def estimate_upper_bound_f1(self, input_path, src_dir): questions = json.load(codecs.open(input_path, 'r', encoding='utf-8')) all_f1s = [] for q in questions: ques_id = q["ID"] type = q["compositionality_type"] ground_answers = q["Answers"] if type == "conjunction" or type == "composition": ques_path = os.path.join(src_dir, ques_id + ".json") if not os.path.exists(ques_path): continue print(ques_id) ques_json = json.load( codecs.open(ques_path, 'r', encoding='utf-8')) max_f1 = 0.0 for topic in ques_json.keys(): for path in ques_json[topic]: if path["src"] == "sub2" and path["approx_label"] == 1: f1 = metricUtils.compute_f1( ground_answers, path["entities"]) max_f1 = max(f1[2], max_f1) if path["src"] == "sub1" and path["approx_label"] == 1: f1 = metricUtils.compute_f1( ground_answers, path["entities"]) max_f1 = max(f1[2], max_f1) all_f1s.append(max_f1) macro_avg_f1 = np.mean(all_f1s) print(macro_avg_f1)
def eval_quality(self, ques_src, cands_dir): questions = json.load(codecs.open(ques_src, 'r', encoding='utf-8'))["Questions"] all_f1s = [] for q in questions: ques_id = q["QuestionId"] parses = q["Parses"] entity_ans_dict = {} for parse in parses: topic_entity = parse["TopicEntityMid"] answer_entities = [a["AnswerArgument"] for a in parse["Answers"]] entity_ans_dict[topic_entity] = answer_entities ques_path = os.path.join(cands_dir, ques_id + ".json") if not os.path.exists(ques_path): continue print(ques_id) main_entity_paths = json.load(codecs.open(ques_path, 'r', encoding='utf-8')) best_f1 = 0 for topic in main_entity_paths: ground_ans = entity_ans_dict.get(topic, []) if len(ground_ans) == 0: continue for path in main_entity_paths[topic]: predicted_ans = path["entities"] f1 = metricUtils.compute_f1(ground_ans, predicted_ans)[2] if f1 > best_f1: best_f1 = f1 all_f1s.append(best_f1) print(np.mean(all_f1s))
def __get_query_graph_cands__(self, topic_entity, main_relation, constraints, ans_entities): constraint_combinations = self.__get_constraint_combinations__( constraints) answer_entities = set(ans_entities) cands = [] for combination in constraint_combinations: entity_names = set( self.sparql.eval_all_constraints_named( topic_entity, main_relation, combination, False)) # entity_names = set() # for e in entities: # if e in self.entity_name_cache: # entity_names.add(self.entity_name_cache[e]) # else: # entity_name = self.sparql.get_names(e) # self.entity_name_cache[e] = entity_name # entity_names.add(entity_name) # common = entities.intersection(answer_entities) # reward = float(len(common)) / max(1.0, (len(entities) + len(answer_entities) - len(common))) if len(answer_entities) == 0: reward = 0, 0, 0 else: reward = metricUtils.compute_f1(answer_entities, entity_names) cand = { "relations": main_relation, "entities": list(entity_names), "constraints": [ob.__dict__ for ob in combination], "reward": reward } cands.append(cand) return cands
def compose(self, sub1_candidates, ground_answers, dest_path): candidates = {} # print("sub1 candidates: " + str(len(sub1_candidates))) for sub1_topic_entity in sub1_candidates.keys(): for relation_path1 in sub1_candidates[sub1_topic_entity]: relation_path1['src'] = 'sub1' if not self.queryGraphGen.__is_valid_rel_path__( relation_path1['relations']): continue if not "is_reverse" in relation_path1: relation_path1["is_reverse"] = False rel_counts = self.sparql.get_interim_size( sub1_topic_entity, relation_path1['relations'], relation_path1["is_reverse"]) if int(rel_counts) > 2000 or int(rel_counts) == 0: continue max_reward = 0, 0, 0 positive_cands = [] negative_cands = [] cands = self.__connecting_paths__(sub1_topic_entity, relation_path1, ground_answers) # print("{} with path {} has len {}".format(sub1_topic_entity, relation_path1['relations'], len(cands))) for cand in cands: if len(ground_answers) == 0: reward = 0, 0, 0 else: reward = metricUtils.compute_f1( ground_answers, cand["entities"]) cand["reward"] = reward cand["src"] = "sub2" if reward[2] == 0: negative_cands.append(cand) else: positive_cands.append(cand) #print(cand) if reward[2] > max_reward[2]: max_reward = reward relation_path1["reward"] = max_reward # if max_reward[2] > 0: # print("{} with {} has max reward {}".format(sub1_topic_entity, relation_path1['relations'], max_reward)) # else: # print("{} with {} has no max reward {}".format(sub1_topic_entity, relation_path1['relations'], max_reward)) # sample the negatives random.shuffle(negative_cands) negative_cands_sample = negative_cands[0:min( len(positive_cands) * MAX_NEGATIVES_FRAC, len(negative_cands))] # we want to use topic entity as the key for the second main path, and not the interim entity entity_cands = [] if sub1_topic_entity in candidates: entity_cands = candidates[sub1_topic_entity] entity_cands += [relation_path1 ] + positive_cands + negative_cands_sample candidates[sub1_topic_entity] = entity_cands # print candidates with open(os.path.join(dest_path), 'w+') as fp: json.dump(candidates, fp, indent=4)
def get_record_f1(self, row, ans_dict): ground_answers = ans_dict[row['qid']].get(row['topic'], []) if len(ground_answers) == 0: recall, precision, f1 = 0.0, 0.0, 0.0 else: recall, precision, f1 = metricUtils.compute_f1( ground_answers, row['pred_entities']) return f1
def get_average_f1(self, src, n_best=25): df = pd.read_csv(src) results = {} all_f1s = [] all_precision = [] all_recall = [] prec_1 = 0 total_prec1 = 0 for qid, group in df.groupby("qid"): total_prec1 += 1 question = self.questions_dict[qid] ground_answers = question["Answers"] group_df = group.reset_index() group_df['sub2_score'].fillna(0.0, inplace=True) group_df['agg'] = group_df['sub1_score'] + group_df['sub2_score'] group_df['pred_entities'] = group_df['pred_entities'].apply( lambda x: ast.literal_eval(x)) group_df = group_df.sort_values(by=["agg"], ascending=False) group_df_sub_records = group_df.head(min( len(group_df), n_best)).to_dict('records') best_f1 = 0.0 best_recall = 0.0 best_precision = 0.0 is_true = False for record in group_df_sub_records: if len(ground_answers) == 0: recall, precision, f1 = 0.0, 0.0, 0.0 else: recall, precision, f1 = metricUtils.compute_f1( ground_answers, record['pred_entities']) if f1 > best_f1: best_f1 = f1 best_precision = precision best_recall = recall if is_true: continue for pred in record['pred_entities']: if pred in ground_answers: is_true = True break if is_true: prec_1 += 1 all_f1s.append(best_f1) all_recall.append(best_recall) all_precision.append(best_precision) macro_avg_f1 = float(np.sum(all_f1s)) / float(len(self.questions_dict)) prec_at_1 = float(prec_1) / float(len(self.questions_dict)) results["macro_f1"] = macro_avg_f1 results["hit1"] = prec_at_1 print(len(all_f1s)) return results
def merge_with_empty(self, sub1_candidates, ground_answers, dest_path): for topic in sub1_candidates.keys(): for rel in sub1_candidates[topic]: if len(ground_answers) == 0: reward = 0, 0, 0 else: reward = metricUtils.compute_f1(ground_answers, rel["entities"]) rel["reward"] = reward rel["src"] = "sub1" # print("{} with {} has reward {} but not future rewards".format(topic, rel['relations'], reward)) if dest_path: with open(os.path.join(dest_path), 'w+') as fp: json.dump(sub1_candidates, fp, indent=4) # print cands return sub1_candidates
def recompute_rewards(self): file_json = json.load(open(QUESTION_PATH, 'r')) try: questions = file_json["Questions"] except: return ans_dict = {} for question in questions: entity_ans_dict = {} questionId = question["QuestionId"] parses = question["Parses"] for parse in parses: topic_entity = parse["TopicEntityMid"] answer_entities = [ a["AnswerArgument"] for a in parse["Answers"] ] entity_ans_dict[topic_entity] = answer_entities ans_dict[questionId] = entity_ans_dict if not os.path.exists(DEST_F1_PATH): os.makedirs(DEST_F1_PATH) files = [ f for f in os.listdir(DEST_PATH) if os.path.isfile(os.path.join(DEST_PATH, f)) ] for f in files: q_id = f.replace(".json", "") print(q_id) try: file_json = json.load(open(os.path.join(DEST_PATH, f), 'r')) except: continue is_valid = False for topic_entity in file_json.keys(): ground_ans = ans_dict[q_id][topic_entity] if len(ground_ans) == 0: continue is_valid = True for path in file_json[topic_entity]: predicted_ans = path["entities"] path["reward"] = metricUtils.compute_f1( ground_ans, predicted_ans) if is_valid: with open(os.path.join(DEST_F1_PATH, q_id + ".json"), 'w+') as fp: json.dump(file_json, fp)
def get_average_f1(self, final_results_path, n_best=25): df = pd.read_csv(final_results_path) all_f1s = [] prec_1 = 0 total_prec1 = 0 results = {} for qid, group in df.groupby("qid"): total_prec1 += 1 group_df = group.reset_index() group_df['pred_entities'] = group_df['pred_entities'].apply( lambda x: ast.literal_eval(x)) group_df = group_df.sort_values(by=["sub1_score"], ascending=False) group_df_sub_records = group_df.head(min( len(group_df), n_best)).to_dict('records') best_f1 = 0.0 is_true = False for record in group_df_sub_records: ground_answers = self.ground_ans_dict[qid].get( record['topic'], []) if len(ground_answers) == 0: recall, precision, f1 = 0.0, 0.0, 0.0 else: recall, precision, f1 = metricUtils.compute_f1( ground_answers, record['pred_entities']) if f1 > best_f1: best_f1 = f1 if is_true: continue for pred in record['pred_entities']: if pred in ground_answers: is_true = True break if is_true: prec_1 += 1 all_f1s.append(best_f1) macro_avg_f1 = float(sum(all_f1s)) / len(self.questions) prec_at_1 = float(prec_1) / len(self.questions) results["macro"] = (macro_avg_f1) results["hit1"] = prec_at_1 print results return results
def evaluate0_no_queries(self, sub2_df, ques_sub2_predictions, main_data_dir, question): ans_entities = question["Answers"] qid = question["ID"] print(qid + "\t" + question["compositionality_type"]) records = [] path_dict1 = self.__get_sub1_paths__(qid) path_dict2 = self.__get_sub2_paths__(main_data_dir, qid) sub1_scores_dict = self.__get_sub1_scores__(main_data_dir, question) parents_with_sub2 = set() for r in ques_sub2_predictions: index = r["index"] parent_index = r["parent_index"] parents_with_sub2.add(parent_index) # print(parent_index) # print(index) topic1, rel1_data = self.__get_sub1_paths_data__( parent_index, path_dict1) # print('sub2 index {}'.format(index)) # print('sub1 index {}'.format(parent_index)) if not parent_index in sub1_scores_dict: # could happen if not in top-k continue sub1_score = sub1_scores_dict[parent_index]["sub1_score"] if rel1_data is None: continue topic2, rel2_data = self.__get_sub2_paths_data__( sub2_df, index, parent_index, path_dict2) if rel2_data is None: continue sub2_score = r["sub2_score"] ques_type = question["compositionality_type"] if ques_type == "conjunction": entities, query = self.evaluate_conjunction( topic1, topic2, rel1_data, rel2_data) elif ques_type == "composition": entities, query = self.evaluate_composition( topic1, rel1_data, rel2_data) else: entities = [] agg_score = sub1_score + 0.8 * sub2_score if len(ans_entities) == 0: f1_score = 0, 0, 0 else: f1_score = metricUtils.compute_f1(ans_entities, entities) record = { "qid": qid, "sub1_index": parent_index, "sub1_relation": rel1_data.get("relations", []), "sub1_constraints": rel1_data.get("constraints", []), "sub1_score": sub1_score, "sub2_index": index, "sub2_relation": rel2_data.get("relations", []), "sub2_constraints": rel2_data.get("constraints", []), "sub2_score": r["sub2_score"], "pred_entities": entities, "precision": f1_score[0], "recall": f1_score[1], "f1_score": f1_score[2], "agg_score": agg_score } records.append(record) # evaluate sub1 cands that didn't have sub2 cands #all_parent_cands = self.test_index[self.test_index["qid"] == qid].to_records(index=False) all_parent_cands = self.test_index[self.test_index["qid"] == qid].to_dict('records') for parent_cand in all_parent_cands: parent_index = parent_cand["index"] if parent_index in parents_with_sub2: continue if not parent_index in sub1_scores_dict: # could happen if not in top-k continue topic1, rel1_data = self.__get_sub1_paths_data__( parent_index, path_dict1) sub1_score = sub1_scores_dict[parent_index]["sub1_score"] entities = rel1_data["entities"] if len(ans_entities) == 0: f1_score = 0, 0, 0 else: f1_score = metricUtils.compute_f1(ans_entities, entities) record = { "qid": qid, "sub1_index": parent_index, "sub1_relation": rel1_data.get("relations", []), "sub1_constraints": rel1_data.get("constraints", []), "sub1_score": sub1_score, "sub2_index": None, "sub2_relation": None, "sub2_constraints": None, "sub2_score": None, "pred_entities": entities, "precision": f1_score[0], "recall": f1_score[1], "f1_score": f1_score[2], "agg_score": sub1_score } records.append(record) if len(records) == 0: print("no evaluation records found in " + qid) return records
def evaluate0(self, sub2_df, ques_sub2_predictions, main_data_dir, question, output_dir): qid = question["ID"] output_path = os.path.join(output_dir, "query_" + qid + ".json") # if os.path.exists(output_path): # try: # records_df = pd.read_csv(output_path) # return records_df.to_dict("records") # except: # return [] # # # TODO RM # # print 'output path: {}'.format(output_path) # # records_df = pd.read_csv(output_path) # # return records_df.to_dict("records") ans_entities = question["Answers"] print(qid + "\t" + question["compositionality_type"]) records = [] path_dict1 = self.__get_sub1_paths__(qid) path_dict2 = self.__get_sub2_paths__(main_data_dir, qid) sub1_scores_dict = self.__get_sub1_scores__(main_data_dir, question) parents_with_sub2 = set() for r in ques_sub2_predictions: index = r["index"] parent_index = r["parent_index"] parents_with_sub2.add(parent_index) # print(parent_index) # print(index) topic1, rel1_data = self.__get_sub1_paths_data__( parent_index, path_dict1) # print('sub2 index {}'.format(index)) # print('sub1 index {}'.format(parent_index)) if not parent_index in sub1_scores_dict: # could happen if not in top-k continue sub1_score = sub1_scores_dict[parent_index]["sub1_score"] if rel1_data is None: continue topic2, rel2_data = self.__get_sub2_paths_data__( sub2_df, index, parent_index, path_dict2) if rel2_data is None: continue sub2_score = r["sub2_score"] ques_type = question["compositionality_type"] if ques_type == "conjunction": entities, query = self.evaluate_conjunction( topic1, topic2, rel1_data, rel2_data) elif ques_type == "composition": entities, query = self.evaluate_composition( topic1, rel1_data, rel2_data) else: entities, query = [], "" agg_score = sub1_score + 0.8 * sub2_score if len(ans_entities) == 0: f1_score = 0, 0, 0 else: f1_score = metricUtils.compute_f1(ans_entities, entities) record = { "qid": qid, "sub1_index": parent_index, "sub1_relation": rel1_data.get("relations", []), "sub1_constraints": rel1_data.get("constraints", []), "sub1_score": sub1_score, "sub2_index": index, "sub2_relation": rel2_data.get("relations", []), "sub2_constraints": rel2_data.get("constraints", []), "sub2_score": r["sub2_score"], "pred_entities": entities, "precision": f1_score[0], "recall": f1_score[1], "f1_score": f1_score[2], "agg_score": agg_score, "query": query } records.append(record) # evaluate sub1 cands that didn't have sub2 cands #all_parent_c # ands = self.test_index[self.test_index["qid"] == qid].to_records(index=False) all_parent_cands = self.test_index[self.test_index["qid"] == qid].to_dict('records') for parent_cand in all_parent_cands: parent_index = parent_cand["index"] if parent_index in parents_with_sub2: continue if not parent_index in sub1_scores_dict: # could happen if not in top-k continue topic1, rel1_data = self.__get_sub1_paths_data__( parent_index, path_dict1) sub1_score = sub1_scores_dict[parent_index]["sub1_score"] entities = rel1_data["entities"] if len(ans_entities) == 0: f1_score = 0, 0, 0 else: f1_score = metricUtils.compute_f1(ans_entities, entities) r, f = self.__get_rel_chain__(rel1_data.get("relations", []), rel1_data.get("constraints", [])) if rel1_data["is_reverse"]: core_chain = "?x " + r + " ns:" + topic1 + " ." else: core_chain = "ns:" + topic1 + " " + r + " ?x ." f += "\n" + self.sparql.__get_entity_filter__("?x") query = self.template.substitute(r=core_chain, f=f) record = { "qid": qid, "sub1_index": parent_index, "sub1_relation": rel1_data.get("relations", []), "sub1_constraints": rel1_data.get("constraints", []), "sub1_score": sub1_score, "sub2_index": None, "sub2_relation": None, "sub2_constraints": None, "sub2_score": None, "pred_entities": entities, "precision": f1_score[0], "recall": f1_score[1], "f1_score": f1_score[2], "agg_score": sub1_score, "query": query } records.append(record) if len(records) == 0: print("no evaluation records found in " + qid) preds_df = pd.DataFrame(records) preds_df.to_csv(output_path, index=False) for r in records: if "query" in r: del r["query"] return records
def merge_with_named(self, sub1_candidates, sub2_named_entities, ground_answers, dest_path=None): # print("sub1 candidates: " + str(len(sub1_candidates))) # print("sub2 candidates: " + str(len(sub2_candidates))) candidates = {} for topic in sub1_candidates.keys(): for rel in sub1_candidates[topic]: # print('{} topic {}'.format(topic, rel)) if not self.queryGraphGen.__is_valid_rel_path__( rel['relations']): continue if len(ground_answers) == 0: reward = 0, 0, 0 else: reward = metricUtils.compute_f1(ground_answers, rel["entities"]) rel["reward"] = reward rel["src"] = "sub1" to_update = candidates.get(topic, {}) rel_key = topic + "_" + str(rel["relations"]) + ":" + str( rel["constraints"]) to_update[rel_key] = rel candidates[topic] = to_update # print(rel) for sub2_named_entity in sub2_named_entities: # print sub2_named_entity rel2_paths = self.sparql.get_conjunction_path( topic, rel, sub2_named_entity) if len(rel2_paths) > 0: # print(rel2_paths) for rel2 in rel2_paths: conjunction_ans_set = self.sparql.evaluate_conjunction_path( topic, rel, rel2, sub2_named_entity) print(len(conjunction_ans_set)) if conjunction_ans_set is not None and len( ground_answers) > 0: reward = metricUtils.compute_f1( ground_answers, conjunction_ans_set) prev_reward = candidates[topic][rel_key][ "reward"] # update rewards for rels in the set if prev_reward[2] < reward[2]: candidates[topic][rel_key][ "reward"] = reward candidates[topic][rel_key]["src"] = "sub1" cands = {} for topic in candidates.keys(): rel_dict = candidates[topic] rels = [] for rel_key in rel_dict.keys(): rel_val = rel_dict[rel_key] rels.append(rel_val) cands[topic] = rels #print cands if dest_path: with open(os.path.join(dest_path), 'w+') as fp: json.dump(cands, fp, indent=4) # print cands return cands
def merge(self, sub1_candidates, sub2_candidates, ground_answers, dest_path=None): # print("sub1 candidates: " + str(len(sub1_candidates))) # print("sub2 candidates: " + str(len(sub2_candidates))) sub1_reverse_map = self.__get_entities_reverse_map__(sub1_candidates) sub2_reverse_map = self.__get_entities_reverse_map__(sub2_candidates) candidates = {} for topic in sub1_candidates.keys(): for rel in sub1_candidates[topic]: if len(ground_answers) == 0: reward = 0, 0, 0 else: reward = metricUtils.compute_f1(ground_answers, rel["entities"]) rel["reward"] = reward rel["src"] = "sub1" to_update = candidates.get(topic, {}) rel_key = topic + "_" + str(rel["relations"]) + ":" + str( rel["constraints"]) to_update[rel_key] = rel candidates[topic] = to_update for topic in sub2_candidates.keys(): for rel in sub2_candidates[topic]: # reward = metricUtils.compute_f1(ground_answers, rel["entities"]) if len(ground_answers) == 0: reward = 0, 0, 0 else: reward = metricUtils.compute_f1(ground_answers, rel["entities"]) rel["reward"] = reward rel["src"] = "sub2" if not "constraints" in rel: rel["constraints"] = [] to_update = candidates.get(topic, {}) rel_key = topic + "_" + str(rel["relations"]) to_update[rel_key] = rel candidates[topic] = to_update join_cands = set(sub1_reverse_map.keys()).intersection( sub2_reverse_map.keys()) rel_cache = set() for join_cand in join_cands: #print('has join cands') rel1_paths = sub1_reverse_map[join_cand] rel2_paths = sub2_reverse_map[join_cand] for rel1_path in rel1_paths: for rel2_path in rel2_paths: rel_1 = rel1_path["relations"] rel_2 = rel2_path["relations"] if str(rel_1) == str( rel_2 ): # can't have the same relation in conjunctions? continue if not self.queryGraphGen.__is_valid_rel_path__( rel_1 ) or not self.queryGraphGen.__is_valid_rel_path__(rel_2): continue topic1 = rel1_path["topic_entity"] topic2 = rel2_path["topic_entity"] if topic1 == topic2: continue constraints_1 = rel1_path["constraints"] constraints_2 = [] rel1_path["is_reverse"] = rel1_path.get( "is_reverse", False) rel2_path["is_reverse"] = rel2_path.get( "is_reverse", False) key = topic1 + "_" + str(rel_1) + "_" + str( constraints_1) + ":" + topic2 + "_" + str( rel_2) + "_" + str(constraints_2) if not key in rel_cache: rel_cache.add(key) ans_set = list( set(rel1_path["entities"]).intersection( rel2_path["entities"])) if ans_set is not None and len(ground_answers) > 0: reward = metricUtils.compute_f1( ground_answers, ans_set) # update rewards for rels in the set rel_key = topic1 + "_" + str(rel_1) + ":" + str( constraints_1) prev_reward = candidates[topic1][rel_key]["reward"] if prev_reward[2] < reward[2]: candidates[topic1][rel_key]["reward"] = reward candidates[topic1][rel_key]["src"] = "sub1" # print("sub1 {} with {} has reward {}".format(topic1, rel_key, reward)) # update rewards for rels in the set rel_key = topic2 + "_" + str(rel_2) prev_reward = candidates[topic2][rel_key]["reward"] if prev_reward[2] < reward[2]: candidates[topic2][rel_key]["reward"] = reward candidates[topic2][rel_key]["src"] = "sub2" # print("sub2 {} with {} has reward {}".format(topic2, rel_key, reward)) pos_count = 0 cands = {} for topic in candidates.keys(): rel_dict = candidates[topic] rels = [] for rel_key in rel_dict.keys(): rel_val = rel_dict[rel_key] if rel_val["reward"][2] > 0: # print("final {} with {} has reward {}".format(topic, rel_val['relations'], rel_val["reward"])) pos_count += 1 rels.append(rel_val) cands[topic] = rels # print cands print "conjunction positive count " + str(pos_count) if dest_path: with open(os.path.join(dest_path), 'w+') as fp: json.dump(cands, fp, indent=4) # print cands return cands