def get_paragraph_forward_pair(tag, ruleterm_doc_results, is_training, debug=False, ignore_non_verifiable=False): if tag == 'dev': d_list = common.load_jsonl(config.FEVER_DEV) elif tag == 'train': d_list = common.load_jsonl(config.FEVER_TRAIN) elif tag == 'test': d_list = common.load_jsonl(config.FEVER_TEST) else: raise ValueError(f"Tag:{tag} not supported.") if debug: d_list = d_list[:100] ruleterm_doc_results = ruleterm_doc_results[:100] ruleterm_doc_results_dict = list_dict_data_tool.list_to_dict( ruleterm_doc_results, 'id') db_cursor = fever_db.get_cursor() fitems = build_full_wiki_document_forward_item(ruleterm_doc_results_dict, d_list, is_training, db_cursor, ignore_non_verifiable) return fitems
def get_sentences(tag, is_training, debug=False): if tag == 'dev': d_list = common.load_jsonl(config.FEVER_DEV) elif tag == 'train': d_list = common.load_jsonl(config.FEVER_TRAIN) elif tag == 'test': d_list = common.load_jsonl(config.FEVER_TEST) else: raise ValueError(f"Tag:{tag} not supported.") if debug: # d_list = d_list[:10] d_list = d_list[:50] # d_list = d_list[:200] doc_results = common.load_jsonl( config.RESULT_PATH / f"doc_retri_results/fever_results/merged_doc_results/m_doc_{tag}.jsonl" ) doc_results_dict = list_dict_data_tool.list_to_dict(doc_results, 'id') fever_db_cursor = fever_db.get_cursor(config.FEVER_DB) forward_items = build_full_wiki_document_forward_item( doc_results_dict, d_list, is_training=is_training, db_cursor=fever_db_cursor) return forward_items
def get_inference_pair(tag, is_training, sent_result_path, debug_num=None, evidence_filtering_threshold=0.01): # sent_result_path = "" if tag == 'dev': d_list = common.load_jsonl(config.FEVER_DEV) elif tag == 'train': d_list = common.load_jsonl(config.FEVER_TRAIN) elif tag == 'test': d_list = common.load_jsonl(config.FEVER_TEST) else: raise ValueError(f"Tag:{tag} not supported.") if debug_num is not None: # d_list = d_list[:10] d_list = d_list[:50] # d_list = d_list[:200] d_dict = list_dict_data_tool.list_to_dict(d_list, 'id') threshold_value = evidence_filtering_threshold # sent_list = common.load_jsonl( # config.RESULT_PATH / "doc_retri_results/fever_results/sent_results/4-14-sent_results_v0/train_sent_results.jsonl") # sent_list = common.load_jsonl( # config.RESULT_PATH / "doc_retri_results/fever_results/sent_results/4-14-sent_results_v0/i(5000)|e(0)|s01(0.9170917091709171)|s05(0.8842384238423843)|seed(12)_dev_sent_results.json") # debug_num = None if not debug else 2971 # debug_num = None if isinstance(sent_result_path, Path): sent_list = common.load_jsonl(sent_result_path, debug_num) elif isinstance(sent_result_path, list): sent_list = sent_result_path else: raise ValueError( f"{sent_result_path} is not of a valid argument type which should be [list, Path]." ) list_dict_data_tool.append_subfield_from_list_to_dict(sent_list, d_dict, 'oid', 'fid', check=True) filltered_sent_dict = select_top_k_and_to_results_dict( d_dict, top_k=5, threshold=threshold_value) list_dict_data_tool.append_item_from_dict_to_list( d_list, filltered_sent_dict, 'id', ['predicted_evidence', 'predicted_scored_evidence']) fever_db_cursor = fever_db.get_cursor(config.FEVER_DB) forward_items = build_nli_forward_item(d_list, is_training=is_training, db_cursor=fever_db_cursor) return forward_items, d_list
def get_nli_pair(tag, is_training, sent_level_results_list, debug=None, sent_top_k=5, sent_filter_value=0.05): if tag == 'dev': d_list = common.load_jsonl(config.FEVER_DEV) elif tag == 'train': d_list = common.load_jsonl(config.FEVER_TRAIN) elif tag == 'test': d_list = common.load_jsonl(config.FEVER_TEST) else: raise ValueError(f"Tag:{tag} not supported.") if debug: d_list = d_list[:100] # sent_dict = list_dict_data_tool.list_to_dict(sent_level_results_list): d_dict = list_dict_data_tool.list_to_dict(d_list, 'id') if debug: id_set = set([item['id'] for item in d_list]) new_sent_list = [] for item in sent_level_results_list: if item["qid"] in id_set: new_sent_list.append(item) sent_level_results_list = new_sent_list list_dict_data_tool.append_subfield_from_list_to_dict( sent_level_results_list, d_dict, 'qid', 'fid', check=True) filltered_sent_dict = select_top_k_and_to_results_dict( d_dict, score_field_name='prob', top_k=sent_top_k, filter_value=sent_filter_value, result_field='predicted_evidence') list_dict_data_tool.append_item_from_dict_to_list_hotpot_style( d_list, filltered_sent_dict, 'id', ['predicted_evidence', 'selected_scored_results']) fever_db_cursor = fever_db.get_cursor(config.FEVER_DB) forward_items = build_nli_forward_item(d_list, is_training=is_training, db_cursor=fever_db_cursor) return forward_items, d_list
def get_sentence_forward_pair(tag, ruleterm_doc_results, is_training, debug=False, ignore_non_verifiable=False, top_k=5, filter_value=0.005): if tag == 'dev': d_list = common.load_jsonl(config.FEVER_DEV) elif tag == 'train': d_list = common.load_jsonl(config.FEVER_TRAIN) elif tag == 'test': d_list = common.load_jsonl(config.FEVER_TEST) else: raise ValueError(f"Tag:{tag} not supported.") if debug: d_list = d_list[:100] ruleterm_doc_results = ruleterm_doc_results[:100] # ruleterm_doc_results_dict = list_dict_data_tool.list_to_dict(ruleterm_doc_results, 'id') d_o_dict = list_dict_data_tool.list_to_dict(d_list, 'id') copied_d_o_dict = copy.deepcopy(d_o_dict) # copied_d_list = copy.deepcopy(d_list) list_dict_data_tool.append_subfield_from_list_to_dict(ruleterm_doc_results, copied_d_o_dict, 'qid', 'fid', check=True) cur_results_dict_filtered = select_top_k_and_to_results_dict( copied_d_o_dict, score_field_name='prob', top_k=top_k, filter_value=filter_value) db_cursor = fever_db.get_cursor() fitems = build_full_wiki_sentence_forward_item(cur_results_dict_filtered, d_list, is_training, db_cursor, ignore_non_verifiable) return fitems
random.shuffle(pos_items) random.shuffle(neg_items) neg_sample_count = int(pos_count / ratio) sampled_neg = neg_items[:neg_sample_count] print(f"After Sampling, we have {pos_count}/{len(sampled_neg)} (pos/neg).") sampled_list = sampled_neg + pos_items random.shuffle(sampled_list) return sampled_list if __name__ == '__main__': d_list = common.load_jsonl(config.FEVER_DEV) doc_results = common.load_jsonl( config.RESULT_PATH / "doc_retri_results/fever_results/merged_doc_results/m_doc_dev.jsonl") doc_results_dict = list_dict_data_tool.list_to_dict(doc_results, 'id') fever_db_cursor = fever_db.get_cursor(config.FEVER_DB) forward_items = build_full_wiki_document_forward_item( doc_results_dict, d_list, is_training=False, db_cursor=fever_db_cursor) # print(forward_items) # for item in forward_items: # if item['s_labels'] == 'true': # print(item['query'], item['context'], item['sid'], item['cid'], item['fid'], item['s_labels']) print(len(forward_items)) # down_sample_neg(forward_items, ratio=0.2)
def adv_simi_sample_with_prob_v1_1(input_file, additional_file, prob_dict_file, tokenized=False): cursor = fever_db.get_cursor() d_list = common.load_jsonl(input_file) if prob_dict_file is None: prob_dict_file = dict() if isinstance(additional_file, list): additional_d_list = additional_file else: additional_d_list = common.load_jsonl(additional_file) additional_data_dict = dict() for add_item in additional_d_list: additional_data_dict[add_item['id']] = add_item sampled_data_list = [] count = 0 for item in tqdm(d_list): # e_list = check_sentences.check_and_clean_evidence(item) sampled_e_list, flags = sample_additional_data_for_item_v1_1( item, additional_data_dict) # print(flags) for i, (sampled_evidence, flag) in enumerate(zip(sampled_e_list, flags)): # Do not copy, might change in the future for error analysis # new_item = copy.deepcopy(item) new_item = dict() # print(new_item['claim']) # print(e_list) # print(sampled_evidence) # print(flag) evidence_text_list = evidence_list_to_text_list( cursor, sampled_evidence, contain_head=True, id_tokenized=tokenized) evidences = sorted(sampled_evidence, key=lambda x: (x[0], x[1])) item_id = int(item['id']) evidence_text_list_with_prob = [] for text, (doc_id, ln) in zip(evidence_text_list, evidences): ssid = (int(item_id), doc_id, int(ln)) if ssid not in prob_dict_file: count += 1 # print("Some sentence pair don't have 'prob'.") prob = 0.5 else: prob = prob_dict_file[ssid]['prob'] assert item['claim'] == prob_dict_file[ssid]['claim'] evidence_text_list_with_prob.append((text, prob)) new_item['id'] = str(item['id']) + '#' + str(i) if tokenized: new_item['claim'] = item['claim'] else: raise NotImplemented("Non tokenized is not implemented.") # new_item['claim'] = ' '.join(easy_tokenize(item['claim'])) new_item['evid'] = evidence_text_list_with_prob new_item['verifiable'] = item['verifiable'] new_item['label'] = item['label'] # print("C:", new_item['claim']) # print("E:", new_item['evid']) # print("L:", new_item['label']) # print() sampled_data_list.append(new_item) cursor.close() print(count) return sampled_data_list
def select_sent_with_prob_for_eval(input_file, additional_file, prob_dict_file, tokenized=False, pipeline=False): """ This method select sentences with upstream sentence retrieval. :param input_file: This should be the file with 5 sentences selected. :return: """ cursor = fever_db.get_cursor() if prob_dict_file is None: prob_dict_file = dict() if isinstance(additional_file, list): additional_d_list = additional_file else: additional_d_list = common.load_jsonl(additional_file) additional_data_dict = dict() for add_item in additional_d_list: additional_data_dict[add_item['id']] = add_item d_list = common.load_jsonl(input_file) for item in tqdm(d_list): e_list = additional_data_dict[item['id']]['predicted_sentids'] if not pipeline: assert additional_data_dict[item['id']]['label'] == item['label'] assert additional_data_dict[ item['id']]['verifiable'] == item['verifiable'] assert additional_data_dict[item['id']]['id'] == item['id'] pred_evidence_list = [] for i, cur_e in enumerate(e_list): doc_id = cur_e.split(fever_scorer.SENT_LINE)[0] ln = int(cur_e.split( fever_scorer.SENT_LINE)[1]) # Important changes Bugs: July 21 pred_evidence_list.append((doc_id, ln)) pred_evidence = check_sentences.Evidences(pred_evidence_list) evidence_text_list = evidence_list_to_text_list(cursor, pred_evidence, contain_head=True, id_tokenized=tokenized) evidences = sorted(pred_evidence, key=lambda x: (x[0], x[1])) item_id = int(item['id']) evidence_text_list_with_prob = [] for text, (doc_id, ln) in zip(evidence_text_list, evidences): ssid = (item_id, doc_id, int(ln)) if ssid not in prob_dict_file: # print("Some sentence pair don't have 'prob'.") prob = 0.5 else: prob = prob_dict_file[ssid]['prob'] assert item['claim'] == prob_dict_file[ssid]['claim'] evidence_text_list_with_prob.append((text, prob)) if tokenized: pass else: raise NotImplemented("Non tokenized is not implemented.") # item['claim'] = ' '.join(easy_tokenize(item['claim'])) item['evid'] = evidence_text_list_with_prob item['predicted_evidence'] = convert_evidence2scoring_format(e_list) item['predicted_sentids'] = e_list # This change need to be saved. # item['predicted_label'] = additional_data_dict[item['id']]['label'] return d_list