def utest_for_ground_truth(d_list): nei_c = 0 support_c = 0 refute_c = 0 for item in tqdm(d_list): e_list = check_sentences.check_and_clean_evidence(item) evidence_sent_id = [] gt_evidence = [] if item['verifiable'] == "VERIFIABLE": for doc_id, ln in list(e_list)[0]: evidence_sent_id.append(doc_id + c_scorer.SENT_LINE + str(ln)) gt_evidence.append([doc_id, ln]) elif item['verifiable'] == "NOT VERIFIABLE": evidence_sent_id = [] item["predicted_sentids"] = evidence_sent_id # item['predicted_evidence'] = [] item['predicted_evidence'] = gt_evidence item['predicted_label'] = item["label"] if item["label"] == 'NOT ENOUGH INFO': nei_c += 1 elif item["label"] == 'SUPPORTS': support_c += 1 elif item["label"] == 'REFUTES': refute_c += 1 print(support_c, refute_c, nei_c)
def disabuigation_training_build_v0(item, cursor, contain_first_sentence=False, only_found=True): doc_t_list = [it[0] for it in item['prioritized_docids']] evidence_group = check_sentences.check_and_clean_evidence(item) all_true_t_list = set() t_claim = ' '.join(item['claim_tokens']) for ground_truth_evid in evidence_group: # print(ground_truth_evid) true_t_list = set([it[0] for it in ground_truth_evid]) all_true_t_list = set.union(all_true_t_list, true_t_list) all_true_t_list = list(all_true_t_list) positive_list = [] negative_list = [] eid = item['id'] found_pos = False for doc_id in all_true_t_list: if '-LRB-' in doc_id and common.doc_id_to_tokenized_text( doc_id) not in t_claim: positive_list.append(doc_id) found_pos = True if found_pos and only_found: random.shuffle(doc_t_list) num_neg = random.randint(6, 8) # for _ in num_neg: for doc_id in doc_t_list[:num_neg]: if '-LRB-' in doc_id and doc_id not in all_true_t_list: negative_list.append(doc_id) elif not only_found: random.shuffle(doc_t_list) # Change this on Aug 30, 2018 # num_neg = random.randint(36, 36) num_neg = random.randint(6, 8) # for _ in num_neg: for doc_id in doc_t_list[:num_neg]: if '-LRB-' in doc_id and doc_id not in all_true_t_list: negative_list.append(doc_id) return make_examples(eid, positive_list, negative_list, t_claim, cursor, contain_first_sentence=contain_first_sentence)
def utest_for_sample(): # cursor = fever_db.get_cursor() d_list = load_data(config.FEVER_DEV_JSONL) additional_d_list = load_data(config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/dev.jsonl") additional_data_dict = dict() for item in additional_d_list: additional_data_dict[item['id']] = item for item in d_list: e_list = check_sentences.check_and_clean_evidence(item) # e_text_list = convert_to_normalized_format(cursor, e_list) r_list, flags = sample_additional_data_for_item( item, additional_data_dict) print(flags)
def filter_contain_parenthese_valid(item): doc_t_list = [it[0] for it in item['prioritized_docids']] evidence_group = check_sentences.check_and_clean_evidence(item) all_true_t_list = set() t_claim = ' '.join(item['claim_tokens']) for ground_truth_evid in evidence_group: # print(ground_truth_evid) true_t_list = set([it[0] for it in ground_truth_evid]) all_true_t_list = set.union(all_true_t_list, true_t_list) all_true_t_list = list(all_true_t_list) for doc_id in all_true_t_list: if '-LRB-' in doc_id and doc_id in doc_t_list and common.doc_id_to_tokenized_text( doc_id) not in t_claim: return True return False
def utest_for_ground_truth(d_list): for item in tqdm(d_list): e_list = check_sentences.check_and_clean_evidence(item) print(e_list) evidence_sent_id = [] gt_evidence = [] if item['verifiable'] == "VERIFIABLE": for doc_id, ln in list(e_list)[0]: evidence_sent_id.append(doc_id + c_scorer.SENT_LINE + str(ln)) gt_evidence.append([doc_id, ln]) elif item['verifiable'] == "NOT VERIFIABLE": evidence_sent_id = [] item["predicted_sentids"] = evidence_sent_id # item['predicted_evidence'] = [] item['predicted_evidence'] = gt_evidence item['predicted_label'] = item["label"]
def disabuigation_training_build(item, cursor, contain_first_sentence=False): doc_t_list = [it[0] for it in item['prioritized_docids']] evidence_group = check_sentences.check_and_clean_evidence(item) all_true_t_list = set() t_claim = ' '.join(item['claim_tokens']) for ground_truth_evid in evidence_group: # print(ground_truth_evid) true_t_list = set([it[0] for it in ground_truth_evid]) all_true_t_list = set.union(all_true_t_list, true_t_list) all_true_t_list = list(all_true_t_list) positive_list = [] negative_list = [] eid = item['id'] for doc_id in all_true_t_list: if '-LRB-' in doc_id and common.doc_id_to_tokenized_text( doc_id) not in t_claim: positive_list.append(doc_id) for doc_id in doc_t_list: if '-LRB-' in doc_id and doc_id not in all_true_t_list: negative_list.append(doc_id) # for doc_id in all_true_t_list: # if '-LRB-' in doc_id and doc_id not in claim: # positive_list.append(doc_id) # # for doc_id in doc_t_list: # if '-LRB-' in doc_id and doc_id not in all_true_t_list: # negative_list.append(doc_id) # print("id:", eid) # print("Pos:", positive_list) # print("Neg:", negative_list) # print("Claim:", t_claim) return make_examples(eid, positive_list, negative_list, t_claim, cursor, contain_first_sentence=contain_first_sentence)
def sample_additional_data_for_item_v1_0(item, additional_data_dictionary): res_sentids_list = [] flags = [] if item['verifiable'] == "VERIFIABLE": assert item['label'] == 'SUPPORTS' or item['label'] == 'REFUTES' e_list = check_sentences.check_and_clean_evidence(item) current_id = item['id'] assert current_id in additional_data_dictionary additional_data = additional_data_dictionary[current_id][ 'predicted_sentids'] additional_data_with_score = additional_data_dictionary[current_id][ 'scored_sentids'] # print(len(additional_data)) for evidences in e_list: # print(evidences) new_evidences = copy.deepcopy(evidences) n_e = len(evidences) if n_e < 5: current_sample_num = random.randint(0, 5 - n_e) random.shuffle(additional_data) for sampled_e in additional_data[:current_sample_num]: doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0] ln = int(sampled_e.split(c_scorer.SENT_LINE)[1]) new_evidences.add_sent(doc_ids, ln) if new_evidences != evidences: flag = f"verifiable.non_eq.{len(new_evidences) - len(evidences)}" flags.append(flag) pass else: flag = "verifiable.eq.0" flags.append(flag) pass res_sentids_list.append(new_evidences) assert len(res_sentids_list) == len(e_list) elif item['verifiable'] == "NOT VERIFIABLE": assert item['label'] == 'NOT ENOUGH INFO' e_list = check_sentences.check_and_clean_evidence(item) current_id = item['id'] additional_data = additional_data_dictionary[current_id][ 'predicted_sentids'] # print(len(additional_data)) random.shuffle(additional_data) current_sample_num = random.randint(2, 5) raw_evidences_list = [] for sampled_e in additional_data[:current_sample_num]: doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0] ln = int(sampled_e.split(c_scorer.SENT_LINE)[1]) raw_evidences_list.append((doc_ids, ln)) new_evidences = check_sentences.Evidences(raw_evidences_list) if len(new_evidences) == 0: flag = f"verifiable.eq.0" flags.append(flag) pass else: flag = f"not_verifiable.non_eq.{len(new_evidences)}" flags.append(flag) assert all(len(e) == 0 for e in e_list) res_sentids_list.append(new_evidences) assert len(res_sentids_list) == 1 assert len(res_sentids_list) == len(flags) return res_sentids_list, flags
def sample_additional_data_for_item(item, additional_data_dictionary): #TODO check this code, very messy!!! res_sentids_list = [] flags = [] # count = 0 # total = 0 # print(count, total) if item['verifiable'] == "VERIFIABLE": assert item['label'] == 'SUPPORTS' or item['label'] == 'REFUTES' e_list = check_sentences.check_and_clean_evidence(item) current_id = item['id'] assert current_id in additional_data_dictionary additional_data = additional_data_dictionary[current_id][ 'predicted_sentids'] for evidences in e_list: # print(evidences) new_evidences = copy.deepcopy(evidences) n_e = len(evidences) if n_e < 5: current_sample_num = random.randint(0, 5 - n_e) random.shuffle(additional_data) for sampled_e in additional_data[:current_sample_num]: doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0] ln = int(sampled_e.split(c_scorer.SENT_LINE)[1]) new_evidences.add_sent(doc_ids, ln) if new_evidences != evidences: flag = f"verifiable.eq.{len(new_evidences) - len(evidences)}" flags.append(flag) # count += 1 # print("Oh") # print(evidences) # print(new_evidences) pass else: flag = "verifiable.eq.0" flags.append(flag) pass # print("Yes") res_sentids_list.append(new_evidences) # total += 1 assert len(res_sentids_list) == len(e_list) elif item['verifiable'] == "NOT VERIFIABLE": assert item['label'] == 'NOT ENOUGH INFO' e_list = check_sentences.check_and_clean_evidence(item) current_id = item['id'] additional_data = additional_data_dictionary[current_id][ 'predicted_sentids'] random.shuffle(additional_data) current_sample_num = random.randint(2, 5) raw_evidences_list = [] for sampled_e in additional_data[:current_sample_num]: doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0] ln = int(sampled_e.split(c_scorer.SENT_LINE)[1]) raw_evidences_list.append((doc_ids, ln)) new_evidences = check_sentences.Evidences(raw_evidences_list) # print(e_list) # print(new_evidences) if len(new_evidences) == 0: flag = f"verifiable.eq.0" flags.append(flag) pass else: flag = f"not_verifiable.eq.{len(new_evidences)}" flags.append(flag) # print("Oh") # global count # count += 1 # print(additional_data) assert all(len(e) == 0 for e in e_list) res_sentids_list.append(new_evidences) assert len(res_sentids_list) == 1 assert len(res_sentids_list) == len(flags) return res_sentids_list, flags
def sample_additional_data_for_item_v1_1(item, additional_data_dictionary): res_sentids_list = [] flags = [] if item['verifiable'] == "VERIFIABLE": assert item['label'] == 'SUPPORTS' or item['label'] == 'REFUTES' e_list = check_sentences.check_and_clean_evidence(item) current_id = item['id'] assert current_id in additional_data_dictionary additional_data = additional_data_dictionary[current_id][ 'predicted_sentids'] # additional_data_with_score = additional_data_dictionary[current_id]['scored_sentids'] # print(len(additional_data)) for evidences in e_list: # print(evidences) new_evidences = copy.deepcopy(evidences) n_e = len(evidences) if n_e < 5: current_sample_num = random.randint(0, 5 - n_e) random.shuffle(additional_data) for sampled_e in additional_data[:current_sample_num]: doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0] ln = int(sampled_e.split(c_scorer.SENT_LINE)[1]) new_evidences.add_sent(doc_ids, ln) if new_evidences != evidences: flag = f"verifiable.non_eq.{len(new_evidences) - len(evidences)}" flags.append(flag) pass else: flag = "verifiable.eq.0" flags.append(flag) pass res_sentids_list.append(new_evidences) assert len(res_sentids_list) == len(e_list) elif item['verifiable'] == "NOT VERIFIABLE": assert item['label'] == 'NOT ENOUGH INFO' e_list = check_sentences.check_and_clean_evidence(item) current_id = item['id'] additional_data = additional_data_dictionary[current_id][ 'predicted_sentids'] prioritized_additional_evidence_list = additional_data_dictionary[ current_id]['scored_sentids'] # cur_predicted_sentids.append((sent_i['sid'], sent_i['score'], sent_i['prob'])) certain_k = 2 prioritized_additional_evidence_list = sorted( prioritized_additional_evidence_list, key=lambda x: -x[1]) top_two_sent = [ sid for sid, _, _ in prioritized_additional_evidence_list[:certain_k] ] random.shuffle(additional_data) current_sample_num = random.randint(0, 2) raw_evidences_list = [] # Debug # print(prioritized_additional_evidence_list) # print(top_two_sent) for sampled_e in top_two_sent + additional_data[:current_sample_num]: doc_ids = sampled_e.split(c_scorer.SENT_LINE)[0] ln = int(sampled_e.split(c_scorer.SENT_LINE)[1]) raw_evidences_list.append((doc_ids, ln)) new_evidences = check_sentences.Evidences(raw_evidences_list) if len(new_evidences) == 0: flag = f"verifiable.eq.0" flags.append(flag) pass else: flag = f"not_verifiable.non_eq.{len(new_evidences)}" flags.append(flag) assert all(len(e) == 0 for e in e_list) res_sentids_list.append(new_evidences) assert len(res_sentids_list) == 1 # Debug # print(res_sentids_list) assert len(res_sentids_list) == len(flags) return res_sentids_list, flags
# # ssid = (item_id, doc_id, ln) # print(ssid) # train_sent_id_dict.add(ssid) selection_dict = simi_sampler.paired_selection_score_dict(train_sent_list) selection_dict = simi_sampler.paired_selection_score_dict( remaining_sent_list, selection_dict) # for k, v in selection_dict.items(): # print(k, v) total = 0 hit = 0 for item in tqdm(training_list): item_id = int(item['id']) e_list = check_sentences.check_and_clean_evidence(item) for evidences in e_list: for doc_id, ln in evidences: ssid: Tuple[int, str, int] = (item_id, doc_id, ln) if ssid in selection_dict: assert item['claim'] == selection_dict[ssid]['claim'] hit += 1 total += 1 print(hit, total, hit / total, total - hit) # for doc_id, ln in evidences: # if (item_id, doc_id, ln) not in selection_dict: # print(item)