def main(): mean_len_knows = 0 with open("test_data/cmu_dog/test_cmu_dog.src.tk", "w", encoding="utf-8") as out1, open( "test_data/cmu_dog/test_cmu_dog.tgt.tk", "w", encoding="utf-8") as out2: for i in range(len(SRC)): query_line = SRC[i].strip().replace(" < SEP > ", "<#Q#>").replace( "'", "'") tgt_line = TGT[i].strip().replace("'", "'") # choice no.3 knows = nltk.sent_tokenize( KNL[i].strip().split(" < SEP > ")[2].replace( "'", "'")) + nltk.sent_tokenize( KNL[i].strip().split(" < SEP > ")[0].replace( "'", "'")) + nltk.sent_tokenize(KNL[i].strip().split( " < SEP > ")[1].replace("'", "'")) max_b2 = 0 check_sentence = "" for know_line in knows: pro_know = normalize_answer(move_stop_words(know_line.strip())) pro_response = normalize_answer( move_stop_words(tgt_line.strip())) b1, b2, b3 = bleu_metric([pro_know], [pro_response]) if b2 >= max_b2: max_b2 = b2 check_sentence = know_line mean_len_knows += len(knows) use_know_list = knows if check_sentence in use_know_list: index = use_know_list.index(check_sentence) use_know_list[0], use_know_list[index] = use_know_list[ index], use_know_list[0] else: use_know_list[0] = check_sentence assert use_know_list.index(check_sentence) == 0 used_know_line = " <#K#> ".join(use_know_list) src_line = query_line + " <#Q2K#> " + used_know_line out1.write(" ".join(tokenizer.tokenize(src_line.strip())) + "\n") out2.write(" ".join(tokenizer.tokenize(tgt_line.strip())) + "\n") if i % 1000 == 0: print("have process {} data / {}".format(i, len(SRC)))
def compute_bleu_between_rule_and_model(rule_parh, model_path): with open(rule_parh, "r", encoding='utf-8') as rule_file: rule = rule_file.readlines() with open(model_path, "r", encoding='utf-8') as model_file: model = model_file.readlines() model = [detokenize(item.lower()) for item in model] b1, b2, b3 = bleu_metric(rule, model) print("b1:{},b2:{},b3:{}".format(round(b1, 4), round(b2, 4), round(b3, 4))) res = f_one(rule, model) print('f1:{}'.format(res[0]))
def calc_bleu(data_file): bleu_list = [] debug = True for (history, label, knowledge_sentences) in data_generator(data_file): # process knowledge checked_sentence = knowledge_sentences[0] if debug: print('checked_sentence: {}, label: {}'.format( checked_sentence, label)) debug = False pro_know = normalize_answer(move_stop_words(checked_sentence.strip())) pro_response = normalize_answer(move_stop_words(label.strip())) b1, b2, b3 = bleu_metric([pro_know], [pro_response]) bleu_list.append(b2) return bleu_list
def calc_bleu(): mean_len_knows = 0 bleu_list = [] for i in range(len(SRC)): query_line = SRC[i].strip().replace(" < SEP > ", "<#Q#>").replace("'", "'") tgt_line = TGT[i].strip().replace("'", "'") # choice no.3 knows = nltk.sent_tokenize( KNL[i].strip().split(" < SEP > ")[2].replace( "'", "'")) + nltk.sent_tokenize( KNL[i].strip().split(" < SEP > ")[0].replace( "'", "'")) + nltk.sent_tokenize( KNL[i].strip().split(" < SEP > ")[1].replace( "'", "'")) max_b2 = 0 check_sentence = "" for know_line in knows: pro_know = normalize_answer(move_stop_words(know_line.strip())) pro_response = normalize_answer(move_stop_words(tgt_line.strip())) b1, b2, b3 = bleu_metric([pro_know], [pro_response]) if b2 >= max_b2: max_b2 = b2 check_sentence = know_line bleu_list.append(max_b2) mean_len_knows += len(knows) use_know_list = knows if check_sentence in use_know_list: index = use_know_list.index(check_sentence) use_know_list[0], use_know_list[index] = use_know_list[ index], use_know_list[0] else: use_know_list[0] = check_sentence assert use_know_list.index(check_sentence) == 0 used_know_line = " <#K#> ".join(use_know_list) src_line = query_line + " <#Q2K#> " + used_know_line if i % 1000 == 0: print("have process {} data / {}".format(i, len(SRC))) return bleu_list
def main(): all_query = [] all_know = [] all_other_know = [] count = 0 no_know = 0 for dialog_id, dialog_turn in conversations_file.items(): count += 1 dialog_content = dialog_turn["content"] knowledge_turn = know_file[dialog_id] query_line = "" for every_content in dialog_content: sentence = every_content["message"] sentence = sentence.encode('unicode_escape').decode('utf-8') know_ids = every_content["knowledge_source"] # list single_knowledges = knowledge_turn[every_content["agent"]] article_knowledge = knowledge_turn["article"] other_know_ids = ["FS1", "FS2", "FS3", "AS1", "AS2", "AS3"] other_know_ids = list(set(other_know_ids) - set(know_ids)) knowledge_list = [] other_knowledge_list = [] for know_id in know_ids: if know_id == "Personal Knowledge": pass elif know_id in single_knowledges: assert "shortened_wiki_lead_section" or "summarized_wiki_lead_section" in single_knowledges[ know_id] if "shortened_wiki_lead_section" in single_knowledges[ know_id]: knowledge_list.extend( nltk.sent_tokenize( single_knowledges[know_id] ["shortened_wiki_lead_section"])) else: knowledge_list.extend( nltk.sent_tokenize( single_knowledges[know_id] ["summarized_wiki_lead_section"])) for item in single_knowledges[know_id]["fun_facts"]: knowledge_list.append(item) elif know_id in article_knowledge: knowledge_list.extend( nltk.sent_tokenize(article_knowledge[know_id])) for other_know_id in other_know_ids: if other_know_id in single_knowledges: assert "shortened_wiki_lead_section" or "summarized_wiki_lead_section" in single_knowledges[ know_id] if "shortened_wiki_lead_section" in single_knowledges[ other_know_id]: other_knowledge_list.extend( nltk.sent_tokenize( single_knowledges[other_know_id] ["shortened_wiki_lead_section"])) else: other_knowledge_list.extend( nltk.sent_tokenize( single_knowledges[other_know_id] ["summarized_wiki_lead_section"])) for item in single_knowledges[other_know_id]["fun_facts"]: other_knowledge_list.append(item) elif (dialog_turn["config"] != "C") and other_know_id in article_knowledge: other_knowledge_list.extend( nltk.sent_tokenize(article_knowledge[other_know_id])) if knowledge_list == []: no_know += 1 knowledge_list = ["__no_knowledge__"] know_line = "" for k in knowledge_list: k = k.encode('unicode_escape').decode('utf-8') know_line += k know_line += "\t" all_know.append(know_line) other_know_line = "" for k in other_knowledge_list: k = k.encode('unicode_escape').decode('utf-8') other_know_line += k other_know_line += "\t" all_other_know.append(other_know_line) query_line += sentence query_line += " <#Q#> " all_query.append(query_line) assert len(all_other_know) == len(all_know) num = 0 src = [] tgt = [] for i in range(len(all_query)): query_list = all_query[i].strip().split("<#Q#>")[:-1] for t in range(len(query_list)): history = " <#Q#> ".join(query_list[:t]) if history.strip() == "": history = "__no_history__" knows = all_know[num].strip().split("\t") max_b2 = 0 for one_know in knows: b1, b2, b3 = bleu_metric( [normalize_answer(move_stop_words(query_list[t].strip()))], [normalize_answer(move_stop_words(one_know))]) if b2 >= max_b2: max_b2 = b2 check = one_know assert check in knows loc = knows.index(check) knows[loc], knows[0] = knows[0], knows[loc] other_knows = all_other_know[num].strip().split("\t")[:-1] know_str = " <#K#> ".join(knows) src_line = history.strip() + " <#Q2K#> " + know_str.strip() tgt_line = query_list[t].strip() src.append(src_line) tgt.append(tgt_line) num += 1 assert num == len(all_know) with open("test_data/{}/test_{}.src.tk".format(part, part), "w") as src_out, \ open("test_data/{}/test_{}.tgt.tk".format(part, part), "w") as tgt_out: mean_know = 0 for i in range(len(src)): query_list = src[i].strip().split("<#Q2K#>")[0].split("<#Q#>") query_list = [ " ".join(word_tokenize(item.strip())) for item in query_list ] query_line = " <#Q#> ".join(query_list).strip() know_list = src[i].strip().split("<#Q2K#>")[1].split("<#K#>") know_list = [ " ".join(word_tokenize(item.strip())) for item in know_list ] mean_know += len(know_list) know_line = " <#K#> ".join(know_list).strip() pro_src_line = " ".join( tokenizer.tokenize(query_line + " <#Q2K#> " + know_line)) pro_tgt_line = " ".join( tokenizer.tokenize(" ".join(word_tokenize(tgt[i].strip())))) src_out.write(pro_src_line) src_out.write("\n") tgt_out.write(pro_tgt_line) tgt_out.write("\n") if i % 1000 == 0: print("have process {} data".format(i))
def dev_step(split, global_step): if split == 'test_seen': test_loader = test_seen_loader elif split == 'test_unseen': test_loader = test_unseen_loader else: raise ValueError dis_model.eval() gen_model.eval() n_token, test_loss = 0, 0.0 # ppl test_hyp, test_ref = [], [] count = 0 with torch.no_grad(): for knowledges, histories, users, responses, knowledge_lens in test_loader: knowledges = [know.split('\n\n') for know in knowledges] histories = [his.split('\n\n') for his in histories] dis_args = dis_batcher(knowledges, histories, knowledge_lens, args.n_sent) dis_out = dis_model(*dis_args) dis_knowledges = [[knowledges[bi][dis_out[0][bi].item()]] for bi in range(len(knowledges))] gen_args = gen_batcher(dis_knowledges, histories, users, responses, args.segment, True) loss = gen_criterion( gen_model(gen_args[0], token_type_ids=gen_args[1])[0], gen_args[2]) n_token += loss.size(0) test_loss += loss.sum().item() for bi in range(len(dis_knowledges)): dec_in = gen_batcher(dis_knowledges[bi:bi + 1], histories[bi:bi + 1], users[bi:bi + 1], segment=args.segment, training=False) dec_out = gen_model.batch_decode( dec_in, args.max_length, args.min_length, args.early_stopping, args.beam_size, args.repetition_penalty, gen_batcher.eos_id, args.length_penalty, args.no_repeat_ngram_size) dec_out = dec_out[0].tolist()[dec_in.size(1):] _hyp = gen_batcher.tokenizer.decode( dec_out, skip_special_tokens=True, clean_up_tokenization_spaces=False) _ref = responses[bi] test_hyp.append(_hyp) test_ref.append(_ref) count += 1 if count % 1000 == 0: print(count) with open( os.path.join( out_dir, '{}-decoded-iter-{}.txt'.format(split, global_step)), 'w') as f: for _hyp, _ref in zip(test_hyp, test_ref): f.writelines('{} ||| {}\n'.format(_hyp, _ref)) MeanLoss = test_loss / n_token b1, b2, b3, b4 = bleu_metric(test_hyp, test_ref) d1, d2 = distinct_metric(test_hyp) f1 = f1_metric(test_hyp, test_ref) time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S') print("**********************************") print("{} results..........".format(split)) print('hypothesis: ', len(test_hyp)) print("Step: %d \t| ppl: %.3f \t| %s" % (global_step, math.exp(MeanLoss), time_str)) print("BLEU-1/2/3/4: {:.4f}/{:.4f}/{:.4f}/{:.4f}".format( b1, b2, b3, b4)) print("Distinct-1/2: {:.4f}/{:.4f}".format(d1, d2)) print("F1: {:.4f}".format(f1)) print("**********************************") return { 'f1': f1, 'loss': MeanLoss, 'bleu1': b1, 'bleu2': b2, 'bleu3': b3, 'bleu4': b4, 'distinct1': d1, 'distinct2': d2 }