def smatch_from_counts(counts): total_counts = [ sum([x[0] for x in counts]), sum([x[1] for x in counts]), sum([x[2] for x in counts]) ] return smatch.compute_f(*total_counts)[2]
async def __call__(self, gold_lines, silver_lines, verbose=False, seed=None): sentences = [] async for sentence in self.sentences(gold_lines, silver_lines, verbose=verbose, seed=seed): sentences.append(sentence) # sentences = list(self.sentences(gold_lines, silver_lines, verbose=verbose, seed=seed)) precision, recall, best_f_score = smatch.compute_f( self.total_match_num, self.total_test_num, self.total_gold_num) if verbose: print("Total:") print() print("Precision: %.4f" % precision) print("Recall: %.4f" % recall) print("Smatch score: %.4f" % best_f_score) return Dict(sentences=sentences, precision=precision, recall=recall, best_f_score=best_f_score, skipped=self.skipped, good=self.good)
def __init__(self, best_match_num, test_triple_num, gold_triple_num): num_matches, num_only_guessed, num_only_ref = (best_match_num, test_triple_num - best_match_num, gold_triple_num - best_match_num) super().__init__({PRIMARY: evaluation.SummaryStatistics(num_matches, num_only_guessed, num_only_ref)}, default={PRIMARY.name: PRIMARY}) self.p, self.r, self.f1 = smatch.compute_f(best_match_num, test_triple_num, gold_triple_num)
def update(self, sample_id, tokens, oracle_actions, actions): # compute smatch for the rule oracle or retrieve from cache if sample_id not in self.oracle_smatch_counts_cache: # compute smatch oracle_smatch_counts = get_smatch_counts( self.ref_amr_lines[sample_id], get_amr(tokens, oracle_actions, self.entity_rules), self.restart_num ) oracle_smatch = smatch.compute_f(*oracle_smatch_counts)[2] # store actions self.original_actions[sample_id] = (oracle_actions, oracle_smatch_counts) # cache self.oracle_smatch_counts_cache[sample_id] = oracle_smatch_counts else: # If we outperform oracle keep the amr oracle_smatch_counts = self.oracle_smatch_counts_cache[sample_id] oracle_smatch = smatch.compute_f(*oracle_smatch_counts)[2] # compute smatch for hypothesis hypo_counts = get_smatch_counts( self.ref_amr_lines[sample_id], get_amr(tokens, actions), self.restart_num ) hypo_smatch = smatch.compute_f(*hypo_counts)[2] # if hypothesis outperforms oracle, keep it if oracle_smatch < hypo_smatch: actions = [a for a in actions if a != '</s>'] self.mined_actions[sample_id] = (actions, hypo_counts) return hypo_smatch, oracle_smatch
def compute_smatch(test_entries, gold_entries): pairs = zip(test_entries, gold_entries) mum_match = mum_test = mum_gold = 0 pool = Pool() for (n1, n2, n3) in pool.imap_unordered(match_pair, pairs): mum_match += n1 mum_test += n2 mum_gold += n3 pool.close() pool.join() precision, recall, f_score = smatch.compute_f(mum_match, mum_test, mum_gold) return precision, recall, f_score
def compute_smatch_2(list1, list2): num_match, num_test, num_gold = (0, 0, 0) pairs = zip(list1, list2) pool = Pool() for (n1, n2, n3) in pool.imap_unordered(match_pair_2, pairs): num_match += n1 num_test += n2 num_gold += n3 pool.close() pool.join() precision, recall, f_score = smatch.compute_f(num_match, num_test, num_gold) return precision, recall, f_score
def process_sentence(sentence): nonlocal nr sentence = Dict(sentence) sentence.gold = Dict(sentence.gold) sentence.silver = Dict(sentence.silver) nr += 1 if not sentence: if verbose: print('Skipping sentence:', gold_amr.text) self.skipped += 1 return if verbose: print(sentence.gold.text) gold_triple_num = len(sentence.gold.instances) + len( sentence.gold.attributes) + len(sentence.gold.relations) test_triple_num = len(sentence.silver.instances) + len( sentence.silver.attributes) + len(sentence.silver.relations) gold_triple_num += 1 if sentence.gold.top else 0 test_triple_num += 1 if sentence.silver.top else 0 # if each AMR pair should have a score, compute and output it here sentence.precision, sentence.recall, sentence.best_f_score = smatch.compute_f( sentence.best_match_num, test_triple_num, gold_triple_num) # sentence.precision = precision # sentence.recall = recall # sentence.best_f_score = best_f_score self.total_match_num += sentence.best_match_num self.total_test_num += test_triple_num self.total_gold_num += gold_triple_num if verbose: print() print("Precision: %.4f" % sentence.precision) print("Recall: %.4f" % sentence.recall) print("Smatch score: %.4f" % sentence.best_f_score) print() else: print('.', end='', flush=True) self.good += 1 sentence.nr = nr return sentence
def print_score_action_stats(scored_actions): scores = [0, 0, 0] action_count = Counter() for sa in scored_actions: action_count.update([sa[6]]) for i in range(3): scores[i] += sa[i+1] smatch = compute_f(*scores)[2] display_str = 'Smatch {:.3f} scored mined {:d} length mined {:d}'.format( smatch, action_count['score'], action_count['length'] ) print(display_str)
def process_sentence(sentence): nonlocal nr sentence = Dict(sentence) sentence.gold = Dict(sentence.gold) sentence.silver = Dict(sentence.silver) nr += 1 if not sentence: if verbose: print('Skipping sentence:', gold_amr.text) self.skipped += 1 return if verbose: print(sentence.gold.text) gold_triple_num = len(sentence.gold.instances) + len(sentence.gold.attributes) + len(sentence.gold.relations) test_triple_num = len(sentence.silver.instances) + len(sentence.silver.attributes) + len(sentence.silver.relations) gold_triple_num += 1 if sentence.gold.top else 0 test_triple_num += 1 if sentence.silver.top else 0 # if each AMR pair should have a score, compute and output it here sentence.precision, sentence.recall, sentence.best_f_score = smatch.compute_f(sentence.best_match_num, test_triple_num, gold_triple_num) # sentence.precision = precision # sentence.recall = recall # sentence.best_f_score = best_f_score self.total_match_num += sentence.best_match_num self.total_test_num += test_triple_num self.total_gold_num += gold_triple_num if verbose: print() print("Precision: %.4f" % sentence.precision) print("Recall: %.4f" % sentence.recall) print("Smatch score: %.4f" % sentence.best_f_score) print() else: print('.', end='', flush=True) self.good += 1 sentence.nr = nr return sentence
async def __call__(self, gold_lines, silver_lines, verbose=False, seed=None): sentences = [] async for sentence in self.sentences(gold_lines, silver_lines, verbose=verbose, seed=seed): sentences.append(sentence) # sentences = list(self.sentences(gold_lines, silver_lines, verbose=verbose, seed=seed)) precision, recall, best_f_score = smatch.compute_f(self.total_match_num, self.total_test_num, self.total_gold_num) if verbose: print("Total:") print() print("Precision: %.4f" % precision) print("Recall: %.4f" % recall) print("Smatch score: %.4f" % best_f_score) return Dict(sentences=sentences, precision=precision, recall=recall, best_f_score=best_f_score, skipped=self.skipped, good=self.good)
def make_matched_document(gold_lines, silver_lines, verbose=False): gold_amrs = parse_amr_iter(gold_lines) silver_amrs = parse_amr_iter(silver_lines) sentences = [] total_match_num = 0 total_test_num = 0 total_gold_num = 0 skipped = 0 good = 0 nr = 0 for gold_amr, silver_amr in zip(gold_amrs, silver_amrs): nr += 1 if not gold_amr.valid or not silver_amr.valid: if verbose: print('Skipping sentence:', gold_amr.text) skipped += 1 continue if verbose: print(gold_amr.text) sentence = make_matched_sentence(gold_amr, silver_amr) gold_triple_num = len(sentence.gold.instances) + len(sentence.gold.attributes) + len(sentence.gold.relations) test_triple_num = len(sentence.silver.instances) + len(sentence.silver.attributes) + len(sentence.silver.relations) gold_triple_num += 1 if sentence.gold.top else 0 test_triple_num += 1 if sentence.silver.top else 0 # if each AMR pair should have a score, compute and output it here sentence.precision, sentence.recall, sentence.best_f_score = smatch.compute_f(sentence.best_match_num, test_triple_num, gold_triple_num) # sentence.precision = precision # sentence.recall = recall # sentence.best_f_score = best_f_score total_match_num += sentence.best_match_num total_test_num += test_triple_num total_gold_num += gold_triple_num if verbose: print() print("Precision: %.4f" % sentence.precision) print("Recall: %.4f" % sentence.recall) print("Smatch score: %.4f" % sentence.best_f_score) print() else: print('.', end='', flush=True) good += 1 sentence.nr = nr sentences.append(sentence) precision, recall, best_f_score = smatch.compute_f(total_match_num, total_test_num, total_gold_num) if verbose: print("Total:") print() print("Precision: %.4f" % precision) print("Recall: %.4f" % recall) print("Smatch score: %.4f" % best_f_score) if next(gold_amrs, None): pass if next(silver_amrs, None): pass return Dict(sentences=sentences, precision=precision, recall=recall, best_f_score=best_f_score, skipped=skipped, good=good)
def choose_best(input_sentences, score_on_sentences=None, **options): if not score_on_sentences: score_on_sentences = input_sentences results = [] unparsed_date_re = re.compile(r'\d\d\d\d-\d\d-\d\d') options = Dict(options) # skip silver AMRs with unparsed date if options.require_parsed_dates: without_unparsed_date_sentences = [ sentence for sentence in input_sentences if not unparsed_date_re.search(sentence.amr_string_collapsed) ] if without_unparsed_date_sentences: input_sentences = without_unparsed_date_sentences for silver in input_sentences: # if unparsed_date_re.search(silver.amr_string_collapsed): # skip silver AMRs with unparsed date # continue score = 0 for gold in score_on_sentences: if silver is gold: continue if not silver.amr.valid or not gold.amr.valid: continue sentence = make_matched_sentence(gold.amr, silver.amr) sentence.text = gold.text or silver.text gold_triple_num = len(sentence.gold.instances) + len( sentence.gold.attributes) + len(sentence.gold.relations) test_triple_num = len(sentence.silver.instances) + len( sentence.silver.attributes) + len(sentence.silver.relations) gold_triple_num += 1 if sentence.gold.top else 0 test_triple_num += 1 if sentence.silver.top else 0 sentence.precision, sentence.recall, sentence.best_f_score = smatch.compute_f( sentence.best_match_num, test_triple_num, gold_triple_num) score += sentence.best_f_score results.append(Dict(amr=silver, gold=gold, score=score)) # let's fallback on using silver sentences for scoring if not done that before if score == 0 and input_sentences is not score_on_sentences: for gold in input_sentences: if silver is gold: continue if not silver.amr.valid or not gold.amr.valid: continue sentence = make_matched_sentence(gold.amr, silver.amr) sentence.text = gold.text or silver.text gold_triple_num = len(sentence.gold.instances) + len( sentence.gold.attributes) + len(sentence.gold.relations) test_triple_num = len(sentence.silver.instances) + len( sentence.silver.attributes) + len( sentence.silver.relations) gold_triple_num += 1 if sentence.gold.top else 0 test_triple_num += 1 if sentence.silver.top else 0 sentence.precision, sentence.recall, sentence.best_f_score = smatch.compute_f( sentence.best_match_num, test_triple_num, gold_triple_num) score += sentence.best_f_score results[-1] = Dict(amr=silver, gold=gold, score=score) # replace last item # results.append(Dict(amr=silver, gold=gold, score=score)) best = None second_best = None for result in results: if not best or best.score < result.score: second_best = best best = result elif not second_best or second_best.score < result.score: second_best = result best.min_dist = best.score - (second_best.score if second_best else best.score) return best, results
def compute_files(user1, user2, file_list, dir_pre, start_num): #print file_list #print user1, user2 match_total = 0 test_total = 0 gold_total = 0 for fi in file_list: file1 = dir_pre + user1 + "/" + fi + ".txt" file2 = dir_pre + user2 + "/" + fi + ".txt" #print file1,file2 if not os.path.exists(file1): print >> ERROR_LOG, "*********Error: ", file1, "does not exist*********" return -1.00 if not os.path.exists(file2): print >> ERROR_LOG, "*********Error: ", file2, "does not exist*********" return -1.00 try: file1_h = open(file1, "r") file2_h = open(file2, "r") except: print >> ERROR_LOG, "Cannot open the files", file1, file2 cur_amr1 = smatch.get_amr_line(file1_h) cur_amr2 = smatch.get_amr_line(file2_h) if (cur_amr1 == ""): print >> ERROR_LOG, "AMR 1 is empty" continue if (cur_amr2 == ""): print >> ERROR_LOG, "AMR 2 is empty" continue amr1 = amr.AMR.parse_AMR_line(cur_amr1) amr2 = amr.AMR.parse_AMR_line(cur_amr2) test_label = "a" gold_label = "b" amr1.rename_node(test_label) amr2.rename_node(gold_label) (test_inst, test_rel1, test_rel2) = amr1.get_triples2() (gold_inst, gold_rel1, gold_rel2) = amr2.get_triples2() if verbose: print >> ERROR_LOG, "Instance triples of file 1:", len(test_inst) print >> ERROR_LOG, test_inst print >> sys.stderr, "Relation triples of file 1:", len( test_rel1) + len(test_rel2) print >> sys.stderr, test_rel1 print >> sys.stderr, test_rel2 print >> ERROR_LOG, "Instance triples of file 2:", len(gold_inst) print >> ERROR_LOG, gold_inst print >> sys.stderr, "Relation triples of file 2:", len( gold_rel1) + len(gold_rel2) print >> sys.stderr, gold_rel1 print >> sys.stderr, gold_rel2 if len(test_inst) < len(gold_inst): (best_match, best_match_num) = smatch.get_fh(test_inst, test_rel1, test_rel2, gold_inst, gold_rel1, gold_rel2, test_label, gold_label) if verbose: print >> ERROR_LOG, "best match number", best_match_num print >> ERROR_LOG, "Best Match:", smatch.print_alignment( best_match, test_inst, gold_inst) else: (best_match, best_match_num) = smatch.get_fh(gold_inst, gold_rel1, gold_rel2, test_inst, test_rel1, test_rel2, gold_label, test_label) if verbose: print >> ERROR_LOG, "best match number", best_match_num print >> ERROR_LOG, "Best Match:", smatch.print_alignment( best_match, gold_inst, test_inst, True) #(match_num,test_num,gold_num)=smatch.get_match(tmp_filename1,tmp_filename2,start_num) #print match_num,test_num,gold_num # print best_match_num # print len(test_inst)+len(test_rel1)+len(test_rel2) # print len(gold_inst)+len(gold_rel1)+len(gold_rel2) match_total += best_match_num test_total += len(test_inst) + len(test_rel1) + len(test_rel2) gold_total += len(gold_inst) + len(gold_rel1) + len(gold_rel2) smatch.match_num_dict.clear() (precision, recall, f_score) = smatch.compute_f(match_total, test_total, gold_total) return "%.2f" % f_score
def compute_files(user1, user2, file_list, dir_pre, start_num): # print file_list # print user1, user2 match_total = 0 test_total = 0 gold_total = 0 for fi in file_list: file1 = dir_pre + user1 + "/" + fi + ".txt" file2 = dir_pre + user2 + "/" + fi + ".txt" # print file1,file2 if not os.path.exists(file1): print >> ERROR_LOG, "*********Error: ", file1, "does not exist*********" return -1.00 if not os.path.exists(file2): print >> ERROR_LOG, "*********Error: ", file2, "does not exist*********" return -1.00 try: file1_h = open(file1, "r") file2_h = open(file2, "r") except: print >> ERROR_LOG, "Cannot open the files", file1, file2 cur_amr1 = smatch.get_amr_line(file1_h) cur_amr2 = smatch.get_amr_line(file2_h) if(cur_amr1 == ""): print >> ERROR_LOG, "AMR 1 is empty" continue if(cur_amr2 == ""): print >> ERROR_LOG, "AMR 2 is empty" continue amr1 = amr.AMR.parse_AMR_line(cur_amr1) amr2 = amr.AMR.parse_AMR_line(cur_amr2) test_label = "a" gold_label = "b" amr1.rename_node(test_label) amr2.rename_node(gold_label) (test_inst, test_rel1, test_rel2) = amr1.get_triples2() (gold_inst, gold_rel1, gold_rel2) = amr2.get_triples2() if verbose: print >> ERROR_LOG, "Instance triples of file 1:", len(test_inst) print >> ERROR_LOG, test_inst print >> sys.stderr, "Relation triples of file 1:", len( test_rel1) + len(test_rel2) print >>sys.stderr, test_rel1 print >> sys.stderr, test_rel2 print >> ERROR_LOG, "Instance triples of file 2:", len(gold_inst) print >> ERROR_LOG, gold_inst print >> sys.stderr, "Relation triples of file 2:", len( gold_rel1) + len(gold_rel2) print >> sys.stderr, gold_rel1 print >> sys.stderr, gold_rel2 if len(test_inst) < len(gold_inst): (best_match, best_match_num) = smatch.get_fh(test_inst, test_rel1, test_rel2, gold_inst, gold_rel1, gold_rel2, test_label, gold_label) if verbose: print >> ERROR_LOG, "best match number", best_match_num print >>ERROR_LOG, "Best Match:", smatch.print_alignment( best_match, test_inst, gold_inst) else: (best_match, best_match_num) = smatch.get_fh(gold_inst, gold_rel1, gold_rel2, test_inst, test_rel1, test_rel2, gold_label, test_label) if verbose: print >> ERROR_LOG, "best match number", best_match_num print >>ERROR_LOG, "Best Match:", smatch.print_alignment( best_match, gold_inst, test_inst, True) #(match_num,test_num,gold_num)=smatch.get_match(tmp_filename1,tmp_filename2,start_num) # print match_num,test_num,gold_num # print best_match_num # print len(test_inst)+len(test_rel1)+len(test_rel2) # print len(gold_inst)+len(gold_rel1)+len(gold_rel2) match_total += best_match_num test_total += len(test_inst) + len(test_rel1) + len(test_rel2) gold_total += len(gold_inst) + len(gold_rel1) + len(gold_rel2) smatch.match_num_dict.clear() (precision, recall, f_score) = smatch.compute_f( match_total, test_total, gold_total) return "%.2f" % f_score
def compute_files(user1, user2, file_list, dir_pre, start_num): """ Compute the smatch scores for a file list between two users Args: user1: user 1 name user2: user 2 name file_list: file list dir_pre: the file location prefix start_num: the number of restarts in smatch Returns: smatch f score. """ match_total = 0 test_total = 0 gold_total = 0 for fi in file_list: file1 = dir_pre + user1 + "/" + fi + ".txt" file2 = dir_pre + user2 + "/" + fi + ".txt" if not os.path.exists(file1): print("*********Error: ", file1, "does not exist*********", file=ERROR_LOG) return -1.00 if not os.path.exists(file2): print("*********Error: ", file2, "does not exist*********", file=ERROR_LOG) return -1.00 try: file1_h = open(file1, "r") file2_h = open(file2, "r") except IOError: print("Cannot open the files", file1, file2, file=ERROR_LOG) break cur_amr1 = smatch.get_amr_line(file1_h) cur_amr2 = smatch.get_amr_line(file2_h) if cur_amr1 == "": print("AMR 1 is empty", file=ERROR_LOG) continue if cur_amr2 == "": print("AMR 2 is empty", file=ERROR_LOG) continue amr1 = amr.AMR.parse_AMR_line(cur_amr1) amr2 = amr.AMR.parse_AMR_line(cur_amr2) test_label = "a" gold_label = "b" amr1.rename_node(test_label) amr2.rename_node(gold_label) (test_inst, test_rel1, test_rel2) = amr1.get_triples() (gold_inst, gold_rel1, gold_rel2) = amr2.get_triples() if verbose: print("Instance triples of file 1:", len(test_inst), file=DEBUG_LOG) print(test_inst, file=DEBUG_LOG) print("Attribute triples of file 1:", len(test_rel1), file=DEBUG_LOG) print(test_rel1, file=DEBUG_LOG) print("Relation triples of file 1:", len(test_rel2), file=DEBUG_LOG) print(test_rel2, file=DEBUG_LOG) print("Instance triples of file 2:", len(gold_inst), file=DEBUG_LOG) print(gold_inst, file=DEBUG_LOG) print("Attribute triples of file 2:", len(gold_rel1), file=DEBUG_LOG) print(gold_rel1, file=DEBUG_LOG) print("Relation triples of file 2:", len(gold_rel2), file=DEBUG_LOG) print(gold_rel2, file=DEBUG_LOG) (best_match, best_match_num) = smatch.get_best_match( test_inst, test_rel1, test_rel2, gold_inst, gold_rel1, gold_rel2, test_label, gold_label) if verbose: print("best match number", best_match_num, file=DEBUG_LOG) print("Best Match:", smatch.print_alignment(best_match, test_inst, gold_inst), file=DEBUG_LOG) match_total += best_match_num test_total += (len(test_inst) + len(test_rel1) + len(test_rel2)) gold_total += (len(gold_inst) + len(gold_rel1) + len(gold_rel2)) smatch.match_triple_dict.clear() (precision, recall, f_score) = smatch.compute_f(match_total, test_total, gold_total) return "%.2f" % f_score
def choose_best(input_sentences, score_on_sentences=None, **options): if not score_on_sentences: score_on_sentences = input_sentences results = [] unparsed_date_re = re.compile(r'\d\d\d\d-\d\d-\d\d') options = Dict(options) # skip silver AMRs with unparsed date if options.require_parsed_dates: without_unparsed_date_sentences = [sentence for sentence in input_sentences if not unparsed_date_re.search(sentence.amr_string_collapsed)] if without_unparsed_date_sentences: input_sentences = without_unparsed_date_sentences for silver in input_sentences: # if unparsed_date_re.search(silver.amr_string_collapsed): # skip silver AMRs with unparsed date # continue score = 0 for gold in score_on_sentences: if silver is gold: continue if not silver.amr.valid or not gold.amr.valid: continue sentence = make_matched_sentence(gold.amr, silver.amr) sentence.text = gold.text or silver.text gold_triple_num = len(sentence.gold.instances) + len(sentence.gold.attributes) + len(sentence.gold.relations) test_triple_num = len(sentence.silver.instances) + len(sentence.silver.attributes) + len(sentence.silver.relations) gold_triple_num += 1 if sentence.gold.top else 0 test_triple_num += 1 if sentence.silver.top else 0 sentence.precision, sentence.recall, sentence.best_f_score = smatch.compute_f(sentence.best_match_num, test_triple_num, gold_triple_num) score += sentence.best_f_score results.append(Dict(amr=silver, gold=gold, score=score)) # let's fallback on using silver sentences for scoring if not done that before if score == 0 and input_sentences is not score_on_sentences: for gold in input_sentences: if silver is gold: continue if not silver.amr.valid or not gold.amr.valid: continue sentence = make_matched_sentence(gold.amr, silver.amr) sentence.text = gold.text or silver.text gold_triple_num = len(sentence.gold.instances) + len(sentence.gold.attributes) + len(sentence.gold.relations) test_triple_num = len(sentence.silver.instances) + len(sentence.silver.attributes) + len(sentence.silver.relations) gold_triple_num += 1 if sentence.gold.top else 0 test_triple_num += 1 if sentence.silver.top else 0 sentence.precision, sentence.recall, sentence.best_f_score = smatch.compute_f(sentence.best_match_num, test_triple_num, gold_triple_num) score += sentence.best_f_score results[-1] = Dict(amr=silver, gold=gold, score=score) # replace last item # results.append(Dict(amr=silver, gold=gold, score=score)) best = None second_best = None for result in results: if not best or best.score < result.score: second_best = best best = result elif not second_best or second_best.score < result.score: second_best = result best.min_dist = best.score - (second_best.score if second_best else best.score) return best, results
def compute_files(user1, user2, file_list, dir_pre, start_num): """ Compute the smatch scores for a file list between two users Args: user1: user 1 name user2: user 2 name file_list: file list dir_pre: the file location prefix start_num: the number of restarts in smatch Returns: smatch f score. """ match_total = 0 test_total = 0 gold_total = 0 for fi in file_list: file1 = dir_pre + user1 + "/" + fi + ".txt" file2 = dir_pre + user2 + "/" + fi + ".txt" if not os.path.exists(file1): print >> ERROR_LOG, "*********Error: ", file1, "does not exist*********" return -1.00 if not os.path.exists(file2): print >> ERROR_LOG, "*********Error: ", file2, "does not exist*********" return -1.00 try: file1_h = open(file1, "r") file2_h = open(file2, "r") except IOError: print >> ERROR_LOG, "Cannot open the files", file1, file2 break cur_amr1 = smatch.get_amr_line(file1_h) cur_amr2 = smatch.get_amr_line(file2_h) if cur_amr1 == "": print >> ERROR_LOG, "AMR 1 is empty" continue if cur_amr2 == "": print >> ERROR_LOG, "AMR 2 is empty" continue amr1 = amr.AMR.parse_AMR_line(cur_amr1) amr2 = amr.AMR.parse_AMR_line(cur_amr2) test_label = "a" gold_label = "b" amr1.rename_node(test_label) amr2.rename_node(gold_label) (test_inst, test_rel1, test_rel2) = amr1.get_triples() (gold_inst, gold_rel1, gold_rel2) = amr2.get_triples() if verbose: print >> DEBUG_LOG, "Instance triples of file 1:", len(test_inst) print >> DEBUG_LOG, test_inst print >> DEBUG_LOG, "Attribute triples of file 1:", len(test_rel1) print >> DEBUG_LOG, test_rel1 print >> DEBUG_LOG, "Relation triples of file 1:", len(test_rel2) print >> DEBUG_LOG, test_rel2 print >> DEBUG_LOG, "Instance triples of file 2:", len(gold_inst) print >> DEBUG_LOG, gold_inst print >> DEBUG_LOG, "Attribute triples of file 2:", len(gold_rel1) print >> DEBUG_LOG, gold_rel1 print >> DEBUG_LOG, "Relation triples of file 2:", len(gold_rel2) print >> DEBUG_LOG, gold_rel2 (best_match, best_match_num) = smatch.get_best_match(test_inst, test_rel1, test_rel2, gold_inst, gold_rel1, gold_rel2, test_label, gold_label) if verbose: print >> DEBUG_LOG, "best match number", best_match_num print >> DEBUG_LOG, "Best Match:", smatch.print_alignment(best_match, test_inst, gold_inst) match_total += best_match_num test_total += (len(test_inst) + len(test_rel1) + len(test_rel2)) gold_total += (len(gold_inst) + len(gold_rel1) + len(gold_rel2)) smatch.match_triple_dict.clear() (precision, recall, f_score) = smatch.compute_f(match_total, test_total, gold_total) return "%.2f" % f_score
def make_matched_document(gold_lines, silver_lines, verbose=False): gold_amrs = parse_amr_iter(gold_lines) silver_amrs = parse_amr_iter(silver_lines) sentences = [] total_match_num = 0 total_test_num = 0 total_gold_num = 0 skipped = 0 good = 0 nr = 0 for gold_amr, silver_amr in zip(gold_amrs, silver_amrs): nr += 1 if not gold_amr.valid or not silver_amr.valid: if verbose: print('Skipping sentence:', gold_amr.text) skipped += 1 continue if verbose: print(gold_amr.text) sentence = make_matched_sentence(gold_amr, silver_amr) gold_triple_num = len(sentence.gold.instances) + len( sentence.gold.attributes) + len(sentence.gold.relations) test_triple_num = len(sentence.silver.instances) + len( sentence.silver.attributes) + len(sentence.silver.relations) gold_triple_num += 1 if sentence.gold.top else 0 test_triple_num += 1 if sentence.silver.top else 0 # if each AMR pair should have a score, compute and output it here sentence.precision, sentence.recall, sentence.best_f_score = smatch.compute_f( sentence.best_match_num, test_triple_num, gold_triple_num) # sentence.precision = precision # sentence.recall = recall # sentence.best_f_score = best_f_score total_match_num += sentence.best_match_num total_test_num += test_triple_num total_gold_num += gold_triple_num if verbose: print() print("Precision: %.4f" % sentence.precision) print("Recall: %.4f" % sentence.recall) print("Smatch score: %.4f" % sentence.best_f_score) print() else: print('.', end='', flush=True) good += 1 sentence.nr = nr sentences.append(sentence) precision, recall, best_f_score = smatch.compute_f(total_match_num, total_test_num, total_gold_num) if verbose: print("Total:") print() print("Precision: %.4f" % precision) print("Recall: %.4f" % recall) print("Smatch score: %.4f" % best_f_score) if next(gold_amrs, None): pass if next(silver_amrs, None): pass return Dict(sentences=sentences, precision=precision, recall=recall, best_f_score=best_f_score, skipped=skipped, good=good)