def match(self, verbose=False): smatch.match_triple_dict.clear() smatch.seed = self.seed if self.seed is not None: random.seed(seed) gold_label = 'g' test_label = 't' gold_inst_orig, _, _ = self.gold_amr.parsed.get_triples() test_inst_orig, _, _ = self.test_amr.parsed.get_triples() self.gold_amr.parsed.rename_node(gold_label) self.test_amr.parsed.rename_node(test_label) gold_inst, gold_attr, gold_rel = self.gold_amr.parsed.get_triples() test_inst, test_attr, test_rel = self.test_amr.parsed.get_triples() gold_map = {a[1]: b[1] for a, b in zip(gold_inst, gold_inst_orig)} test_map = {a[1]: b[1] for a, b in zip(test_inst, test_inst_orig)} # best_mapping, self.best_match_num = smatch.get_best_match(gold_inst, gold_attr, gold_rel, test_inst, test_attr, test_rel, gold_label, test_label) best_mapping, self.best_match_num = smatch.get_best_match( test_inst, test_attr, test_rel, gold_inst, gold_attr, gold_rel, test_label, gold_label) gold_inst, gold_attr, gold_rel = self._rename(gold_inst, gold_attr, gold_rel, gold_map) test_inst, test_attr, test_rel = self._rename(test_inst, test_attr, test_rel, test_map) if verbose: print('Gold instances:') for instance in gold_inst: print(' ', instance) print('Test instances:') for instance in test_inst: print(' ', instance) print("Best Match:", smatch.print_alignment(best_mapping, gold_inst, test_inst), file=sys.stderr) # print("Matches:", self._matches(best_mapping, gold_inst, test_inst)) print("Matches:", self._matches(best_mapping, test_inst, gold_inst)) gold_amr = self.gold_amr test_amr = self.test_amr gold_amr.instances, gold_amr.attributes, gold_amr.top = self._convert( gold_inst, gold_attr) gold_amr.relations = gold_rel test_amr.instances, test_amr.attributes, test_amr.top = self._convert( test_inst, test_attr) test_amr.relations = test_rel # gold_amr.matches, test_amr.matches = self._matches(best_mapping, gold_inst, test_inst) test_amr.matches, gold_amr.matches = self._matches( best_mapping, test_inst, gold_inst)
def score_amr_pair(ref_amr_line, rec_amr_line, restart_num, justinstance=False, justattribute=False, justrelation=False): # parse lines amr1 = AMR.parse_AMR_line(ref_amr_line) amr2 = AMR.parse_AMR_line(rec_amr_line) # Fix prefix prefix1 = "a" prefix2 = "b" # Rename node to "a1", "a2", .etc amr1.rename_node(prefix1) # Renaming node to "b1", "b2", .etc amr2.rename_node(prefix2) # get triples (instance1, attributes1, relation1) = amr1.get_triples() (instance2, attributes2, relation2) = amr2.get_triples() # optionally turn off some of the node comparison doinstance = doattribute = dorelation = True if justinstance: doattribute = dorelation = False if justattribute: doinstance = dorelation = False if justrelation: doinstance = doattribute = False (best_mapping, best_match_num) = smatch.get_best_match( instance1, attributes1, relation1, instance2, attributes2, relation2, prefix1, prefix2, restart_num, doinstance=doinstance, doattribute=doattribute, dorelation=dorelation ) if justinstance: test_triple_num = len(instance1) gold_triple_num = len(instance2) elif justattribute: test_triple_num = len(attributes1) gold_triple_num = len(attributes2) elif justrelation: test_triple_num = len(relation1) gold_triple_num = len(relation2) else: test_triple_num = len(instance1) + len(attributes1) + len(relation1) gold_triple_num = len(instance2) + len(attributes2) + len(relation2) return best_match_num, test_triple_num, gold_triple_num
def match(self, verbose=False): smatch.match_triple_dict.clear() smatch.seed = self.seed if self.seed is not None: random.seed(seed) gold_label = 'g' test_label = 't' gold_inst_orig,_,_ = self.gold_amr.parsed.get_triples() test_inst_orig,_,_ = self.test_amr.parsed.get_triples() self.gold_amr.parsed.rename_node(gold_label) self.test_amr.parsed.rename_node(test_label) gold_inst, gold_attr, gold_rel = self.gold_amr.parsed.get_triples() test_inst, test_attr, test_rel = self.test_amr.parsed.get_triples() gold_map = {a[1]:b[1] for a,b in zip(gold_inst, gold_inst_orig)} test_map = {a[1]:b[1] for a,b in zip(test_inst, test_inst_orig)} # best_mapping, self.best_match_num = smatch.get_best_match(gold_inst, gold_attr, gold_rel, test_inst, test_attr, test_rel, gold_label, test_label) best_mapping, self.best_match_num = smatch.get_best_match(test_inst, test_attr, test_rel, gold_inst, gold_attr, gold_rel, test_label, gold_label) gold_inst, gold_attr, gold_rel = self._rename(gold_inst, gold_attr, gold_rel, gold_map) test_inst, test_attr, test_rel = self._rename(test_inst, test_attr, test_rel, test_map) if verbose: print('Gold instances:') for instance in gold_inst: print(' ', instance) print('Test instances:') for instance in test_inst: print(' ', instance) print("Best Match:", smatch.print_alignment(best_mapping, gold_inst, test_inst), file=sys.stderr) # print("Matches:", self._matches(best_mapping, gold_inst, test_inst)) print("Matches:", self._matches(best_mapping, test_inst, gold_inst)) gold_amr = self.gold_amr test_amr = self.test_amr gold_amr.instances, gold_amr.attributes, gold_amr.top = self._convert(gold_inst, gold_attr) gold_amr.relations = gold_rel test_amr.instances, test_amr.attributes, test_amr.top = self._convert(test_inst, test_attr) test_amr.relations = test_rel # gold_amr.matches, test_amr.matches = self._matches(best_mapping, gold_inst, test_inst) test_amr.matches, gold_amr.matches = self._matches(best_mapping, test_inst, gold_inst)
def match_amr(gold_amr, test_amr, verbose=False): smatch.match_triple_dict.clear() gold_label = 'g' test_label = 't' gold_inst_orig, _, _ = gold_amr.get_triples() test_inst_orig, _, _ = test_amr.get_triples() gold_amr.rename_node(gold_label) test_amr.rename_node(test_label) gold_inst, gold_attr, gold_rel = gold_amr.get_triples() test_inst, test_attr, test_rel = test_amr.get_triples() gold_map = {a[1]: b[1] for a, b in zip(gold_inst, gold_inst_orig)} test_map = {a[1]: b[1] for a, b in zip(test_inst, test_inst_orig)} # best_mapping, best_match_num = smatch.get_best_match(gold_inst, gold_attr, gold_rel, test_inst, test_attr, test_rel, gold_label, test_label) best_mapping, best_match_num = smatch.get_best_match( test_inst, test_attr, test_rel, gold_inst, gold_attr, gold_rel, test_label, gold_label) gold_inst, gold_attr, gold_rel = rename(gold_inst, gold_attr, gold_rel, gold_map) test_inst, test_attr, test_rel = rename(test_inst, test_attr, test_rel, test_map) if verbose: print('Gold instances:') for instance in gold_inst: print(' ', instance) print('Test instances:') for instance in test_inst: print(' ', instance) print("Best Match:", smatch.print_alignment(best_mapping, gold_inst, test_inst), file=sys.stderr) # print("Matches:", matches(best_mapping, gold_inst, test_inst)) print("Matches:", matches(best_mapping, test_inst, gold_inst)) amr1 = convert(gold_amr, gold_inst, gold_attr, gold_rel) amr2 = convert(test_amr, test_inst, test_attr, test_rel) # amr1.matches, amr2.matches = matches(best_mapping, gold_inst, test_inst) amr2.matches, amr1.matches = matches(best_mapping, test_inst, gold_inst) return amr1, amr2, best_match_num
def match_pair_2(pair): (lst_amr1, dic_amr1), (lst_amr2, dic_amr2) = pair prefix1 = 'a' prefix2 = 'b' smatch.match_triple_dict.clear() amr1 = parse_relations(lst_amr1, dic_amr1) amr2 = parse_relations(lst_amr2, dic_amr2) amr1.rename_node(prefix1) # Rename node to "a1", "a2", .etc amr2.rename_node(prefix2) # Rename node to "b1", "b2", .etc inst1, attrib1, rel1 = amr1.get_triples() inst2, attrib2, rel2 = amr2.get_triples() _, best_match_num = smatch.get_best_match(inst1, attrib1, rel1, inst2, attrib2, rel2, prefix1, prefix2) num_test = len(inst1) + len(attrib1) + len(rel1) num_gold = len(inst2) + len(attrib2) + len(rel2) return best_match_num, num_test, num_gold
def match_amr(gold_amr, test_amr, verbose=False): smatch.match_triple_dict.clear() gold_label = 'g' test_label = 't' gold_inst_orig,_,_ = gold_amr.get_triples() test_inst_orig,_,_ = test_amr.get_triples() gold_amr.rename_node(gold_label) test_amr.rename_node(test_label) gold_inst, gold_attr, gold_rel = gold_amr.get_triples() test_inst, test_attr, test_rel = test_amr.get_triples() gold_map = {a[1]:b[1] for a,b in zip(gold_inst, gold_inst_orig)} test_map = {a[1]:b[1] for a,b in zip(test_inst, test_inst_orig)} # best_mapping, best_match_num = smatch.get_best_match(gold_inst, gold_attr, gold_rel, test_inst, test_attr, test_rel, gold_label, test_label) best_mapping, best_match_num = smatch.get_best_match(test_inst, test_attr, test_rel, gold_inst, gold_attr, gold_rel, test_label, gold_label) gold_inst, gold_attr, gold_rel = rename(gold_inst, gold_attr, gold_rel, gold_map) test_inst, test_attr, test_rel = rename(test_inst, test_attr, test_rel, test_map) if verbose: print('Gold instances:') for instance in gold_inst: print(' ', instance) print('Test instances:') for instance in test_inst: print(' ', instance) print("Best Match:", smatch.print_alignment(best_mapping, gold_inst, test_inst), file=sys.stderr) # print("Matches:", matches(best_mapping, gold_inst, test_inst)) print("Matches:", matches(best_mapping, test_inst, gold_inst)) amr1 = convert(gold_amr, gold_inst, gold_attr, gold_rel) amr2 = convert(test_amr, test_inst, test_attr, test_rel) # amr1.matches, amr2.matches = matches(best_mapping, gold_inst, test_inst) amr2.matches, amr1.matches = matches(best_mapping, test_inst, gold_inst) return amr1, amr2, best_match_num
def compute_files(user1, user2, file_list, dir_pre, start_num): """ Compute the smatch scores for a file list between two users Args: user1: user 1 name user2: user 2 name file_list: file list dir_pre: the file location prefix start_num: the number of restarts in smatch Returns: smatch f score. """ match_total = 0 test_total = 0 gold_total = 0 for fi in file_list: file1 = dir_pre + user1 + "/" + fi + ".txt" file2 = dir_pre + user2 + "/" + fi + ".txt" if not os.path.exists(file1): print("*********Error: ", file1, "does not exist*********", file=ERROR_LOG) return -1.00 if not os.path.exists(file2): print("*********Error: ", file2, "does not exist*********", file=ERROR_LOG) return -1.00 try: file1_h = open(file1, "r") file2_h = open(file2, "r") except IOError: print("Cannot open the files", file1, file2, file=ERROR_LOG) break cur_amr1 = smatch.get_amr_line(file1_h) cur_amr2 = smatch.get_amr_line(file2_h) if cur_amr1 == "": print("AMR 1 is empty", file=ERROR_LOG) continue if cur_amr2 == "": print("AMR 2 is empty", file=ERROR_LOG) continue amr1 = amr.AMR.parse_AMR_line(cur_amr1) amr2 = amr.AMR.parse_AMR_line(cur_amr2) test_label = "a" gold_label = "b" amr1.rename_node(test_label) amr2.rename_node(gold_label) (test_inst, test_rel1, test_rel2) = amr1.get_triples() (gold_inst, gold_rel1, gold_rel2) = amr2.get_triples() if verbose: print("Instance triples of file 1:", len(test_inst), file=DEBUG_LOG) print(test_inst, file=DEBUG_LOG) print("Attribute triples of file 1:", len(test_rel1), file=DEBUG_LOG) print(test_rel1, file=DEBUG_LOG) print("Relation triples of file 1:", len(test_rel2), file=DEBUG_LOG) print(test_rel2, file=DEBUG_LOG) print("Instance triples of file 2:", len(gold_inst), file=DEBUG_LOG) print(gold_inst, file=DEBUG_LOG) print("Attribute triples of file 2:", len(gold_rel1), file=DEBUG_LOG) print(gold_rel1, file=DEBUG_LOG) print("Relation triples of file 2:", len(gold_rel2), file=DEBUG_LOG) print(gold_rel2, file=DEBUG_LOG) (best_match, best_match_num) = smatch.get_best_match( test_inst, test_rel1, test_rel2, gold_inst, gold_rel1, gold_rel2, test_label, gold_label) if verbose: print("best match number", best_match_num, file=DEBUG_LOG) print("Best Match:", smatch.print_alignment(best_match, test_inst, gold_inst), file=DEBUG_LOG) match_total += best_match_num test_total += (len(test_inst) + len(test_rel1) + len(test_rel2)) gold_total += (len(gold_inst) + len(gold_rel1) + len(gold_rel2)) smatch.match_triple_dict.clear() (precision, recall, f_score) = smatch.compute_f(match_total, test_total, gold_total) return "%.2f" % f_score
def compute_files(user1, user2, file_list, dir_pre, start_num): """ Compute the smatch scores for a file list between two users Args: user1: user 1 name user2: user 2 name file_list: file list dir_pre: the file location prefix start_num: the number of restarts in smatch Returns: smatch f score. """ match_total = 0 test_total = 0 gold_total = 0 for fi in file_list: file1 = dir_pre + user1 + "/" + fi + ".txt" file2 = dir_pre + user2 + "/" + fi + ".txt" if not os.path.exists(file1): print >> ERROR_LOG, "*********Error: ", file1, "does not exist*********" return -1.00 if not os.path.exists(file2): print >> ERROR_LOG, "*********Error: ", file2, "does not exist*********" return -1.00 try: file1_h = open(file1, "r") file2_h = open(file2, "r") except IOError: print >> ERROR_LOG, "Cannot open the files", file1, file2 break cur_amr1 = smatch.get_amr_line(file1_h) cur_amr2 = smatch.get_amr_line(file2_h) if cur_amr1 == "": print >> ERROR_LOG, "AMR 1 is empty" continue if cur_amr2 == "": print >> ERROR_LOG, "AMR 2 is empty" continue amr1 = amr.AMR.parse_AMR_line(cur_amr1) amr2 = amr.AMR.parse_AMR_line(cur_amr2) test_label = "a" gold_label = "b" amr1.rename_node(test_label) amr2.rename_node(gold_label) (test_inst, test_rel1, test_rel2) = amr1.get_triples() (gold_inst, gold_rel1, gold_rel2) = amr2.get_triples() if verbose: print >> DEBUG_LOG, "Instance triples of file 1:", len(test_inst) print >> DEBUG_LOG, test_inst print >> DEBUG_LOG, "Attribute triples of file 1:", len(test_rel1) print >> DEBUG_LOG, test_rel1 print >> DEBUG_LOG, "Relation triples of file 1:", len(test_rel2) print >> DEBUG_LOG, test_rel2 print >> DEBUG_LOG, "Instance triples of file 2:", len(gold_inst) print >> DEBUG_LOG, gold_inst print >> DEBUG_LOG, "Attribute triples of file 2:", len(gold_rel1) print >> DEBUG_LOG, gold_rel1 print >> DEBUG_LOG, "Relation triples of file 2:", len(gold_rel2) print >> DEBUG_LOG, gold_rel2 (best_match, best_match_num) = smatch.get_best_match(test_inst, test_rel1, test_rel2, gold_inst, gold_rel1, gold_rel2, test_label, gold_label) if verbose: print >> DEBUG_LOG, "best match number", best_match_num print >> DEBUG_LOG, "Best Match:", smatch.print_alignment(best_match, test_inst, gold_inst) match_total += best_match_num test_total += (len(test_inst) + len(test_rel1) + len(test_rel2)) gold_total += (len(gold_inst) + len(gold_rel1) + len(gold_rel2)) smatch.match_triple_dict.clear() (precision, recall, f_score) = smatch.compute_f(match_total, test_total, gold_total) return "%.2f" % f_score