Beispiel #1
0
    def match(self, verbose=False):
        smatch.match_triple_dict.clear()
        smatch.seed = self.seed

        if self.seed is not None:
            random.seed(seed)

        gold_label = 'g'
        test_label = 't'

        gold_inst_orig, _, _ = self.gold_amr.parsed.get_triples()
        test_inst_orig, _, _ = self.test_amr.parsed.get_triples()

        self.gold_amr.parsed.rename_node(gold_label)
        self.test_amr.parsed.rename_node(test_label)

        gold_inst, gold_attr, gold_rel = self.gold_amr.parsed.get_triples()
        test_inst, test_attr, test_rel = self.test_amr.parsed.get_triples()

        gold_map = {a[1]: b[1] for a, b in zip(gold_inst, gold_inst_orig)}
        test_map = {a[1]: b[1] for a, b in zip(test_inst, test_inst_orig)}

        # best_mapping, self.best_match_num = smatch.get_best_match(gold_inst, gold_attr, gold_rel, test_inst, test_attr, test_rel, gold_label, test_label)
        best_mapping, self.best_match_num = smatch.get_best_match(
            test_inst, test_attr, test_rel, gold_inst, gold_attr, gold_rel,
            test_label, gold_label)

        gold_inst, gold_attr, gold_rel = self._rename(gold_inst, gold_attr,
                                                      gold_rel, gold_map)
        test_inst, test_attr, test_rel = self._rename(test_inst, test_attr,
                                                      test_rel, test_map)

        if verbose:
            print('Gold instances:')
            for instance in gold_inst:
                print('   ', instance)
            print('Test instances:')
            for instance in test_inst:
                print('   ', instance)
            print("Best Match:",
                  smatch.print_alignment(best_mapping, gold_inst, test_inst),
                  file=sys.stderr)
            # print("Matches:", self._matches(best_mapping, gold_inst, test_inst))
            print("Matches:", self._matches(best_mapping, test_inst,
                                            gold_inst))

        gold_amr = self.gold_amr
        test_amr = self.test_amr

        gold_amr.instances, gold_amr.attributes, gold_amr.top = self._convert(
            gold_inst, gold_attr)
        gold_amr.relations = gold_rel
        test_amr.instances, test_amr.attributes, test_amr.top = self._convert(
            test_inst, test_attr)
        test_amr.relations = test_rel

        # gold_amr.matches, test_amr.matches = self._matches(best_mapping, gold_inst, test_inst)
        test_amr.matches, gold_amr.matches = self._matches(
            best_mapping, test_inst, gold_inst)
Beispiel #2
0
def score_amr_pair(ref_amr_line, rec_amr_line, restart_num, justinstance=False, 
                   justattribute=False, justrelation=False):

    # parse lines
    amr1 = AMR.parse_AMR_line(ref_amr_line)
    amr2 = AMR.parse_AMR_line(rec_amr_line)

    # Fix prefix
    prefix1 = "a"
    prefix2 = "b"
    # Rename node to "a1", "a2", .etc
    amr1.rename_node(prefix1)
    # Renaming node to "b1", "b2", .etc
    amr2.rename_node(prefix2)

    # get triples
    (instance1, attributes1, relation1) = amr1.get_triples()
    (instance2, attributes2, relation2) = amr2.get_triples()

    # optionally turn off some of the node comparison
    doinstance = doattribute = dorelation = True
    if justinstance:
        doattribute = dorelation = False
    if justattribute:
        doinstance = dorelation = False
    if justrelation:
        doinstance = doattribute = False

    (best_mapping, best_match_num) = smatch.get_best_match(
        instance1, attributes1, relation1,
        instance2, attributes2, relation2,
        prefix1, prefix2, 
        restart_num,
        doinstance=doinstance,
        doattribute=doattribute,
        dorelation=dorelation
    )

    if justinstance:
        test_triple_num = len(instance1)
        gold_triple_num = len(instance2)
    elif justattribute:
        test_triple_num = len(attributes1)
        gold_triple_num = len(attributes2)
    elif justrelation:
        test_triple_num = len(relation1)
        gold_triple_num = len(relation2)
    else:
        test_triple_num = len(instance1) + len(attributes1) + len(relation1)
        gold_triple_num = len(instance2) + len(attributes2) + len(relation2)
    return best_match_num, test_triple_num, gold_triple_num
Beispiel #3
0
    def match(self, verbose=False):
        smatch.match_triple_dict.clear()
        smatch.seed = self.seed

        if self.seed is not None:
            random.seed(seed)


        gold_label = 'g'
        test_label = 't'

        gold_inst_orig,_,_ = self.gold_amr.parsed.get_triples()
        test_inst_orig,_,_ = self.test_amr.parsed.get_triples()

        self.gold_amr.parsed.rename_node(gold_label)
        self.test_amr.parsed.rename_node(test_label)

        gold_inst, gold_attr, gold_rel = self.gold_amr.parsed.get_triples()
        test_inst, test_attr, test_rel = self.test_amr.parsed.get_triples()

        gold_map = {a[1]:b[1] for a,b in zip(gold_inst, gold_inst_orig)}
        test_map = {a[1]:b[1] for a,b in zip(test_inst, test_inst_orig)}

        # best_mapping, self.best_match_num = smatch.get_best_match(gold_inst, gold_attr, gold_rel, test_inst, test_attr, test_rel, gold_label, test_label)
        best_mapping, self.best_match_num = smatch.get_best_match(test_inst, test_attr, test_rel, gold_inst, gold_attr, gold_rel, test_label, gold_label)

        gold_inst, gold_attr, gold_rel = self._rename(gold_inst, gold_attr, gold_rel, gold_map)
        test_inst, test_attr, test_rel = self._rename(test_inst, test_attr, test_rel, test_map)

        if verbose:
            print('Gold instances:')
            for instance in gold_inst:
                print('   ', instance)
            print('Test instances:')
            for instance in test_inst:
                print('   ', instance)
            print("Best Match:", smatch.print_alignment(best_mapping, gold_inst, test_inst), file=sys.stderr)
            # print("Matches:", self._matches(best_mapping, gold_inst, test_inst))
            print("Matches:", self._matches(best_mapping, test_inst, gold_inst))

        gold_amr = self.gold_amr
        test_amr = self.test_amr

        gold_amr.instances, gold_amr.attributes, gold_amr.top = self._convert(gold_inst, gold_attr)
        gold_amr.relations = gold_rel
        test_amr.instances, test_amr.attributes, test_amr.top = self._convert(test_inst, test_attr)
        test_amr.relations = test_rel

        # gold_amr.matches, test_amr.matches = self._matches(best_mapping, gold_inst, test_inst)
        test_amr.matches, gold_amr.matches = self._matches(best_mapping, test_inst, gold_inst)
Beispiel #4
0
    def match_amr(gold_amr, test_amr, verbose=False):
        smatch.match_triple_dict.clear()

        gold_label = 'g'
        test_label = 't'

        gold_inst_orig, _, _ = gold_amr.get_triples()
        test_inst_orig, _, _ = test_amr.get_triples()

        gold_amr.rename_node(gold_label)
        test_amr.rename_node(test_label)

        gold_inst, gold_attr, gold_rel = gold_amr.get_triples()
        test_inst, test_attr, test_rel = test_amr.get_triples()

        gold_map = {a[1]: b[1] for a, b in zip(gold_inst, gold_inst_orig)}
        test_map = {a[1]: b[1] for a, b in zip(test_inst, test_inst_orig)}

        # best_mapping, best_match_num = smatch.get_best_match(gold_inst, gold_attr, gold_rel, test_inst, test_attr, test_rel, gold_label, test_label)
        best_mapping, best_match_num = smatch.get_best_match(
            test_inst, test_attr, test_rel, gold_inst, gold_attr, gold_rel,
            test_label, gold_label)

        gold_inst, gold_attr, gold_rel = rename(gold_inst, gold_attr, gold_rel,
                                                gold_map)
        test_inst, test_attr, test_rel = rename(test_inst, test_attr, test_rel,
                                                test_map)

        if verbose:
            print('Gold instances:')
            for instance in gold_inst:
                print('   ', instance)
            print('Test instances:')
            for instance in test_inst:
                print('   ', instance)
            print("Best Match:",
                  smatch.print_alignment(best_mapping, gold_inst, test_inst),
                  file=sys.stderr)
            # print("Matches:", matches(best_mapping, gold_inst, test_inst))
            print("Matches:", matches(best_mapping, test_inst, gold_inst))

        amr1 = convert(gold_amr, gold_inst, gold_attr, gold_rel)
        amr2 = convert(test_amr, test_inst, test_attr, test_rel)
        # amr1.matches, amr2.matches = matches(best_mapping, gold_inst, test_inst)
        amr2.matches, amr1.matches = matches(best_mapping, test_inst,
                                             gold_inst)

        return amr1, amr2, best_match_num
def match_pair_2(pair):
    (lst_amr1, dic_amr1), (lst_amr2, dic_amr2) = pair
    prefix1 = 'a'
    prefix2 = 'b'
    smatch.match_triple_dict.clear()
    amr1 = parse_relations(lst_amr1, dic_amr1)
    amr2 = parse_relations(lst_amr2, dic_amr2)
    amr1.rename_node(prefix1)  # Rename node to "a1", "a2", .etc
    amr2.rename_node(prefix2)  # Rename node to "b1", "b2", .etc
    inst1, attrib1, rel1 = amr1.get_triples()
    inst2, attrib2, rel2 = amr2.get_triples()
    _, best_match_num = smatch.get_best_match(inst1, attrib1, rel1, inst2,
                                              attrib2, rel2, prefix1, prefix2)
    num_test = len(inst1) + len(attrib1) + len(rel1)
    num_gold = len(inst2) + len(attrib2) + len(rel2)
    return best_match_num, num_test, num_gold
Beispiel #6
0
    def match_amr(gold_amr, test_amr, verbose=False):
        smatch.match_triple_dict.clear()

        gold_label = 'g'
        test_label = 't'

        gold_inst_orig,_,_ = gold_amr.get_triples()
        test_inst_orig,_,_ = test_amr.get_triples()

        gold_amr.rename_node(gold_label)
        test_amr.rename_node(test_label)

        gold_inst, gold_attr, gold_rel = gold_amr.get_triples()
        test_inst, test_attr, test_rel = test_amr.get_triples()

        gold_map = {a[1]:b[1] for a,b in zip(gold_inst, gold_inst_orig)}
        test_map = {a[1]:b[1] for a,b in zip(test_inst, test_inst_orig)}

        # best_mapping, best_match_num = smatch.get_best_match(gold_inst, gold_attr, gold_rel, test_inst, test_attr, test_rel, gold_label, test_label)
        best_mapping, best_match_num = smatch.get_best_match(test_inst, test_attr, test_rel, gold_inst, gold_attr, gold_rel, test_label, gold_label)

        gold_inst, gold_attr, gold_rel = rename(gold_inst, gold_attr, gold_rel, gold_map)
        test_inst, test_attr, test_rel = rename(test_inst, test_attr, test_rel, test_map)

        if verbose:
            print('Gold instances:')
            for instance in gold_inst:
                print('   ', instance)
            print('Test instances:')
            for instance in test_inst:
                print('   ', instance)
            print("Best Match:", smatch.print_alignment(best_mapping, gold_inst, test_inst), file=sys.stderr)
            # print("Matches:", matches(best_mapping, gold_inst, test_inst))
            print("Matches:", matches(best_mapping, test_inst, gold_inst))

        amr1 = convert(gold_amr, gold_inst, gold_attr, gold_rel)
        amr2 = convert(test_amr, test_inst, test_attr, test_rel)
        # amr1.matches, amr2.matches = matches(best_mapping, gold_inst, test_inst)
        amr2.matches, amr1.matches = matches(best_mapping, test_inst, gold_inst)

        return amr1, amr2, best_match_num
Beispiel #7
0
def compute_files(user1, user2, file_list, dir_pre, start_num):
    """
    Compute the smatch scores for a file list between two users
    Args:
    user1: user 1 name
    user2: user 2 name
    file_list: file list
    dir_pre: the file location prefix
    start_num: the number of restarts in smatch
    Returns:
    smatch f score.

    """
    match_total = 0
    test_total = 0
    gold_total = 0
    for fi in file_list:
        file1 = dir_pre + user1 + "/" + fi + ".txt"
        file2 = dir_pre + user2 + "/" + fi + ".txt"
        if not os.path.exists(file1):
            print("*********Error: ",
                  file1,
                  "does not exist*********",
                  file=ERROR_LOG)
            return -1.00
        if not os.path.exists(file2):
            print("*********Error: ",
                  file2,
                  "does not exist*********",
                  file=ERROR_LOG)
            return -1.00
        try:
            file1_h = open(file1, "r")
            file2_h = open(file2, "r")
        except IOError:
            print("Cannot open the files", file1, file2, file=ERROR_LOG)
            break
        cur_amr1 = smatch.get_amr_line(file1_h)
        cur_amr2 = smatch.get_amr_line(file2_h)
        if cur_amr1 == "":
            print("AMR 1 is empty", file=ERROR_LOG)
            continue
        if cur_amr2 == "":
            print("AMR 2 is empty", file=ERROR_LOG)
            continue
        amr1 = amr.AMR.parse_AMR_line(cur_amr1)
        amr2 = amr.AMR.parse_AMR_line(cur_amr2)
        test_label = "a"
        gold_label = "b"
        amr1.rename_node(test_label)
        amr2.rename_node(gold_label)
        (test_inst, test_rel1, test_rel2) = amr1.get_triples()
        (gold_inst, gold_rel1, gold_rel2) = amr2.get_triples()
        if verbose:
            print("Instance triples of file 1:",
                  len(test_inst),
                  file=DEBUG_LOG)
            print(test_inst, file=DEBUG_LOG)
            print("Attribute triples of file 1:",
                  len(test_rel1),
                  file=DEBUG_LOG)
            print(test_rel1, file=DEBUG_LOG)
            print("Relation triples of file 1:",
                  len(test_rel2),
                  file=DEBUG_LOG)
            print(test_rel2, file=DEBUG_LOG)
            print("Instance triples of file 2:",
                  len(gold_inst),
                  file=DEBUG_LOG)
            print(gold_inst, file=DEBUG_LOG)
            print("Attribute triples of file 2:",
                  len(gold_rel1),
                  file=DEBUG_LOG)
            print(gold_rel1, file=DEBUG_LOG)
            print("Relation triples of file 2:",
                  len(gold_rel2),
                  file=DEBUG_LOG)
            print(gold_rel2, file=DEBUG_LOG)
        (best_match, best_match_num) = smatch.get_best_match(
            test_inst, test_rel1, test_rel2, gold_inst, gold_rel1, gold_rel2,
            test_label, gold_label)
        if verbose:
            print("best match number", best_match_num, file=DEBUG_LOG)
            print("Best Match:",
                  smatch.print_alignment(best_match, test_inst, gold_inst),
                  file=DEBUG_LOG)
        match_total += best_match_num
        test_total += (len(test_inst) + len(test_rel1) + len(test_rel2))
        gold_total += (len(gold_inst) + len(gold_rel1) + len(gold_rel2))
        smatch.match_triple_dict.clear()
    (precision, recall, f_score) = smatch.compute_f(match_total, test_total,
                                                    gold_total)
    return "%.2f" % f_score
Beispiel #8
0
def compute_files(user1, user2, file_list, dir_pre, start_num):

    """
    Compute the smatch scores for a file list between two users
    Args:
    user1: user 1 name
    user2: user 2 name
    file_list: file list
    dir_pre: the file location prefix
    start_num: the number of restarts in smatch
    Returns:
    smatch f score.

    """
    match_total = 0
    test_total = 0
    gold_total = 0
    for fi in file_list:
        file1 = dir_pre + user1 + "/" + fi + ".txt"
        file2 = dir_pre + user2 + "/" + fi + ".txt"
        if not os.path.exists(file1):
            print >> ERROR_LOG, "*********Error: ", file1, "does not exist*********"
            return -1.00
        if not os.path.exists(file2):
            print >> ERROR_LOG, "*********Error: ", file2, "does not exist*********"
            return -1.00
        try:
            file1_h = open(file1, "r")
            file2_h = open(file2, "r")
        except IOError:
            print >> ERROR_LOG, "Cannot open the files", file1, file2
            break
        cur_amr1 = smatch.get_amr_line(file1_h)
        cur_amr2 = smatch.get_amr_line(file2_h)
        if cur_amr1 == "":
            print >> ERROR_LOG, "AMR 1 is empty"
            continue
        if cur_amr2 == "":
            print >> ERROR_LOG, "AMR 2 is empty"
            continue
        amr1 = amr.AMR.parse_AMR_line(cur_amr1)
        amr2 = amr.AMR.parse_AMR_line(cur_amr2)
        test_label = "a"
        gold_label = "b"
        amr1.rename_node(test_label)
        amr2.rename_node(gold_label)
        (test_inst, test_rel1, test_rel2) = amr1.get_triples()
        (gold_inst, gold_rel1, gold_rel2) = amr2.get_triples()
        if verbose:
            print >> DEBUG_LOG, "Instance triples of file 1:", len(test_inst)
            print >> DEBUG_LOG, test_inst
            print >> DEBUG_LOG, "Attribute triples of file 1:", len(test_rel1)
            print >> DEBUG_LOG, test_rel1
            print >> DEBUG_LOG, "Relation triples of file 1:", len(test_rel2)
            print >> DEBUG_LOG, test_rel2
            print >> DEBUG_LOG, "Instance triples of file 2:", len(gold_inst)
            print >> DEBUG_LOG, gold_inst
            print >> DEBUG_LOG, "Attribute triples of file 2:", len(gold_rel1)
            print >> DEBUG_LOG, gold_rel1
            print >> DEBUG_LOG, "Relation triples of file 2:", len(gold_rel2)
            print >> DEBUG_LOG, gold_rel2
        (best_match, best_match_num) = smatch.get_best_match(test_inst, test_rel1, test_rel2,
                                                             gold_inst, gold_rel1, gold_rel2,
                                                             test_label, gold_label)
        if verbose:
            print >> DEBUG_LOG, "best match number", best_match_num
            print >> DEBUG_LOG, "Best Match:", smatch.print_alignment(best_match, test_inst, gold_inst)
        match_total += best_match_num
        test_total += (len(test_inst) + len(test_rel1) + len(test_rel2))
        gold_total += (len(gold_inst) + len(gold_rel1) + len(gold_rel2))
        smatch.match_triple_dict.clear()
    (precision, recall, f_score) = smatch.compute_f(match_total, test_total, gold_total)
    return "%.2f" % f_score