def dummy_constituent_tree(token_yield,
                           full_token_yield,
                           dummy_label,
                           dummy_root,
                           label=None):
    """
    :param token_yield: connected yield of a parse tree
    :type token_yield: list[ConstituentTerminal]
    :param full_token_yield: full yield of the parse tree
    :type full_token_yield: list[ConstituentTerminal]
    :return: dummy constituent tree
    :rtype: ConstituentTree
    generates a dummy tree for a given yield using dummy_label as inner node symbol
    """
    tree = ConstituentTree(label)

    # create all leaves and punctuation
    for token in full_token_yield:
        if token not in token_yield:
            tree.add_punct(full_token_yield.index(token), token.pos(),
                           token.form())
        else:
            tree.add_leaf(full_token_yield.index(token), token.pos(),
                          token.form())

    # generate root node
    root_id = 'n0'
    tree.add_node(root_id, ConstituentCategory(dummy_root))
    tree.add_to_root(root_id)

    parent = root_id

    if len(token_yield) > 1:
        i = 1
        # generate inner nodes of branching tree
        for token in token_yield[:-2]:
            node = ConstituentCategory(str(dummy_label))
            tree.add_node('n' + str(i), node)
            tree.add_child(parent, 'n' + str(i))
            tree.add_child(parent, full_token_yield.index(token))
            parent = 'n' + str(i)
            i += 1

        token = token_yield[len(token_yield) - 2]
        tree.add_child(parent, full_token_yield.index(token))
        token = token_yield[len(token_yield) - 1]
        tree.add_child(parent, full_token_yield.index(token))
    elif len(token_yield) == 1:
        tree.add_child(parent, full_token_yield.index(token_yield[0]))

    return tree
def main():
    # train_path = '../res/SPMRL_SHARED_2014_NO_ARABIC/GERMAN_SPMRL/gold/xml/train/train.German.gold.xml'
    # corpus = sentence_names_to_hybridtrees(["s" + str(i) for i in range(1, 10)], file_name=train_path, hold=False)

    train_path = '../res/SPMRL_SHARED_2014_NO_ARABIC/GERMAN_SPMRL/gold/xml/dev/dev.German.gold.xml'
    names = ["s" + str(i) for i in range(40675, 40700)]
    names = ['s40564']
    corpus = sentence_names_to_hybridtrees(names, path=train_path, hold=False)

    cp = TreeComparator()

    tree_sys = ConstituentTree()
    tree_sys.add_node('0', ConstituentCategory('PN'))
    tree_sys.add_node('1', corpus[0].token_yield()[0], order=True)
    tree_sys.add_punct("3", '$.', '.')
    tree_sys.add_to_root('0')
    tree_sys.add_child('0', '1')

    param = build_param()

    for i, hybridtree in enumerate(corpus):
        print(i)

        # discotree = convert_tree(hybridtree)
        tree, sent = convert_tree(hybridtree)
        tree2, sent2 = convert_tree(tree_sys)

        if i == 11:
            pass

        # print(discotree)

        # print(discotree.draw())

        # print(DrawTree(discotree, discotree.sent))
        print(DrawTree(tree, sent))

        print(' '.join(map(lambda x: x.form(), hybridtree.full_token_yield())))

        print(DrawTree(tree2, sent2))

        print(tree[::-1])

        print('POS', tree.pos())

        result = TreePairResult(i, tree, sent, tree2, sent2, param)
        print(result.scores())

        print("Comparator: ", cp.compare_hybridtrees(hybridtree, hybridtree))
Beispiel #3
0
def constituent_tree_1_pos_stripped():
    tree = ConstituentTree("s1")
    tree.add_leaf("f1", "--", "hat")
    tree.add_leaf("f2", "--", "schnell")
    tree.add_leaf("f3", "--", "gearbeitet")
    tree.add_punct("f4", "--", ".")

    tree.set_label("V", "V")
    tree.add_child("V", "f1")
    tree.add_child("V", "f3")

    tree.set_label("ADV", "ADV")
    tree.add_child("ADV", "f2")

    tree.set_label("VP", "VP")
    tree.add_child("VP", "V")
    tree.add_child("VP", "ADV")

    tree.add_to_root("VP")

    return tree
def flat_dummy_constituent_tree(token_yield,
                                full_token_yield,
                                dummy_label,
                                dummy_root,
                                label=None,
                                gold_pos=True):
    """
    :param token_yield: connected yield of a parse tree
    :type token_yield: list[ConstituentTerminal]
    :param full_token_yield: full yield of the parse tree
    :type full_token_yield: list[ConstituentTerminal]
    :return: dummy constituent tree
    :rtype: ConstituentTree
    generates a flat dummy tree for a given yield where all nodes are attached under the root
    """
    tree = ConstituentTree(label)

    # generate root node
    root_id = 'n_root'
    tree.add_node(root_id, ConstituentCategory(dummy_root))
    tree.add_to_root(root_id)

    parent = root_id

    # create all leaves and punctuation
    for token in full_token_yield:
        pos = token.pos() if gold_pos else '--'
        if token not in token_yield:
            tree.add_punct(full_token_yield.index(token), pos, token.form())
        else:
            idx = full_token_yield.index(token)
            tree.add_leaf(idx,
                          pos,
                          token.form(),
                          morph=token.morph_feats(),
                          lemma=token.lemma())

            tree.add_child(parent, idx)

    return tree
Beispiel #5
0
def constituent_tree_2():
    tree = ConstituentTree("s2")
    tree.add_leaf("l1", "N", "John")
    tree.add_leaf("l2", "V", "hit")
    tree.add_leaf("l3", "D", "the")
    tree.add_leaf("l4", "N", "Ball")
    tree.add_punct("l5", "PUNC", ".")

    tree.set_label("NP", "NP")
    tree.add_child("NP", "l3")
    tree.add_child("NP", "l4")

    tree.set_label("VP", "VP")
    tree.add_child("VP", "l2")
    tree.add_child("VP", "NP")

    tree.set_label("S", "S")
    tree.add_child("S", "l1")
    tree.add_child("S", "VP")

    tree.add_to_root("S")

    return tree
    def setUp(self):
        tree = ConstituentTree("s1")
        tree.add_leaf("f1",
                      "VAFIN",
                      "hat",
                      morph=[("number", "Sg"), ("person", "3"),
                             ("tense", "Past"), ("mood", "Ind")])
        tree.add_leaf("f2", "ADV", "schnell", morph=[("degree", "Pos")])
        tree.add_leaf("f3", "VVPP", "gearbeitet")
        tree.add_punct("f4", "PUNC", ".")

        tree.add_child("VP2", "f1")
        tree.add_child("VP2", "f3")
        tree.add_child("ADVP", "f2")

        tree.add_child("VP1", "VP2")
        tree.add_child("VP1", "ADVP")

        tree.set_label("VP2", "VP")
        tree.set_label("VP1", "VP")
        tree.set_label("ADVP", "ADVP")

        self.tree = tree

        tree2 = ConstituentTree("s2")
        tree2.add_leaf("f1",
                       "VAFIN",
                       "haben",
                       morph=[("number", "Pl"), ("person", "3"),
                              ("tense", "Past"), ("mood", "Ind")])
        tree2.add_leaf("f2", "ADV", "gut", morph=[("degree", "Pos")])
        tree2.add_leaf("f3", "VVPP", "gekocht")
        tree2.add_punct("f4", "PUNC", ".")

        tree2.add_child("VP2", "f1")
        tree2.add_child("VP2", "f3")
        tree2.add_child("ADVP", "f2")

        tree2.add_child("VP1", "VP2")
        tree2.add_child("VP1", "ADVP")

        tree2.set_label("VP2", "VP")
        tree2.set_label("VP1", "VP")
        tree2.set_label("ADVP", "ADVP")
        tree2.add_to_root("VP1")
        self.tree2 = tree2

        self.tree3 = ConstituentTree("s3")
        self.tree3.add_leaf("f1",
                            "ADJA",
                            "Allgemeiner",
                            edge="NK",
                            morph=[("number", "Sg")])
        self.tree3.add_leaf("f2",
                            "ADJA",
                            "Deutscher",
                            edge="NK",
                            morph=[("degree", "Pos"), ("number", "Sg")])
        self.tree3.add_leaf("f3",
                            "NN",
                            "Fahrrad",
                            edge="NK",
                            morph=[("number", "Sg"), ("gender", "Neut")])
        self.tree3.add_leaf("f4",
                            "NN",
                            "Club",
                            edge="NK",
                            morph=[("number", "Sg"), ("gender", "Neut")])
        for i in range(1, 5):
            self.tree3.add_child("NP", "f" + str(i))
        self.tree3.set_label("NP", "NP")
        self.tree3.add_to_root("NP")