def dummy_constituent_tree(token_yield, full_token_yield, dummy_label, dummy_root, label=None): """ :param token_yield: connected yield of a parse tree :type token_yield: list[ConstituentTerminal] :param full_token_yield: full yield of the parse tree :type full_token_yield: list[ConstituentTerminal] :return: dummy constituent tree :rtype: ConstituentTree generates a dummy tree for a given yield using dummy_label as inner node symbol """ tree = ConstituentTree(label) # create all leaves and punctuation for token in full_token_yield: if token not in token_yield: tree.add_punct(full_token_yield.index(token), token.pos(), token.form()) else: tree.add_leaf(full_token_yield.index(token), token.pos(), token.form()) # generate root node root_id = 'n0' tree.add_node(root_id, ConstituentCategory(dummy_root)) tree.add_to_root(root_id) parent = root_id if len(token_yield) > 1: i = 1 # generate inner nodes of branching tree for token in token_yield[:-2]: node = ConstituentCategory(str(dummy_label)) tree.add_node('n' + str(i), node) tree.add_child(parent, 'n' + str(i)) tree.add_child(parent, full_token_yield.index(token)) parent = 'n' + str(i) i += 1 token = token_yield[len(token_yield) - 2] tree.add_child(parent, full_token_yield.index(token)) token = token_yield[len(token_yield) - 1] tree.add_child(parent, full_token_yield.index(token)) elif len(token_yield) == 1: tree.add_child(parent, full_token_yield.index(token_yield[0])) return tree
def main(): # train_path = '../res/SPMRL_SHARED_2014_NO_ARABIC/GERMAN_SPMRL/gold/xml/train/train.German.gold.xml' # corpus = sentence_names_to_hybridtrees(["s" + str(i) for i in range(1, 10)], file_name=train_path, hold=False) train_path = '../res/SPMRL_SHARED_2014_NO_ARABIC/GERMAN_SPMRL/gold/xml/dev/dev.German.gold.xml' names = ["s" + str(i) for i in range(40675, 40700)] names = ['s40564'] corpus = sentence_names_to_hybridtrees(names, path=train_path, hold=False) cp = TreeComparator() tree_sys = ConstituentTree() tree_sys.add_node('0', ConstituentCategory('PN')) tree_sys.add_node('1', corpus[0].token_yield()[0], order=True) tree_sys.add_punct("3", '$.', '.') tree_sys.add_to_root('0') tree_sys.add_child('0', '1') param = build_param() for i, hybridtree in enumerate(corpus): print(i) # discotree = convert_tree(hybridtree) tree, sent = convert_tree(hybridtree) tree2, sent2 = convert_tree(tree_sys) if i == 11: pass # print(discotree) # print(discotree.draw()) # print(DrawTree(discotree, discotree.sent)) print(DrawTree(tree, sent)) print(' '.join(map(lambda x: x.form(), hybridtree.full_token_yield()))) print(DrawTree(tree2, sent2)) print(tree[::-1]) print('POS', tree.pos()) result = TreePairResult(i, tree, sent, tree2, sent2, param) print(result.scores()) print("Comparator: ", cp.compare_hybridtrees(hybridtree, hybridtree))
def constituent_tree_1_pos_stripped(): tree = ConstituentTree("s1") tree.add_leaf("f1", "--", "hat") tree.add_leaf("f2", "--", "schnell") tree.add_leaf("f3", "--", "gearbeitet") tree.add_punct("f4", "--", ".") tree.set_label("V", "V") tree.add_child("V", "f1") tree.add_child("V", "f3") tree.set_label("ADV", "ADV") tree.add_child("ADV", "f2") tree.set_label("VP", "VP") tree.add_child("VP", "V") tree.add_child("VP", "ADV") tree.add_to_root("VP") return tree
def flat_dummy_constituent_tree(token_yield, full_token_yield, dummy_label, dummy_root, label=None, gold_pos=True): """ :param token_yield: connected yield of a parse tree :type token_yield: list[ConstituentTerminal] :param full_token_yield: full yield of the parse tree :type full_token_yield: list[ConstituentTerminal] :return: dummy constituent tree :rtype: ConstituentTree generates a flat dummy tree for a given yield where all nodes are attached under the root """ tree = ConstituentTree(label) # generate root node root_id = 'n_root' tree.add_node(root_id, ConstituentCategory(dummy_root)) tree.add_to_root(root_id) parent = root_id # create all leaves and punctuation for token in full_token_yield: pos = token.pos() if gold_pos else '--' if token not in token_yield: tree.add_punct(full_token_yield.index(token), pos, token.form()) else: idx = full_token_yield.index(token) tree.add_leaf(idx, pos, token.form(), morph=token.morph_feats(), lemma=token.lemma()) tree.add_child(parent, idx) return tree
def constituent_tree_2(): tree = ConstituentTree("s2") tree.add_leaf("l1", "N", "John") tree.add_leaf("l2", "V", "hit") tree.add_leaf("l3", "D", "the") tree.add_leaf("l4", "N", "Ball") tree.add_punct("l5", "PUNC", ".") tree.set_label("NP", "NP") tree.add_child("NP", "l3") tree.add_child("NP", "l4") tree.set_label("VP", "VP") tree.add_child("VP", "l2") tree.add_child("VP", "NP") tree.set_label("S", "S") tree.add_child("S", "l1") tree.add_child("S", "VP") tree.add_to_root("S") return tree
def setUp(self): tree = ConstituentTree("s1") tree.add_leaf("f1", "VAFIN", "hat", morph=[("number", "Sg"), ("person", "3"), ("tense", "Past"), ("mood", "Ind")]) tree.add_leaf("f2", "ADV", "schnell", morph=[("degree", "Pos")]) tree.add_leaf("f3", "VVPP", "gearbeitet") tree.add_punct("f4", "PUNC", ".") tree.add_child("VP2", "f1") tree.add_child("VP2", "f3") tree.add_child("ADVP", "f2") tree.add_child("VP1", "VP2") tree.add_child("VP1", "ADVP") tree.set_label("VP2", "VP") tree.set_label("VP1", "VP") tree.set_label("ADVP", "ADVP") self.tree = tree tree2 = ConstituentTree("s2") tree2.add_leaf("f1", "VAFIN", "haben", morph=[("number", "Pl"), ("person", "3"), ("tense", "Past"), ("mood", "Ind")]) tree2.add_leaf("f2", "ADV", "gut", morph=[("degree", "Pos")]) tree2.add_leaf("f3", "VVPP", "gekocht") tree2.add_punct("f4", "PUNC", ".") tree2.add_child("VP2", "f1") tree2.add_child("VP2", "f3") tree2.add_child("ADVP", "f2") tree2.add_child("VP1", "VP2") tree2.add_child("VP1", "ADVP") tree2.set_label("VP2", "VP") tree2.set_label("VP1", "VP") tree2.set_label("ADVP", "ADVP") tree2.add_to_root("VP1") self.tree2 = tree2 self.tree3 = ConstituentTree("s3") self.tree3.add_leaf("f1", "ADJA", "Allgemeiner", edge="NK", morph=[("number", "Sg")]) self.tree3.add_leaf("f2", "ADJA", "Deutscher", edge="NK", morph=[("degree", "Pos"), ("number", "Sg")]) self.tree3.add_leaf("f3", "NN", "Fahrrad", edge="NK", morph=[("number", "Sg"), ("gender", "Neut")]) self.tree3.add_leaf("f4", "NN", "Club", edge="NK", morph=[("number", "Sg"), ("gender", "Neut")]) for i in range(1, 5): self.tree3.add_child("NP", "f" + str(i)) self.tree3.set_label("NP", "NP") self.tree3.add_to_root("NP")