def dummy_constituent_tree(token_yield, full_token_yield, dummy_label, dummy_root, label=None): """ :param token_yield: connected yield of a parse tree :type token_yield: list[ConstituentTerminal] :param full_token_yield: full yield of the parse tree :type full_token_yield: list[ConstituentTerminal] :return: dummy constituent tree :rtype: ConstituentTree generates a dummy tree for a given yield using dummy_label as inner node symbol """ tree = ConstituentTree(label) # create all leaves and punctuation for token in full_token_yield: if token not in token_yield: tree.add_punct(full_token_yield.index(token), token.pos(), token.form()) else: tree.add_leaf(full_token_yield.index(token), token.pos(), token.form()) # generate root node root_id = 'n0' tree.add_node(root_id, ConstituentCategory(dummy_root)) tree.add_to_root(root_id) parent = root_id if len(token_yield) > 1: i = 1 # generate inner nodes of branching tree for token in token_yield[:-2]: node = ConstituentCategory(str(dummy_label)) tree.add_node('n' + str(i), node) tree.add_child(parent, 'n' + str(i)) tree.add_child(parent, full_token_yield.index(token)) parent = 'n' + str(i) i += 1 token = token_yield[len(token_yield) - 2] tree.add_child(parent, full_token_yield.index(token)) token = token_yield[len(token_yield) - 1] tree.add_child(parent, full_token_yield.index(token)) elif len(token_yield) == 1: tree.add_child(parent, full_token_yield.index(token_yield[0])) return tree
def main(): # train_path = '../res/SPMRL_SHARED_2014_NO_ARABIC/GERMAN_SPMRL/gold/xml/train/train.German.gold.xml' # corpus = sentence_names_to_hybridtrees(["s" + str(i) for i in range(1, 10)], file_name=train_path, hold=False) train_path = '../res/SPMRL_SHARED_2014_NO_ARABIC/GERMAN_SPMRL/gold/xml/dev/dev.German.gold.xml' names = ["s" + str(i) for i in range(40675, 40700)] names = ['s40564'] corpus = sentence_names_to_hybridtrees(names, path=train_path, hold=False) cp = TreeComparator() tree_sys = ConstituentTree() tree_sys.add_node('0', ConstituentCategory('PN')) tree_sys.add_node('1', corpus[0].token_yield()[0], order=True) tree_sys.add_punct("3", '$.', '.') tree_sys.add_to_root('0') tree_sys.add_child('0', '1') param = build_param() for i, hybridtree in enumerate(corpus): print(i) # discotree = convert_tree(hybridtree) tree, sent = convert_tree(hybridtree) tree2, sent2 = convert_tree(tree_sys) if i == 11: pass # print(discotree) # print(discotree.draw()) # print(DrawTree(discotree, discotree.sent)) print(DrawTree(tree, sent)) print(' '.join(map(lambda x: x.form(), hybridtree.full_token_yield()))) print(DrawTree(tree2, sent2)) print(tree[::-1]) print('POS', tree.pos()) result = TreePairResult(i, tree, sent, tree2, sent2, param) print(result.scores()) print("Comparator: ", cp.compare_hybridtrees(hybridtree, hybridtree))
def flat_dummy_constituent_tree(token_yield, full_token_yield, dummy_label, dummy_root, label=None, gold_pos=True): """ :param token_yield: connected yield of a parse tree :type token_yield: list[ConstituentTerminal] :param full_token_yield: full yield of the parse tree :type full_token_yield: list[ConstituentTerminal] :return: dummy constituent tree :rtype: ConstituentTree generates a flat dummy tree for a given yield where all nodes are attached under the root """ tree = ConstituentTree(label) # generate root node root_id = 'n_root' tree.add_node(root_id, ConstituentCategory(dummy_root)) tree.add_to_root(root_id) parent = root_id # create all leaves and punctuation for token in full_token_yield: pos = token.pos() if gold_pos else '--' if token not in token_yield: tree.add_punct(full_token_yield.index(token), pos, token.form()) else: idx = full_token_yield.index(token) tree.add_leaf(idx, pos, token.form(), morph=token.morph_feats(), lemma=token.lemma()) tree.add_child(parent, idx) return tree