Python LCFRS_parser 예제들, parser.naive.parsing.LCFRS_parser Python 예제들

예제 #1

0

파일 보기

    def test_multiroot(self):
        tree = multi_dep_tree()
        term_pos = the_terminal_labeling_factory().get_strategy(
            'pos').token_label
        fanout_1 = the_recursive_partitioning_factory().get_partitioning(
            'fanout-1')
        for top_level_labeling_strategy in ['strict', 'child']:
            labeling_strategy = the_labeling_factory(
            ).create_simple_labeling_strategy(top_level_labeling_strategy,
                                              'pos+deprel')
            for recursive_partitioning in [[direct_extraction], fanout_1,
                                           [left_branching]]:
                (_, grammar) = induce_grammar([tree], labeling_strategy,
                                              term_pos, recursive_partitioning,
                                              'START')
                print(grammar)

                parser = LCFRS_parser(grammar, 'pA pB pC pD pE'.split(' '))
                print(parser.best_derivation_tree())

                cleaned_tokens = copy.deepcopy(tree.full_token_yield())
                for token in cleaned_tokens:
                    token.set_edge_label('_')
                hybrid_tree = HybridTree()
                hybrid_tree = parser.dcp_hybrid_tree_best_derivation(
                    hybrid_tree, cleaned_tokens, True, construct_conll_token)
                print(hybrid_tree)
                self.assertEqual(tree, hybrid_tree)

예제 #2

0

파일 보기

파일: test_conll_parse.py 프로젝트: parsingPhilipp/panda-parser

    def test_conll_grammar_induction():
        ignore_punctuation = True
        trees = parse_conll_corpus(TEST_FILE, False)
        trees = disconnect_punctuation(trees)
        terminal_labeling = the_terminal_labeling_factory().get_strategy('pos')
        nonterminal_labeling = the_labeling_factory(
        ).create_simple_labeling_strategy('child', 'pos')
        (_, grammar) = d_i.induce_grammar(trees, nonterminal_labeling,
                                          terminal_labeling.token_label,
                                          [direct_extraction], 'START')

        trees2 = parse_conll_corpus(TEST_FILE_MODIFIED, False)
        trees2 = disconnect_punctuation(trees2)

        for tree in trees2:
            parser = LCFRS_parser(
                grammar,
                terminal_labeling.prepare_parser_input(tree.token_yield()))
            cleaned_tokens = copy.deepcopy(tree.full_token_yield())
            for token in cleaned_tokens:
                token.set_edge_label('_')
            h_tree = HybridTree()
            h_tree = parser.dcp_hybrid_tree_best_derivation(
                h_tree, cleaned_tokens, ignore_punctuation,
                construct_conll_token)
            # print h_tree
            print('input -> hybrid-tree -> output')
            print(tree_to_conll_str(tree))
            print('parsed tokens')
            print(list(map(str, h_tree.full_token_yield())))
            print('test_parser output')
            print(tree_to_conll_str(h_tree))

예제 #3

0

파일 보기

파일: test_dog.py 프로젝트: parsingPhilipp/panda-parser

    def test_induction_from_corpus_tree(self):
        dsg = sentence_names_to_deep_syntax_graphs(["s26954"], "res/tiger/tiger_s26954.xml", hold=False)[0]

        def label_edge(edge):
            if isinstance(edge.label, ConstituentTerminal):
                return edge.label.pos()
            else:
                return edge.label
        labeling = lambda nodes, dsg: simple_labeling(nodes, dsg, label_edge)

        term_labeling_token = PosTerminals()

        def term_labeling(token):
            if isinstance(token, ConstituentTerminal):
                return term_labeling_token.token_label(token)
            else:
                return token

        rec_part_strategy = the_recursive_partitioning_factory().get_partitioning('cfg')[0]
        rec_part = rec_part_strategy(dsg)
        dcmp = compute_decomposition(dsg, rec_part)

        grammar = induce_grammar_from(dsg, rec_part, dcmp, labeling=labeling, terminal_labeling=term_labeling)

        print(grammar)

        parser = LCFRS_parser(grammar)
        parser.set_input(term_labeling_token.prepare_parser_input(dsg.sentence))
        parser.parse()
        self.assertTrue(parser.recognized())

        derivation = parser.best_derivation_tree()
        self.assertNotEqual(derivation, None)

예제 #4

0

파일 보기

파일: test_dog.py 프로젝트: parsingPhilipp/panda-parser

    def test_induction_with_labeling_strategies(self):
        dsg = build_dsg()
        rec_part_strategy = the_recursive_partitioning_factory().get_partitioning('right-branching')[0]
        rec_part = rec_part_strategy(dsg)
        dcmp = compute_decomposition(dsg, rec_part)

        grammar = induce_grammar_from(dsg, rec_part, dcmp, labeling=simple_labeling, terminal_labeling=str)
        print(grammar)

        parser = LCFRS_parser(grammar)
        parser.set_input(dsg.sentence)  # ["Sie", "entwickelt", "und", "druckt", "Verpackungen", "und", "Etiketten"]
        parser.parse()
        self.assertTrue(parser.recognized())

        derivation = parser.best_derivation_tree()
        self.assertNotEqual(derivation, None)

        dog, sync_list = dog_evaluation(derivation)
        self.assertEqual(dog, dsg.dog)

        self.assertEqual(len(sync_list), len(dsg.sentence))
        # print(dog)
        # print(sync)
        # print(sync_list)

        morphism, _ = dsg.dog.compute_isomorphism(dog)

        for i in range(len(dsg.sentence)):
            self.assertListEqual(list(map(lambda x: morphism[x], dsg.get_graph_position(i))), sync_list[i])

예제 #5

0

파일 보기

    def __process_single_dsg(self, i, dsg, rec_part_strat, terminal_labeling):
        if True or len(dsg.dog.outputs) > 1:
            print(i, dsg, dsg.label)
            # if i == 89:
            # render_and_view_dog(dsg.dog, 'dm0', 'dm0')
            # render_and_view_dog(corpus[1].dog, 'dm1', 'dm1')
            # print(dsg.sentence, dsg.synchronization, dsg.label)

            # dog39 = dsg.dog.extract_dog([39], [], enforce_outputs=False)
            # render_and_view_dog(dog39, "dog39")

            rec_part = rec_part_strat(dsg)

            if False and i == 89:
                pretty_print_rec_partitioning(rec_part)

            decomp = compute_decomposition(dsg, rec_part)
            # print(decomp)

            grammar = induce_grammar_from(dsg,
                                          rec_part,
                                          decomp,
                                          terminal_labeling=terminal_labeling,
                                          enforce_outputs=False,
                                          normalize=True)
            if False and i == 89:
                print(grammar)

            parser = LCFRS_parser(grammar)
            parser.set_input(list(map(terminal_labeling, dsg.sentence)))

            print("parsing")

            parser.parse()
            self.assertTrue(parser.recognized())

            derivation = parser.best_derivation_tree()
            self.assertNotEqual(derivation, None)

            dog, sync_list = dog_evaluation(derivation)

            dsg.dog.project_labels(terminal_labeling)

            if False and i == 89:
                render_and_view_dog(dsg.dog, "corpus", "corpus_graph")
                render_and_view_dog(dog, "parse_result", "parse_result")

            print("comparing")

            self.assertEqual(dog, dsg.dog)

예제 #6

0

파일 보기

파일: test_dog.py 프로젝트: parsingPhilipp/panda-parser

    def test_dsg(self):
        dsg = build_dsg()
        rec_part = dsg.recursive_partitioning()
        self.assertEqual(rec_part, ({0, 1, 2, 3, 4, 5, 6}, [({0, 1}, [({0}, []), ({1}, [])]), ({2}, []), (
            {3, 4, 5, 6}, [({3}, []), ({4, 5, 6}, [({4}, []), ({5}, []), ({6}, [])])])]))
        dcmp = compute_decomposition(dsg, rec_part)
        self.assertEqual(dcmp, ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [([1, 4, 5], [([4], []), ([5], [])]), ([2], []), (
            [3, 6, 7, 8, 9, 10], [([7], []), ([6, 8, 9, 10], [([8], []), ([9], []), ([10], [])])])]))
        self.__structurally_equal(rec_part, dcmp)

        grammar = induce_grammar_from(dsg, rec_part, dcmp, terminal_labeling=str)
        # print(grammar)

        for nont, label in zip(["[4]", "[5]", "[2]", "[7]", "[8]", "[9]", "[10]"],
                               ["Sie", "entwickelt", "und", "druckt", "Verpackungen", "und", "Etiketten"]):
            for rule in grammar.lhs_nont_to_rules(nont):
                self.assertEqual(rule.dcp()[0], build_terminal_dog(label))

        for nont, graph in zip(
                ["[1, 4, 5]", "[6, 8, 9, 10]", "[3, 6, 7, 8, 9, 10]", "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"],
                [dog_s1(), dog_s13(), dog_s3(), dog_se()]):
            for rule in grammar.lhs_nont_to_rules(nont):
                self.assertEqual(rule.dcp()[0], graph)

        parser = LCFRS_parser(grammar)
        parser.set_input(dsg.sentence)  # ["Sie", "entwickelt", "und", "druckt", "Verpackungen", "und", "Etiketten"]
        parser.parse()
        self.assertTrue(parser.recognized())

        derivation = parser.best_derivation_tree()
        self.assertNotEqual(derivation, None)

        dog, sync_list = dog_evaluation(derivation)
        self.assertEqual(dog, dsg.dog)

        self.assertEqual(len(sync_list), len(dsg.sentence))
        # print(dog)
        # print(sync)
        # print(sync_list)

        morphism, _ = dsg.dog.compute_isomorphism(dog)

        for i in range(len(dsg.sentence)):
            self.assertListEqual(list(map(lambda x: morphism[x], dsg.get_graph_position(i))), sync_list[i])

예제 #7

0

파일 보기

파일: test_constituent_tree.py 프로젝트: kilian-gebhardt/panda-parser

    def test_induction_and_parsing_with_pos_recovery(self):
        naming = 'child'

        def rec_part(tree):
            return left_branching_partitioning(len(tree.id_yield()))

        tree = self.tree
        tree.add_to_root("VP1")

        print(tree)

        grammar = fringe_extract_lcfrs(tree,
                                       rec_part(tree),
                                       naming=naming,
                                       isolate_pos=True,
                                       term_labeling=FormTerminals())
        print(grammar)

        parser = LCFRS_parser(grammar)
        parser.set_input([token.form() for token in tree.token_yield()])
        parser.parse()
        self.assertTrue(parser.recognized())
        derivation = parser.best_derivation_tree()
        e = DCP_evaluator(derivation)
        dcp_term = e.getEvaluation()
        print(str(dcp_term[0]))
        t = ConstituentTree()
        dcp_to_hybridtree(
            t,
            dcp_term, [
                construct_constituent_token(token.form(), '--', True)
                for token in tree.token_yield()
            ],
            ignore_punctuation=False,
            construct_token=construct_constituent_token)
        print(t)
        self.assertEqual(len(tree.token_yield()), len(t.token_yield()))
        for tok1, tok2 in zip(tree.token_yield(), t.token_yield()):
            self.assertEqual(tok1.form(), tok2.form())
            self.assertEqual(tok1.pos(), tok2.pos())

예제 #8

0

파일 보기

    def test_negra_to_dag_parsing(self):
        pass
        names = list(map(str, [26954]))

        fd_, primary_file = tempfile.mkstemp(suffix='.export')
        with open(primary_file, mode='w') as pf:

            for s in names:
                dsg = tp.sentence_names_to_deep_syntax_graphs(
                    [s],
                    "res/tiger/tiger_s%s.xml" % s,
                    hold=False,
                    ignore_puntcuation=False)[0]
                dsg.set_label(dsg.label[1:])
                lines = np.serialize_hybrid_dag_to_negra(
                    [dsg], 0, 500, use_sentence_names=True)
                print(''.join(lines), file=pf)

        _, binarized_file = tempfile.mkstemp(suffix='.export')
        subprocess.call([
            "discodop", "treetransforms", "--binarize", "-v", "1", "-h", "1",
            primary_file, binarized_file
        ])

        print(primary_file)
        print(binarized_file)

        corpus = np.sentence_names_to_hybridtrees(names,
                                                  primary_file,
                                                  secedge=True)
        corpus2 = np.sentence_names_to_hybridtrees(names,
                                                   binarized_file,
                                                   secedge=True)
        dag = corpus[0]
        print(dag)
        assert isinstance(dag, HybridDag)
        self.assertEqual(8, len(dag.token_yield()))
        for token in dag.token_yield():
            print(token.form() + '/' + token.pos(), end=' ')
        print()

        dag_bin = corpus2[0]
        print(dag_bin)

        for token in dag_bin.token_yield():
            print(token.form() + '/' + token.pos(), end=' ')
        print()
        self.assertEqual(8, len(dag_bin.token_yield()))

        for node, token in zip(
                dag_bin.nodes(),
                list(map(str, map(dag_bin.node_token, dag_bin.nodes())))):
            print(node, token)

        print()
        print(top(dag_bin, {'500', '101', '102'}))
        self.assertSetEqual({'101', '500'}, top(dag_bin,
                                                {'500', '101', '102'}))
        print(bottom(dag_bin, {'500', '101', '102'}))
        self.assertSetEqual({'502'}, bottom(dag_bin, {'500', '101', '102'}))
        grammar = direct_extract_lcfrs_from_prebinarized_corpus(dag_bin)
        print(grammar)

        parser = LCFRS_parser(grammar)

        poss = list(map(lambda x: x.pos(), dag_bin.token_yield()))
        print(poss)
        parser.set_input(poss)

        parser.parse()

        self.assertTrue(parser.recognized())

        der = parser.best_derivation_tree()
        print(der)

        dcp_term = DCP_evaluator(der).getEvaluation()

        print(dcp_term[0])

        dag_eval = HybridDag(dag_bin.sent_label())
        dcp_to_hybriddag(dag_eval,
                         dcp_term,
                         copy.deepcopy(dag_bin.token_yield()),
                         False,
                         construct_token=construct_constituent_token)

        print(dag_eval)
        for node in dag_eval.nodes():
            token = dag_eval.node_token(node)
            if token.type() == "CONSTITUENT-CATEGORY":
                label = token.category()
            elif token.type() == "CONSTITUENT-TERMINAL":
                label = token.form(), token.pos()

            print(node, label, dag_eval.children(node),
                  dag_eval.sec_children(node), dag_eval.sec_parents(node))

        lines = np.serialize_hybridtrees_to_negra([dag_eval],
                                                  1,
                                                  500,
                                                  use_sentence_names=True)
        for line in lines:
            print(line, end='')

        print()

        with open(primary_file) as pcf:
            for line in pcf:
                print(line, end='')

예제 #9

0

파일 보기

    def test_negra_to_dag_parsing(self):
        names = list(map(str, [26954]))

        fd_, primary_file = tempfile.mkstemp(suffix='.export')
        with open(primary_file, mode='w') as pf:

            for s in names:
                dsg = tp.sentence_names_to_deep_syntax_graphs(
                    ["s" + s],
                    "res/tiger/tiger_s%s.xml" % s,
                    hold=False,
                    ignore_puntcuation=False)[0]
                dsg.set_label(dsg.label[1:])
                lines = np.serialize_hybrid_dag_to_negra(
                    [dsg], 0, 500, use_sentence_names=True)
                print(''.join(lines), file=pf)

        _, binarized_file = tempfile.mkstemp(suffix='.export')
        subprocess.call([
            "discodop", "treetransforms", "--binarize", "-v", "1", "-h", "1",
            primary_file, binarized_file
        ])

        print(primary_file)
        print(binarized_file)

        corpus = np.sentence_names_to_hybridtrees(names,
                                                  primary_file,
                                                  secedge=True)
        corpus2 = np.sentence_names_to_hybridtrees(names,
                                                   binarized_file,
                                                   secedge=True)
        dag = corpus[0]
        print(dag)

        assert isinstance(dag, HybridDag)
        self.assertEqual(8, len(dag.token_yield()))
        for token in dag.token_yield():
            print(token.form() + '/' + token.pos(), end=' ')
        print()

        dag_bin = corpus2[0]
        print(dag_bin)

        for token in dag_bin.token_yield():
            print(token.form() + '/' + token.pos(), end=' ')
        print()
        self.assertEqual(8, len(dag_bin.token_yield()))

        for node, token in zip(
                dag_bin.nodes(),
                list(map(str, map(dag_bin.node_token, dag_bin.nodes())))):
            print(node, token)

        print()
        print(top(dag_bin, {'500', '101', '102'}))
        self.assertSetEqual({'101', '500'}, top(dag_bin,
                                                {'500', '101', '102'}))
        print(bottom(dag_bin, {'500', '101', '102'}))
        self.assertSetEqual({'502'}, bottom(dag_bin, {'500', '101', '102'}))

        nont_labeling = BasicNonterminalLabeling()
        term_labeling = FormTerminals()  # PosTerminals()

        grammar = direct_extract_lcfrs_from_prebinarized_corpus(
            dag_bin, term_labeling, nont_labeling)
        # print(grammar)

        for rule in grammar.rules():
            print(rule.get_idx(), rule)

        print("Testing LCFRS parsing and DCP evaluation".center(80, '='))

        parser = LCFRS_parser(grammar)

        parser_input = term_labeling.prepare_parser_input(
            dag_bin.token_yield())
        print(parser_input)
        parser.set_input(parser_input)

        parser.parse()

        self.assertTrue(parser.recognized())

        der = parser.best_derivation_tree()
        print(der)

        dcp_term = DCP_evaluator(der).getEvaluation()

        print(dcp_term[0])

        dag_eval = HybridDag(dag_bin.sent_label())
        dcp_to_hybriddag(dag_eval,
                         dcp_term,
                         copy.deepcopy(dag_bin.token_yield()),
                         False,
                         construct_token=construct_constituent_token)

        print(dag_eval)
        for node in dag_eval.nodes():
            token = dag_eval.node_token(node)
            if token.type() == "CONSTITUENT-CATEGORY":
                label = token.category()
            elif token.type() == "CONSTITUENT-TERMINAL":
                label = token.form(), token.pos()

            print(node, label, dag_eval.children(node),
                  dag_eval.sec_children(node), dag_eval.sec_parents(node))

        lines = np.serialize_hybridtrees_to_negra([dag_eval],
                                                  1,
                                                  500,
                                                  use_sentence_names=True)
        for line in lines:
            print(line, end='')

        print()

        with open(primary_file) as pcf:
            for line in pcf:
                print(line, end='')

        print('Testing reduct computation with Schick parser'.center(80, '='))

        grammar_path = '/tmp/lcfrs_dcp_grammar.gr'
        derivation_manager = PyDerivationManager(grammar)

        with open(grammar_path, 'w') as grammar_file:
            nonterminal_enc, terminal_enc = linearize(
                grammar,
                nont_labeling,
                term_labeling,
                grammar_file,
                delimiter=' : ',
                nonterminal_encoder=derivation_manager.get_nonterminal_map())

        print(np.negra_to_json(dag, terminal_enc, term_labeling))
        json_data = np.export_corpus_to_json([dag], terminal_enc,
                                             term_labeling)

        corpus_path = '/tmp/json_dags.json'
        with open(corpus_path, 'w') as data_file:
            json.dump(json_data, data_file)

        reduct_dir = '/tmp/schick_parser_reducts'
        if os.path.isdir(reduct_dir):
            shutil.rmtree(reduct_dir)
        os.makedirs(reduct_dir)

        p = subprocess.Popen([
            ' '.join([
                "java", "-jar",
                os.path.join("util",
                             SCHICK_PARSER_JAR), 'reduct', '-g', grammar_path,
                '-t', corpus_path, "--input-format", "json", "-o", reduct_dir
            ])
        ],
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
        print("stdout", p.stdout.name)

        while True:
            nextline = p.stdout.readline()
            if nextline == b'' and p.poll() is not None:
                break
            print(nextline.decode('unicode_escape'), end='')
            # sys.stdout.write(nextline)
            # sys.stdout.flush()

        p.wait()
        p.stdout.close()
        self.assertEqual(0, p.returncode)
        rtgs = []

        def decode_nonterminals(s):
            return derivation_manager.get_nonterminal_map().index_object(
                int(s))

        for i in range(1, len(corpus) + 1):
            rtgs.append(
                read_rtg(os.path.join(reduct_dir,
                                      str(i) + '.gra'),
                         symbol_offset=-1,
                         rule_prefix='r',
                         process_nonterminal=decode_nonterminals))

        print("Reduct RTG")
        for rule in rtgs[0].rules:
            print(rule.lhs, "->", rule.symbol, rule.rhs)

        derivation_manager.get_nonterminal_map().print_index()
        derivation_manager.convert_rtgs_to_hypergraphs(rtgs)
        derivation_manager.serialize(
            bytes('/tmp/reduct_manager.trace', encoding='utf8'))
        derivations = [
            LCFRSDerivationWrapper(der)
            for der in derivation_manager.enumerate_derivations(0, grammar)
        ]
        self.assertGreaterEqual(len(derivations), 1)

        if len(derivations) >= 1:
            print("Sentence", i)
            for der in derivations:
                print(der)
                self.assertTrue(
                    der.check_integrity_recursive(der.root_id(),
                                                  grammar.start()))

예제 #10

0

파일 보기

파일: test_constituent_tree.py 프로젝트: kilian-gebhardt/panda-parser

    def test_stanford_unking_scheme(self):
        naming = 'child'

        def rec_part(tree):
            return left_branching_partitioning(len(tree.id_yield()))

        tree = self.tree
        tree.add_to_root("VP1")

        print(tree)

        terminal_labeling = StanfordUNKing([tree])

        grammar = fringe_extract_lcfrs(tree,
                                       rec_part(tree),
                                       naming=naming,
                                       isolate_pos=True,
                                       term_labeling=terminal_labeling)
        print(grammar)

        parser = LCFRS_parser(grammar)
        parser.set_input([token.form() for token in tree.token_yield()])
        parser.parse()
        self.assertTrue(parser.recognized())
        derivation = parser.best_derivation_tree()
        e = DCP_evaluator(derivation)
        dcp_term = e.getEvaluation()
        print(str(dcp_term[0]))
        t = ConstituentTree()
        dcp_to_hybridtree(
            t,
            dcp_term, [
                construct_constituent_token(token.form(), '--', True)
                for token in tree.token_yield()
            ],
            ignore_punctuation=False,
            construct_token=construct_constituent_token)
        print(t)
        self.assertEqual(len(tree.token_yield()), len(t.token_yield()))
        for tok1, tok2 in zip(tree.token_yield(), t.token_yield()):
            self.assertEqual(tok1.form(), tok2.form())
            self.assertEqual(tok1.pos(), tok2.pos())

        rules = terminal_labeling.create_smoothed_rules()
        print(rules)

        new_rules = {}

        for rule in grammar.rules():
            if rule.rhs() == []:
                assert len(rule.dcp()) == 1
                dcp = rule.dcp()[0]
                assert len(dcp.rhs()) == 1
                term = dcp.rhs()[0]
                head = term.head()
                pos = head.pos()

                for tag, form in rules:
                    if tag == pos:
                        lhs = LCFRS_lhs(rule.lhs().nont())
                        lhs.add_arg([form])
                        new_rules[lhs, dcp] = rules[tag, form]

        for lhs, dcp in new_rules:
            print(str(lhs), str(dcp), new_rules[(lhs, dcp)])

        tokens = [
            construct_constituent_token('hat', '--', True),
            construct_constituent_token('HAT', '--', True)
        ]
        self.assertEqual(terminal_labeling.token_label(tokens[0]), 'hat')
        self.assertEqual(terminal_labeling.token_label(tokens[1]), '_UNK')
        terminal_labeling.test_mode = True
        self.assertEqual(terminal_labeling.token_label(tokens[0]), 'hat')
        self.assertEqual(terminal_labeling.token_label(tokens[1]), 'hat')

예제 #11

0

파일 보기

    def test_single_root_induction(self):
        tree = hybrid_tree_1()
        # print tree.children("v")
        # print tree
        #
        # for id_set in ['v v1 v2 v21'.split(' '), 'v1 v2'.split(' '),
        # 'v v21'.split(' '), ['v'], ['v1'], ['v2'], ['v21']]:
        # print id_set, 'top:', top(tree, id_set), 'bottom:', bottom(tree, id_set)
        # print id_set, 'top_max:', max(tree, top(tree, id_set)), 'bottom_max:', max(tree, bottom(tree, id_set))
        #
        # print "some rule"
        # for mem, arg in [(-1, 0), (0,0), (1,0)]:
        # print create_DCP_rule(mem, arg, top_max(tree, ['v','v1','v2','v21']), bottom_max(tree, ['v','v1','v2','v21']),
        # [(top_max(tree, l), bottom_max(tree, l)) for l in [['v1', 'v2'], ['v', 'v21']]])
        #
        #
        # print "some other rule"
        # for mem, arg in [(-1,1),(1,0)]:
        # print create_DCP_rule(mem, arg, top_max(tree, ['v1','v2']), bottom_max(tree, ['v1','v2']),
        # [(top_max(tree, l), bottom_max(tree, l)) for l in [['v1'], ['v2']]])
        #
        # print 'strict:' , strict_labeling(tree, top_max(tree, ['v','v21']), bottom_max(tree, ['v','v21']))
        # print 'child:' , child_labeling(tree, top_max(tree, ['v','v21']), bottom_max(tree, ['v','v21']))
        # print '---'
        # print 'strict: ', strict_labeling(tree, top_max(tree, ['v1','v21']), bottom_max(tree, ['v1','v21']))
        # print 'child: ', child_labeling(tree, top_max(tree, ['v1','v21']), bottom_max(tree, ['v1','v21']))
        # print '---'
        # print 'strict:' , strict_labeling(tree, top_max(tree, ['v','v1', 'v21']), bottom_max(tree, ['v','v1', 'v21']))
        # print 'child:' , child_labeling(tree, top_max(tree, ['v','v1', 'v21']), bottom_max(tree, ['v','v1', 'v21']))

        tree2 = hybrid_tree_2()

        # print tree2.children("v")
        # print tree2
        #
        # print 'siblings v211', tree2.siblings('v211')
        # print top(tree2, ['v','v1', 'v211'])
        # print top_max(tree2, ['v','v1', 'v211'])
        #
        # print '---'
        # print 'strict:' , strict_labeling(tree2, top_max(tree2, ['v','v1', 'v211']), bottom_max(tree2, ['v','v11', 'v211']))
        # print 'child:' , child_labeling(tree2, top_max(tree2, ['v','v1', 'v211']), bottom_max(tree2, ['v','v11', 'v211']))

        # rec_par = ('v v1 v2 v21'.split(' '),
        # [('v1 v2'.split(' '), [(['v1'],[]), (['v2'],[])])
        #                ,('v v21'.split(' '), [(['v'],[]), (['v21'],[])])
        #            ])
        #
        # grammar = LCFRS(nonterminal_str(tree, top_max(tree, rec_par[0]), bottom_max(tree, rec_par[0]), 'strict'))
        #
        # add_rules_to_grammar_rec(tree, rec_par, grammar, 'child')
        #
        # grammar.make_proper()
        # print grammar

        print(tree.recursive_partitioning())

        terminal_labeling = the_terminal_labeling_factory().get_strategy('pos')

        (_, grammar) = induce_grammar(
            [tree, tree2],
            the_labeling_factory().create_simple_labeling_strategy(
                'empty', 'pos'),
            # the_labeling_factory().create_simple_labeling_strategy('child', 'pos+deprel'),
            terminal_labeling.token_label,
            [direct_extraction],
            'START')
        print(max([grammar.fanout(nont) for nont in grammar.nonts()]))
        print(grammar)

        parser = LCFRS_parser(grammar, 'NP N V V'.split(' '))
        print(parser.best_derivation_tree())

        tokens = [
            construct_conll_token(form, pos) for form, pos in zip(
                'Piet Marie helpen lezen'.split(' '), 'NP N V V'.split(' '))
        ]
        hybrid_tree = HybridTree()
        hybrid_tree = parser.dcp_hybrid_tree_best_derivation(
            hybrid_tree, tokens, True, construct_conll_token)
        print(list(map(str, hybrid_tree.full_token_yield())))
        print(hybrid_tree)

        string = "foo"
        dcp_string = DCP_string(string)
        dcp_string.set_edge_label("bar")
        print(dcp_string, dcp_string.edge_label())

        linearize(
            grammar,
            the_labeling_factory().create_simple_labeling_strategy(
                'child', 'pos+deprel'),
            the_terminal_labeling_factory().get_strategy('pos'), sys.stdout)