def test_induction_and_parsing_with_pos_recovery(self):
        naming = 'child'

        def rec_part(tree):
            return left_branching_partitioning(len(tree.id_yield()))

        tree = self.tree
        tree.add_to_root("VP1")

        print(tree)

        grammar = fringe_extract_lcfrs(tree,
                                       rec_part(tree),
                                       naming=naming,
                                       isolate_pos=True,
                                       term_labeling=FormTerminals())
        print(grammar)

        parser = LCFRS_parser(grammar)
        parser.set_input([token.form() for token in tree.token_yield()])
        parser.parse()
        self.assertTrue(parser.recognized())
        derivation = parser.best_derivation_tree()
        e = DCP_evaluator(derivation)
        dcp_term = e.getEvaluation()
        print(str(dcp_term[0]))
        t = ConstituentTree()
        dcp_to_hybridtree(
            t,
            dcp_term, [
                construct_constituent_token(token.form(), '--', True)
                for token in tree.token_yield()
            ],
            ignore_punctuation=False,
            construct_token=construct_constituent_token)
        print(t)
        self.assertEqual(len(tree.token_yield()), len(t.token_yield()))
        for tok1, tok2 in zip(tree.token_yield(), t.token_yield()):
            self.assertEqual(tok1.form(), tok2.form())
            self.assertEqual(tok1.pos(), tok2.pos())
    def test_stanford_unking_scheme(self):
        naming = 'child'

        def rec_part(tree):
            return left_branching_partitioning(len(tree.id_yield()))

        tree = self.tree
        tree.add_to_root("VP1")

        print(tree)

        terminal_labeling = StanfordUNKing([tree])

        grammar = fringe_extract_lcfrs(tree,
                                       rec_part(tree),
                                       naming=naming,
                                       isolate_pos=True,
                                       term_labeling=terminal_labeling)
        print(grammar)

        parser = LCFRS_parser(grammar)
        parser.set_input([token.form() for token in tree.token_yield()])
        parser.parse()
        self.assertTrue(parser.recognized())
        derivation = parser.best_derivation_tree()
        e = DCP_evaluator(derivation)
        dcp_term = e.getEvaluation()
        print(str(dcp_term[0]))
        t = ConstituentTree()
        dcp_to_hybridtree(
            t,
            dcp_term, [
                construct_constituent_token(token.form(), '--', True)
                for token in tree.token_yield()
            ],
            ignore_punctuation=False,
            construct_token=construct_constituent_token)
        print(t)
        self.assertEqual(len(tree.token_yield()), len(t.token_yield()))
        for tok1, tok2 in zip(tree.token_yield(), t.token_yield()):
            self.assertEqual(tok1.form(), tok2.form())
            self.assertEqual(tok1.pos(), tok2.pos())

        rules = terminal_labeling.create_smoothed_rules()
        print(rules)

        new_rules = {}

        for rule in grammar.rules():
            if rule.rhs() == []:
                assert len(rule.dcp()) == 1
                dcp = rule.dcp()[0]
                assert len(dcp.rhs()) == 1
                term = dcp.rhs()[0]
                head = term.head()
                pos = head.pos()

                for tag, form in rules:
                    if tag == pos:
                        lhs = LCFRS_lhs(rule.lhs().nont())
                        lhs.add_arg([form])
                        new_rules[lhs, dcp] = rules[tag, form]

        for lhs, dcp in new_rules:
            print(str(lhs), str(dcp), new_rules[(lhs, dcp)])

        tokens = [
            construct_constituent_token('hat', '--', True),
            construct_constituent_token('HAT', '--', True)
        ]
        self.assertEqual(terminal_labeling.token_label(tokens[0]), 'hat')
        self.assertEqual(terminal_labeling.token_label(tokens[1]), '_UNK')
        terminal_labeling.test_mode = True
        self.assertEqual(terminal_labeling.token_label(tokens[0]), 'hat')
        self.assertEqual(terminal_labeling.token_label(tokens[1]), 'hat')