Example #1
0
 def latent_viterbi_derivation(self, debug=False):
     manager = PyDerivationManager(self.grammar, self.nontMap)
     manager.convert_chart_to_hypergraph(self.chart, self.disco_grammar, debug=False)
     if debug:
         manager.serialize(b'/tmp/my_debug_hypergraph.hg')
     if isinstance(self.la, list):
         la = self.la[0]
     else:
         la = self.la
     vit_der = manager.latent_viterbi_derivation(0, la, self.grammar, debug=debug)
     # if len(self.input) < 15 and not debug:
     #     for weight, der in self.k_best_derivation_trees():
     #         if der != vit_der:
     #             print(weight, der, vit_der)
     #             vit_der2 = self.latent_viterbi_derivation(debug=True)
     #             print("vit2", vit_der2)
     #             if vit_der2 != vit_der:
     #                 print("first and second viterbi derivation differ")
     #             if vit_der2 == der:
     #                 print("second viterbi derivation = 1-best-disco-dop derivation")
     #         print("##############################", flush=True)
     #         break
     #         # raise Exception("too much to read")
     if vit_der is not None:
         vit_der = LCFRSDerivationWrapper(vit_der)
     return vit_der
Example #2
0
    def __projection_based_derivation_tree(self, la, variational=False, op=prod):
        if self.nontMap is None:
            print("A nonterminal map is required for weight projection based parsing!")
            return None
        manager = PyDerivationManager(self.grammar, self.nontMap)
        manager.convert_chart_to_hypergraph(self.chart, self.disco_grammar, debug=False)
        if self.grammarInfo is not None:
            assert manager.is_consistent_with_grammar(self.grammarInfo)
        manager.set_io_cycle_limit(200)
        manager.set_io_precision(0.000001)

        if not isinstance(la, list):
            la = [la]

        edge_weights = None

        for l in la:
            edge_weights_l = py_edge_weight_projection(l, manager, variational=variational, debug=self.debug,
                                                 log_mode=self.log_mode)
            if edge_weights is None:
                edge_weights = edge_weights_l
            else:
                if self.log_mode:
                    edge_weights = [w1 + w2 for w1, w2 in zip(edge_weights, edge_weights_l)]
                else:
                    edge_weights = [op(w1, w2) for w1, w2 in zip(edge_weights, edge_weights_l)]

        if self.debug:
            nans = 0
            infs = 0
            zeros = 0
            for weight in edge_weights:
                if weight == float("nan"):
                    nans += 1
                if weight == float("inf") or weight == float("-inf"):
                    infs += 1
                if weight == 0.0:
                    zeros += 1
            print("[", len(edge_weights), nans, infs, zeros, "]")
            if len(edge_weights) < 100:
                print(edge_weights)

        der = manager.viterbi_derivation(0, edge_weights, self.grammar, op=op, log_mode=self.log_mode)
        if der is None:
            print("p", end="")
            der = self.latent_viterbi_derivation(debug=self.debug)
        if der is not None:
            der = LCFRSDerivationWrapper(der)
        if der is None:
            _, der = next(self.k_best_derivation_trees())
        return der
Example #3
0
    def test_individual_parsing_stages(self):
        grammar = self.build_grammar()

        for r in transform_grammar(grammar):
            pprint(r)

        rule_list = list(transform_grammar(grammar))
        pprint(rule_list)
        disco_grammar = Grammar(rule_list, start=grammar.start())
        print(disco_grammar)

        inp = ["a"] * 3
        estimates = 'SXlrgaps', getestimates(disco_grammar, 40, grammar.start())
        print(type(estimates))
        chart, msg = parse(inp, disco_grammar, estimates=estimates)
        print(chart)
        print(msg)
        chart.filter()
        print("filtered chart")
        print(disco_grammar.nonterminals)
        print(type(disco_grammar.nonterminals))

        print(chart)
        # print(help(chart))

        root = chart.root()
        print("root", root, type(root))
        print(chart.indices(root))
        print(chart.itemstr(root))
        print(chart.stats())
        print("root label", chart.label(root))
        print(root, chart.itemid1(chart.label(root), chart.indices(root)))
        for i in range(1, chart.numitems() + 1):
            print(i, chart.label(i), chart.indices(i), chart.numedges(i))
            if True or len(chart.indices(i)) > 1:
                for edge_num in range(chart.numedges(i)):
                    edge = chart.getEdgeForItem(i, edge_num)
                    if isinstance(edge, tuple):
                        print("\t", disco_grammar.nonterminalstr(chart.label(i)) + "[" + str(i) + "]", "->", ' '.join([disco_grammar.nonterminalstr(chart.label(j)) + "[" + str(j) + "]" for j in [edge[1], edge[2]] if j != 0]))
                    else:
                        print("\t", disco_grammar.nonterminalstr(chart.label(i)) + "[" + str(i) + "]", "->", inp[edge])
        print(chart.getEdgeForItem(root, 0))
        # print(lazykbest(chart, 5))

        manager = PyDerivationManager(grammar)
        manager.convert_chart_to_hypergraph(chart, disco_grammar, debug=True)

        file = tempfile.mktemp()
        print(file)
        manager.serialize(bytes(file, encoding="utf-8"))

        gi = PyGrammarInfo(grammar, manager.get_nonterminal_map())
        sm = PyStorageManager()
        la = build_PyLatentAnnotation_initial(grammar, gi, sm)

        vec = py_edge_weight_projection(la, manager, variational=True, debug=True, log_mode=False)
        print(vec)
        self.assertEqual([1.0, 1.0, 1.0, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25, 0.25, 0.25, 1.0], vec)

        vec = py_edge_weight_projection(la, manager, variational=False, debug=True, log_mode=False)
        print(vec)
        self.assertEqual([1.0, 1.0, 1.0, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 1.0], vec)

        der = manager.viterbi_derivation(0, vec, grammar)
        print(der)

        # print(disco_grammar.rulenos)
        # print(disco_grammar.numrules)
        # print(disco_grammar.lexicalbylhs)
        # print(disco_grammar.lexicalbyword)
        # print(disco_grammar.lexicalbynum)
        # print(disco_grammar.origrules, type(disco_grammar.origrules))
        # print(disco_grammar.numbinary)
        # print(disco_grammar.numunary)
        # print(disco_grammar.toid)
        # print(disco_grammar.tolabel)
        # print(disco_grammar.bitpar)
        # striplabelre = re.compile(r'-\d+$')
        # msg = disco_grammar.getmapping(None, None)
        # disco_grammar.getrulemapping(disco_grammar, striplabelre)
        # mapping = disco_grammar.rulemapping
        # print(mapping)
        # for idx, group in enumerate(mapping):
        #     print("Index", idx)
        #     for elem in group:
        #         print(grammar.rule_index(elem))

        # for _, item in zip(range(20), chart.parseforest):
        #     edge = chart.parseforest[item]
        #     print(item, item.binrepr(), item.__repr__(), item.lexidx())
        #     print(type(edge))
        for _ in range(5):
            vec2 = py_edge_weight_projection(la, manager, debug=True, log_mode=True)
            print(vec2)