def __projection_based_derivation_tree(self, la, variational=False, op=prod): manager = PyDerivationManager(self.grammar, self.nontMap) derivations = [der for _, der in self.base_parser.k_best_derivation_trees()] manager.convert_derivations_to_hypergraph(derivations) manager.set_io_cycle_limit(200) manager.set_io_precision(0.000001) self.debug = False self.log_mode = True edge_weights = py_edge_weight_projection(la, manager, variational=variational, debug=self.debug, log_mode=self.log_mode) der = manager.viterbi_derivation(0, edge_weights, self.grammar, op=op, log_mode=self.log_mode) if der is None: if True or self.debug: nans = 0 infs = 0 zeros = 0 for weight in edge_weights: if math.isnan(weight): nans += 1 if math.isinf(weight): infs += 1 if weight == 0.0: zeros += 1 print("[", len(edge_weights), nans, infs, zeros, "]") if len(edge_weights) < 200: print("orig:", edge_weights) edge_weights = py_edge_weight_projection(la, manager, variational=variational, debug=True, log_mode=self.log_mode) print("1:", edge_weights) edge_weights = py_edge_weight_projection(la, manager, variational=variational, debug=True, log_mode=self.log_mode) print("2:", edge_weights) print("p", end="") _, der = next(self.k_best_derivation_trees()) return der
def __projection_based_derivation_tree(self, la, variational=False, op=prod): if self.nontMap is None: print("A nonterminal map is required for weight projection based parsing!") return None manager = PyDerivationManager(self.grammar, self.nontMap) manager.convert_chart_to_hypergraph(self.chart, self.disco_grammar, debug=False) if self.grammarInfo is not None: assert manager.is_consistent_with_grammar(self.grammarInfo) manager.set_io_cycle_limit(200) manager.set_io_precision(0.000001) if not isinstance(la, list): la = [la] edge_weights = None for l in la: edge_weights_l = py_edge_weight_projection(l, manager, variational=variational, debug=self.debug, log_mode=self.log_mode) if edge_weights is None: edge_weights = edge_weights_l else: if self.log_mode: edge_weights = [w1 + w2 for w1, w2 in zip(edge_weights, edge_weights_l)] else: edge_weights = [op(w1, w2) for w1, w2 in zip(edge_weights, edge_weights_l)] if self.debug: nans = 0 infs = 0 zeros = 0 for weight in edge_weights: if weight == float("nan"): nans += 1 if weight == float("inf") or weight == float("-inf"): infs += 1 if weight == 0.0: zeros += 1 print("[", len(edge_weights), nans, infs, zeros, "]") if len(edge_weights) < 100: print(edge_weights) der = manager.viterbi_derivation(0, edge_weights, self.grammar, op=op, log_mode=self.log_mode) if der is None: print("p", end="") der = self.latent_viterbi_derivation(debug=self.debug) if der is not None: der = LCFRSDerivationWrapper(der) if der is None: _, der = next(self.k_best_derivation_trees()) return der
def test_individual_parsing_stages(self): grammar = self.build_grammar() for r in transform_grammar(grammar): pprint(r) rule_list = list(transform_grammar(grammar)) pprint(rule_list) disco_grammar = Grammar(rule_list, start=grammar.start()) print(disco_grammar) inp = ["a"] * 3 estimates = 'SXlrgaps', getestimates(disco_grammar, 40, grammar.start()) print(type(estimates)) chart, msg = parse(inp, disco_grammar, estimates=estimates) print(chart) print(msg) chart.filter() print("filtered chart") print(disco_grammar.nonterminals) print(type(disco_grammar.nonterminals)) print(chart) # print(help(chart)) root = chart.root() print("root", root, type(root)) print(chart.indices(root)) print(chart.itemstr(root)) print(chart.stats()) print("root label", chart.label(root)) print(root, chart.itemid1(chart.label(root), chart.indices(root))) for i in range(1, chart.numitems() + 1): print(i, chart.label(i), chart.indices(i), chart.numedges(i)) if True or len(chart.indices(i)) > 1: for edge_num in range(chart.numedges(i)): edge = chart.getEdgeForItem(i, edge_num) if isinstance(edge, tuple): print("\t", disco_grammar.nonterminalstr(chart.label(i)) + "[" + str(i) + "]", "->", ' '.join([disco_grammar.nonterminalstr(chart.label(j)) + "[" + str(j) + "]" for j in [edge[1], edge[2]] if j != 0])) else: print("\t", disco_grammar.nonterminalstr(chart.label(i)) + "[" + str(i) + "]", "->", inp[edge]) print(chart.getEdgeForItem(root, 0)) # print(lazykbest(chart, 5)) manager = PyDerivationManager(grammar) manager.convert_chart_to_hypergraph(chart, disco_grammar, debug=True) file = tempfile.mktemp() print(file) manager.serialize(bytes(file, encoding="utf-8")) gi = PyGrammarInfo(grammar, manager.get_nonterminal_map()) sm = PyStorageManager() la = build_PyLatentAnnotation_initial(grammar, gi, sm) vec = py_edge_weight_projection(la, manager, variational=True, debug=True, log_mode=False) print(vec) self.assertEqual([1.0, 1.0, 1.0, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25, 0.25, 0.25, 1.0], vec) vec = py_edge_weight_projection(la, manager, variational=False, debug=True, log_mode=False) print(vec) self.assertEqual([1.0, 1.0, 1.0, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 1.0], vec) der = manager.viterbi_derivation(0, vec, grammar) print(der) # print(disco_grammar.rulenos) # print(disco_grammar.numrules) # print(disco_grammar.lexicalbylhs) # print(disco_grammar.lexicalbyword) # print(disco_grammar.lexicalbynum) # print(disco_grammar.origrules, type(disco_grammar.origrules)) # print(disco_grammar.numbinary) # print(disco_grammar.numunary) # print(disco_grammar.toid) # print(disco_grammar.tolabel) # print(disco_grammar.bitpar) # striplabelre = re.compile(r'-\d+$') # msg = disco_grammar.getmapping(None, None) # disco_grammar.getrulemapping(disco_grammar, striplabelre) # mapping = disco_grammar.rulemapping # print(mapping) # for idx, group in enumerate(mapping): # print("Index", idx) # for elem in group: # print(grammar.rule_index(elem)) # for _, item in zip(range(20), chart.parseforest): # edge = chart.parseforest[item] # print(item, item.binrepr(), item.__repr__(), item.lexidx()) # print(type(edge)) for _ in range(5): vec2 = py_edge_weight_projection(la, manager, debug=True, log_mode=True) print(vec2)