def parse(self, tokens): self._grammar.check_coverage(tokens) chart = Chart(list(tokens)) grammar = self._grammar # Chart parser rules. bu_init = ProbabilisticBottomUpInitRule() bu = ProbabilisticBottomUpPredictRule() fr = SingleEdgeProbabilisticFundamentalRule() # Our queue queue = [] # Initialize the chart. for edge in bu_init.apply(chart, grammar): if self._trace > 1: print( ' %-50s [%s]' % (chart.pretty_format_edge(edge, width=2), edge.prob()) ) queue.append(edge) while len(queue) > 0: # Re-sort the queue. self.sort_queue(queue, chart) # Prune the queue to the correct size if a beam was defined if self.beam_size: self._prune(queue, chart) # Get the best edge. edge = queue.pop() if self._trace > 0: print( ' %-50s [%s]' % (chart.pretty_format_edge(edge, width=2), edge.prob()) ) # Apply BU & FR to it. queue.extend(bu.apply(chart, grammar, edge)) queue.extend(fr.apply(chart, grammar, edge)) # Get a list of complete parses. parses = list(chart.parses(grammar.start(), ProbabilisticTree)) # Assign probabilities to the trees. prod_probs = {} for prod in grammar.productions(): prod_probs[prod.lhs(), prod.rhs()] = prod.prob() for parse in parses: self._setprob(parse, prod_probs) # Sort by probability parses.sort(reverse=True, key=lambda tree: tree.prob()) return iter(parses)
def parse(self, tokens): self._grammar.check_coverage(tokens) chart = Chart(list(tokens)) grammar = self._grammar # Chart parser rules. bu_init = ProbabilisticBottomUpInitRule() bu = ProbabilisticBottomUpPredictRule() fr = SingleEdgeProbabilisticFundamentalRule() # Our queue queue = [] # Initialize the chart. for edge in bu_init.apply(chart, grammar): if self._trace > 1: print(' %-50s [%s]' % (chart.pretty_format_edge(edge, width=2), edge.prob())) queue.append(edge) while len(queue) > 0: # Re-sort the queue. self.sort_queue(queue, chart) # Prune the queue to the correct size if a beam was defined if self.beam_size: self._prune(queue, chart) # Get the best edge. edge = queue.pop() if self._trace > 0: print(' %-50s [%s]' % (chart.pretty_format_edge(edge, width=2), edge.prob())) # Apply BU & FR to it. queue.extend(bu.apply(chart, grammar, edge)) queue.extend(fr.apply(chart, grammar, edge)) # Get a list of complete parses. parses = list(chart.parses(grammar.start(), ProbabilisticTree)) # Assign probabilities to the trees. prod_probs = {} for prod in grammar.productions(): prod_probs[prod.lhs(), prod.rhs()] = prod.prob() for parse in parses: self._setprob(parse, prod_probs) # Sort by probability parses.sort(reverse=True, key=lambda tree: tree.prob()) return iter(parses)
def nbest_parse(self, tokens, n=None): # now the tokens comes from continuse set chart = Chart(list(tokens)) grammar = self._grammar bu_init = ProbabilisticBottomUpInitRule() bu = ProbabilisticBottomUpPredictRule() fr = SingleEdgeProbabilisticFundamentalRule() em = ProbabilisticEmissionRule() queue = [] for edge in bu_init.apply_iter(chart, grammar): if self._trace > 1: print(' %-50s [%s]' % (chart.pp_edge(edge,width=2), edge.prob())) queue.append(edge) while len(queue) > 0: self.sort_queue(queue, chart) if self.beam_size: self._prune(queue, chart) edge = queue.pop() if self._trace > 0: print(' %-50s [%s]' % (chart.pp_edge(edge,width=2), edge.prob())) queue.extend(em.apply(chart, grammar, edge)) queue.extend(bu.apply(chart, grammar, edge)) queue.extend(fr.apply(chart, grammar, edge)) parses = chart.parses(grammar.start(), ProbabilisticTree) prod_probs = {} for prod in grammar.productions(): prod_probs[prod.lhs(), prod.rhs()] = prod.prob() for parse in parses: self._setprob(parse, prod_probs, grammar.density()) parses.sort(reverse=True, key=lambda tree: tree.prob()) return parses[:n]
def parse(self, tokens, notify=True, max=0): '''Run a probabilistic parse of tokens. If notify is true, display each complete parse as it is found If max>0, quit after finding that many parses''' self._grammar.check_coverage(tokens) chart = Chart(list(tokens)) chart._trace = self._trace # Bad form. . . grammar = self._grammar start = grammar.start() prod_probs = {} # Chart parser rules. bu_init = ProbabilisticBottomUpInitRule() bu = BetterPBPR() # avoid infinite numbers of parses :-( fr = BetterSEPFR() # don't look at pending edges # Our queue queue = [] # Initialize the chart. for edge in bu_init.apply(chart, grammar): if self._trace > 1: print(' %-50s [%.4g]' % (chart.pretty_format_edge( edge, width=2), cost(edge.prob()))) queue.append(edge) found = 0 while len(queue) > 0 and (max < 1 or found < max): # Re-sort the queue. self.sort_queue(queue, chart) # Prune the queue to the correct size if a beam was defined if self.beam_size: self._prune(queue, chart) # Get the best edge. edge = queue.pop() edge.pending = False if self._trace > 0: print(' %-50s [%.4g]' % (chart.pretty_format_edge( edge, width=2), cost(edge.prob()))) if (edge.start() == 0 and edge.end() == chart._num_leaves and edge.lhs() == start and edge.is_complete()): if len(prod_probs) == 0: for prod in grammar.productions(): prod_probs[prod.lhs(), prod.rhs()] = prod.prob() if notify: print("****") for tree in chart.trees(edge, tree_class=ProbabilisticTree, complete=True): self._setprob(tree, prod_probs) print('{}{:.4g}({:.4g})'.format( tree, cost(tree.prob()), cost(edge.prob()))) #print tree print("****") found += 1 # Apply BU & FR to it. queue.extend(fr.apply(chart, grammar, edge)) queue.extend(bu.apply(chart, grammar, edge)) # Get a list of complete parses. parses = list(chart.parses(grammar.start(), ProbabilisticTree)) if not notify: for parse in parses: self._setprob(parse, prod_probs) # Sort by probability parses.sort(key=lambda tree: tree[0], reverse=True) if notify: print("{0} total parses found".format(found)) return iter(parses)