コード例 #1
0
ファイル: pchart.py プロジェクト: prz3m/kind2anki
    def parse(self, tokens):
        self._grammar.check_coverage(tokens)
        chart = Chart(list(tokens))
        grammar = self._grammar

        # Chart parser rules.
        bu_init = ProbabilisticBottomUpInitRule()
        bu = ProbabilisticBottomUpPredictRule()
        fr = SingleEdgeProbabilisticFundamentalRule()

        # Our queue
        queue = []

        # Initialize the chart.
        for edge in bu_init.apply(chart, grammar):
            if self._trace > 1:
                print(
                    '  %-50s [%s]'
                    % (chart.pretty_format_edge(edge, width=2), edge.prob())
                )
            queue.append(edge)

        while len(queue) > 0:
            # Re-sort the queue.
            self.sort_queue(queue, chart)

            # Prune the queue to the correct size if a beam was defined
            if self.beam_size:
                self._prune(queue, chart)

            # Get the best edge.
            edge = queue.pop()
            if self._trace > 0:
                print(
                    '  %-50s [%s]'
                    % (chart.pretty_format_edge(edge, width=2), edge.prob())
                )

            # Apply BU & FR to it.
            queue.extend(bu.apply(chart, grammar, edge))
            queue.extend(fr.apply(chart, grammar, edge))

        # Get a list of complete parses.
        parses = list(chart.parses(grammar.start(), ProbabilisticTree))

        # Assign probabilities to the trees.
        prod_probs = {}
        for prod in grammar.productions():
            prod_probs[prod.lhs(), prod.rhs()] = prod.prob()
        for parse in parses:
            self._setprob(parse, prod_probs)

        # Sort by probability
        parses.sort(reverse=True, key=lambda tree: tree.prob())

        return iter(parses)
コード例 #2
0
ファイル: pchart.py プロジェクト: hfiuza/Text-Mining-and-NLP
    def parse(self, tokens):
        self._grammar.check_coverage(tokens)
        chart = Chart(list(tokens))
        grammar = self._grammar

        # Chart parser rules.
        bu_init = ProbabilisticBottomUpInitRule()
        bu = ProbabilisticBottomUpPredictRule()
        fr = SingleEdgeProbabilisticFundamentalRule()

        # Our queue
        queue = []

        # Initialize the chart.
        for edge in bu_init.apply(chart, grammar):
            if self._trace > 1:
                print('  %-50s [%s]' %
                      (chart.pretty_format_edge(edge, width=2), edge.prob()))
            queue.append(edge)

        while len(queue) > 0:
            # Re-sort the queue.
            self.sort_queue(queue, chart)

            # Prune the queue to the correct size if a beam was defined
            if self.beam_size:
                self._prune(queue, chart)

            # Get the best edge.
            edge = queue.pop()
            if self._trace > 0:
                print('  %-50s [%s]' %
                      (chart.pretty_format_edge(edge, width=2), edge.prob()))

            # Apply BU & FR to it.
            queue.extend(bu.apply(chart, grammar, edge))
            queue.extend(fr.apply(chart, grammar, edge))

        # Get a list of complete parses.
        parses = list(chart.parses(grammar.start(), ProbabilisticTree))

        # Assign probabilities to the trees.
        prod_probs = {}
        for prod in grammar.productions():
            prod_probs[prod.lhs(), prod.rhs()] = prod.prob()
        for parse in parses:
            self._setprob(parse, prod_probs)

        # Sort by probability
        parses.sort(reverse=True, key=lambda tree: tree.prob())

        return iter(parses)
コード例 #3
0
  def nbest_parse(self, tokens, n=None):
    # now the tokens comes from continuse set
    chart = Chart(list(tokens))
    grammar = self._grammar

    bu_init = ProbabilisticBottomUpInitRule()
    bu = ProbabilisticBottomUpPredictRule()
    fr = SingleEdgeProbabilisticFundamentalRule()
    em = ProbabilisticEmissionRule()

    queue = []

    for edge in bu_init.apply_iter(chart, grammar):
      if self._trace > 1:
        print('  %-50s [%s]' % (chart.pp_edge(edge,width=2),
                                edge.prob()))
      queue.append(edge)

    while len(queue) > 0:
      self.sort_queue(queue, chart)

      if self.beam_size:
        self._prune(queue, chart)

      edge = queue.pop()
      if self._trace > 0:
        print('  %-50s [%s]' % (chart.pp_edge(edge,width=2),
                                edge.prob()))

      queue.extend(em.apply(chart, grammar, edge))
      queue.extend(bu.apply(chart, grammar, edge))
      queue.extend(fr.apply(chart, grammar, edge))

    parses = chart.parses(grammar.start(), ProbabilisticTree)

    prod_probs = {}
    for prod in grammar.productions():
      prod_probs[prod.lhs(), prod.rhs()] = prod.prob()

    for parse in parses:
      self._setprob(parse, prod_probs, grammar.density())

    parses.sort(reverse=True, key=lambda tree: tree.prob())

    return parses[:n]
コード例 #4
0
    def parse(self, tokens, notify=True, max=0):
        '''Run a probabilistic parse of tokens.
        If notify is true, display each complete parse as it is found
        If max>0, quit after finding that many parses'''
        self._grammar.check_coverage(tokens)
        chart = Chart(list(tokens))
        chart._trace = self._trace  # Bad form. . .
        grammar = self._grammar
        start = grammar.start()
        prod_probs = {}

        # Chart parser rules.
        bu_init = ProbabilisticBottomUpInitRule()
        bu = BetterPBPR()  # avoid infinite numbers of parses :-(
        fr = BetterSEPFR()  # don't look at pending edges
        # Our queue
        queue = []

        # Initialize the chart.
        for edge in bu_init.apply(chart, grammar):
            if self._trace > 1:
                print('  %-50s [%.4g]' % (chart.pretty_format_edge(
                    edge, width=2), cost(edge.prob())))
            queue.append(edge)

        found = 0
        while len(queue) > 0 and (max < 1 or found < max):
            # Re-sort the queue.
            self.sort_queue(queue, chart)

            # Prune the queue to the correct size if a beam was defined
            if self.beam_size:
                self._prune(queue, chart)

            # Get the best edge.
            edge = queue.pop()
            edge.pending = False
            if self._trace > 0:
                print('  %-50s [%.4g]' % (chart.pretty_format_edge(
                    edge, width=2), cost(edge.prob())))
            if (edge.start() == 0 and edge.end() == chart._num_leaves
                    and edge.lhs() == start and edge.is_complete()):
                if len(prod_probs) == 0:
                    for prod in grammar.productions():
                        prod_probs[prod.lhs(), prod.rhs()] = prod.prob()
                if notify:
                    print("****")
                    for tree in chart.trees(edge,
                                            tree_class=ProbabilisticTree,
                                            complete=True):
                        self._setprob(tree, prod_probs)
                        print('{}{:.4g}({:.4g})'.format(
                            tree, cost(tree.prob()), cost(edge.prob())))
                        #print tree
                    print("****")
                found += 1
            # Apply BU & FR to it.
            queue.extend(fr.apply(chart, grammar, edge))
            queue.extend(bu.apply(chart, grammar, edge))

        # Get a list of complete parses.
        parses = list(chart.parses(grammar.start(), ProbabilisticTree))
        if not notify:
            for parse in parses:
                self._setprob(parse, prod_probs)

        # Sort by probability
        parses.sort(key=lambda tree: tree[0], reverse=True)
        if notify:
            print("{0} total parses found".format(found))
        return iter(parses)