Ejemplos de ProbabilisticTree en Python, ejemplos de nltk.tree.ProbabilisticTree en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: hw4_parser.py Proyecto: Diana-Zhang/Parser-Improved

    def get_subparse(backpointer):
        parses = []
        probability = backpointer.prod.prob(
        )  # Probability of tree for input S, P(T, S) = P(T)P(S|T) = P(T)
        if backpointer.l_child is None and backpointer.r_child is None:
            return [
                ProbabilisticTree(str(backpointer.prod.lhs()),
                                  [str(backpointer.prod.rhs()[0])],
                                  prob=probability)
            ]

        left, left_index = backpointer.prod.rhs()[0], backpointer.l_child
        left_parses = []
        for b in table[left_index[0]][left_index[1]][left]:
            left_parses += get_subparse(b)

        right, right_index = backpointer.prod.rhs()[1], backpointer.r_child
        right_parses = []
        for b in table[right_index[0]][right_index[1]][right]:
            right_parses += get_subparse(b)

        for left_parse in left_parses:
            for right_parse in right_parses:

                parses.append(
                    ProbabilisticTree(str(backpointer.prod.lhs()),
                                      [left_parse, right_parse],
                                      prob=probability * left_parse.prob() *
                                      right_parse.prob()))
        return parses

Ejemplo n.º 2

0

Mostrar archivo

Archivo: cky_parser.py Proyecto: Aadesh-Magare/PCFG_CKY_NLU

    def parse(self, tokens):
        try:
            self.grammar.check_coverage(tokens)
        except ValueError as v:
            # print('Words not Found', v)
            words = v.args[0].split(':')[1].replace('"', '').replace("'",
                                                                     "")[:-1]
            for word in words.split(','):
                w = word.strip()
                if w in tokens:
                    idx = tokens.index(w)
                    tokens[idx] = self.unk

        parse_table = {}

        for index in range(len(tokens)):
            token = tokens[index]
            parse_table[index, index + 1, token] = token

        for length in range(1, len(tokens) + 1):
            for start in range(len(tokens) - length + 1):
                span = (start, start + length)

                changed = True
                while changed:
                    changed = False

                    span_coverage = []

                    for production in self.grammar.productions():
                        matching_rules = self.find_matching_rules(
                            production.rhs(), span, parse_table)

                        for matching_rule in matching_rules:
                            span_coverage.append((production, matching_rule))

                    for (production, children) in span_coverage:
                        subtrees = [c for c in children if isinstance(c, Tree)]
                        p = reduce(lambda pr, t: pr * t.prob(), subtrees,
                                   production.prob())
                        node = production.lhs().symbol()
                        tree = ProbabilisticTree(node, children, prob=p)

                        c = parse_table.get(
                            (span[0], span[1], production.lhs()))

                        if c is None or c.prob() < tree.prob():
                            parse_table[span[0], span[1],
                                        production.lhs()] = tree
                            changed = True

        tree = parse_table.get((0, len(tokens), self.grammar.start()))

        # if tree is None:
        #     [print(p, parse_table[p]) for p in parse_table if p[0] == 0 and p[1] == len(tokens)]
        #     [print(p) for p in self.grammar.productions() if p.lhs() == Nonterminal('S')]

        return tree

Ejemplo n.º 3

0

Mostrar archivo

    def _apply_binary_rules(self, N, chart):
        """Populate the remainder of the chart, assuming the bottom row is complete.

           Iterating throught the chart from the bottom up, apply all available
           binary rules at each position in the chart.  Each cell of the chart should
           enumerate the heads that can be produced there and the score corresponding
           to their most efficient construction.

           Args:
             - N: the number of words
             - chart: the chart to populate, see _apply_preterminal_rules for a detailed description.
        """
        
        # Iterate through the chart, handling nonterminal rules A -> B C
        # Use the ordered_spans function to get a list of spans from the bottom up.
        for (i, j) in ordered_spans(N):
            for split in xrange(i+1, j):
                # Consider all possible A -> B C
                pass
            
                for lhs_tree in chart[(i, split)].values():
                    for rhs_tree in chart[(split, j)].values():
                        B = lhs_tree.label()
                        C = rhs_tree.label()
                        
                        for (A, score) in self._grammar.lookup_rhs(B, C):
                    
                            total_score = lhs_tree.logprob() + rhs_tree.logprob() + score
                            
                            if total_score > chart[(i, j)][A].logprob():
                                chart[(i, j)][A] = ProbabilisticTree(A, [lhs_tree, rhs_tree], logprob=total_score)

Ejemplo n.º 4

0

Mostrar archivo

    def _parse(self, table):
        """
        Helper function of :func:`CKYParser.parse` that implements the actual
        parsing algorithm.

        Args:
            - table :class:`ParserTable`: a probabilistic CKY matrix

        Returns:
            list -- all complete parses that could be derived.
        """
        for end in xrange(1, table.num_leaves() + 1):
            for start in xrange(end - 2, -1, -1):
                top_node = table.top_node(start, end)
                trees = []
                self.best_prob = 0.0
                for split in xrange(start + 1, end):
                    for l, r in product(table[start][split],
                                        table[split][end]):
                        for prod in self.find_productions(l, r):
                            # do not add a tree to trees if the top node is
                            # an indexed node or if it is a rewritten
                            # production.
                            if top_node and linking(prod):
                                continue
                            prob = prod.prob() * l.prob() * r.prob()
                            lhs = prod.lhs().symbol()
                            if self.acceptable(lhs, prob, trees):
                                trees.append(
                                    ProbabilisticTree(lhs, [l, r], prob=prob))
                table[start][end] = trees

Ejemplo n.º 5

0

Mostrar archivo

    def _apply_preterminal_rules(self, words, chart):
        """Populate the bottom row of the CKY chart.

           Specifically, apply preterminal unary rules to go from word to preterminal.

           Args:
             - words: sequence of words to parse
             - chart: the chart to populate

           Returns: False if a preterminal could not be found in the grammar for a word.
                    True otherwise.
        """
        
        # Handle preterminal rules A -> a
        # For the ith token, you should populate cell (i,i+1).
        for i, word in enumerate(words):
            cell_key = (i,i+1)
            pass

            if (word,) not in self._grammar.parsing_index:
                return False
            
            for t in self._grammar.parsing_index[(word,)]:
                pos_tag = t[0]
                score = t[1]
                chart[cell_key][pos_tag] = ProbabilisticTree(pos_tag, [word], logprob=score)

        return True

Ejemplo n.º 6

0

Mostrar archivo

    def _apply_preterminal_rules(self, words, chart):
        """Populate the bottom row of the CKY chart.

           Specifically, apply preterminal unary rules to go from word to preterminal.

           Hint:  use self._grammar.lookup_rhs(word) to enumerate available unary rules and their
                  corresponding scores.
           Hint:  A `chart` is a two level structure.  The first key is a tuple representing the span.
                  the second key is a part of speech that can be produced by that span.
                  Finally, the value is a ProbabilisticTree containing the score of the best way to create
                  that part of speech.  As with A4 best_cuts_with_trace, it also maintains some book keeping
                  to know how to create it.  Concretely...

                  chart[(i, i+1)][pos_tag] = ProbabilisticTree(pos_tag, [word], logprob=score)

           Args:
             - words: sequence of words to parse
             - chart: the chart to populate

           Returns: False if a preterminal could not be found in the grammar for a word.
                    True otherwise.
        """
        #### YOUR CODE HERE ####
        # Handle preterminal rules A -> a
        # For the ith token, you should populate cell (i,i+1).
        for i, word in enumerate(words):
            cell_key = (i, i + 1)
            lhs = self._grammar.lookup_rhs(word)
            for pos_tag in lhs:
                pt = ProbabilisticTree(pos_tag[0], [word], logprob=pos_tag[1])
                chart[cell_key][pos_tag[0]] = pt
        if not lhs: return False
        #### END(YOUR CODE) ####
        return True

Ejemplo n.º 7

0

Mostrar archivo

Archivo: pparse.py Proyecto: INL/MBMP-morphological-parser

 def initialize(tokens):
     table = ParserTable(tokens)
     for end in xrange(1, table.num_leaves()+1):
         table[end-1][end] = [
             ProbabilisticTree(p.lhs().symbol(), p.rhs(), prob=p.prob())
             for p in grammar.productions(rhs=tokens[end-1])]
     return table

Ejemplo n.º 8

0

Mostrar archivo

Archivo: hw4_parser.py Proyecto: Diana-Zhang/Parser-Improved

    def get_subparse(backpointer):
        probability = backpointer.prob  # Probability of tree for input S, P(T, S) = P(T)P(S|T) = P(T)
        if backpointer.l_child is None and backpointer.r_child is None:
            return ProbabilisticTree(str(backpointer.prod.lhs()),
                                     [str(backpointer.prod.rhs()[0])],
                                     prob=probability)

        left, left_index = backpointer.prod.rhs()[0], backpointer.l_child
        right, right_index = backpointer.prod.rhs()[1], backpointer.r_child

        left_cell = table[left_index[0]][left_index[1]]
        right_cell = table[right_index[0]][right_index[1]]

        left_parse = get_subparse(left_cell[left])
        right_parse = get_subparse(right_cell[right])

        return ProbabilisticTree(str(backpointer.prod.lhs()),
                                 [left_parse, right_parse],
                                 prob=probability)

Ejemplo n.º 9

0

Mostrar archivo

def make_tree(table, splits, left, right, nonterminal):
    if isinstance(nonterminal, basestring):
        # actually a terminal -- we're done
        return nonterminal

    try:
        leftsym, rightsym, split = splits[left, right, nonterminal]
        prob = table[left, right, nonterminal]
    except KeyError:
        return None

    if rightsym is None:
        # unary production
        tree = make_tree(table, splits, left, right, leftsym)
        return ProbabilisticTree(nonterminal.symbol(), [tree], prob=prob)

    else:
        left_tree = make_tree(table, splits, left, split, leftsym)
        right_tree = make_tree(table, splits, split, right, rightsym)
        return ProbabilisticTree(nonterminal.symbol(), [left_tree, right_tree],
                                 prob=prob)

Ejemplo n.º 10

0

Mostrar archivo

    def _apply_binary_rules(self, N, chart):
        """Populate the remainder of the chart, assuming the bottom row is complete.

           Iterating throught the chart from the bottom up, apply all available
           binary rules at each position in the chart.  Each cell of the chart should
           enumerate the heads that can be produced there and the score corresponding
           to their most efficient construction.

           Hint: self._grammar.lookup_rhs(B, C) will return a list of binary
                 production rules of the form A -> B, C along with their score.

           Hint: When building the backpointers in this function, provide the
                 full left and right trees as (left, right), not just the immediate
                 children.

           Args:
             - N: the number of words
             - chart: the chart to populate, see _apply_preterminal_rules for a detailed description.
        """
        #### YOUR CODE HERE ####
        # Iterate through the chart, handling nonterminal rules A -> B C
        # Use the ordered_spans function to get a list of spans from the bottom up.
        for (i, j) in ordered_spans(N):
            for split in range(i + 1, j):
                #print ("split", split)
                # Consider all possible A -> B C
                B_lhs = [key for (key, val) in chart[(i, split)].items()]
                #print ("B_lhs",B_lhs)
                C_lhs = [key for (key, val) in chart[(split, j)].items()]
                #print ("C_lhs",C_lhs)
                for B in B_lhs:
                    #print ("B",B)
                    for C in C_lhs:
                        #print("C",C)
                        for A, weight_A_BC in self._grammar.lookup_rhs(B, C):
                            #print ("A",A)
                            #print ("weight ABC", weight_A_BC)
                            #print ("logprob", chart[(i, j)][A].logprob())

                            ### Score calculation
                            score = chart[(i, split)][B].logprob() + chart[
                                (split, j)][C].logprob() + weight_A_BC

                            if score > chart[(i, j)][A].logprob():
                                #print ("Boo",score > chart[(i, j)][A].logprob())
                                chart[(i, j)][A] = ProbabilisticTree(
                                    A, (chart[(i, split)][B], chart[(split,
                                                                     j)][C]),
                                    logprob=score)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: hw4_improved_parser.py Proyecto: HaoboGu/LING571

def pcky(sentence, pcfg):
    """
    Probabilistic CKY algorithm.
    :param sentence: List[str]
    :param pcfg: nltk.PCFG
    :return: parsed result
    """
    p_rules = pcfg.productions()
    symbol_map = create_map(p_rules)  # map RHS to a set of LHS according to the grammar
    d = len(sentence)  # d is the length of the sentence
    table = initialize_table(d + 1)  # initialize table for pcky
    for j in range(1, len(sentence)+1):
        j_tuple = tuple([sentence[j-1]])
        if j_tuple in symbol_map:
            for symbol_prob in symbol_map[j_tuple]:  # for all terminals
                table[j - 1][j].append(ProbabilisticTree(symbol_prob[0], [sentence[j-1]], prob=symbol_prob[1]))
        else:
            j_tuple = tuple(['*unknown*'])
            for symbol_prob in symbol_map[j_tuple]:  # for all terminals
                table[j - 1][j].append(ProbabilisticTree(symbol_prob[0], [sentence[j-1]], prob=symbol_prob[1]))
        for i in range(j - 2, -1, -1):  # from j-2 to 0
            for k in range(i + 1, j):  # from i+1 to j-1
                table = update_cell(table, i, k, j, symbol_map)
    return table

Ejemplo n.º 12

0

Mostrar archivo

Archivo: hw4_improved_parser.py Proyecto: HaoboGu/LING571

def update_cell(table, i, k, j, symbol_map):
    """
    For current cell [i,j], update tree list and probability based on cell [i,k] and [k,j]
    :return: updated table
    """
    for s1 in table[i, k]:
        for s2 in table[k, j]:  # s1 and s2 are trees
            rhs = (s1.label(), s2.label())
            if rhs in symbol_map:  # check if rhs in current grammar's rules
                lhs = symbol_map[rhs]
                # max_prob = -1
                # best_symbol_prob = None
                for l_symbol_prob in lhs:
                    # add current tree to cell [i,j]
                    table[i, j].append(
                        ProbabilisticTree(l_symbol_prob[0], [s1, s2], prob=l_symbol_prob[1] * s1.prob() * s2.prob()))
    table[i, j] = sorted(table[i, j], key=ProbabilisticTree.prob, reverse=True)[:28]
    return table

Ejemplo n.º 13

0

Mostrar archivo

    def _apply_binary_rules(self, N, chart):
        """Populate the remainder of the chart, assuming the bottom row is complete.

           Iterating throught the chart from the bottom up, apply all available
           binary rules at each position in the chart.  Each cell of the chart should
           enumerate the heads that can be produced there and the score corresponding
           to their most efficient construction.

           Hint: self._grammar.lookup_rhs(B, C) will return a list of binary
                 production rules of the form A -> B, C along with their score.

           Hint: When building the backpointers in this function, provide the
                 full left and right trees as (left, right), not just the immediate
                 children.

           Args:
             - N: the number of words
             - chart: the chart to populate, see _apply_preterminal_rules for a detailed description.
        """
        #### YOUR CODE HERE ####
        # Iterate through the chart, handling nonterminal rules A -> B C
        # Use the ordered_spans function to get a list of spans from the bottom up.
        for (i, j) in ordered_spans(N):
            for split in range(i + 1, j):
                # Consider all possible A -> B C
                for B in chart[(i, split)].keys():
                    for C in chart[(split, j)].keys():
                        for A, weight in self._grammar.lookup_rhs(B, C):
                            B_t = chart[(i, split)][B]
                            C_t = chart[(split, j)][C]
                            x = B_t.logprob() + C_t.logprob() + weight
                            if x > chart[(i, j)][A].logprob():
                                chart[(i,
                                       j)][A] = ProbabilisticTree(A,
                                                                  [B_t, C_t],
                                                                  logprob=x)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: viterbi.py Proyecto: AirJunda/cs114spring2019hw

    def _add_constituents_spanning(self, span, constituents, tokens):
        """
        Find any constituents that might cover ``span``, and add them
        to the most likely constituents table.
        :rtype: None
        :type span: tuple(int, int)
        :param span: The section of the text for which we are
            trying to find possible constituents.  The span is
            specified as a pair of integers, where the first integer
            is the index of the first token that should be included in
            the constituent; and the second integer is the index of
            the first token that should not be included in the
            constituent.  I.e., the constituent should cover
            ``text[span[0]:span[1]]``, where ``text`` is the text
            that we are parsing.
        :type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree)
        :param constituents: The most likely constituents table.  This
            table records the most probable tree representation for
            any given span and node value.  In particular,
            ``constituents(s,e,nv)`` is the most likely
            ``ProbabilisticTree`` that covers ``text[s:e]``
            and has a node value ``nv.symbol()``, where ``text``
            is the text that we are parsing.  When
            ``_add_constituents_spanning`` is called, ``constituents``
            should contain all possible constituents that are shorter
            than ``span``.
        :type tokens: list of tokens
        :param tokens: The text we are parsing.  This is only used for
            trace output.
        """
        # Since some of the grammar productions may be unary, we need to
        # repeatedly try all of the productions until none of them add any
        # new constituents.
        changed = True
        while changed:
            changed = False

            # Find all ways instantiations of the grammar productions that
            # cover the span.
            instantiations = self._find_instantiations(span, constituents,
                                                       tokens)

            # For each production instantiation, add a new
            # ProbabilisticTree whose probability is the product
            # of the childrens' probabilities and the production's
            # probability.
            for (production, children) in instantiations:
                subtrees = [c for c in children if isinstance(c, Tree)]
                p = reduce(lambda pr, t: pr * t.prob(), subtrees,
                           production.prob())
                node = production.lhs().symbol()
                tree = ProbabilisticTree(node, children, prob=p)

                # If it's new a constituent, then add it to the
                # constituents dictionary.
                c = constituents.get((span[0], span[1], production.lhs()))
                if self._trace > 1:
                    if c is None or c != tree:
                        if c is None or c.prob() < tree.prob():
                            print('   Insert:', end=' ')
                        else:
                            print('  Discard:', end=' ')
                        self._trace_production(production, p, span,
                                               len(tokens))
                if c is None or c.prob() < tree.prob():
                    constituents[span[0], span[1], production.lhs()] = tree
                    changed = True

Ejemplo n.º 15

0

Mostrar archivo

Archivo: viterbi.py Proyecto: B-Rich/Fem-Coding-Challenge

    def _add_constituents_spanning(self, span, constituents, tokens):
        """
        Find any constituents that might cover C{span}, and add them
        to the most likely constituents table.

        @rtype: C{None}
        @type span: C{(int, int)}
        @param span: The section of the text for which we are
            trying to find possible constituents.  The span is
            specified as a pair of integers, where the first integer
            is the index of the first token that should be included in
            the constituent; and the second integer is the index of
            the first token that should not be included in the
            constituent.  I.e., the constituent should cover
            C{M{text}[span[0]:span[1]]}, where C{M{text}} is the text
            that we are parsing.

        @type constituents: C{dictionary} from
            C{(int,int,Nonterminal)} to (C{ProbabilisticToken} or
            C{ProbabilisticTree}).
        @param constituents: The most likely constituents table.  This
            table records the most probable tree representation for
            any given span and node value.  In particular,
            C{constituents(M{s},M{e},M{nv})} is the most likely
            C{ProbabilisticTree} that covers C{M{text}[M{s}:M{e}]}
            and has a node value C{M{nv}.symbol()}, where C{M{text}}
            is the text that we are parsing.  When
            C{_add_constituents_spanning} is called, C{constituents}
            should contain all possible constituents that are shorter
            than C{span}.
            
        @type tokens: C{list} of tokens
        @param tokens: The text we are parsing.  This is only used for
            trace output.  
        """
        # Since some of the grammar productions may be unary, we need to
        # repeatedly try all of the productions until none of them add any
        # new constituents.
        changed = True
        while changed:
            changed = False
            
            # Find all ways instantiations of the grammar productions that
            # cover the span.
            instantiations = self._find_instantiations(span, constituents)

            # For each production instantiation, add a new
            # ProbabilisticTree whose probability is the product
            # of the childrens' probabilities and the production's
            # probability.
            for (production, children) in instantiations:
                subtrees = [c for c in children if isinstance(c, Tree)]
                p = reduce(lambda pr,t:pr*t.prob(),
                           subtrees, production.prob())
                node = production.lhs().symbol()
                tree = ProbabilisticTree(node, children, prob=p)

                # If it's new a constituent, then add it to the
                # constituents dictionary.
                c = constituents.get((span[0], span[1], production.lhs()))
                if self._trace > 1:
                    if c is None or c != tree:
                        if c is None or c.prob() < tree.prob():
                            print '   Insert:',
                        else:
                            print '  Discard:',
                        self._trace_production(production, p, span, len(tokens))
                if c is None or c.prob() < tree.prob():
                    constituents[span[0], span[1], production.lhs()] = tree
                    changed = True

Ejemplo n.º 16

0

Mostrar archivo

Archivo: NLP_all_sentences_AR.py Proyecto: OMARI1988/robot_simulation_modified

    def _add_constituents_spanning(self, span, constituents, tokens):
        """
        Find any constituents that might cover ``span``, and add them
        to the most likely constituents table.

        :rtype: None
        :type span: tuple(int, int)
        :param span: The section of the text for which we are
            trying to find possible constituents.  The span is
            specified as a pair of integers, where the first integer
            is the index of the first token that should be included in
            the constituent; and the second integer is the index of
            the first token that should not be included in the
            constituent.  I.e., the constituent should cover
            ``text[span[0]:span[1]]``, where ``text`` is the text
            that we are parsing.

        :type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree)
        :param constituents: The most likely constituents table.  This
            table records the most probable tree representation for
            any given span and node value.  In particular,
            ``constituents(s,e,nv)`` is the most likely
            ``ProbabilisticTree`` that covers ``text[s:e]``
            and has a node value ``nv.symbol()``, where ``text``
            is the text that we are parsing.  When
            ``_add_constituents_spanning`` is called, ``constituents``
            should contain all possible constituents that are shorter
            than ``span``.

        :type tokens: list of tokens
        :param tokens: The text we are parsing.  This is only used for
            trace output.
        """
        # Since some of the grammar productions may be unary, we need to
        # repeatedly try all of the productions until none of them add any
        # new constituents.
        changed = True
        while changed:
            changed = False

            # Find all ways instantiations of the grammar productions that
            # cover the span.
            instantiations = self._find_instantiations(span, constituents)

            # For each production instantiation, add a new
            # ProbabilisticTree whose probability is the product
            # of the childrens' probabilities and the production's
            # probability.
            for (production, children) in instantiations:
                subtrees = [c for c in children if isinstance(c, Tree)]
                p = reduce(lambda pr,t:pr*t.prob(),
                           subtrees, production.prob())
                node = production.lhs().symbol()
                tree = ProbabilisticTree(node, children, prob=p)

                # If it's new a constituent, then add it to the
                # constituents dictionary.
                c = constituents.get((span[0], span[1], production.lhs()))
                if self._trace > 1:
                    if c is None or c != tree:
                        if c is None or c.prob() < tree.prob():
                            print('   Insert:', end=' ')
                        else:
                            print('  Discard:', end=' ')
                        self._trace_production(production, p, span, len(tokens))
                if c is None or c.prob() < tree.prob():
                    constituents[span[0], span[1], production.lhs()] = tree
                    changed = True

Ejemplo n.º 17

0

Mostrar archivo

def make_chart():
    """Create an empty chart."""
    dummy_tree_factory = lambda: ProbabilisticTree('', [], logprob=-np.inf)
    cell_factory = lambda: collections.defaultdict(dummy_tree_factory)
    return collections.defaultdict(cell_factory)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: viterbi.py Proyecto: wrand/tweater

    def _add_constituents_spanning(self, span, constituents, tokens):
        """
        Find any constituents that might cover C{span}, and add them
        to the most likely constituents table.

        @rtype: C{None}
        @type span: C{(int, int)}
        @param span: The section of the text for which we are
            trying to find possible constituents.  The span is
            specified as a pair of integers, where the first integer
            is the index of the first token that should be included in
            the constituent; and the second integer is the index of
            the first token that should not be included in the
            constituent.  I.e., the constituent should cover
            C{M{text}[span[0]:span[1]]}, where C{M{text}} is the text
            that we are parsing.

        @type constituents: C{dictionary} from
            C{(int,int,Nonterminal)} to (C{ProbabilisticToken} or
            C{ProbabilisticTree}).
        @param constituents: The most likely constituents table.  This
            table records the most probable tree representation for
            any given span and node value.  In particular,
            C{constituents(M{s},M{e},M{nv})} is the most likely
            C{ProbabilisticTree} that covers C{M{text}[M{s}:M{e}]}
            and has a node value C{M{nv}.symbol()}, where C{M{text}}
            is the text that we are parsing.  When
            C{_add_constituents_spanning} is called, C{constituents}
            should contain all possible constituents that are shorter
            than C{span}.
            
        @type tokens: C{list} of tokens
        @param tokens: The text we are parsing.  This is only used for
            trace output.  
        """
        # Since some of the grammar productions may be unary, we need to
        # repeatedly try all of the productions until none of them add any
        # new constituents.
        changed = True
        while changed:
            changed = False

            # Find all ways instantiations of the grammar productions that
            # cover the span.
            instantiations = self._find_instantiations(span, constituents)

            # For each production instantiation, add a new
            # ProbabilisticTree whose probability is the product
            # of the childrens' probabilities and the production's
            # probability.
            for (production, children) in instantiations:
                subtrees = [c for c in children if isinstance(c, Tree)]
                p = reduce(lambda pr, t: pr * t.prob(), subtrees,
                           production.prob())
                node = production.lhs().symbol()
                tree = ProbabilisticTree(node, children, prob=p)

                # If it's new a constituent, then add it to the
                # constituents dictionary.
                c = constituents.get((span[0], span[1], production.lhs()))
                if self._trace > 1:
                    if c is None or c != tree:
                        if c is None or c.prob() < tree.prob():
                            print '   Insert:',
                        else:
                            print '  Discard:',
                        self._trace_production(production, p, span,
                                               len(tokens))
                if c is None or c.prob() < tree.prob():
                    constituents[span[0], span[1], production.lhs()] = tree
                    changed = True