Example #1
0
    def rightbranch(tree):
        """
        Transform a subtree lying on a right branch.
        """
        def leftbranch(subtree, transformed_right):
            """
            Transform a subtree lying on a left branch.
            transformed_right is transformed right material between this node and Anc.
            """
            if tb.is_preterminal(subtree):
                return [subtree, transformed_right]
            else:
                left, right = tb.tree_children(subtree)
                return leftbranch(
                    left,
                    tb.make_nonterminal(
                        make_pair(Anc, tb.tree_label(left)),
                        rightbranch(right) + [transformed_right]))

        if tb.is_preterminal(tree):
            return [flag(tree)]
        else:
            Anc = tb.tree_label(tree)
            left, right = tb.tree_children(tree)
            return leftbranch(
                left,
                tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                    rightbranch(right)))
Example #2
0
 def leftbranch(subtree, continuation, X1):
     if tb.is_preterminal(subtree):
         return [relabel(subtree, X1)] + continuation
     else:
         left, right = tb.tree_children(subtree)
         X2 = tb.tree_label(left) + '>'
         return leftbranch(left, [
             tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                 rightbranch(right, X2) + continuation)
         ], X1)
Example #3
0
 def leftbranch(subtree, continuation):
     if tb.is_preterminal(subtree):
         return [subtree] + continuation
     else:
         left, right = tb.tree_children(subtree)
         return leftbranch(left, [
             tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                 rightbranch(right) + continuation)
         ])
Example #4
0
def searchTree(labels, regex, tree):
    if tb.is_terminal(tree):
        return
    else:
        label = tb.tree_label(tree).split('#')[0]
        if regex.match(label):
            if not label in labels:
                labels[label] = {}
            word = ''.join(tb.terminals(tree)).replace("\\", "")
            incr(word, labels[label])
        for subtree in tb.tree_subtrees(tree):
            searchTree(labels, regex, subtree)
Example #5
0
 def visit(node, sofar):
     assert tb.is_phrasal(node)
     label = tb.tree_label(node)
     assert is_pair(label)
     A, X = pair_categories(label)
     children = tb.tree_children(node)
     assert len(children) > 0
     assert tb.is_preterminal(children[0])
     if len(children) == 1:
         assert is_flagged(children[0])
         xf = cat_c(A, X, unflag(children[0]))
     elif len(children) == 2:
         if is_flagged(children[0]):
             A1, B = pair_categories(tb.tree_label(children[1]))
             assert A1 == A
             xf = cat_b(A, X, B, unflag(children[0]))
         else:
             C, a = pair_categories(tb.tree_label(children[1]))
             assert a == tb.tree_label(children[0]), \
                    "error in label of node.children[1] a = {}, node = {}".format(a, node)
             xf = cat_e(A, X, C, children[0])
     elif len(children) == 3:
         assert not is_flagged(children[0])
         C, a = pair_categories(tb.tree_label(children[1]))
         A1, B = pair_categories(tb.tree_label(children[2]))
         assert A == A1
         xf = cat_d(A, X, B, C, children[0])
     else:
         sys.exit("error: ill-formed subtree {}\n in tree {}".format(
             node, xtree))
     sofar.append(xf)
     for child in children[1:]:
         sofar = visit(child, sofar)
     return sofar
 def visit(node, wordssofar, segssofar):
     """Does a preorder visit of the nodes in the tree"""
     if tb.is_terminal(node):
         if not ignore_terminal_rex.match(node):
             segssofar.append(simplify_terminal(node))
         return wordssofar,segssofar
     for child in tb.tree_children(node):
         wordssofar,segssofar = visit(child, wordssofar, segssofar)
     if word_rex.match(tb.tree_label(node)):
         if segssofar != []:
             wordssofar.append(''.join(segssofar))
             segssofar = []
     return wordssofar,segssofar
Example #7
0
 def visit(node, wordssofar, segssofar):
     """Does a preorder visit of the nodes in the tree"""
     if tb.is_terminal(node):
         if not ignore_terminal_rex.match(node):
             segssofar.append(simplify_terminal(node))
         return wordssofar, segssofar
     for child in tb.tree_children(node):
         wordssofar, segssofar = visit(child, wordssofar, segssofar)
     if word_rex.match(tb.tree_label(node)):
         if segssofar != []:
             wordssofar.append(''.join(segssofar))
             segssofar = []
     return wordssofar, segssofar
Example #8
0
def lcx2tree_labels0(xtree):
    """
    Maps an lcx2 tree to the corresponding labels, as in my 1996 paper.
    """
    def visit(node, sofar):
        assert tb.is_phrasal(node)
        label = tb.tree_label(node)
        assert is_pair(label)
        A, X = pair_categories(label)
        children = tb.tree_children(node)
        assert len(children) > 0
        assert tb.is_preterminal(children[0])
        if len(children) == 1:
            assert is_flagged(children[0])
            xf = cat_c(A, X, unflag(children[0]))
        elif len(children) == 2:
            if is_flagged(children[0]):
                A1, B = pair_categories(tb.tree_label(children[1]))
                assert A1 == A
                xf = cat_b(A, X, B, unflag(children[0]))
            else:
                C, a = pair_categories(tb.tree_label(children[1]))
                assert a == tb.tree_label(children[0]), \
                       "error in label of node.children[1] a = {}, node = {}".format(a, node)
                xf = cat_e(A, X, C, children[0])
        elif len(children) == 3:
            assert not is_flagged(children[0])
            C, a = pair_categories(tb.tree_label(children[1]))
            A1, B = pair_categories(tb.tree_label(children[2]))
            assert A == A1
            xf = cat_d(A, X, B, C, children[0])
        else:
            sys.exit("error: ill-formed subtree {}\n in tree {}".format(
                node, xtree))
        sofar.append(xf)
        for child in children[1:]:
            sofar = visit(child, sofar)
        return sofar

    root = tb.tree_label(xtree)
    rchildren = tb.tree_children(xtree)
    assert len(rchildren) == 2, "nonbinary xtree = {}".format(xtree)
    sofar = [cat_a(root, rchildren[0])]
    return visit(rchildren[1], sofar)
Example #9
0
def lcx(root):
    """lcx() maps a binary tree into the left-corner transform of my 1996 paper."""
    def rightbranch(tree):
        def leftbranch(subtree, continuation):
            if tb.is_preterminal(subtree):
                return [subtree] + continuation
            else:
                left, right = tb.tree_children(subtree)
                return leftbranch(left, [
                    tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                        rightbranch(right) + continuation)
                ])

        Anc = tb.tree_label(tree)
        return leftbranch(tree, [])

    if tb.is_preterminal(root):
        return root
    else:
        return tb.make_nonterminal(tb.tree_label(root), rightbranch(root))
Example #10
0
def lct(root):
    """
    lct() implements the same transform as lcx(), but it also relabels
    the preterminal labels to implement the transduction in my 1996
    paper.
    
    It isn't complete, i.e., it doesn't implement the relabelling.

    """
    def relabel(tree, label):
        return tb.make_nonterminal(tree[0] + ' ' + label,
                                   tb.tree_children(tree))

    def rightbranch(tree, X0):
        def leftbranch(subtree, continuation, X1):
            if tb.is_preterminal(subtree):
                return [relabel(subtree, X1)] + continuation
            else:
                left, right = tb.tree_children(subtree)
                X2 = tb.tree_label(left) + '>'
                return leftbranch(left, [
                    tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                        rightbranch(right, X2) + continuation)
                ], X1)

        if tb.is_preterminal(tree):
            return [relabel(tree, X0 + '<' + tb.tree_label(tree) + ']')]
        else:
            Anc = tb.tree_label(tree)
            left, right = tb.tree_children(tree)
            X2 = tb.tree_label(left) + '>'
            return leftbranch(left, [
                tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                    rightbranch(right, X2))
            ], X0)

    if tb.is_preterminal(root):
        return root
    else:
        return tb.make_nonterminal(tb.tree_label(root), rightbranch(root, ''))
Example #11
0
def lcx2(root):
    """
    lcx2() maps a binary tree into the left-corner transform of my 1996 paper.
    Preterminals that are right children, i.e., generated under schema (11b) and (11c),
    are flagged.  This permits us to distinguish schema (11b) and (11e).
    """
    def rightbranch(tree):
        """
        Transform a subtree lying on a right branch.
        """
        def leftbranch(subtree, transformed_right):
            """
            Transform a subtree lying on a left branch.
            transformed_right is transformed right material between this node and Anc.
            """
            if tb.is_preterminal(subtree):
                return [subtree, transformed_right]
            else:
                left, right = tb.tree_children(subtree)
                return leftbranch(
                    left,
                    tb.make_nonterminal(
                        make_pair(Anc, tb.tree_label(left)),
                        rightbranch(right) + [transformed_right]))

        if tb.is_preterminal(tree):
            return [flag(tree)]
        else:
            Anc = tb.tree_label(tree)
            left, right = tb.tree_children(tree)
            return leftbranch(
                left,
                tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                    rightbranch(right)))

    if tb.is_preterminal(root):
        return root
    else:
        return tb.make_nonterminal(tb.tree_label(root), rightbranch(root))
 def visit(node, wordssofar, segssofar):
     """Does a preorder visit of the nodes in the tree"""
     if tb.is_terminal(node):
         if not ignore_terminal_rex.match(node):
             segssofar.append(node)
         return wordssofar,segssofar
     for child in tb.tree_children(node):
         wordssofar,segssofar = visit(child, wordssofar, segssofar)
     mo = score_cat_rex.match(tb.tree_label(node))
     if mo:
         if segssofar != []:
             word = ''.join(segssofar)
             segssofar = []
             try:
                 topic = mo.group('topic')
                 if topic != None:
                     wordssofar.append((word,topic))
                 else:
                     wordssofar.append(word)
             except IndexError:
                 wordssofar.append(word)
             
     return wordssofar,segssofar