Ejemplo n.º 1
0
def build_tree(score, bptr, tag2idx, idx2tag, leaves):

    start_id = tag2idx['S']
    begin = 0
    end = len(bptr) - 1
    tree = build_tree_util(start_id, begin, end, bptr, tag2idx, idx2tag,
                           leaves)
    ttf.un_chomsky_normal_form(tree)
    return tree
Ejemplo n.º 2
0
def demo():
    """
    A demonstration showing how each tree transform can be used.
    """

    from copy import deepcopy

    from nltk import tree, treetransforms
    from nltk.draw.tree import draw_trees

    # original tree from WSJ bracketed text
    sentence = """(TOP
  (S
    (S
      (VP
        (VBN Turned)
        (ADVP (RB loose))
        (PP
          (IN in)
          (NP
            (NP (NNP Shane) (NNP Longman) (POS 's))
            (NN trading)
            (NN room)))))
    (, ,)
    (NP (DT the) (NN yuppie) (NNS dealers))
    (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
    (. .)))"""
    t = tree.Tree.fromstring(sentence, remove_empty_top_bracketing=True)

    # collapse subtrees with only one child
    collapsedTree = deepcopy(t)
    treetransforms.collapse_unary(collapsedTree)

    # convert the tree to CNF
    cnfTree = deepcopy(collapsedTree)
    treetransforms.chomsky_normal_form(cnfTree)

    # convert the tree to CNF with parent annotation (one level) and horizontal smoothing of order two
    parentTree = deepcopy(collapsedTree)
    treetransforms.chomsky_normal_form(parentTree, horzMarkov=2, vertMarkov=1)

    # convert the tree back to its original form (used to make CYK results comparable)
    original = deepcopy(parentTree)
    treetransforms.un_chomsky_normal_form(original)

    # convert tree back to bracketed text
    sentence2 = original.pprint()
    print(sentence)
    print(sentence2)
    print("Sentences the same? ", sentence == sentence2)

    draw_trees(t, collapsedTree, cnfTree, parentTree, original)
Ejemplo n.º 3
0
def demo():
    """
    A demonstration showing how each tree transform can be used.
    """

    from nltk.draw.tree import draw_trees
    from nltk import tree, treetransforms
    from copy import deepcopy

    # original tree from WSJ bracketed text
    sentence = """(TOP
  (S
    (S
      (VP
        (VBN Turned)
        (ADVP (RB loose))
        (PP
          (IN in)
          (NP
            (NP (NNP Shane) (NNP Longman) (POS 's))
            (NN trading)
            (NN room)))))
    (, ,)
    (NP (DT the) (NN yuppie) (NNS dealers))
    (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
    (. .)))"""
    t = tree.Tree.parse(sentence, remove_empty_top_bracketing=True)

    # collapse subtrees with only one child
    collapsedTree = deepcopy(t)
    treetransforms.collapse_unary(collapsedTree)

    # convert the tree to CNF
    cnfTree = deepcopy(collapsedTree)
    treetransforms.chomsky_normal_form(cnfTree)

    # convert the tree to CNF with parent annotation (one level) and horizontal smoothing of order two
    parentTree = deepcopy(collapsedTree)
    treetransforms.chomsky_normal_form(parentTree, horzMarkov=2, vertMarkov=1)

    # convert the tree back to its original form (used to make CYK results comparable)
    original = deepcopy(parentTree)
    treetransforms.un_chomsky_normal_form(original)

    # convert tree back to bracketed text
    sentence2 = original.pprint()
    print sentence
    print sentence2
    print "Sentences the same? ", sentence == sentence2

    draw_trees(t, collapsedTree, cnfTree, parentTree, original)
Ejemplo n.º 4
0
def un_cnf(tree, old_grammar):
  reinsert_unary_chains(tree, old_grammar)
  treetransforms.un_chomsky_normal_form(tree)
  nodeList = [(tree, [])]
  while nodeList != []:
    node, parent = nodeList.pop()
    if isinstance(node, Tree):
      if '$' in node.label():
        nodeIndex = parent.index(node)
        parent.remove(parent[nodeIndex])
        parent.insert(nodeIndex, node[0])
      else:
        for child in node:
          nodeList.append((child, node))
Ejemplo n.º 5
0
def un_cnf(tree, old_grammar):
    """Convert back to the original grammar
  To convert a tree output from CKY back to the original form of the grammar:
  `un_cnf(tree, cnf_grammar_wunaries)`
  """
    reinsert_unary_chains(tree, old_grammar)
    treetransforms.un_chomsky_normal_form(tree)
    nodeList = [(tree, [])]
    while nodeList != []:
        node, parent = nodeList.pop()
        if isinstance(node, Tree):
            if '$' in node.label():
                nodeIndex = parent.index(node)
                parent.remove(parent[nodeIndex])
                parent.insert(nodeIndex, node[0])
            else:
                for child in node:
                    nodeList.append((child, node))