Esempio n. 1
0
def add_node(tree, span, label, position=0, in_place=True):
    '''Introduce a new node in the tree.  Position indicates what to do when a
	node already exists with the same span.  Zero indicates above any current
	nodes, one indicates beneath the first, and so on.'''
    tree = tree.root()
    if not in_place:
        tree = tree.clone()

    # Find the node(s) that should be within the new span
    nodes = tree.get_spanning_nodes(*span)
    # Do not operate on the root node
    if nodes[0].parent is None:
        nodes = nodes[0].subtrees[:]
    for i in xrange(position):
        if len(nodes) > 1:
            return (False, "Position {} is too deep".format(position))
        nodes[0] = nodes[0].subtrees[0]
    nodes.sort(key=lambda x: x.span)

    # Check that all of the nodes are at the same level
    parent = None
    for node in nodes:
        if parent is None:
            parent = node.parent
        if parent != node.parent:
            return (False,
                    "The span ({} - {}) would cross brackets".format(*span))

    # Create the node
    nnode = pstree.PSTree(None, label, span, parent)
    position = parent.subtrees.index(nodes[0])
    parent.subtrees.insert(position, nnode)

    # Move the subtrees
    for node in nodes:
        node.parent.subtrees.remove(node)
        nnode.subtrees.append(node)
        node.parent = nnode

    return (True, (tree, nnode))
Esempio n. 2
0
def construct_node(node, tree, ner_raw_data, head_raw_data, text_raw_data,
        character_to_index, word_to_index, pos_to_index, index_to_lexicon,
        pos_count, ne_count, pos_ne_count, lexicon_hits, span_to_node):
    pos = tree.label
    word = tree.word
    span = tree.span
    head = tree.head if hasattr(tree, "head") else head_raw_data[(span, pos)][1]
    ne = ner_raw_data[span] if span in ner_raw_data else "NONE"
    constituent = " ".join(text_raw_data[span[0]:span[1]]).lower()
    
    # Process pos info
    node.pos_index = pos_to_index[pos]
    pos_count[pos] += 1
    
    # Process word info
    node.word_split = [character_to_index[character] for character in word] if word else []
    node.word_index = word_to_index[word] if word else -1
    
    # Process head info
    node.head_split = [character_to_index[character] for character in head]
    #if head == "-LSB-": print text_raw_data
    node.head_index = word_to_index[head]
    
    # Process ne info
    node.ne = ne
    if ne != "NONE":
        if not node.parent or node.parent.span!=span:
            ne_count[ne] += 1
        pos_ne_count[pos] += 1
    
    # Process span info
    node.span = span
    span_to_node[span] = node
    
    # Process lexicon info
    node.lexicon_hit = [0] * len(index_to_lexicon)
    hits = 0
    for index, lexicon in index_to_lexicon.iteritems():
        if constituent in lexicon:
            node.lexicon_hit[index] = 1
            hits = 1
    lexicon_hits[0] += hits
    
    # Binarize children
    if len(tree.subtrees) > 2:
        side_child_pos = tree.subtrees[-1].label
        side_child_span = tree.subtrees[-1].span
        side_child_head = head_raw_data[(side_child_span, side_child_pos)][1]
        if side_child_head != head:
            sub_subtrees = tree.subtrees[:-1]
        else:
            sub_subtrees = tree.subtrees[1:]
        new_span = (sub_subtrees[0].span[0], sub_subtrees[-1].span[1])
        new_tree = pstree.PSTree(label=pos, span=new_span, subtrees=sub_subtrees)
        new_tree.head = head
        if side_child_head != head:
            tree.subtrees = [new_tree, tree.subtrees[-1]]
        else:
            tree.subtrees = [tree.subtrees[0], new_tree]
         
    # Process children
    nodes = 1
    for subtree in tree.subtrees:
        child = Node()
        node.add_child(child)
        child_nodes = construct_node(child, subtree, ner_raw_data, head_raw_data, text_raw_data,
            character_to_index, word_to_index, pos_to_index, index_to_lexicon,
            pos_count, ne_count, pos_ne_count, lexicon_hits, span_to_node)
        nodes += child_nodes
    return nodes