Ejemplo n.º 1
0
def boh_tree(fr):
    """Return a flatten tree with all heads between m1 and m2"""

    mention1 = _get_mentions_in_order_(fr)[0]
    mention2 = _get_mentions_in_order_(fr)[1]
    head_of_m1 = _head_of_m1_(fr)
    head_of_m2 = _head_of_m2_(fr)
    s_tree = SYNTAX_PARSE_SENTENCES[fr.article][mention1[4]]
    i = mention1[1] + 1
    heads = []
    while i < mention2[1]:
        word_tuple = s_tree.leaf_treeposition(i)
        pos_index = word_tuple[-2]
        parent = s_tree[word_tuple[0:-2]]
        head = None
        sum = 0
        for j, child in enumerate(parent[pos_index:]):
            if parent.node in phrase_heads.keys():
                if parent.node in phrase_heads.keys():
                    candidate_head = child.node in phrase_heads[parent.node]
                    not_head_of_m1 = child[0] != head_of_m1
                    not_head_of_m2 = child[0] != head_of_m2
                    if not (isinstance(child.right_sibling(), ParentedTree)
                            and child.right_sibling().node
                            in phrase_heads[parent.node]):
                        if candidate_head and not_head_of_m1 and not_head_of_m2:
                            head = child[0]
                            sum = j
        if isinstance(head, unicode):
            heads.append(head)
        i += sum + 1

    children = [ParentedTree(w, ["*"]) for w in heads]
    boh_tree = ParentedTree("BOH", children)
    return boh_tree
Ejemplo n.º 2
0
def lp_tree(fr):
    """return a flatten tree with the nodes of the phrases in the path from m1
    to m2 (duplicates removed)"""

    s_tree = SYNTAX_PARSE_SENTENCES[fr.article][int(fr.i_sentence)]
    lwca = _get_lowest_common_ancestor_(fr, s_tree)
    mention1 = _get_mentions_in_order_(fr)[0]
    mention2 = _get_mentions_in_order_(fr)[1]
    left_tree = s_tree[s_tree.leaf_treeposition(int(mention1[1]))[0:-1]]
    right_tree = s_tree[s_tree.leaf_treeposition(int(mention2[2]) - 1)[0:-1]]
    nodes_left_branch = []
    nodes_right_branch = []
    curr_tree = left_tree
    while curr_tree != lwca.parent():
        if not (len(nodes_left_branch) > 0
                and nodes_left_branch[-1] == curr_tree.node):
            nodes_left_branch.append(curr_tree.node)
        curr_tree = curr_tree.parent()
    curr_tree = right_tree
    while curr_tree != lwca:
        if not (len(nodes_right_branch) > 0
                and nodes_right_branch[-1] == curr_tree.node):
            nodes_right_branch.append(curr_tree.node)
        curr_tree = curr_tree.parent()
    nodes_right_branch.reverse()
    path = nodes_left_branch + nodes_right_branch
    children = [ParentedTree(node, ["*"]) for node in path]
    label_path = ParentedTree("LP", children)
    return label_path
Ejemplo n.º 3
0
    def from_root_to_m2(pos_token_tree):
        """Get the path from root to the entity mention3 and the preceding branches"""
        if s_tree[i_tuple[:-2]] == s_tree[
                j_tuple[:-2]]:  #tokens have the same parent
            return  #from_root_to_m1 has taken care of this
        else:
            #initiate right branch with token and pos, and its
            #left siblings, if any eg. NNP W. NNP Bush
            children_to_add = []
            for child in pos_token_tree.parent():
                if child == pos_token_tree:
                    children_to_add.append(child.copy(deep=True))
                    break
                children_to_add.append(child.copy(deep=True))
            right_branch = ParentedTree(pos_token_tree.parent().node,
                                        children_to_add)
            subtree = pos_token_tree.parent()

            ##keep going upwards adding nodes and left branches, but ignoring right branches
            while isinstance(subtree.parent(),ParentedTree) and \
                            subtree.parent()!=lowest_common_ancestor:
                children = []
                for child in subtree.parent():
                    if child == subtree:
                        break
                    else:
                        children.append(child.copy(deep=True))
                children.append(right_branch)
                right_branch = ParentedTree(subtree.parent().node,
                                            children)  #start from the bottom
                subtree = subtree.parent()
            return right_branch
Ejemplo n.º 4
0
def boh_np_tree(fr):
    """
    return a bag of heads tree with the heads of the NPs in_between
    mention1 and mention2

    """
    mention1 = _get_mentions_in_order_(fr)[0]
    mention2 = _get_mentions_in_order_(fr)[1]
    head_of_m1 = _head_of_m1_(fr)
    head_of_m2 = _head_of_m2_(fr)
    s_tree = SYNTAX_PARSE_SENTENCES[fr.article][mention1[4]]
    i = mention1[1] + 1
    heads = []
    while i < mention2[1]:
        word_tuple = s_tree.leaf_treeposition(i)
        pos_index = word_tuple[-2]
        parent = s_tree[word_tuple[0:-2]]
        head = None
        sum = 0
        for j, child in enumerate(parent[pos_index:]):
            if child.node in ['NN', 'NNS', 'NNP', 'NNPS', 'WHNP', "PRP"] and \
                            child[0] != head_of_m1 and child[0]!= head_of_m2:
                head = child[0]
                sum = j
        if isinstance(head, unicode):
            heads.append(head)
        i += sum + 1

    children = [ParentedTree(w, ["*"]) for w in heads]
    boh_tree = ParentedTree("BOH-NPs", children)
    return boh_tree
Ejemplo n.º 5
0
    def tree_transformation(self, parent, ls_pos_children=[], type=''):
        """
        transform a non binary tree to binary tree.
        :param parent: parent position of nltk_tree
        :param ls_pos_children: for multiple situation, the positions of the potential children of the parent
        :param type: single, tree with on child. multiple, tree with more than 2 children
        :return: return a binary nltk.ParentTree object
        """
        if type == 'single':
            # print('single tree component')
            self.nltk_tree[parent].append(
                ParentedTree(self.nltk_tree[parent].label(), ['@@']))
            return True
        elif type == 'multiple':
            # print('multiple tree component')
            tup_position_parent = parent['parent']
            tree_parent = self.nltk_tree[tup_position_parent]
            cnt_children = parent['max']
            cnt_sub_tree_pair = int(
                (cnt_children + 1) /
                2)  # calculate the number of children pairs
            remainder = (cnt_children + 1) % 2  # 0 -> even, 1 -> odd
            for i in range(0, cnt_sub_tree_pair):
                pos_tree1 = ls_pos_children[2 * i]  # pick out
                pos_tree2 = ls_pos_children[2 * i + 1]  # subtree pair
                tree1 = self.nltk_tree[pos_tree1]
                tree2 = self.nltk_tree[pos_tree2]
                label_parent = tree_parent.label()  # get parent label
                str_tree1 = str(tree1)
                str_tree2 = str(
                    tree2)  # build a new subtree with these two chindren
                tree_parent.append(
                    ParentedTree(label_parent, [
                        ParentedTree.fromstring(str_tree1),
                        ParentedTree.fromstring(str_tree2)
                    ]))  # append the new subtree to the end of the parent
            if remainder != 0:
                pos_remaining_child = ls_pos_children[len(ls_pos_children) - 1]
                tree_remaining = self.nltk_tree[pos_remaining_child]
                # print('remaining tree', tree_remaining)
                tree_parent.append(tree_remaining.copy(deep=True))

            for child in ls_pos_children:
                # print(nltk_tree[child], 'will be replaced')
                self.nltk_tree[child] = ParentedTree(
                    'LEXICA_REPLACED', ['lexica_replaced_leaf']
                )  # replace the merged subtree with specific symbol in order to delete them

            # draw_trees(nltk_tree)
            # remove the replaced leaf
            leaves = self.nltk_tree.leaves()
            # print('new leaves', leaves)
            for leaf in leaves:
                if leaf == 'lexica_replaced_leaf':
                    leaf_index = leaves.index(leaf)
                    tree_position = self.nltk_tree.leaf_treeposition(
                        leaf_index)
                    parent = tree_position[:-1]
                    del self.nltk_tree[parent]  # delete the merged subtrees
            return True
Ejemplo n.º 6
0
    def cut_what(self, tree, min_length=0, length=0):
        """
        This function is used to shorten verbphrases, it recursively traverses the parse tree depth first.

        :param tree: Tree to cut
        :type tree: ParentedTree
        :param min_length: Desired minimal length of tokens
        :type min_length: Integer
        :param length: Number of tokens already included by the upper level function
        :type length: Integer

        :return: A subtree
        """
        if type(tree[0]) is not ParentedTree:
            # we found a leaf
            return ParentedTree(tree.label(), [tree[0]])
        else:
            children = []
            for sub in tree:
                child = self.cut_what(sub, min_length, length)
                length += len(child.leaves())
                children.append(child)
                if sub.label() == 'NP':
                    sibling = sub.right_sibling()
                    if length < min_length and sibling is not None and sibling.label(
                    ) == 'PP':
                        children.append(sibling.copy(deep=True))
                    break
            return ParentedTree(tree.label(), children)
Ejemplo n.º 7
0
def lp_head_tree(fr):
    """
    return a flatten tree with the nodes of the phrases in the path from m1
    to m2 (duplicates removed) augmented with the head word of the lowest
    common ancestor
    """
    s_tree = SYNTAX_PARSE_SENTENCES[fr.article][int(fr.i_sentence)]
    lwca = _get_lowest_common_ancestor_(fr, s_tree)
    mention1 = _get_mentions_in_order_(fr)[0]
    mention2 = _get_mentions_in_order_(fr)[1]
    left_tree = s_tree[s_tree.leaf_treeposition(int(mention1[1]))[0:-1]]
    right_tree = s_tree[s_tree.leaf_treeposition(int(mention2[2]) - 1)[0:-1]]
    nodes_left_branch = []
    nodes_right_branch = []
    curr_tree = left_tree
    while curr_tree != lwca:
        if not (len(nodes_left_branch) > 0
                and nodes_left_branch[-1].node == curr_tree.node):
            nodes_left_branch.append(ParentedTree(curr_tree.node, ["*"]))
        curr_tree = curr_tree.parent()
    if nodes_left_branch[-1].node == lwca.node: nodes_left_branch.pop()
    nodes_left_branch.append(
        ParentedTree(lwca.node,
                     [_find_head_of_tree_(lwca)]))  #add head of lwca
    curr_tree = right_tree
    while curr_tree != lwca:
        if not (len(nodes_right_branch) > 0
                and nodes_right_branch[-1].node == curr_tree.node):
            nodes_right_branch.append(ParentedTree(curr_tree.node, ["*"]))
        curr_tree = curr_tree.parent()
    nodes_right_branch.reverse()
    path = nodes_left_branch + nodes_right_branch
    label_path = ParentedTree("LP-head", path)
    return label_path
Ejemplo n.º 8
0
def bow_tree(fr):
    """ return words between m1 and m2 excluding the first and last words"""
    words = _get_words_in_between_(fr)
    if len(words) >= 1:
        words.pop()
    if len(words) >= 1:
        words.pop()
    children = [ParentedTree(w, ["*"]) for w, pos in words]
    bow_tree = ParentedTree("BOW", children)
    return bow_tree
Ejemplo n.º 9
0
 def create_tree(tree):
     nodes = []
     for n in tree:
         subtrees = [
             subtree for subtree in n.subtrees(filter=lambda k: k != n)
         ]
         if len(subtrees) > 0:
             subnodes = create_tree(n)
             nodes.append(ParentedTree(n.label(), subnodes))
         else:
             parent_label = n.parent().label() if n.parent() is not None \
                                                  and n.parent().label() not in ['S', 'ROOT'] else None
             nodes.append(
                 ParentedTree(parent_label, [(self.__decode_(
                     n[0]), self.__decode_(n.label()))]))
     return nodes
Ejemplo n.º 10
0
def path_enclosed_tree_augmented(fr):
    if fr.i_sentence != fr.j_sentence:
        return ParentedTree("None", ["*"])  #just in case
    else:
        s_tree = ParentedTree.convert(AUGMENTED_TREES[fr.article][int(
            fr.i_sentence)])
        return _generate_enclosed_tree(fr, s_tree)
Ejemplo n.º 11
0
def conll2tree(arr):
    #format: idx, word, _, pos, pos, _, head, _, _, _

    # dep:head
    dep2headHash = {}
    for wArr in arr:
        dep2headHash[int(wArr[0])] = int(wArr[6])
    #print dep2headHash

    # head:depsList[]
    head2depsHash = {}
    for dep in dep2headHash:
        head = dep2headHash[dep]

        if head in head2depsHash:
            head2depsHash[head].append(dep)
        else:
            head2depsHash[head] = [dep]
    #print head2depsHash


#    if len(head2depsHash[0]) > 1:
#        print "Error. Multiple roots."

# head:tree
    treeheadHash = {}
    for head in head2depsHash:
        tree = ParentedTree(head, head2depsHash[head])
        treeheadHash[head] = tree

    root = updateTree(treeheadHash, 0)
    return root
Ejemplo n.º 12
0
 def merge_both_branches(left_branch, right_branch):
     """Merge left and right branch with the lowest_common_ancestor_node"""
     if right_branch == None:
         result_tree = left_branch
     else:
         children = [left_branch]
         m1_visited = False
         m2_visited = False
         for child in lowest_common_ancestor:
             i_in_leaves = len(
                 set(first_token.split("_")).intersection(
                     set(child.leaves()))) > 0
             j_in_leaves = len(
                 set(later_token.split("_")).intersection(
                     set(child.leaves()))) > 0
             if m2_visited and m1_visited:
                 break
             if j_in_leaves and m1_visited:
                 m2_visited = True
             elif m1_visited and not m2_visited:
                 children.append(child.copy(deep=True))
             if i_in_leaves:
                 m1_visited = True
         children.append(right_branch)
         result_tree = ParentedTree(lowest_common_ancestor.node, children)
     return result_tree
Ejemplo n.º 13
0
def updateTree(treeheadHash, idx):
    #print idx, "pre", treeheadHash[idx]
    children = []
    for child in treeheadHash[idx]:
        if child in treeheadHash:
            children.append(updateTree(treeheadHash, child))
        else:
            children.append(child)
    treeheadHash[idx] = ParentedTree(idx, children)
    #print idx, "aft", treeheadHash[idx]
    return treeheadHash[idx]
Ejemplo n.º 14
0
    def __get_nltk_parse_tree__(self, tree):
        def create_tree(tree):
            nodes = []
            for n in tree:
                subtrees = [
                    subtree for subtree in n.subtrees(filter=lambda k: k != n)
                ]
                if len(subtrees) > 0:
                    subnodes = create_tree(n)
                    nodes.append(ParentedTree(n.label(), subnodes))
                else:
                    parent_label = n.parent().label() if n.parent() is not None \
                                                         and n.parent().label() not in ['S', 'ROOT'] else None
                    nodes.append(
                        ParentedTree(parent_label, [(self.__decode_(
                            n[0]), self.__decode_(n.label()))]))
            return nodes

        def move_up(tree):
            for i in range(len(tree[:])):
                n = tree[i]
                if isinstance(n, Tree):
                    subtrees = [(ind, subtree) for ind, subtree in enumerate(
                        n.subtrees(
                            filter=lambda k: k != n or k.label() is None))]
                    if i == 0:
                        subtrees = subtrees[::-1]
                    for ind, subtree in subtrees:
                        if subtree.label() == n.label(
                        ) or subtree.label() is None:
                            parent = subtree.parent()
                            if parent is not None:
                                parent.remove(subtree)
                            subsub = [
                                s for s in subtree.subtrees(
                                    filter=lambda k: k != subtree)
                            ]
                            if len(subsub) == 0:
                                for k in range(
                                        len(subtree.leaves()) - 1, -1, -1):
                                    if parent is not None:
                                        parent.insert(i, subtree.leaves()[k])
                            else:
                                move_up(n)
            return tree

        tree = ParentedTree.convert(tree)
        new_tree = ParentedTree('S', create_tree(tree))
        print(new_tree)
        return move_up(new_tree)
Ejemplo n.º 15
0
def generate_subtrees(simplified_sentences, full_tree):
    parented_tree = ParentedTree(0, []).convert(full_tree)
    subtrees = []
    for n, sent in enumerate(simplified_sentences):
        new_tree = parented_tree.copy(deep=True)
        new_tree.set_label(f"{new_tree.label()}--extra{n}")
        # delete leafs
        to_del = list(
            reversed([num for num, word in enumerate(sent) if not word]))
        if not to_del:
            continue
        for num in to_del:
            postn = new_tree.leaf_treeposition(num)
            # go up deleting nodes until there are left siblings (we are starting
            while not (new_tree[postn[:-1]].left_sibling()
                       or new_tree[postn[:-1]].right_sibling()):
                postn = postn[:-1]

            del new_tree[postn[:-1]]

        subtrees.append(BoTree(0, []).convert(new_tree))

    return subtrees
Ejemplo n.º 16
0
 def wrap_leaves(node):
     for i,child in enumerate(node) :
         if isinstance(child, Tree):
             wrap_leaves(child)
         else :
             node[i] = ParentedTree(child,[])
Ejemplo n.º 17
0
    def from_root_to_m1(pos_token_tree):
        """#Get the path from root to the entity mention1 and everything right to it
        up to the lowest_common_ancestor node"""

        #initiate left_branch with token and pos, and its
        #right siblings.
        children_to_add = []
        found = False
        same_subtree = False

        #Building the "proto" left_branch tree: add the (POS Mention1) tree and all its right siblings.
        # Don't add anything until Mention1 is found. Not going up yet
        for child in pos_token_tree.parent():
            j_in_leaves = len(
                set(later_token.split("_")).intersection(set(
                    child.leaves()))) > 0
            if child == pos_token_tree:  #eg. (JJ Republican)
                children_to_add.append(child.copy(deep=True))
                found = True
            elif child == later_tree:  #M2 is in that same subtree!
                children_to_add.append(child.copy(deep=True))
                same_subtree = True  #Eg. Mention1 = Republican and M2= candidate.
                break  #don't want to keep adding stuff after M2!
            elif j_in_leaves and not same_subtree:
                break  #M2 is deep embedded in tree sibling to (POS Mention1). #from_root_to_M2 will take care of it.
            elif found:
                children_to_add.append(child.copy(deep=True))

        #proto left-branch eg.
        left_branch = ParentedTree(pos_token_tree.parent().node,
                                   children_to_add)

        #check whether M1 and M2 in same pre-leaf phrase (eg. NP Republican candidate)
        if same_subtree:
            return left_branch  #no need to keep going upwards, this is the path-enclosed tree.
        else:
            if pos_token_tree.parent() == lowest_common_ancestor:
                # (POS Mention1) will be the left branch of the path-enclosed tree.
                left_branch = pos_token_tree.copy(deep=True)
                return left_branch
            else:
                #we have to go further up
                subtree = pos_token_tree.parent()

            ##Keep going up, looping over the children of each parent, adding branches that are
            ##right to m1 until the lowest common ancestor is hit.
            found = False
            seen = False
            while isinstance(subtree.parent(),ParentedTree) and \
                            subtree.parent()!=lowest_common_ancestor:
                children = []
                children.append(left_branch)
                for child in subtree.parent():
                    if child == subtree:
                        seen = True
                        found = True
                    elif found and seen:  #= if m1 was found and the current subtree is on the right side of m1
                        children.append(child.copy(deep=True))
                left_branch = ParentedTree(subtree.parent().node, children)
                subtree = subtree.parent()
                seen = False
                #left_branch.draw()
            return left_branch
Ejemplo n.º 18
0
    del newtree[mymoves[1][0]]
    return newtree
    '''


for i in xrange(0, len(alignfi)):
    if alignfi[i].strip() == '': break
    align_c2s = {}
    align_s2c = {}
    for x in alignfi[i].split():
        c_num = int(x.split('-')[1])
        s_num = int(x.split('-')[0])
        align_c2s[c_num] = align_c2s.setdefault(c_num, []) + [s_num]
        align_s2c[s_num] = align_s2c.setdefault(s_num, []) + [c_num]

    comptree = ParentedTree(compfi[i])
    simptree = ParentedTree(simpfi[i])
    if DEBUG:
        print '######################'
        print 'comptree:', comptree
        print 'simptree:', simptree
        print 'c2s align:', align_c2s
    complength = len(comptree.leaves())
    simplength = len(simptree.leaves())
    if complength > simplength:
        maxlength = complength
        longdict = align_c2s
        chunk_list = []
        mychunk = []
        for j in xrange(0, maxlength):
            if j in longdict:
Ejemplo n.º 19
0
            newlist[j] = (newlist[j][0], new_s1, new_s2)
    return newlist


import sys

simpfi = open(sys.argv[2]).readlines()
compfi = open(sys.argv[1]).readlines()

#print Tree(simpfi[0]).leaves()
#for chunk in common_chunks(Tree(simpfi[0]).leaves(), Tree(compfi[0]).leaves()):
#    print chunk
#    print [Tree(simpfi[0]).leaves()[tup[0]] for tup in chunk]
#print longest_common_substring(Tree(simpfi[0]).leaves(), Tree(compfi[0]).leaves())
for i in xrange(0, len(simpfi)):
    simptree = ParentedTree(simpfi[i].lower())
    comptree = ParentedTree(compfi[i].lower())

    chunk_list = get_substrings(comptree.leaves(), simptree.leaves(),
                                ([''], (0, 0), (0, 0)), [])
    #print chunk_list
    #print comptree
    alignlist = []
    for chunk in chunk_list:
        #print chunk
        comprange = chunk[1]
        simprange = chunk[2]
        simpidx = simprange[0]
        for j in xrange(comprange[0], comprange[1]):
            alignlist.append(str(simpidx) + '-' + str(j))
            simpidx += 1
def split(sent, cc_tuple):
    parser = stanford.StanfordParser()
    pos_tagged = pos_tag(tokenize(sent))
    tree = next(parser.tagged_parse(pos_tagged))
    tree1 = ParentedTree.convert(tree)
    #tree.draw()
    count = 0
    m = 0
    for t in tree1.subtrees():
        if t.label() == 'PP':
            count = count + 1

    index = []
    index1 = 0
    if count > 0 and (
        ('to') not in tokenized_sent and
        ('washed') not in tokenized_sent) and (tokenized_sent.count(",") < 2):
        for i in range(len(pos_tagged) - 3):
            if (pos_tagged[i][1] == 'VBD' or pos_tagged[i][1] == 'VBZ'
                ) and pos_tagged[i + 1][1] != 'VBG' and pos_tagged[
                    i + 3][1] != 'CC' and pos_tagged[
                        i + 1][1] != 'NNP' and pos_tagged[i - 1][1] != 'CC':
                pos_tagged.insert(i + 1, (',', ','))

        for j in range(len(pos_tagged)):
            if pos_tagged[j][1] == 'CC':
                index.append(j)

    for t in tree1.subtrees():
        if t.label() == 'SBAR':
            m = m + 1
    if len(index) > 0 and count > 0 and m == 0:
        c = 0
        for i in range(len(index)):
            pos_tagged.insert(index[i] + c, (',', ','))
            c = c + 1
    if m > 0:
        for j in range(len(pos_tagged)):
            if pos_tagged[j][1] == 'CC':
                index1 = j

    if (index1 > 0 and m > 0) and count == 0:
        pos_tagged.insert(index1, (' ,', ','))  # ', 'is used
        pos_tagged.insert(index1 + 2, (', ', ','))  #' ,' is used


#print(pos_tagged)
    tree = next(parser.tagged_parse(pos_tagged))
    p_tree = ParentedTree.convert(tree)

    leaf_values = p_tree.leaves()
    parts = []
    ht_3_last_obj = []

    if cc_tuple in pos_tagged:
        leaf_index = leaf_values.index(cc_tuple[0])
        tree_location = p_tree.leaf_treeposition(leaf_index)
        parent = p_tree[tree_location[:-2]]
        #print(parent.height())

        if parent.height() == 3:
            # find the noun being referred to
            for subtree in reversed(list(parent.subtrees())):
                if subtree.parent() == parent:
                    if subtree.label() == 'NN' or subtree.label() == 'NNS':
                        ht_3_last_obj = subtree.leaves() + ht_3_last_obj
                        del p_tree[subtree.treeposition()]
            #print("ht 3 last obj -> ", ht_3_last_obj)
            part = []
            for subtree in reversed(list(parent.subtrees())):
                if subtree.parent() == parent:
                    # print(subtree)
                    if subtree.label() != ',' and subtree.label() != 'CC':
                        part = subtree.leaves() + part
                    else:
                        parts.append(part + ht_3_last_obj)
                        part = []
                    del p_tree[subtree.treeposition()]
            parts.append(part + ht_3_last_obj)
            #print('parent', parent)
            #print('treeloc', tree_location)
            parent.append(ParentedTree('INSRT', ['*']))

        else:
            for subtree in reversed(list(parent.subtrees())):
                if subtree.parent() == parent:
                    # print(subtree)
                    if subtree.label() != ',' and subtree.label() != 'CC':
                        parts.append(subtree.leaves() + ht_3_last_obj)
                    del p_tree[subtree.treeposition()]
            #print('parent', parent)
            #print('treeloc', tree_location)
            parent.append(ParentedTree('INSRT', ['*']))

    #p_tree.draw()
    #print(parts)

    split = []
    rem = p_tree.leaves()
    start_idx = rem.index('*')

    for part in reversed(parts):
        offset = start_idx
        r_clone = rem.copy()
        del r_clone[offset]
        for i, word in enumerate(part):
            r_clone.insert(offset + i, word)
        split.append(r_clone)

    #print("split", split)

    split = [" ".join(sent) for sent in split]

    return split
Ejemplo n.º 21
0
def path_enclosed_tree(fr):
    if fr.i_sentence != fr.j_sentence:
        return ParentedTree("None", ["*"])  #just in case
    else:
        s_tree = SYNTAX_PARSE_SENTENCES[fr.article][int(fr.i_sentence)]
        return _generate_enclosed_tree(fr, s_tree)