def appositive(pair,parsed_sentences):
    
    if pair.first.sentenceID == pair.second.sentenceID:
        index1 = eval(pair.first.offsets[0])
        index2 = eval(pair.second.offsets[0])
        #print index1,index2
        #print pair.first.str,pair.second.str
        tree = parsed_sentences[eval(pair.first.sentenceID)]

        #print tree1,tree2
        path1 = list(tree.leaf_treeposition(index1))
        path2 = list(tree.leaf_treeposition(index2))
        if index1 < index2:
            dominate_path = tree.treeposition_spanning_leaves(index1,index2+1)
        else:
            dominate_path = tree.treeposition_spanning_leaves(index2,index1+1)
        dept = len(dominate_path)
        subtree = copy.deepcopy(tree)
        #print index1,index2
        #print dominate_path
        for i in list(dominate_path):
            temp = subtree[i]
            subtree = temp
        #print subtree
        if subtree.node == 'NP':
            if subtree[path1[dept]].node == 'NP' and subtree[path2[dept]].node == 'NP':
                if path1[dept] == (path2[dept] - 2) and subtree[path1[dept]+1].node == ',':
                    return True
                if path1[dept] == (path2[dept] + 2) and subtree[path1[dept]-1].node == ',':
                    return True
    
    return False
def tree_distance(pair,parsed_sentences):
    
    #within the same tree
    if pair.first.sentenceID == pair.second.sentenceID:
        index1 = eval(pair.first.offsets[0])
        index2 = eval(pair.second.offsets[0])
        #print index1,index2
        #print pair.first.str,pair.second.str
        tree = parsed_sentences[eval(pair.first.sentenceID)]
        
        #print tree1,tree2
        path1 = list(tree.leaf_treeposition(index1))
        path2 = list(tree.leaf_treeposition(index2))

        n = 0
        for i,j in zip(path1,path2):
            if i == j:
                n+=1
            else:
                break

        tree_distance = len(path1)+len(path2)-2*n
        return tree_distance
    else:
        return "unknown"
Exemple #3
0
def padding_leaves(tree):
    leaves_location = [
        tree.leaf_treeposition(i) for i in range(len(tree.leaves()))
    ]
    for i in range(len(leaves_location)):
        tree[leaves_location[i]] = "{0:03}".format(i) + "||||" + tree[
            leaves_location[i]]
    for i in range(len(tree.leaves())):
        if len(tree[tree.leaf_treeposition(i)[:-1]]) > 1:
            tree[tree.leaf_treeposition(i)] = Tree(
                tree[tree.leaf_treeposition(i)[:-1]].label(),
                [tree.leaves()[i]])
Exemple #4
0
def padding_leaves_wnum(leaves, tree):
    # leaves_location = [tree.leaf_treeposition(i) for i in range(len(tree.leaves()))]
    # for i in range(len(leaves_location)):
    #     # tree[leaves_location[i]] = "{0:03}".format(i) + "||||" + tree[leaves_location[i]]
    #     tree[leaves_location[i]] = f'{i}'
    # for i in range(len(tree.leaves())):
    #     if len(tree[tree.leaf_treeposition(i)[:-1]]) > 1:
    #         tree[tree.leaf_treeposition(i)] = Tree(tree[tree.leaf_treeposition(i)[:-1]].label(), [tree.leaves()[i]])
    for i in range(len(leaves)):
        tree[tree.leaf_treeposition(i)] = f'{i}'
def depth(entity,parsed_sentences):
    
    index = eval(entity.offsets[0])
    
    #print index1,index2
    #print pair.first.str,pair.second.str
    tree = parsed_sentences[eval(entity.sentenceID)]

    #print tree1,tree2
    path = list(tree.leaf_treeposition(index))
    return len(path)
def get_PTP(pair,parsed_sentences):

    m1_index = pair.first.offsets[0]
    m2_index = pair.second.offsets[0]
    senID = pair.first.sentenceID
    
    tree = parsed_sentences[senID]
    if m2_index >= len(tree.leaves()):
        m2_index -=1
    path1 = list(tree.leaf_treeposition(m1_index))
    path2 = list(tree.leaf_treeposition(m2_index))

    phrase_labels = []
    n = 0
    share_path = []
    for i,j in zip(path1,path2):
        if i == j:
            n+=1
            share_path.append(i)
        else:
            break
    sub_path1 = path1[n:]
    sub_path2 = path2[n:]
    def get_labels(stree,path):
        subtree = copy.deepcopy(stree)
        labels = [subtree.node]
        for i in path:
            if isinstance(subtree[i],nltk.tree.Tree):
                labels.append(subtree[i].node)
                temp = subtree[i]
                subtree = temp
        return tuple((subtree,labels))
    subtree = get_labels(tree,share_path)[0]
    path1_labels = get_labels(subtree,sub_path1)[1]
    path2_labels = get_labels(subtree,sub_path2)[1]
    path1_labels.reverse()
    path2_labels.reverse()
    if path1_labels[-1] == path2_labels[0]:
        return list(set(path1_labels[:]+path2_labels[1:]))
    else:
        ValueError("Path cannot connect:%s,%s" % (path1_labels,path2_labels ))
Exemple #7
0
def remap_chars(tree):
    for i in range(len(tree.leaves())):
        if tree.leaves()[i] in SPECIAL_CHAR_MBACK:
            tree[tree.leaf_treeposition(i)] = SPECIAL_CHAR_MBACK[tree.leaves()
                                                                 [i]]