def open_syntax_file(file, mentions):
    with open(os.path.join(parsed_path, file + '.head.rel.tokenized.raw.parse')) as raw_syntax_file:
        lines = raw_syntax_file.readlines()
        mention_counter = 0
        distances = []
        prev_mention = mentions[mention_counter][0]

        for line in lines:
            if prev_mention in line:
                while (mentions[mention_counter][0] == prev_mention) and mention_counter < len(mentions):
                    if len(line.strip()) == 0:
                        continue
                    full_tree = ParentedTree.fromstring(line)
                    subtrees = ParentedTree.subtrees(full_tree)
                    arg1_subtrees = []
                    arg2_subtrees = []
                    found_m1 = False
                    found_m2 = False
                    for subtree in subtrees:
                        for node in subtree.leaves():

                            if node == mentions[mention_counter][0]:
                                arg1_subtrees.append(subtree)
                                found_m1 = True
                            elif node == mentions[mention_counter][1]:
                                arg2_subtrees.append(subtree)
                                found_m2 = True

                            if found_m2 and found_m1:
                                arg1_height, arg1_subtree = get_smallest_height(arg1_subtrees)
                                arg2_height, arg2_subtree = get_smallest_height(arg2_subtrees)

                                distances.append(get_tree_distance(arg1_subtree, arg2_subtree))

                                if mention_counter == len(mentions)-1:
                                    return distances

                                mention_counter += 1
                                break
                    mention_counter += 1
                    distances.append(-1)

                    if mention_counter == len(mentions) -1:
                        return distances
                    prev_mention = mentions[mention_counter][0]
    while len(mentions) != len(distances):
        distances.append(-1)

    return distances
Ejemplo n.º 2
0
def get_terminals(ptree: ParentedTree) -> list:
    terms = ptree.subtrees(filter=lambda x: len(list(x.subtrees())) == 1)
    terms = list(terms)
    assert len(ptree.leaves()) == len(terms)  # Pull out to unit test?

    return terms