Ejemplo n.º 1
0
 def __iterate(tree: ParentedTree, index: int = 1):
     # clean the tags which contains '-'
     if '-' in tree.label():
         tree.set_label(tree.label().split('-')[0])
     if _is_leaf(tree):
         yield tree.label(), tree[0]  # (tag, token)
         tree[0] = index  # replace the token with its index number
         index += 1
     else:
         for subtree in tree:
             for _item in __iterate(subtree, index):
                 yield _item
                 index += 1
Ejemplo n.º 2
0
        def __iterate(tree: ParentedTree):
            label = tree.label()

            if _is_leaf(tree):
                tree.set_label('{}|{}'.format(tree[0], label))
            else:
                for subtree in tree:
                    __iterate(subtree)

                # just select the last one as the head if the tag is not covered by the head rules
                if label not in self.head_rules:
                    index = tree[-1].label().split('|')[0]
                    tree.set_label('{}|{}'.format(index, label))
                    return

                for rule in self.head_rules[label]:
                    sub_labels = [t.label().split('|') for t in tree]
                    if rule['direction'] == 'r':
                        sub_labels = sub_labels[::-1]  # reverse

                    # this is the last rule, just select the first or last one as the head
                    if not rule['tags']:
                        index = sub_labels[0][0]
                        tree.set_label('{}|{}'.format(index, label))
                        return

                    for tag in rule['tags']:
                        if tag in {_tag for _i, _tag in sub_labels}:
                            index = next(_i for _i, _tag in sub_labels
                                         if tag == _tag)
                            tree.set_label('{}|{}'.format(index, label))
                            return
Ejemplo n.º 3
0
    def _find_special_nodes(self, node: ParentedTree):

        #IMP assumption: dealing with a tree
        #as such dont have to keep track of visited nodes
        #since each node has only one parent

        if not type(node) is str:

            #print(node.treeposition())
            if (node.label() not in self.look_for) and (node.label()
                                                        not in self.avoid):

                res = {self._find_special_nodes(n) for n in node}
                if 0 in res:

                    return 0

                else:

                    return 2

            elif node.label() in self.avoid:

                {self._find_special_nodes(n) for n in node}
                return 0

            elif node.label() in self.look_for:

                res = {self._find_special_nodes(n) for n in node}
                if 0 in res:

                    return 0

                else:

                    self.special_labels.add(node.treeposition())
                    return 1
        else:

            return 2
Ejemplo n.º 4
0
    def __iterate(tree: ParentedTree):
        # the index of the current node
        parent_index = tree.label().split('|')[0]

        # if this is the root node, yield a index -> 0 relation
        if not tree.parent():
            yield parent_index, 0

        if not _is_leaf(tree):
            for subtree in tree:
                index = subtree.label().split('|')[0]
                if index != parent_index:
                    yield index, parent_index
                for _item in __iterate(subtree):
                    yield _item
    def traverse_and_store(self, tree: ParentedTree,
                           parse_tree_stored: List[Dict]):

        label = tree.label()
        words = [x.split('_')[0] for x in tree.leaves()]
        indices = [int(x.split('_')[-1]) for x in tree.leaves()]
        ngram_info = len(words)
        words = " ".join(words)

        if tree.height() > self.TREE_HEIGHT and ngram_info < self.NGRAM_LIMIT:
            parse_tree_stored.append({
                'phrase_label': label,
                'phrase': words,
                'ngram': ngram_info,
                'indices': indices
            })
        for subtree in tree:
            if type(subtree) == ParentedTree:
                self.traverse_and_store(tree=subtree,
                                        parse_tree_stored=parse_tree_stored)

        return parse_tree_stored
Ejemplo n.º 6
0
def IsAnArray(tree:ParentedTree):
    return tree.label() == 'FiguresArray'
Ejemplo n.º 7
0
def IsConcatenated(tree:ParentedTree):
    return tree.label() == 'Concatenated'