Ejemplo n.º 1
0
 def __iterate(tree: ParentedTree, index: int = 1):
     # clean the tags which contains '-'
     if '-' in tree.label():
         tree.set_label(tree.label().split('-')[0])
     if _is_leaf(tree):
         yield tree.label(), tree[0]  # (tag, token)
         tree[0] = index  # replace the token with its index number
         index += 1
     else:
         for subtree in tree:
             for _item in __iterate(subtree, index):
                 yield _item
                 index += 1
Ejemplo n.º 2
0
        def __iterate(tree: ParentedTree):
            label = tree.label()

            if _is_leaf(tree):
                tree.set_label('{}|{}'.format(tree[0], label))
            else:
                for subtree in tree:
                    __iterate(subtree)

                # just select the last one as the head if the tag is not covered by the head rules
                if label not in self.head_rules:
                    index = tree[-1].label().split('|')[0]
                    tree.set_label('{}|{}'.format(index, label))
                    return

                for rule in self.head_rules[label]:
                    sub_labels = [t.label().split('|') for t in tree]
                    if rule['direction'] == 'r':
                        sub_labels = sub_labels[::-1]  # reverse

                    # this is the last rule, just select the first or last one as the head
                    if not rule['tags']:
                        index = sub_labels[0][0]
                        tree.set_label('{}|{}'.format(index, label))
                        return

                    for tag in rule['tags']:
                        if tag in {_tag for _i, _tag in sub_labels}:
                            index = next(_i for _i, _tag in sub_labels
                                         if tag == _tag)
                            tree.set_label('{}|{}'.format(index, label))
                            return