Example #1
0
def simplify_tree(ptree: nltk.ParentedTree, collapse_root=False):
    ptree._label = 'S'
    tree = nltk.Tree.convert(ptree)

    if not collapse_root and isinstance(tree, nltk.Tree) and len(tree) == 1:
        nodes = [tree[0]]
    else:
        nodes = [tree]

    # depth-first traversal of tree
    while nodes:
        node = nodes.pop()
        if isinstance(node, nltk.Tree):
            if (len(node) == 1 and isinstance(node[0], nltk.Tree)
                    and isinstance(node[0, 0], nltk.Tree)):
                if node.label() != node[0].label():
                    node.set_label(node.label() + '+' + node[0].label())
                else:
                    node.set_label(node.label())
                node[0:] = [child for child in node[0]]
                # since we assigned the child's children to the current node,
                # evaluate the current node again
                nodes.append(node)
            else:
                for child in node:
                    nodes.append(child)

    return nltk.ParentedTree.convert(tree)
Example #2
0
    def extract_arguments(self, ptree: nltk.ParentedTree, relation: Relation,
                          arg_pos: str):
        indices = [token.local_idx for token in relation.conn.tokens]
        ptree._label = 'S'
        x = get_features(ptree, indices)
        if arg_pos == 'SS':
            probs = np.array(
                [[p[c] for c in self.ss_model.classes_]
                 for p in self.ss_model.predict_marginals_single(x)])
            probs_max = probs.max(1)
            labels = np.array(self.ss_model.classes_)[probs.argmax(axis=1)]
            arg1 = np.where(labels == 'Arg1')[0]
            arg2 = np.where(labels == 'Arg2')[0]
            arg1_prob = probs_max[arg1].mean() if len(arg1) else 0.0
            arg2_prob = probs_max[arg2].mean() if len(arg2) else 0.0
            arg1, arg2 = arg1.tolist(), arg2.tolist()
            if not arg1:
                logger.warning("Empty Arg1")
            if not arg2:
                logger.warning("Empty Arg2")
        elif arg_pos == 'PS':
            probs = np.array(
                [[p[c] for c in self.ps_model.classes_]
                 for p in self.ps_model.predict_marginals_single(x)])
            probs_max = probs.max(1)
            labels = np.array(self.ps_model.classes_)[probs.argmax(axis=1)]
            arg1 = []
            arg1_prob = 1.0
            arg2 = np.where(labels == 'Arg2')[0]
            arg2_prob = probs_max[arg2].mean() if len(arg2) else 0.0
            arg2 = arg2.tolist()
            if not arg2:
                logger.warning("Empty Arg2")
        else:
            raise NotImplementedError('Unknown argument position')

        return arg1, arg2, arg1_prob, arg2_prob