def simplify_tree(ptree: nltk.ParentedTree, collapse_root=False): ptree._label = 'S' tree = nltk.Tree.convert(ptree) if not collapse_root and isinstance(tree, nltk.Tree) and len(tree) == 1: nodes = [tree[0]] else: nodes = [tree] # depth-first traversal of tree while nodes: node = nodes.pop() if isinstance(node, nltk.Tree): if (len(node) == 1 and isinstance(node[0], nltk.Tree) and isinstance(node[0, 0], nltk.Tree)): if node.label() != node[0].label(): node.set_label(node.label() + '+' + node[0].label()) else: node.set_label(node.label()) node[0:] = [child for child in node[0]] # since we assigned the child's children to the current node, # evaluate the current node again nodes.append(node) else: for child in node: nodes.append(child) return nltk.ParentedTree.convert(tree)
def extract_arguments(self, ptree: nltk.ParentedTree, relation: Relation, arg_pos: str): indices = [token.local_idx for token in relation.conn.tokens] ptree._label = 'S' x = get_features(ptree, indices) if arg_pos == 'SS': probs = np.array( [[p[c] for c in self.ss_model.classes_] for p in self.ss_model.predict_marginals_single(x)]) probs_max = probs.max(1) labels = np.array(self.ss_model.classes_)[probs.argmax(axis=1)] arg1 = np.where(labels == 'Arg1')[0] arg2 = np.where(labels == 'Arg2')[0] arg1_prob = probs_max[arg1].mean() if len(arg1) else 0.0 arg2_prob = probs_max[arg2].mean() if len(arg2) else 0.0 arg1, arg2 = arg1.tolist(), arg2.tolist() if not arg1: logger.warning("Empty Arg1") if not arg2: logger.warning("Empty Arg2") elif arg_pos == 'PS': probs = np.array( [[p[c] for c in self.ps_model.classes_] for p in self.ps_model.predict_marginals_single(x)]) probs_max = probs.max(1) labels = np.array(self.ps_model.classes_)[probs.argmax(axis=1)] arg1 = [] arg1_prob = 1.0 arg2 = np.where(labels == 'Arg2')[0] arg2_prob = probs_max[arg2].mean() if len(arg2) else 0.0 arg2 = arg2.tolist() if not arg2: logger.warning("Empty Arg2") else: raise NotImplementedError('Unknown argument position') return arg1, arg2, arg1_prob, arg2_prob