def convert_tree(t): label = None if t.label() == "text": return slice_text(t[0]) children = [] for elem in t: if not isinstance(elem, Tree) or (elem.label() != "span" and elem.label() != "rel2par" and elem.label() != "leaf"): children.append(elem) if isinstance(elem, Tree) and (label is None or label[0] == "span"): for sub in (s for s in elem if isinstance(s, Tree)): if sub.label() == "rel2par": label = sub break; if len(children) == 1: return convert_tree(children[0]) label_rel = rel2class[label[0].lower()] + "[" + children[0].label()[0:1] + "][" + children[1].label()[0:1] + "]" if len(children) > 2: for item in children[1:]: item._parent = None return ParseTree(label_rel, [convert_tree(children[0]), ParseTree(label_rel, [convert_tree(children[1]), convert_tree(ParseTree("temp", children[2:]))])]) else: return ParseTree(label_rel, [convert_tree(children[0]), convert_tree(children[1])])
def load_tree_from_file(filename, tokenize = False): def preprocess_leaf(leaf): leaf = re.sub('_!(.+?)!_', '\\1', leaf) if tokenize: return leaf.split(' ') else: return leaf if filename.endswith('.dis'): pt = rst_lib.load_tree(filename) elif filename.endswith('.tree'): pt = ParseTree.parse(open(filename).read(), leaf_pattern = '_!.+?!_', parse_leaf = preprocess_leaf) return pt
def make_new_subtree(label, subtree1, subtree2, deepcopy = False): if deepcopy: stump1_clone = copy_subtree(subtree1, True) stump2_clone = copy_subtree(subtree2, True) else: stump1_clone = subtree1 stump2_clone = subtree2 if isinstance(stump1_clone, ParseTree): stump1_clone._parent = None if isinstance(stump2_clone, ParseTree): stump2_clone._parent = None return ParseTree(label, [stump1_clone, stump2_clone])
def build_tree(self, doc): if len(doc.edus) == 1: return [ParseTree("n/a", [doc.edus[0]])] for i in range(len(doc.sentences)): sentence = doc.sentences[i] (start_edu, end_edu) = doc.cuts[i] if self.verbose: print 'sentence %d' % i print 'start_edu', start_edu, 'end_edu', end_edu self.intra_parser.parse_each_sentence(sentence) self.multi_parser.parse_document(doc) return doc.discourse_tree
def build_tree(self, doc): # print self.use_contextual_features # Check if only one EDU if len(doc.edus) == 1: return [ParseTree("n/a", [doc.edus[0]])] for i in range(len(doc.sentences)): sentence = doc.sentences[i] (start_edu, end_edu) = doc.cuts[i] if self.verbose: print('sentence %d' % i) print('start_edu', start_edu, 'end_edu', end_edu) self.intra_parser.parse_each_sentence(sentence) self.multi_parser.parse_document(doc) return doc.discourse_tree
def simplify_tree(tree, start): if not tree: return None # print 'before', tree if not isinstance(tree, ParseTree): t = ParseTree('leaf', [str(start + 1)]) else: t = tree.__deepcopy__(None) # print t L = simplify_tree(tree[0], start) R = simplify_tree(tree[1], start + len(L.leaves())) t[0] = L t[1] = R # print 'end', t # print return t