def constree(self): # Some depgraphs have several roots (for instance, 512th of Turkish). # i = self.root['address'] roots = self.nodelist[0]['deps'] if len(roots) == 1: return treebank.Tree(self._constree(roots[0])) else: # TODO: check projectivity here also. trees = [self._constree(i) for i in roots] return treebank.Tree(tree.Tree('TOP', trees))
def parsed(self): f = open(self.basedir + '/negra-corpus.export') self.f = f # go to first sentece s = f.readline() while not s.startswith('#BOS'): s = f.readline() while s != '': l = s.split() (num, origin) = (int(l[1]), int(l[4])) sent = [] l = f.readline().split() while l[0][0] != '#': #if l[4] != '0': if not l[1].startswith('$'): sent += [l] l = f.readline().split() parse = [] while l[0] != '#EOS': parse += [l] l = f.readline().split() if len(sent) > 0 and len(sent) <= 10: self.sent = sent self.parse = parse t = build_tree(sent, parse) t2 = treebank.Tree(t, (num, origin)) t2.depset = tree_to_depset(t) yield t2 s = f.readline()
def string_to_bracketing(s): """Converts a string to a bracketing. >>> string_to_bracketing('(DT NNP NN) (VBD (DT (VBZ (DT JJ NN))))') """ s2 = s.replace('(', '(X ') s2 = '((X ' + s2 + '))' t = treebank.Tree(tree.bracket_parse(s2)) b = tree_to_bracketing(t) return b
def parse(self, s): t, w = self.dep_parse(s) return (treebank.Tree(t), w)