Esempio n. 1
0
 def constree(self):
     # Some depgraphs have several roots (for instance, 512th of Turkish).
     # i = self.root['address']
     roots = self.nodelist[0]['deps']
     if len(roots) == 1:
         return treebank.Tree(self._constree(roots[0]))
     else:
         # TODO: check projectivity here also.
         trees = [self._constree(i) for i in roots]
         return treebank.Tree(tree.Tree('TOP', trees))
Esempio n. 2
0
    def parsed(self):
        f = open(self.basedir + '/negra-corpus.export')
        self.f = f

        # go to first sentece
        s = f.readline()
        while not s.startswith('#BOS'):
            s = f.readline()

        while s != '':
            l = s.split()
            (num, origin) = (int(l[1]), int(l[4]))
            sent = []
            l = f.readline().split()
            while l[0][0] != '#':
                #if l[4] != '0':
                if not l[1].startswith('$'):
                    sent += [l]
                l = f.readline().split()

            parse = []
            while l[0] != '#EOS':
                parse += [l]
                l = f.readline().split()

            if len(sent) > 0 and len(sent) <= 10:
                self.sent = sent
                self.parse = parse
                t = build_tree(sent, parse)
                t2 = treebank.Tree(t, (num, origin))
                t2.depset = tree_to_depset(t)
                yield t2

            s = f.readline()
Esempio n. 3
0
def string_to_bracketing(s):
    """Converts a string to a bracketing.

    >>> string_to_bracketing('(DT NNP NN) (VBD (DT (VBZ (DT JJ NN))))')
    """
    s2 = s.replace('(', '(X ')
    s2 = '((X ' + s2 + '))'
    t = treebank.Tree(tree.bracket_parse(s2))
    b = tree_to_bracketing(t)
    return b
Esempio n. 4
0
 def parse(self, s):
     t, w = self.dep_parse(s)
     return (treebank.Tree(t), w)