Ejemplo n.º 1
0
 def _tree_helper(t, antvalues):
     t = tree.str_to_tree(t)
     for node in t.frontier():
         x = rule.Nonterminal.from_str(node.label)
         if isinstance(x, rule.Nonterminal):
             node.insert_child(0, antvalues[x.getindex() - 1])
     return t
Ejemplo n.º 2
0
 def _tree_helper(t, antvalues):
     t = tree.str_to_tree(t)
     for node in t.frontier():
         x = sym.fromstring(node.label)
         if sym.isvar(x):
             node.insert_child(0, antvalues[sym.getindex(x) - 1])
     return t
Ejemplo n.º 3
0
 def _tree_helper(t, antvalues):
     t = tree.str_to_tree(t)
     for node in t.frontier():
         x = sym.fromstring(node.label)
         if sym.isvar(x):
             node.insert_child(0, antvalues[sym.getindex(x)-1])
     return t
Ejemplo n.º 4
0
 def _tree_helper(t, antvalues):
     t = tree.str_to_tree(t)
     for node in t.frontier():
         x = rule.Nonterminal.from_str(node.label)
         if isinstance(x, rule.Nonterminal):
             node.insert_child(0, antvalues[x.getindex()-1])
     return t
Ejemplo n.º 5
0
 def __init__(self, file):
     self.parses = {}
     for line in file:
         t = tree.str_to_tree(line)
         if t is None:
             log.write("warning: couldn't read tree\n")
             continue
         s = tuple([sym.fromstring(node.label) for node in t.frontier()])
         self.parses[s] = t
Ejemplo n.º 6
0
def raduparse(t,radu=True):
    if radu: t=radu2ptb(t)
    return tree.str_to_tree(t)
Ejemplo n.º 7
0
def raduparse(t):
    t=radu2ptb(t)
    return tree.str_to_tree(t)
Ejemplo n.º 8
0
        if opts.tags:
            a.ftags = ftfile.readline().split()
            a.etags = etfile.readline().split()
            if len(a.ftags) != len(a.fwords):
                sys.stderr.write(
                    "warning: length mismatch between French words and tags (%d != %d)\n"
                    % (len(a.ftags), len(a.fwords)))
            if len(a.etags) != len(a.ewords):
                sys.stderr.write(
                    "warning: length mismatch between English words and tags (%d != %d)\n"
                    % (len(a.etags), len(a.ewords)))

        a.espans = None
        if opts.trees:
            if ebfile is not None:
                etree = tree.str_to_tree(ebfile.readline())
                if etree is None:
                    sys.stderr.write("warning, line %d: null tree" % a.lineno)
                    a.espans = {}
                elif etree.length != len(a.ewords):
                    sys.stderr.write(
                        "warning, line %d: length mismatch between English words and trees (%d != %d)\n"
                        % (a.lineno, len(a.ewords), etree.length))
                    sys.stderr.write(
                        "  start of English sentence: %s\n" %
                        " ".join([sym.tostring(x) for x in a.ewords[:5]]))
                    a.espans = {}
                else:
                    remove_req(etree)
                    a.espans = etree.spans()
                    for (span, labels) in a.espans.iteritems():
Ejemplo n.º 9
0
            a.write(log.file)
            a.write_visual(log.file)
            log.file.flush()

        if opts.tags:
            a.ftags = ftfile.readline().split()
            a.etags = etfile.readline().split()
            if len(a.ftags) != len(a.fwords):
                sys.stderr.write("warning: length mismatch between French words and tags (%d != %d)\n" % (len(a.ftags), len(a.fwords)))
            if len(a.etags) != len(a.ewords):
                sys.stderr.write("warning: length mismatch between English words and tags (%d != %d)\n" % (len(a.etags), len(a.ewords)))

        a.espans = None
        if opts.trees:
            if ebfile is not None:
                etree = tree.str_to_tree(ebfile.readline())
                if etree is None:
                    sys.stderr.write("warning, line %d: null tree" % a.lineno)
                    a.espans = {}
                elif etree.length != len(a.ewords):
                    sys.stderr.write("warning, line %d: length mismatch between English words and trees (%d != %d)\n" % (a.lineno, len(a.ewords), etree.length))
                    sys.stderr.write("  start of English sentence: %s\n" % " ".join([sym.tostring(x) for x in a.ewords[:5]]))
                    a.espans = {}
                else:
                    remove_req(etree)
                    a.espans = etree.spans()
                    for (span, labels) in a.espans.iteritems():
                        a.espans[span] = [sym.fromtag(x) for x in labels]

        # done reading all input lines
        if opts.discard_long_sentences and len(a.fwords) > opts.maxabslen:
Ejemplo n.º 10
0
relabeler2 = re.compile('-\d+-BAR$')
relabeler3 = re.compile('^@')

def relabel(label):
    #sys.stderr.write("%sA\n" % label)
    label=relabeler2.sub('-BAR', label)
    label=relabeler1.sub('', label)
    label=relabeler3.sub('', label)
    return label



if __name__ == "__main__":
    for l in sys.stdin:
        (rid, trline) = l.split(' ', 1)
        node=tree.str_to_tree(trline)
        #sys.stdout.write("%s\n" % node.__str__())
        begin=0
        n_bad_rewrites=0
        for n in node.preorder():
            if(len(n.children)):
                #if begin!=0:
                #    sys.stdout.write(" ### ")
                begin=1
                rewrite="%s(" % relabel(n.label)
                ch=[]
                for c in n.children:
                    ch.append(relabel(c.label))
                #sys.stdout.write("%s" % ' '.join(ch))
                rewrite+= "%s" % ' '.join(ch)
                rewrite+=")"
Ejemplo n.º 11
0
    for li, line in enumerate(sys.stdin):
        fields = line.rstrip().split('\t')
        try:
            fstr, estr, astr = fields[:3]
        except:
            log.write("bad line, skipped: %s\n" % line.rstrip('\r\n'))
            continue
        
        if len(fields) == 4:
            provenance = list(fields[3].split())
        else:
            provenance = []

        if opts.french_trees:
            try:
                ftree = prepare_tree(tree.str_to_tree(fstr))
                if ftree is None:
                    log.write("bad tree, skipped line\n")
                    continue

                #fspans = label_spans(ftree)
                fleaves = list(ftree.frontier())
                fwords = [leaf.label for leaf in fleaves]
                ftags = [leaf.parent.label for leaf in fleaves]
                fcb = crossing_brackets(ftree)
            except Exception as e:
                log.write("bad ftree, skipped: %s\n" % fstr)
                log.write("reason: %s\n" % e)
                continue
        else:
            fwords = fstr.split()