コード例 #1
0
 def create_forest(self,line):
     try:
         self.forest = forest.forest_from_text(line, delete_words=['@UNKNOWN@'])
         return True
     except forest.TreeFormatException:
         self.forest = None
         return False
コード例 #2
0
ファイル: sbmt_decoder.py プロジェクト: jungikim/sbmt
 def create_forest(self, line):
     try:
         self.forest = forest.forest_from_text(line,
                                               delete_words=['@UNKNOWN@'])
         return True
     except forest.TreeFormatException:
         self.forest = None
         return False
コード例 #3
0
srcfilename = args[1]
forestfilename = args[0]
reffilenames = args[2:]

srcfile = open(srcfilename)
forestfile = open(forestfilename) if forestfilename != "-" else sys.stdin
reffiles = [open(reffilename) for reffilename in reffilenames]

def output(f):
    deriv = f.viterbi_deriv()
    hypv = deriv.vector()
    hyp = deriv.english()
    return "hyp={{{%s}}} derivation={{{%s}}} %s" % (" ".join(sym.tostring(e) for e in hyp), deriv, hypv)

for srcline, forestline, reflines in itertools.izip(srcfile, forestfile, itertools.izip(*reffiles)):
    f = forest.forest_from_text(forestline)

    # the oracle needs to know how long all the French spans are
    for item in f.bottomup():
        for ded in item.deds:
            # replace rule's French side with correct number of French words
            # we don't even bother to use the right number of variables
            ded.rule = rule.Rule(ded.rule.lhs,
                                 rule.Phrase([sym.fromstring('<foreign-word>')]*int(ded.dcost['foreign-length'])),
                                 ded.rule.e)

    f.reweight(weights)
    print "1-best %s" % output(f)

    s = sgml.Sentence(srcline.split())
    s.fwords = srcline.split()