Example #1
0
            bleu, hyp, fv, edgelist = forest.compute_oracle(weights, 0, 1, store_oracle=True)
            ##print >> logs, forest.root.oracle_edge
            bleu = forest.bleu.rescore(hyp)
            mscore = weights.dot(fv)
            print  >> logs, "moracle\tscore=%.4lf\tbleu+1=%.4lf\tlenratio=%.2lf" % \
                  (mscore, forest.bleu.fscore(), forest.bleu.ratio()) #, hyp) # don't output oracle
            
            myoraclebleus += forest.bleu.copy()
            myscores += mscore

        if FLAGS.out:
            if FLAGS.suffix is not None:
                forest.dump(open("%d.%s" % (i+FLAGS.startid, FLAGS.suffix), "wt"))
            else:
                forest.dump()

        if i % 10 == 0:
            if old_edges <> 0:
                print >> logs, "%d forests pruned, avg new size: %.1lf %.1lf (survival ratio: %4.1lf%% %4.1lf%%)" % \
                            (i, total_nodes/i, total_edges/i, \
                             total_nodes*100./old_nodes, total_edges*100./old_edges)
                
    print >> logs,  "overall 1-best oracle bleu = %s  score = %.4lf" \
          % (onebestbleus.score_ratio_str(), onebestscores/i)
    
    if FLAGS.oracle:
        print >> logs,  "overall my     oracle bleu = %s  score = %.4lf" \
              % (myoraclebleus.score_ratio_str(), myscores/i)


Example #2
0
def main():
    
    weights = Model.cmdline_model()
    lm = Ngram.cmdline_ngram()
    
    LMState.init(lm, weights)

    decoder = Decoder()

    tot_bleu = Bleu()
    tot_score = 0.
    tot_time = 0.
    tot_len = tot_fnodes = tot_fedges = 0
    tot_steps = tot_states = tot_edges = tot_stacks = 0

    for i, forest in enumerate(Forest.load("-", is_tforest=True, lm=lm), 1):

        t = time.time()
        
        best, final_items = decoder.beam_search(forest, b=FLAGS.beam)
        score, trans, fv = best.score, best.trans(), best.get_fvector()

        t = time.time() - t
        tot_time += t

        tot_score += score
        forest.bleu.rescore(trans)
        tot_bleu += forest.bleu

        fnodes, fedges = forest.size()

        tot_len += len(forest.sent)
        tot_fnodes += fnodes
        tot_fedges += fedges
        tot_steps += decoder.max_step
        tot_states += decoder.num_states
        tot_edges += decoder.num_edges
        tot_stacks += decoder.num_stacks

        print >> logs, ("sent %d, b %d\tscore %.4f\tbleu+1 %s" + \
              "\ttime %.3f\tsentlen %-3d fnodes %-4d fedges %-5d\tstep %d  states %d  edges %d stacks %d") % \
              (i, FLAGS.beam, score, 
               forest.bleu.score_ratio_str(), t, len(forest.sent), fnodes, fedges,
               decoder.max_step, decoder.num_states, decoder.num_edges, decoder.num_stacks)

        if FLAGS.k > 1 or FLAGS.forest:
           lmforest = best.toforest(forest)

        if FLAGS.forest:
            lmforest.dump()

        if FLAGS.k > 1:
           lmforest.lazykbest(FLAGS.k, weights=weights)
           klist = lmforest.root.klist

           if not FLAGS.mert:
               for j, (sc, tr, fv) in enumerate(klist, 1):
                   print >> logs, "k=%d score=%.4f fv=%s\n%s" % (j, sc, fv, tr)

        else:
            klist = [(best.score, best.trans(), best.get_fvector())]
        
        if FLAGS.mert: # <score>... <hyp> ...
            print >> logs, '<sent No="%d">' % i
            print >> logs, "<Chinese>%s</Chinese>" % " ".join(forest.cased_sent)

            for sc, tr, fv in klist:
                print >> logs, "<score>%.3lf</score>" % sc
                print >> logs, "<hyp>%s</hyp>" % tr
                print >> logs, "<cost>%s</cost>" % fv

            print >> logs, "</sent>"

        if not FLAGS.forest:
            print trans

    print >> logs, "avg %d sentences, first pass score: %.4f, bleu: %s" % \
          (i, decoder.firstpassscore/i, decoder.firstpassbleus.score_ratio_str())
                                                                            
    print >> logs, ("avg %d sentences, b %d\tscore %.4lf\tbleu %s\ttime %.3f" + \
          "\tsentlen %.1f fnodes %.1f fedges %.1f\tstep %.1f states %.1f edges %.1f stacks %.1f") % \
          (i, FLAGS.beam, tot_score/i, tot_bleu.score_ratio_str(), tot_time/i,
           tot_len/i, tot_fnodes/i, tot_fedges/i,
           tot_steps/i, tot_states/i, tot_edges/i, tot_stacks/i)

    print >> logs, LMState.cachehits, LMState.cachemiss