Exemple #1
0
def test_score():
    cand = "中华人民共和国"
    ref = "中华人民共和国公民"
    bleu = Bleu(N_SIZE)
    bleu.add_inst(cand, ref)
    s = bleu.get_score()
    print('score: {}'.format(s))
Exemple #2
0
def test_add_inst():
    cand = '13'
    ref = '13'
    bleu = Bleu(N_SIZE)
    bleu.add_inst(cand, ref)
    match_ngram = bleu.match_ngram
    candi_ngram = bleu.candi_ngram
    print('match_ngram: {}'.format(match_ngram))
    print('candi_ngram: {}'.format(candi_ngram))
Exemple #3
0
def test_score():
    # init all argument
    data = read_json()

    rouge_eval = RougeL()
    bleu_eval = Bleu()
    for idx, (ref_key, cand_key) in enumerate(data):
        ref_sent = data[idx][ref_key]
        cand_sent = data[idx][cand_key]

        rouge_eval.add_inst(cand_sent, ref_sent)
        bleu_eval.add_inst(cand_sent, ref_sent)

    bleu_score = bleu_eval.get_score()
    rouge_score = rouge_eval.get_score()
    print('bleu score: {}, rouge score: {}'.format(bleu_score, rouge_score))
Exemple #4
0
def eval_captions(gt_captions, res_captions):
	"""
		gt_captions = ground truth captions; 5 per image
		res_captions = captions generated by the model to be evaluated
	"""
	print('ground truth captions')
	print(gt_captions)

	print('RES CAPTIONS')
	print(res_captions)

	scorers = [
		(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
		(Meteor(),"METEOR"),
		(Rouge(), "ROUGE_L"),
		(Cider(), "CIDEr"), 
	]

	res = []
	
	for scorer, method in scorers:
		print('computing %s score...' % (scorer.method()))
		score, scores = scorer.compute_score(gt_captions, res_captions)
		if type(method) == list:
			for sc, scs, m in zip(score, scores, method):
				print("%s: %0.3f"%(m, sc))
				res.append((m, sc))
		else:
				print("%s: %0.3f"%(method, score))
				res.append((method, score))

	return res
Exemple #5
0
def main():
  from ngram import Ngram
  from model import Model
  from forest import Forest
  
  flags.DEFINE_integer("beam", 100, "beam size", short_name="b")
  flags.DEFINE_integer("debuglevel", 0, "debug level")
  flags.DEFINE_boolean("mert", True, "output mert-friendly info (<hyp><cost>)")
  flags.DEFINE_boolean("cube", True, "using cube pruning to speedup")
  flags.DEFINE_integer("kbest", 1, "kbest output", short_name="k")
  flags.DEFINE_integer("ratio", 3, "the maximum items (pop from PQ): ratio*b", short_name="r")
  

  argv = FLAGS(sys.argv)
  [outfile] = argv[1:]
  weights = Model.cmdline_model()
  lm = Ngram.cmdline_ngram()
  

  false_decoder = CYKDecoder(weights, lm)
  out = utility.getfile(outfile, 1)
  old_bleu = Bleu()
  new_bleu = Bleu()
  
  for i, forest in enumerate(Forest.load("-", is_tforest=True, lm=lm), 1):
    
    oracle_forest, oracle_item = oracle_extracter(forest, weights, false_decoder, 100, 2, extract=100)
    print >>sys.stderr, "processed sent %s " % i
    oracle_forest.dump(out)
    bleu, hyp, fv, edgelist = forest.compute_oracle(weights, 0.0, 1)

    forest.bleu.rescore(hyp)
    old_bleu += forest.bleu
    forest.bleu.rescore(oracle_item[0].full_derivation)
    new_bleu += forest.bleu

    bad_bleu, _, _, _ = oracle_forest.compute_oracle(weights, 0.0, -1)
    #for i in range(min(len(oracle_item), 5)):
     # print >>sys.stderr, "Oracle Trans: %s %s %s" %(oracle_item[i].full_derivation, oracle_item[i].score, str(oracle_item[i].score[2]))
     # print >>sys.stderr, "Oracle BLEU Score: %s"% (forest.bleu.rescore(oracle_item[i].full_derivation))
    print >>sys.stderr, "Oracle BLEU Score: %s"% (forest.bleu.rescore(oracle_item[0].full_derivation))
    print >>sys.stderr, "Worst new Oracle BLEU Score: %s"% (bad_bleu)
    print >>sys.stderr, "Old Oracle BLEU Score: %s"% (bleu)
    
    print >>sys.stderr, "Running Oracle BLEU Score: %s"% (new_bleu.compute_score())
    print >>sys.stderr, "Running Old Oracle BLEU Score: %s"% (old_bleu.compute_score())
Exemple #6
0
    def evaluate(self):
        cap = open(r'results.txt')
        cap_ = []
        for line in cap:
            line = line.split(' ')
            line[len(line)-1] = '.'
            del line[0]
            print(line)
            cap_.append(line)
        gts = {}
        res = {}
        f = open("cap_flickr30k.json")
        captions = json.load(f)
        f1 = open("dic_flickr30k.json")
        dics = json.load(f1)
        dics = dics['images']
        pos = 0
        for i in range(0, len(dics), 1):
            if dics[i]['split'] == 'test':
                caption_1 = []
                caption_2 = []
                caption_1.append(captions[i][0]['caption'])
                res[dics[i]['id']] = caption_1
                caption_2.append(cap_[pos])
                caption_2.append(cap_[pos])
                gts[dics[i]['id']] = caption_2
                pos = pos + 1

        # =================================================
        # Set up scorers
        # =================================================

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Cider(), "CIDEr")
        ]

        # =================================================
        # Compute scores
        # =================================================
        eval = {}
        for scorer, method in scorers:
            print ('computing %s score...'%(scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    print ("%s: %0.3f"%(m, sc))
            else:
                self.setEval(score, method)
                print ("%s: %0.3f"%(method, score))
Exemple #7
0
 def compute_bleu_rouge(pred_dict, ref_dict, bleu_order=4):
     """
     Compute bleu and rouge scores.
     """
     assert set(pred_dict.keys()) == set(ref_dict.keys()), \
         "missing keys: {}".format(
             set(ref_dict.keys()) - set(pred_dict.keys()))
     scores = {}
     bleu_scores, _ = Bleu(bleu_order).compute_score(ref_dict, pred_dict)
     for i, bleu_score in enumerate(bleu_scores):
         bleu_score *= 100
         scores['Bleu-%d' % (i + 1)] = bleu_score
     return scores
    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        '''
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr")
        ]
        '''

        scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"])]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print("%s: %0.3f" % (m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print("%s: %0.3f" % (method, score))
        self.setEvalImgs()
Exemple #9
0
def score(ref, hypo):
    scorers = [
        (Bleu(4),["Bleu_1","Bleu_2","Bleu_3","Bleu_4"]),
        (Meteor(),"METEOR"),
        (Rouge(),"ROUGE_L"),
        (Cider(),"CIDEr")
    ]
    final_scores = {}
    for scorer,method in scorers:
        score,scores = scorer.compute_score(ref,hypo)
        if type(score)==list:
            for m,s in zip(method,score):
                final_scores[m] = s
        else:
            final_scores[method] = score

    return final_scores
Exemple #10
0
def cal_avg_B4(custom_gts, custom_res):
    # input tested senetences, and (top_N - 1) corresponding 'gt' sentences
    # return the BLEU-4 score
    # calculate BLEU scores in tradictional way
    gts  = tokenizer.tokenize(custom_gts)
    res = tokenizer.tokenize(custom_res)
    print('setting up scorers...')
    scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"])]
    imgToEval = {}
    for scorer, method in scorers:
        print('computing %s score...'%(scorer.method()))
        if type(method) == list:
            score, scores, subgraph_training_bleu = scorer.compute_score(gts, res)
            for sc, scs, m in zip(score, scores, method):
                setImgToEvalImgs(scs, list(gts.keys()), m, imgToEval)
                print("%s: %0.3f"%(m, sc))
    B_4s = [imgToEval[sen_id]['Bleu_4'] for sen_id in custom_gts.keys()]
    return B_4s
def score(ref, hypo):
    """
    ref, dictionary of reference sentences (id, sentence)
    hypo, dictionary of hypothesis sentences (id, sentence)
    score, dictionary of scores
    """
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Rouge(), "ROUGE_L"),
    ]
    final_scores = {}
    for scorer, method in scorers:
        score, scores = scorer.compute_score(ref, hypo)
        if type(score) == list:
            for m, s in zip(method, score):
                final_scores[m] = s
        else:
            final_scores[method] = score
    return final_scores
 def __init__(self, alpha=0.5):
     self.simple_meteor = SimpleMeteor(alpha=alpha, beta=0.16)
     self.tri_bleu = Bleu(3)
     self.four_bleu = Bleu(4, beta=0.13)
     self.p = Preprocessor()
class MeteorBleu:
    """ Prints features for all versions of Meteor and BLEU for every
		input sentence
	"""
    def __init__(self, alpha=0.5):
        self.simple_meteor = SimpleMeteor(alpha=alpha, beta=0.16)
        self.tri_bleu = Bleu(3)
        self.four_bleu = Bleu(4, beta=0.13)
        self.p = Preprocessor()

    def features(self, tokline, posline):
        """ The workhouse function
			Takes lists of tokens and postags for [h1, h2, ref]
			Returns feature values for h1, h2, h1-h2
		"""

        features = []

        # Simple Meteor
        h1p, h2p, refp = self.p.preprocess(tokline,
                                           stem=False,
                                           lowercase=False)
        h1score = self.simple_meteor.score(h1p, refp)
        h2score = self.simple_meteor.score(h2p, refp)
        h1_h2 = h1score - h2score
        features += [h1score, h2score, h1_h2]

        # Simple Meteor lowercase
        h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=True)
        h1score = self.simple_meteor.score(h1p, refp)
        h2score = self.simple_meteor.score(h2p, refp)
        h1_h2 = h1score - h2score
        features += [h1score, h2score, h1_h2]

        # Simple Meteor lowercase, stemmed
        h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True)
        h1score = self.simple_meteor.score(h1p, refp)
        h2score = self.simple_meteor.score(h2p, refp)
        h1_h2 = h1score - h2score
        features += [h1score, h2score, h1_h2]

        # Simple Meteor referencing sequence of postags, lowercase, stemmed
        h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True)
        h1pos, h2pos, refpos = self.p.preprocess(posline)
        h1score = self.simple_meteor.score(h1p,
                                           refp,
                                           postags=True,
                                           hpos=h1pos,
                                           refpos=refpos)
        h2score = self.simple_meteor.score(h2p,
                                           refp,
                                           postags=True,
                                           hpos=h2pos,
                                           refpos=refpos)
        h1_h2 = h1score - h2score
        features += [h1score, h2score, h1_h2]

        # trigram BLEU, lowercased, stemmed
        h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True)
        h1score = self.tri_bleu.score(h1p, refp)
        h2score = self.tri_bleu.score(h2p, refp)
        h1_h2 = h1score - h2score
        features += [h1score, h2score, h1_h2]

        # postag-smoothed 4-gram BLEU
        h1p, h2p, refp = self.p.preprocess(tokline,
                                           stem=False,
                                           lowercase=False)
        h1pos, h2pos, refpos = self.p.preprocess(posline)
        h1score = self.four_bleu.score(h1p,
                                       refp,
                                       postag=True,
                                       hpos=h1pos,
                                       refpos=refpos)
        h2score = self.four_bleu.score(h2p,
                                       refp,
                                       postag=True,
                                       hpos=h2pos,
                                       refpos=refpos)
        h1_h2 = h1score - h2score
        features += [h1score, h2score, h1_h2]

        # postag-smoothed 4-gram BLEU, lowercased
        h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=True)
        h1pos, h2pos, refpos = self.p.preprocess(posline)
        h1score = self.four_bleu.score(h1p,
                                       refp,
                                       postag=True,
                                       hpos=h1pos,
                                       refpos=refpos)
        h2score = self.four_bleu.score(h2p,
                                       refp,
                                       postag=True,
                                       hpos=h2pos,
                                       refpos=refpos)
        h1_h2 = h1score - h2score
        features += [h1score, h2score, h1_h2]

        # postag-smoothed 4-gram BLEU, lowercased, stemmed
        h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True)
        h1pos, h2pos, refpos = self.p.preprocess(posline)
        h1score = self.four_bleu.score(h1p,
                                       refp,
                                       postag=True,
                                       hpos=h1pos,
                                       refpos=refpos)
        h2score = self.four_bleu.score(h2p,
                                       refp,
                                       postag=True,
                                       hpos=h2pos,
                                       refpos=refpos)
        h1_h2 = h1score - h2score
        features += [h1score, h2score, h1_h2]

        # postag-smoothed 4-gram BLEU, lowercased, stemmed, weighted
        w = [10, 5, 2, 1]
        h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True)
        h1pos, h2pos, refpos = self.p.preprocess(posline)
        h1score = self.four_bleu.score(h1p,
                                       refp,
                                       postag=True,
                                       hpos=h1pos,
                                       refpos=refpos,
                                       wts=w)
        h2score = self.four_bleu.score(h2p,
                                       refp,
                                       postag=True,
                                       hpos=h2pos,
                                       refpos=refpos,
                                       wts=w)
        h1_h2 = h1score - h2score
        features += [h1score, h2score, h1_h2]

        return features

    def evaluate(self, h1score, h2score):
        """ Scores hypothesis sentences based on scores
			Prints output
		"""
        if h1score > h2score:
            print -1
        elif h1score == h2score:
            print 0
        else:
            print 1
Exemple #14
0
                         help="print result for each sentence", default=False)
    optparser.add_option("", "--defaultnbest", dest="defaultnbest", help="default nbests", metavar="FILE", default=None)

    (opts, args) = optparser.parse_args()

    if opts.weights is not None:
        weights = get_weights(opts.weights)
    else:
        weights = Vector("lm1=2 gt_prob=1")

    extra_feats = None # prep_features(args)        

    decoder = LocalDecoder() #BUDecoder(opts.k, extra_feats, check_feats=False)
    decoder.set_feats(extra_feats)
    
    all_pp = Bleu()  # Parseval(), now BLEU
    decode_time, parseval_time = 0, 0
    sum_score = 0
    
    if opts.defaultnbest:
        defaultnbests = defaultdict(lambda : [])
        for line in open(opts.defaultnbest):
            defaultnbests[int(line.split()[0])].append(line.strip())
                      
    for i, forest in enumerate(decoder.load("-")):

        if forest is None:
            print >> logs, "forest %d is empty" % (i+1)
            if opts.defaultnbest:
                for line in defaultnbests[i][:opts.k]:
                    print line
Exemple #15
0
    from ngram import Ngram # defines --lm and --order    

    argv = FLAGS(sys.argv)

    if FLAGS.prob is None and FLAGS.ratio is None:
        print >> logs, "Error: must specify pruning threshold by -p or ratio by -r" + str(FLAGS)
        sys.exit(1)

    weights = Model.cmdline_model()
    lm = Ngram.cmdline_ngram() # if FLAGS.lm is None then returns None
    if lm:
        weights["lm1"] = weights["lm"] * FLAGS.lmratio
    
    onebestscores = 0
    onebestbleus = Bleu()
    myscores = 0
    myoraclebleus = Bleu()    
    
    total_nodes = total_edges = old_nodes = old_edges = 0
    
    for i, forest in enumerate(Forest.load("-", lm=lm), 1):
        if forest is None:
            print
            continue
        
        prune(forest, weights, FLAGS.prob, FLAGS.ratio)

        score, hyp, fv = forest.root.bestres
        
        forest.bleu.rescore(hyp)
Exemple #16
0
def test_count_bp():
    cand = '我是中国人'
    ref = '重视啊啊啊啊我啊啊我了'
    bleu = Bleu(N_SIZE)
    bp = bleu.count_bp(cand, ref)
    print('BP: {}'.format(bp))
Exemple #17
0
    bylinefile = open(opts.byline)
    reffiles = [open(f) for f in args] ## the remaining of the input are assumed to be refs


##    print >> logs, "rules file %s" % rulefile
##    print >> logs, "source file %s" % srcfile
##    print >> logs, "byline file %s" % bylinefile
##    print >> logs, "re files %s" % " ".join(map(str, reffiles))

    # lhuang: n-gram order = 4
    theoracle = oracle.Oracle(4, variant="ibm")

    hopebleus = collections.defaultdict(lambda : Bleu())
    hopescores = collections.defaultdict(lambda : [])
    
    onebestbleus = Bleu()
    onebestscores = []
    
    for i, (srcline, byline, forestline) in \
            enumerate(itertools.izip(srcfile, bylinefile, forestfile)):

        reflines = [f.readline() for f in reffiles]

        rules = read_rules(opts.rules)
        if opts.extrarules:
            rules = read_rules(opts.extrarules, rules)

        if forestline.strip() == "": ## empty forest (pure byline)
            forestline = "(0<gt_prob:0> )"
        f = forest_from_text(forestline)
        
Exemple #18
0
def main():
    
    weights = Model.cmdline_model()
    lm = Ngram.cmdline_ngram()
    
    LMState.init(lm, weights)

    decoder = Decoder()

    tot_bleu = Bleu()
    tot_score = 0.
    tot_time = 0.
    tot_len = tot_fnodes = tot_fedges = 0
    tot_steps = tot_states = tot_edges = tot_stacks = 0

    for i, forest in enumerate(Forest.load("-", is_tforest=True, lm=lm), 1):

        t = time.time()
        
        best, final_items = decoder.beam_search(forest, b=FLAGS.beam)
        score, trans, fv = best.score, best.trans(), best.get_fvector()

        t = time.time() - t
        tot_time += t

        tot_score += score
        forest.bleu.rescore(trans)
        tot_bleu += forest.bleu

        fnodes, fedges = forest.size()

        tot_len += len(forest.sent)
        tot_fnodes += fnodes
        tot_fedges += fedges
        tot_steps += decoder.max_step
        tot_states += decoder.num_states
        tot_edges += decoder.num_edges
        tot_stacks += decoder.num_stacks

        print >> logs, ("sent %d, b %d\tscore %.4f\tbleu+1 %s" + \
              "\ttime %.3f\tsentlen %-3d fnodes %-4d fedges %-5d\tstep %d  states %d  edges %d stacks %d") % \
              (i, FLAGS.beam, score, 
               forest.bleu.score_ratio_str(), t, len(forest.sent), fnodes, fedges,
               decoder.max_step, decoder.num_states, decoder.num_edges, decoder.num_stacks)

        if FLAGS.k > 1 or FLAGS.forest:
           lmforest = best.toforest(forest)

        if FLAGS.forest:
            lmforest.dump()

        if FLAGS.k > 1:
           lmforest.lazykbest(FLAGS.k, weights=weights)
           klist = lmforest.root.klist

           if not FLAGS.mert:
               for j, (sc, tr, fv) in enumerate(klist, 1):
                   print >> logs, "k=%d score=%.4f fv=%s\n%s" % (j, sc, fv, tr)

        else:
            klist = [(best.score, best.trans(), best.get_fvector())]
        
        if FLAGS.mert: # <score>... <hyp> ...
            print >> logs, '<sent No="%d">' % i
            print >> logs, "<Chinese>%s</Chinese>" % " ".join(forest.cased_sent)

            for sc, tr, fv in klist:
                print >> logs, "<score>%.3lf</score>" % sc
                print >> logs, "<hyp>%s</hyp>" % tr
                print >> logs, "<cost>%s</cost>" % fv

            print >> logs, "</sent>"

        if not FLAGS.forest:
            print trans

    print >> logs, "avg %d sentences, first pass score: %.4f, bleu: %s" % \
          (i, decoder.firstpassscore/i, decoder.firstpassbleus.score_ratio_str())
                                                                            
    print >> logs, ("avg %d sentences, b %d\tscore %.4lf\tbleu %s\ttime %.3f" + \
          "\tsentlen %.1f fnodes %.1f fedges %.1f\tstep %.1f states %.1f edges %.1f stacks %.1f") % \
          (i, FLAGS.beam, tot_score/i, tot_bleu.score_ratio_str(), tot_time/i,
           tot_len/i, tot_fnodes/i, tot_fedges/i,
           tot_steps/i, tot_states/i, tot_edges/i, tot_stacks/i)

    print >> logs, LMState.cachehits, LMState.cachemiss
Exemple #19
0
    best_devscore = -1

    print >> logs, "starting perceptron at", time.ctime()
    for it in xrange(opts.iterations):

        print >> logs, "iteration %d" % (it+1), "= = " * 20
        print >> logs, "hope weight on modelcost = %lf" % opts.hope

        iterstart = time.time()

        if opts.shuffle:
            ## TODO: randomize
            pass

        parseval = Bleu()
        num_updates = 0

        avgtime = 0
        decoder.reset()
        
        if not preloaded:
            trainforests = decoder.load(opts.trainfile)
            

        for i, forest in enumerate(trainforests):

            decoder.do_oracle(forest, weights)
            
            print >> logs, "  iteration %d, example %d" % (it+1, i+1), "-" * 5, "oracle = %.4lf" % forest.oracle_bleu_score, 
            updated, pp, deltafv = one_example(forest, weights)
def init_scorer(cached_tokens):
    global CiderD_scorer
    CiderD_scorer = CiderD_scorer or CiderD(df=cached_tokens)
    global Bleu_scorer
    Bleu_scorer = Bleu_scorer or Bleu(4)
	def __init__(self, alpha=0.5):
		self.simple_meteor = SimpleMeteor(alpha=alpha, beta=0.16)
		self.tri_bleu = Bleu(3)
		self.four_bleu = Bleu(4, beta=0.13)
		self.p = Preprocessor()
class MeteorBleu:
	""" Prints features for all versions of Meteor and BLEU for every
		input sentence
	"""

	def __init__(self, alpha=0.5):
		self.simple_meteor = SimpleMeteor(alpha=alpha, beta=0.16)
		self.tri_bleu = Bleu(3)
		self.four_bleu = Bleu(4, beta=0.13)
		self.p = Preprocessor()

	def features(self, tokline, posline):
		""" The workhouse function
			Takes lists of tokens and postags for [h1, h2, ref]
			Returns feature values for h1, h2, h1-h2
		"""

		features = []

		# Simple Meteor
		h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=False)
		h1score = self.simple_meteor.score(h1p, refp)
		h2score = self.simple_meteor.score(h2p, refp)
		h1_h2 = h1score - h2score
		features += [h1score, h2score, h1_h2]

		# Simple Meteor lowercase
		h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=True)
		h1score = self.simple_meteor.score(h1p, refp)
		h2score = self.simple_meteor.score(h2p, refp)
		h1_h2 = h1score - h2score
		features += [h1score, h2score, h1_h2]

		# Simple Meteor lowercase, stemmed
		h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True)
		h1score = self.simple_meteor.score(h1p, refp)
		h2score = self.simple_meteor.score(h2p, refp)
		h1_h2 = h1score - h2score
		features += [h1score, h2score, h1_h2]

		# Simple Meteor referencing sequence of postags, lowercase, stemmed
		h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True)
		h1pos, h2pos, refpos = self.p.preprocess(posline)
		h1score = self.simple_meteor.score(
				h1p, refp, postags=True, hpos=h1pos, refpos=refpos)
		h2score = self.simple_meteor.score(
				h2p, refp, postags=True, hpos=h2pos, refpos=refpos)
		h1_h2 = h1score - h2score
		features += [h1score, h2score, h1_h2]

		# trigram BLEU, lowercased, stemmed
		h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True)
		h1score = self.tri_bleu.score(h1p, refp)
		h2score = self.tri_bleu.score(h2p, refp)
		h1_h2 = h1score - h2score
		features += [h1score, h2score, h1_h2]

		# postag-smoothed 4-gram BLEU
		h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=False)
		h1pos, h2pos, refpos = self.p.preprocess(posline)
		h1score = self.four_bleu.score(
				h1p, refp, postag=True, hpos=h1pos, refpos=refpos)
		h2score = self.four_bleu.score(
				h2p, refp, postag=True, hpos=h2pos, refpos=refpos)
		h1_h2 = h1score - h2score
		features += [h1score, h2score, h1_h2]
		
		# postag-smoothed 4-gram BLEU, lowercased
		h1p, h2p, refp = self.p.preprocess(tokline, stem=False, lowercase=True)
		h1pos, h2pos, refpos = self.p.preprocess(posline)
		h1score = self.four_bleu.score(
				h1p, refp, postag=True, hpos=h1pos, refpos=refpos)
		h2score = self.four_bleu.score(
				h2p, refp, postag=True, hpos=h2pos, refpos=refpos)
		h1_h2 = h1score - h2score
		features += [h1score, h2score, h1_h2]

		# postag-smoothed 4-gram BLEU, lowercased, stemmed
		h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True)
		h1pos, h2pos, refpos = self.p.preprocess(posline)
		h1score = self.four_bleu.score(
				h1p, refp, postag=True, hpos=h1pos, refpos=refpos)
		h2score = self.four_bleu.score(
				h2p, refp, postag=True, hpos=h2pos, refpos=refpos)
		h1_h2 = h1score - h2score
		features += [h1score, h2score, h1_h2]
		
		# postag-smoothed 4-gram BLEU, lowercased, stemmed, weighted
		w = [10,5,2,1]
		h1p, h2p, refp = self.p.preprocess(tokline, stem=True, lowercase=True)
		h1pos, h2pos, refpos = self.p.preprocess(posline)
		h1score = self.four_bleu.score(
				h1p, refp, postag=True, hpos=h1pos, refpos=refpos, wts=w)
		h2score = self.four_bleu.score(
				h2p, refp, postag=True, hpos=h2pos, refpos=refpos, wts=w)
		h1_h2 = h1score - h2score
		features += [h1score, h2score, h1_h2]
		
		return features

	def evaluate(self, h1score, h2score):
		""" Scores hypothesis sentences based on scores
			Prints output
		"""
		if h1score > h2score:
			print -1
		elif h1score == h2score:
			print 0
		else:
			print 1
Exemple #23
0
 def __init__(self):
     self.scorers = [(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"])]
Exemple #24
0
def test_score():
    cand = "中华人民共和国"
    ref = "中华人民共和国公民"
    bleu = Bleu(N_SIZE)
    s = bleu.score(cand, ref)
    print('score: {}'.format(s))
Exemple #25
0
from rouge import Rouge
import argparse
import logging
from ReadingComprehension.IterativeReattentionAligner.e2e_encoder import MnemicReader as e2e_MnemicReader
import cProfile, pstats, io
from utils import *
from InformationRetrieval.AttentionRM.modules import AttentionRM
from EndToEndModel.modules import EndToEndModel
from nltk.translate.bleu_score import sentence_bleu
import re
import pickle
from CSMrouge import RRRouge
from bleu import Bleu

stoplist = set(['.',',', '...', '..'])
bleu_obj = Bleu(4)

def add_arguments(parser):
    parser.add_argument("train_file", help="File that contains training data")
    parser.add_argument("dev_file", help="File that contains dev data")
    parser.add_argument("embedding_file", help="File that contains pre-trained embeddings")
    parser.add_argument('--dicts_dir', type=str, default=None, help='Directory containing the word dictionaries')
    parser.add_argument('--seed', type=int, default=6, help='Random seed for the experiment')
    parser.add_argument('--epochs', type=int, default=20, help='Train data iterations')
    parser.add_argument('--train_batch_size', type=int, default=32, help='Batch size for training')
    parser.add_argument('--dev_batch_size', type=int, default=32, help='Batch size for dev')
    parser.add_argument('--hidden_size', type=int, default=100, help='Hidden size for LSTM')
    parser.add_argument('--num_layers', type=int, default=1, help='Number of layers for LSTM')
    parser.add_argument('--char_emb_size', type=int, default=50, help='Embedding size for characters')
    parser.add_argument('--pos_emb_size', type=int, default=50, help='Embedding size for pos tags')
    parser.add_argument('--ner_emb_size', type=int, default=50, help='Embedding size for ner')