Esempio n. 1
0
    def score_article(self,test,ref):
      refs = [bleu.cook_refs([refSent],self.options['bleu_ngrams']) for refSent in ref]
      testcook = []

      for i,line in enumerate(test):
        testcook.append(bleu.cook_test(line,refs[i],self.options['bleu_ngrams']))

      score = bleu.score_cooked(testcook,self.options['bleu_ngrams'])
      return score
Esempio n. 2
0
    def score_article(self, test, ref):
        refs = [bleu.cook_refs([refSent], self.options["bleu_ngrams"]) for refSent in ref]
        testcook = []

        for i, line in enumerate(test):
            testcook.append(bleu.cook_test(line, refs[i], self.options["bleu_ngrams"]))

        score = bleu.score_cooked(testcook, self.options["bleu_ngrams"])
        return score
Esempio n. 3
0
    def eval_sents(self,translist,targetlist):
      
      scoredict = {}
      cooked_test = {}
      cooked_test2 = {}
      cooktarget =  [(items[0],bleu.cook_refs([items[1]],self.options['bleu_ngrams'])) for items in enumerate(targetlist)]
      cooktarget = [(refID,(reflens, refmaxcounts, set(refmaxcounts))) for (refID,(reflens, refmaxcounts)) in cooktarget]


      for testID,testSent in enumerate(translist):
        scorelist = []


        #copied over from bleu.py to minimize redundancy
        test_normalized = bleu.normalize(testSent)
        cooked_test["testlen"] = len(test_normalized)
        cooked_test["guess"] = [max(len(test_normalized)-k+1,0) for k in range(1,self.options['bleu_ngrams']+1)]
        counts = bleu.count_ngrams(test_normalized, self.options['bleu_ngrams'])
        
        #separate by n-gram length. if we have no matching bigrams, we don't have to compare unigrams
        ngrams_sorted = dict([(x,set()) for x in range(self.options['bleu_ngrams'])])
        for ngram in counts:
            ngrams_sorted[len(ngram)-1].add(ngram)
            

        for (refID,(reflens, refmaxcounts, refset)) in cooktarget:
            
          ngrams_filtered = ngrams_sorted[self.options['bleu_ngrams']-1].intersection(refset)
        
          if ngrams_filtered:
            cooked_test["reflen"] = reflens[0]
            cooked_test['correct'] = [0]*self.options['bleu_ngrams']
            for ngram in ngrams_filtered:
              cooked_test["correct"][self.options['bleu_ngrams']-1] += min(refmaxcounts[ngram], counts[ngram])
            
            for order in range(self.options['bleu_ngrams']-1):
                for ngram in ngrams_sorted[order].intersection(refset):
                    cooked_test["correct"][order] += min(refmaxcounts[ngram], counts[ngram])

            #copied over from bleu.py to minimize redundancy
            logbleu = 0.0
            for k in range(self.options['bleu_ngrams']):
                logbleu += math.log(cooked_test['correct'][k])-math.log(cooked_test['guess'][k])
            logbleu /= self.options['bleu_ngrams']
            logbleu += min(0,1-float(cooked_test['reflen'])/cooked_test['testlen'])
            score = math.exp(logbleu)
            
            if score > 0:
                #calculate bleu score in reverse direction
                cooked_test2["guess"] = [max(cooked_test['reflen']-k+1,0) for k in range(1,self.options['bleu_ngrams']+1)]
                logbleu = 0.0
                for k in range(self.options['bleu_ngrams']):
                    logbleu += math.log(cooked_test['correct'][k])-math.log(cooked_test2['guess'][k])
                logbleu /= self.options['bleu_ngrams']
                logbleu += min(0,1-float(cooked_test['testlen'])/cooked_test['reflen'])
                score2 = math.exp(logbleu)
                
                meanscore = (2*score*score2)/(score+score2)
                scorelist.append((meanscore,refID,cooked_test['correct']))
              
        scoredict[testID] = sorted(scorelist,key=itemgetter(0),reverse=True)[:self.options['maxalternatives']]
        
      return scoredict
Esempio n. 4
0
    def eval_sents(self, translist, targetlist):

        scoredict = {}
        cooked_test = {}
        cooked_test2 = {}
        cooktarget = [
            (items[0], bleu.cook_refs([items[1]], self.options["bleu_ngrams"])) for items in enumerate(targetlist)
        ]
        cooktarget = [
            (refID, (reflens, refmaxcounts, set(refmaxcounts))) for (refID, (reflens, refmaxcounts)) in cooktarget
        ]

        for testID, testSent in enumerate(translist):
            scorelist = []

            # copied over from bleu.py to minimize redundancy
            test_normalized = bleu.normalize(testSent)
            cooked_test["testlen"] = len(test_normalized)
            cooked_test["guess"] = [
                max(len(test_normalized) - k + 1, 0) for k in range(1, self.options["bleu_ngrams"] + 1)
            ]
            counts = bleu.count_ngrams(test_normalized, self.options["bleu_ngrams"])

            # separate by n-gram length. if we have no matching bigrams, we don't have to compare unigrams
            ngrams_sorted = dict([(x, set()) for x in range(self.options["bleu_ngrams"])])
            for ngram in counts:
                ngrams_sorted[len(ngram) - 1].add(ngram)

            for (refID, (reflens, refmaxcounts, refset)) in cooktarget:

                ngrams_filtered = ngrams_sorted[self.options["bleu_ngrams"] - 1].intersection(refset)

                if ngrams_filtered:
                    cooked_test["reflen"] = reflens[0]
                    cooked_test["correct"] = [0] * self.options["bleu_ngrams"]
                    for ngram in ngrams_filtered:
                        cooked_test["correct"][self.options["bleu_ngrams"] - 1] += min(
                            refmaxcounts[ngram], counts[ngram]
                        )

                    for order in range(self.options["bleu_ngrams"] - 1):
                        for ngram in ngrams_sorted[order].intersection(refset):
                            cooked_test["correct"][order] += min(refmaxcounts[ngram], counts[ngram])

                    # copied over from bleu.py to minimize redundancy
                    logbleu = 0.0
                    for k in range(self.options["bleu_ngrams"]):
                        logbleu += math.log(cooked_test["correct"][k]) - math.log(cooked_test["guess"][k])
                    logbleu /= self.options["bleu_ngrams"]
                    logbleu += min(0, 1 - float(cooked_test["reflen"]) / cooked_test["testlen"])
                    score = math.exp(logbleu)

                    if score > 0:
                        # calculate bleu score in reverse direction
                        cooked_test2["guess"] = [
                            max(cooked_test["reflen"] - k + 1, 0) for k in range(1, self.options["bleu_ngrams"] + 1)
                        ]
                        logbleu = 0.0
                        for k in range(self.options["bleu_ngrams"]):
                            logbleu += math.log(cooked_test["correct"][k]) - math.log(cooked_test2["guess"][k])
                        logbleu /= self.options["bleu_ngrams"]
                        logbleu += min(0, 1 - float(cooked_test["testlen"]) / cooked_test["reflen"])
                        score2 = math.exp(logbleu)

                        meanscore = (2 * score * score2) / (score + score2)
                        scorelist.append((meanscore, refID, cooked_test["correct"]))

            scoredict[testID] = sorted(scorelist, key=itemgetter(0), reverse=True)[: self.options["maxalternatives"]]

        return scoredict