def test_degenerate_statistics(statistics, offset, expected_score):
    score = BLEU.compute_bleu(statistics[0].common,
                              statistics[0].total,
                              statistics[1],
                              statistics[2],
                              smooth_method='floor',
                              smooth_value=offset).score / 100
    assert score == expected_score
                def compute_bleu(meters):
                    import inspect
                    #import sacrebleu
                    from sacrebleu.metrics import BLEU

                    fn_sig = inspect.getfullargspec(BLEU.compute_bleu)[0]
                    if "smooth_method" in fn_sig:
                        smooth = {"smooth_method": "exp"}
                    else:
                        smooth = {"smooth": "exp"}
                    bleu = BLEU.compute_bleu(
                        correct=meters["_bleu_counts"].sum,
                        total=meters["_bleu_totals"].sum,
                        sys_len=meters["_bleu_sys_len"].sum,
                        ref_len=meters["_bleu_ref_len"].sum,
                        **smooth)
                    return round(bleu.score, 2)
Exemple #3
0
 def score_corpus_multiprocess(
         self, hypothesis: List[str], references: List[List[str]],
         score='score'
 ) -> float:
     tokenizer = get_optional_dict(self.extra_args, 'tokenizer', 'none')
     args = get_default_args(tokenize=tokenizer, num_refs=len(references))
     scorer = BLEU(args)
     if self.n_workers == 1:
         corpus_score = scorer.corpus_score(
             hypothesis, references, use_effective_order=False
         )
     else:
         batches = list(
             self._batch(hypothesis, references, n_batches=self.n_workers)
         )
         ref_len, sys_len = 0, 0
         correct = [0 for _ in range(BLEU.NGRAM_ORDER)]
         total = [0 for _ in range(BLEU.NGRAM_ORDER)]
         with ProcessPoolExecutor(max_workers=self.n_workers) as executor:
             futures = [
                 executor.submit(
                     scorer.corpus_score, b[0], b[1],
                     use_effective_order=False
                 )
                 for b in batches
             ]
             progress = as_completed(futures)
             if self.verbose:
                 progress = tqdm(progress)
             for future in progress:
                 s = future.result()
                 ref_len += s.ref_len
                 sys_len += s.sys_len
                 for n in range(BLEU.NGRAM_ORDER):
                     correct[n] += s.counts[n]
                     total[n] += s.totals[n]
             corpus_score = scorer.compute_bleu(
                 correct, total, sys_len, ref_len, smooth_method='exp'
             )
     proj = {'score': lambda s: s.score, 'bp': lambda s: s.bp}.get(score)
     return proj(corpus_score)
def test_scoring(statistics, expected_score):
    score = BLEU.compute_bleu(statistics[0].common, statistics[0].total,
                              statistics[1], statistics[2]).score / 100
    assert abs(score - expected_score) < EPSILON