def test_degenerate_statistics(statistics, offset, expected_score): score = BLEU.compute_bleu(statistics[0].common, statistics[0].total, statistics[1], statistics[2], smooth_method='floor', smooth_value=offset).score / 100 assert score == expected_score
def compute_bleu(meters): import inspect #import sacrebleu from sacrebleu.metrics import BLEU fn_sig = inspect.getfullargspec(BLEU.compute_bleu)[0] if "smooth_method" in fn_sig: smooth = {"smooth_method": "exp"} else: smooth = {"smooth": "exp"} bleu = BLEU.compute_bleu( correct=meters["_bleu_counts"].sum, total=meters["_bleu_totals"].sum, sys_len=meters["_bleu_sys_len"].sum, ref_len=meters["_bleu_ref_len"].sum, **smooth) return round(bleu.score, 2)
def score_corpus_multiprocess( self, hypothesis: List[str], references: List[List[str]], score='score' ) -> float: tokenizer = get_optional_dict(self.extra_args, 'tokenizer', 'none') args = get_default_args(tokenize=tokenizer, num_refs=len(references)) scorer = BLEU(args) if self.n_workers == 1: corpus_score = scorer.corpus_score( hypothesis, references, use_effective_order=False ) else: batches = list( self._batch(hypothesis, references, n_batches=self.n_workers) ) ref_len, sys_len = 0, 0 correct = [0 for _ in range(BLEU.NGRAM_ORDER)] total = [0 for _ in range(BLEU.NGRAM_ORDER)] with ProcessPoolExecutor(max_workers=self.n_workers) as executor: futures = [ executor.submit( scorer.corpus_score, b[0], b[1], use_effective_order=False ) for b in batches ] progress = as_completed(futures) if self.verbose: progress = tqdm(progress) for future in progress: s = future.result() ref_len += s.ref_len sys_len += s.sys_len for n in range(BLEU.NGRAM_ORDER): correct[n] += s.counts[n] total[n] += s.totals[n] corpus_score = scorer.compute_bleu( correct, total, sys_len, ref_len, smooth_method='exp' ) proj = {'score': lambda s: s.score, 'bp': lambda s: s.bp}.get(score) return proj(corpus_score)
def test_scoring(statistics, expected_score): score = BLEU.compute_bleu(statistics[0].common, statistics[0].total, statistics[1], statistics[2]).score / 100 assert abs(score - expected_score) < EPSILON