def raw_corpus_bleu(hypotheses: Iterable[str], references: Iterable[str], offset: Optional[float] = 0.01) -> float: """ Simple wrapper around sacreBLEU's BLEU without tokenization and smoothing. :param hypotheses: Hypotheses stream. :param references: Reference stream. :param offset: Smoothing constant. :return: BLEU score as float between 0 and 1. """ return sacrebleu.raw_corpus_bleu(hypotheses, [references], smooth_floor=offset).score / 100.0
def raw_corpus_bleu(hypotheses: Iterable[str], references: Iterable[str], offset: Optional[float] = 0.01) -> float: """ Simple wrapper around sacreBLEU's BLEU without tokenization and smoothing. :param hypotheses: Hypotheses stream. :param references: Reference stream. :param offset: Smoothing constant. :return: BLEU score as float between 0 and 1. """ return sacrebleu.raw_corpus_bleu(hypotheses, [references], smooth_floor=offset).score / 100
def test_statistics(hypothesis, reference, expected_stat): result = sacrebleu.raw_corpus_bleu(hypothesis, reference, .01) stat = Statistics(result.counts, result.totals) assert stat == expected_stat
def test_effective_order(hypotheses, references, expected_bleu): bleu = sacrebleu.raw_corpus_bleu(hypotheses, [references], .01).score / 100 assert abs(bleu - expected_bleu) < EPSILON
def test_degenerate_uneven(hypotheses, references): with pytest.raises(EOFError, match=r'.*stream.*'): sacrebleu.raw_corpus_bleu(hypotheses, references)
def test_offset(hypothesis, reference, expected_with_offset, expected_without_offset): score_without_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.0).score / 100 assert abs(expected_without_offset - score_without_offset) < EPSILON score_with_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.1).score / 100 assert abs(expected_with_offset - score_with_offset) < EPSILON
def test_offset(hypothesis, reference, expected_with_offset, expected_without_offset): score_without_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.0).score / 100 assert abs(expected_without_offset - score_without_offset) < EPSILON score_with_offset = sacrebleu.raw_corpus_bleu(hypothesis, reference, 0.1).score / 100 assert abs(expected_with_offset - score_with_offset) < EPSILON
def test_statistics(hypothesis, reference, expected_stat): result = sacrebleu.raw_corpus_bleu(hypothesis, reference, .01) stat = Statistics(result.counts, result.totals) assert stat == expected_stat
def test_effective_order(hypotheses, references, expected_bleu): bleu = sacrebleu.raw_corpus_bleu(hypotheses, [references], .01).score / 100 assert abs(bleu - expected_bleu) < EPSILON
def test_degenerate_uneven(hypotheses, references): with pytest.raises(EOFError, match=r'.*stream.*'): sacrebleu.raw_corpus_bleu(hypotheses, references)