def test_accumulation_macro_bleu(): bleu = Bleu(ngram=4, smooth="smooth2") bleu.update(([corpus.cand_1], [corpus.references_1])) bleu.update(([corpus.cand_2a], [corpus.references_2])) bleu.update(([corpus.cand_2b], [corpus.references_2])) bleu.update(([corpus.cand_3], [corpus.references_2])) value = bleu._sentence_bleu(corpus.references_1, corpus.cand_1) value += bleu._sentence_bleu(corpus.references_2, corpus.cand_2a) value += bleu._sentence_bleu(corpus.references_2, corpus.cand_2b) value += bleu._sentence_bleu(corpus.references_2, corpus.cand_3) assert bleu.compute() == value / 4
def _test(candidates, references, average, smooth="no_smooth", smooth_nltk_fn=None, ngram_range=8): for i in range(1, ngram_range): weights = tuple([1 / i] * i) bleu = Bleu(ngram=i, average=average, smooth=smooth) if average == "macro": with warnings.catch_warnings(): warnings.simplefilter("ignore") reference = sentence_bleu(references[0], candidates[0], weights=weights, smoothing_function=smooth_nltk_fn) assert pytest.approx(reference) == bleu._sentence_bleu( references[0], candidates[0]) elif average == "micro": with warnings.catch_warnings(): warnings.simplefilter("ignore") reference = corpus_bleu(references, candidates, weights=weights, smoothing_function=smooth_nltk_fn) assert pytest.approx(reference) == bleu._corpus_bleu( references, candidates) bleu.update((candidates, references)) assert pytest.approx(reference) == bleu.compute()
def test_bleu_batch_macro(): bleu = Bleu(ngram=4) # Batch size 3 hypotheses = [corpus.cand_1, corpus.cand_2a, corpus.cand_2b] refs = [corpus.references_1, corpus.references_2, corpus.references_2] bleu.update((hypotheses, refs)) with warnings.catch_warnings(): warnings.simplefilter("ignore") reference_bleu_score = (sentence_bleu(refs[0], hypotheses[0]) + sentence_bleu(refs[1], hypotheses[1]) + sentence_bleu(refs[2], hypotheses[2])) / 3 assert pytest.approx(bleu.compute()) == reference_bleu_score value = 0 for _hypotheses, _refs in zip(hypotheses, refs): value += bleu._sentence_bleu(_refs, _hypotheses) bleu.update(([_hypotheses], [_refs])) ref_1 = value / len(refs) ref_2 = bleu.compute() assert pytest.approx(ref_1) == reference_bleu_score assert pytest.approx(ref_2) == reference_bleu_score