def test__modified_precision(self): """ Examples from the original BLEU paper http://www.aclweb.org/anthology/P02-1040.pdf """ # Example 1: the "the*" example. # Reference sentences. ref1 = 'the cat is on the mat'.split() ref2 = 'there is a cat on the mat'.split() # Hypothesis sentence(s). hyp1 = 'the the the the the the the'.split() references = [ref1, ref2] # Testing modified unigram precision. assert (_modified_precision(references, hyp1, n=1) == 0.2857142857142857) # Testing modified bigram precision. assert(_modified_precision(references, hyp1, n=2) == 0.0) # Example 2: the "of the" example. # Reference sentences ref1 = str('It is a guide to action that ensures that the military ' 'will forever heed Party commands.').split() ref2 = str('It is the guiding principle which guarantees the military ' 'forces always being under the command of the Party.').split() ref3 = str('It is the practical guide for the army always to heed ' 'the directions of the party').split() # Hypothesis sentence(s). hyp1 = 'of the'.split() references = [ref1, ref2, ref3] # Testing modified unigram precision. assert (_modified_precision(references, hyp1, n=1) == 1.0) # Testing modified bigram precision. assert(_modified_precision(references, hyp1, n=2) == 1.0) # Example 3: Proper MT outputs. hyp1 = str('It is a guide to action which ensures that the military ' 'always obeys the commands of the party').split() hyp2 = str('It is to insure the troops forever hearing the activity ' 'guidebook that party direct').split() references = [ref1, ref2, ref3] # Unigram precision. assert (_modified_precision(references, hyp1, n=1) == 0.9444444444444444) assert (_modified_precision(references, hyp2, n=1) == 0.5714285714285714) # Bigram precision assert (_modified_precision(references, hyp1, n=2) == 0.5882352941176471) assert (_modified_precision(references, hyp2, n=2) == 0.07692307692307693)
def test__modified_precision(self): """ Examples from the original BLEU paper http://www.aclweb.org/anthology/P02-1040.pdf """ # Example 1: the "the*" example. # Reference sentences. ref1 = 'the cat is on the mat'.split() ref2 = 'there is a cat on the mat'.split() # Hypothesis sentence(s). hyp1 = 'the the the the the the the'.split() references = [ref1, ref2] # Testing modified unigram precision. hyp1_unigram_precision = _modified_precision(references, hyp1, n=1) assert (round(hyp1_unigram_precision, 4) == 0.2857) # With assertAlmostEqual at 4 place precision. self.assertAlmostEqual(hyp1_unigram_precision, 0.28571428, places=4) # Testing modified bigram precision. assert(_modified_precision(references, hyp1, n=2) == 0.0) # Example 2: the "of the" example. # Reference sentences ref1 = str('It is a guide to action that ensures that the military ' 'will forever heed Party commands').split() ref2 = str('It is the guiding principle which guarantees the military ' 'forces always being under the command of the Party').split() ref3 = str('It is the practical guide for the army always to heed ' 'the directions of the party').split() # Hypothesis sentence(s). hyp1 = 'of the'.split() references = [ref1, ref2, ref3] # Testing modified unigram precision. assert (_modified_precision(references, hyp1, n=1) == 1.0) # Testing modified bigram precision. assert(_modified_precision(references, hyp1, n=2) == 1.0) # Example 3: Proper MT outputs. hyp1 = str('It is a guide to action which ensures that the military ' 'always obeys the commands of the party').split() hyp2 = str('It is to insure the troops forever hearing the activity ' 'guidebook that party direct').split() references = [ref1, ref2, ref3] # Unigram precision. hyp1_unigram_precision = _modified_precision(references, hyp1, n=1) hyp2_unigram_precision = _modified_precision(references, hyp2, n=1) # Test unigram precision with assertAlmostEqual at 4 place precision. self.assertAlmostEqual(hyp1_unigram_precision, 0.94444444, places=4) self.assertAlmostEqual(hyp2_unigram_precision, 0.57142857, places=4) # Test unigram precision with rounding. assert (round(hyp1_unigram_precision, 4) == 0.9444) assert (round(hyp2_unigram_precision, 4) == 0.5714) # Bigram precision hyp1_bigram_precision = _modified_precision(references, hyp1, n=2) hyp2_bigram_precision = _modified_precision(references, hyp2, n=2) # Test bigram precision with assertAlmostEqual at 4 place precision. self.assertAlmostEqual(hyp1_bigram_precision, 0.58823529, places=4) self.assertAlmostEqual(hyp2_bigram_precision, 0.07692307, places=4) # Test bigram precision with rounding. assert (round(hyp1_bigram_precision, 4) == 0.5882) assert (round(hyp2_bigram_precision, 4) == 0.0769)
def test__modified_precision(self): """ Examples from the original BLEU paper http://www.aclweb.org/anthology/P02-1040.pdf """ # Example 1: the "the*" example. # Reference sentences. ref1 = "the cat is on the mat".split() ref2 = "there is a cat on the mat".split() # Hypothesis sentence(s). hyp1 = "the the the the the the the".split() references = [ref1, ref2] # Testing modified unigram precision. hyp1_unigram_precision = _modified_precision(references, hyp1, n=1) assert round(hyp1_unigram_precision, 4) == 0.2857 # Testing modified bigram precision. assert _modified_precision(references, hyp1, n=2) == 0.0 # Example 2: the "of the" example. # Reference sentences ref1 = str("It is a guide to action that ensures that the military " "will forever heed Party commands").split() ref2 = str( "It is the guiding principle which guarantees the military " "forces always being under the command of the Party" ).split() ref3 = str("It is the practical guide for the army always to heed " "the directions of the party").split() # Hypothesis sentence(s). hyp1 = "of the".split() references = [ref1, ref2, ref3] # Testing modified unigram precision. assert _modified_precision(references, hyp1, n=1) == 1.0 # Testing modified bigram precision. assert _modified_precision(references, hyp1, n=2) == 1.0 # Example 3: Proper MT outputs. hyp1 = str( "It is a guide to action which ensures that the military " "always obeys the commands of the party" ).split() hyp2 = str("It is to insure the troops forever hearing the activity " "guidebook that party direct").split() references = [ref1, ref2, ref3] # Unigram precision. hyp1_unigram_precision = _modified_precision(references, hyp1, n=1) hyp2_unigram_precision = _modified_precision(references, hyp2, n=1) # Test unigram precision without rounding. assert hyp1_unigram_precision == 0.9444444444444444 assert hyp2_unigram_precision == 0.5714285714285714 # Test unigram precision with rounding. assert round(hyp1_unigram_precision, 4) == 0.9444 assert round(hyp2_unigram_precision, 4) == 0.5714 # Bigram precision hyp1_bigram_precision = _modified_precision(references, hyp1, n=2) hyp2_bigram_precision = _modified_precision(references, hyp2, n=2) # Test bigram precision without rounding. assert hyp1_bigram_precision == 0.5882352941176471 assert hyp2_bigram_precision == 0.07692307692307693 # Test bigram precision with rounding. assert round(hyp1_bigram_precision, 4) == 0.5882 assert round(hyp2_bigram_precision, 4) == 0.0769