def test_accumulate(self): mo = ModelOne() trans = mo.accumulate_counts(TrivialCorpus(), UniformTranslation()) # Use this test if you're doing p(f|e) self.assertAlmostEqual(trans.score("the", "das"), .2) self.assertAlmostEqual(trans.score("the", "haus"), .2) self.assertAlmostEqual(trans.score("house", "haus"), .2) self.assertAlmostEqual(trans.score(None, "haus"), .2)
def test_score(self): mo = ModelOne() mo.em(TrivialCorpus(), 1) # After one iteration of EM, all the translation scores are .2 # (as above). So the overall translation probability is # # p(f|e) = 1 / (l_e + 1) ^ l_f * # \prod_j^l_f \sum_i^l_e t(f_j | e_i) # # = 1 / (4 + 1) ^ 4 (5 * .2) ^ 4 # = 1 / 625 # # There are 5 English words counting the "NULL" mo.build_lm([], 0) self.assertAlmostEqual(\ mo.translate_score("the house is small".split(), \ "das haus ist klein".split()), \ -1.0 * log(625., 2))