Beispiel #1
0
    def test_accumulate(self):
        mo = ModelOne()
        trans = mo.accumulate_counts(TrivialCorpus(), UniformTranslation())

        # Use this test if you're doing p(f|e)

        self.assertAlmostEqual(trans.score("the", "das"), .2)
        self.assertAlmostEqual(trans.score("the", "haus"), .2)
        self.assertAlmostEqual(trans.score("house", "haus"), .2)
        self.assertAlmostEqual(trans.score(None, "haus"), .2)
Beispiel #2
0
    def test_accumulate(self):
        mo = ModelOne()
        trans = mo.accumulate_counts(TrivialCorpus(), UniformTranslation())

        # Use this test if you're doing p(f|e)

        self.assertAlmostEqual(trans.score("the", "das"), .2)
        self.assertAlmostEqual(trans.score("the", "haus"), .2)
        self.assertAlmostEqual(trans.score("house", "haus"), .2)
        self.assertAlmostEqual(trans.score(None, "haus"), .2)
Beispiel #3
0
    def test_score(self):
        mo = ModelOne()
        mo.em(TrivialCorpus(), 1)

        # After one iteration of EM, all the translation scores are .2
        # (as above).  So the overall translation probability is
        #
        # p(f|e) = 1 / (l_e + 1) ^ l_f *
        #               \prod_j^l_f \sum_i^l_e t(f_j | e_i)
        #
        #        = 1 / (4 + 1) ^ 4 (5 * .2) ^ 4
        #        = 1 / 625
        #
        # There are 5 English words counting the "NULL"

        mo.build_lm([], 0)
        self.assertAlmostEqual(\
            mo.translate_score("the house is small".split(), \
                                   "das haus ist klein".split()), \
                -1.0 * log(625., 2))
Beispiel #4
0
    def test_score(self):
        mo = ModelOne()
        mo.em(TrivialCorpus(), 1)

        # After one iteration of EM, all the translation scores are .2
        # (as above).  So the overall translation probability is
        #
        # p(f|e) = 1 / (l_e + 1) ^ l_f *
        #               \prod_j^l_f \sum_i^l_e t(f_j | e_i)
        #
        #        = 1 / (4 + 1) ^ 4 (5 * .2) ^ 4
        #        = 1 / 625
        #
        # There are 5 English words counting the "NULL"

        mo.build_lm([], 0)
        self.assertAlmostEqual(\
            mo.translate_score("the house is small".split(), \
                                   "das haus ist klein".split()), \
                -1.0 * log(625., 2))