Exemple #1
0
    def test_cond_prob_1gram_no_addone(self):
        model = InterpolatedNGram(1, self.sents, gamma=1.0, addone=False)

        # behaves just like unsmoothed n-gram
        probs = {
            'pescado': 1 / 12.0,
            'come': 2 / 12.0,
            'salame': 0.0,
        }
        for token, p in probs.items():
            self.assertAlmostEqual(model.cond_prob(token), p, msg=token)
    def test_cond_prob_1gram_no_addone(self):
        model = InterpolatedNGram(1, self.sents, gamma=1.0, addone=False)

        # behaves just like unsmoothed n-gram
        probs = {
            'pescado': 1 / 12.0,
            'come': 2 / 12.0,
            'salame': 0.0,
        }
        for token, p in probs.items():
            self.assertAlmostEqual(model.cond_prob(token), p, msg=token)
    def test_cond_prob_2gram_no_addone(self):
        gamma = 1.0
        model = InterpolatedNGram(2, self.sents, gamma, addone=False)

        c1 = 2.0  # count for 'come'
        l1 = c1 / (c1 + gamma)

        probs = {
            ('pescado', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / 12.0,
            ('salmón', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / 12.0,
            ('salame', 'come'): 0.0,
        }
        for (token, prev), p in probs.items():
            self.assertEqual(model.cond_prob(token, [prev]), p, (token))
    def test_cond_prob_2gram_no_addone(self):
        gamma = 1.0
        model = InterpolatedNGram(2, self.sents, gamma, addone=False)

        c1 = 2.0  # count for 'come' (and '.')
        l1 = c1 / (c1 + gamma)

        probs = {
            ('pescado', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / 12.0,
            ('salmón', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / 12.0,
            ('salame', 'come'): 0.0,
            ('</s>', '.'): l1 * 1.0 + (1.0 - l1) * 2 / 12.0,
        }
        for (token, prev), p in probs.items():
            self.assertAlmostEqual(model.cond_prob(token, [prev]), p, msg=token)
Exemple #5
0
    def test_cond_prob_2gram_no_addone(self):
        gamma = 1.0
        model = InterpolatedNGram(2, self.sents, gamma, addone=False)

        c1 = 2.0  # count for 'come' (and '.')
        l1 = c1 / (c1 + gamma)

        probs = {
            ('pescado', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / self.total,
            ('salmón', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / self.total,
            ('salame', 'come'): 0.0,
            ('</s>', '.'): l1 * 1.0 + (1.0 - l1) * 2 / self.total,
        }
        for (token, prev), p in probs.items():
            self.assertAlmostEqual(model.cond_prob(token, (prev, )),
                                   p,
                                   msg=token)