def test_cond_prob_1gram_no_addone(self): model = InterpolatedNGram(1, self.sents, gamma=1.0, addone=False) # behaves just like unsmoothed n-gram probs = { 'pescado': 1 / 12.0, 'come': 2 / 12.0, 'salame': 0.0, } for token, p in probs.items(): self.assertAlmostEqual(model.cond_prob(token), p, msg=token)
def test_cond_prob_1gram_no_addone(self): model = InterpolatedNGram(1, self.sents, gamma=1.0, addone=False) # behaves just like unsmoothed n-gram probs = { 'pescado': 1 / 12.0, 'come': 2 / 12.0, 'salame': 0.0, } for token, p in probs.items(): self.assertAlmostEqual(model.cond_prob(token), p, msg=token)
def test_cond_prob_2gram_no_addone(self): gamma = 1.0 model = InterpolatedNGram(2, self.sents, gamma, addone=False) c1 = 2.0 # count for 'come' l1 = c1 / (c1 + gamma) probs = { ('pescado', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / 12.0, ('salmón', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / 12.0, ('salame', 'come'): 0.0, } for (token, prev), p in probs.items(): self.assertEqual(model.cond_prob(token, [prev]), p, (token))
def test_cond_prob_2gram_no_addone(self): gamma = 1.0 model = InterpolatedNGram(2, self.sents, gamma, addone=False) c1 = 2.0 # count for 'come' (and '.') l1 = c1 / (c1 + gamma) probs = { ('pescado', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / 12.0, ('salmón', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / 12.0, ('salame', 'come'): 0.0, ('</s>', '.'): l1 * 1.0 + (1.0 - l1) * 2 / 12.0, } for (token, prev), p in probs.items(): self.assertAlmostEqual(model.cond_prob(token, [prev]), p, msg=token)
def test_cond_prob_2gram_no_addone(self): gamma = 1.0 model = InterpolatedNGram(2, self.sents, gamma, addone=False) c1 = 2.0 # count for 'come' (and '.') l1 = c1 / (c1 + gamma) probs = { ('pescado', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / self.total, ('salmón', 'come'): l1 * 0.5 + (1.0 - l1) * 1 / self.total, ('salame', 'come'): 0.0, ('</s>', '.'): l1 * 1.0 + (1.0 - l1) * 2 / self.total, } for (token, prev), p in probs.items(): self.assertAlmostEqual(model.cond_prob(token, (prev, )), p, msg=token)