def test_sent_prob_3and4gram(self): ngram = NGram(3, self.sents3) ngram2 = NGram(4, self.sents3) sents = { 'el gato come pescado y ronca .': 0.0, # 'ronca' unseen 'la la la': 0.0, # 'la' after 'la' unseen # la probabilidad se da por el principio, # si empieza con 'la' o 'el' 'el gato come pescado y duerme . ': 0.5, 'la gata come pescado y duerme . ': 0.5 } for sent, prob in sents.items(): self.assertAlmostEqual(ngram.sent_prob(sent.split()), prob, msg=sent) self.assertAlmostEqual(ngram2.sent_prob(sent.split()), prob, msg=sent)
def test_sent_prob_1gram(self): ngram = NGram(1, self.sents) sents = { # 'come', '.' and '</s>' have prob 1/6, the rest have 1/12. 'el gato come pescado .': (1 / 6.0)**3 * (1 / 12.0)**3, 'la gata come salmón .': (1 / 6.0)**3 * (1 / 12.0)**3, 'el gato come salame .': 0.0, # 'salame' unseen 'la la la': (1 / 6.0)**1 * (1 / 12.0)**3, } for sent, prob in sents.items(): self.assertAlmostEqual(ngram.sent_prob(sent.split()), prob, msg=sent)
def test_sent_prob_3gram(self): ngram = NGram(3, self.sents) sents = { # after '<s>, <s>': 'el' and 'la' have prob 0.5. 'el gato come pescado .': 0.5 * 1 * 1 * 1 * 1, 'la gata come salmón .': 0.5 * 1 * 1 * 1 * 1, 'el gato come salmón .': 0.5 * 1 * 1 * 0 * 1 * 1, # prob('gato come salmon') = 0 'el gato come salame .': 0.0, # 'salame' unseen 'la la la': 0.0, # 'la' after 'la' unseen } for sent, prob in sents.items(): self.assertAlmostEqual(ngram.sent_prob(sent.split()), prob, msg=sent)
def test_sent_prob_2gram(self): ngram = NGram(2, self.sents) sents = { # after '<s>': 'el' and 'la' have prob 0.5. # after 'come': 'pescado' and 'salmón' have prob 0.5. 'el gato come pescado .': 0.5 * 0.5, 'la gata come salmón .': 0.5 * 0.5, 'el gato come salmón .': 0.5 * 0.5, 'la gata come pescado .': 0.5 * 0.5, 'el gato come salame .': 0.0, # 'salame' unseen 'la la la': 0.0, # 'la' after 'la' unseen } for sent, prob in sents.items(): self.assertAlmostEqual(ngram.sent_prob(sent.split()), prob, msg=sent)