def test_count_2gram(self): ngram = InterpolatedNGram(2, self.sents, gamma=1.0) counts = { (): 12, ('el', ): 1, ('gato', ): 1, ('come', ): 2, ('pescado', ): 1, ('.', ): 2, ('</s>', ): 2, ('la', ): 1, ('gata', ): 1, ('salmón', ): 1, ('<s>', 'el'): 1, ('el', 'gato'): 1, ('gato', 'come'): 1, ('come', 'pescado'): 1, ('pescado', '.'): 1, ('.', '</s>'): 2, ('<s>', 'la'): 1, ('la', 'gata'): 1, ('gata', 'come'): 1, ('come', 'salmón'): 1, ('salmón', '.'): 1, } for gram, c in counts.items(): self.assertEqual(ngram.count(gram), c, gram)
def test_count_2gram(self): ngram = InterpolatedNGram(2, self.sents, gamma=1.0) counts = { (): 12, ('el',): 1, ('gato',): 1, ('come',): 2, ('pescado',): 1, ('.',): 2, ('</s>',): 2, ('la',): 1, ('gata',): 1, ('salmón',): 1, ('<s>', 'el'): 1, ('el', 'gato'): 1, ('gato', 'come'): 1, ('come', 'pescado'): 1, ('pescado', '.'): 1, ('.', '</s>'): 2, ('<s>', 'la'): 1, ('la', 'gata'): 1, ('gata', 'come'): 1, ('come', 'salmón'): 1, ('salmón', '.'): 1, } for gram, c in counts.items(): self.assertEqual(ngram.count(gram), c, gram)
def test_held_out(self): model = InterpolatedNGram(1, self.sents) # only first sentence (second sentence is held-out data) counts = { (): 6, ('el', ): 1, ('gato', ): 1, ('come', ): 1, ('pescado', ): 1, ('.', ): 1, ('</s>', ): 1, } for gram, c in counts.items(): self.assertEqual(model.count(gram), c, gram)
def test_held_out(self): model = InterpolatedNGram(1, self.sents) # only first sentence (second sentence is held-out data) counts = { (): 6, ('el',): 1, ('gato',): 1, ('come',): 1, ('pescado',): 1, ('.',): 1, ('</s>',): 1, } for gram, c in counts.items(): self.assertEqual(model.count(gram), c, gram)
def test_count_1gram(self): model = InterpolatedNGram(1, self.sents, gamma=1.0) counts = { (): 12, ('el', ): 1, ('gato', ): 1, ('come', ): 2, ('pescado', ): 1, ('.', ): 2, ('</s>', ): 2, ('la', ): 1, ('gata', ): 1, ('salmón', ): 1, } for gram, c in counts.items(): self.assertEqual(model.count(gram), c, gram)
def test_count_1gram(self): model = InterpolatedNGram(1, self.sents, gamma=1.0) counts = { (): 12, ('el',): 1, ('gato',): 1, ('come',): 2, ('pescado',): 1, ('.',): 2, ('</s>',): 2, ('la',): 1, ('gata',): 1, ('salmón',): 1, } for gram, c in counts.items(): self.assertEqual(model.count(gram), c, gram)