def test_count_3gram(self): ngram = NGram(3, self.sents) counts = { ('<s>', 'el'): 1, ('el', 'gato'): 1, ('gato', 'come'): 1, ('come', 'pescado'): 1, ('pescado', '.'): 1, ('<s>', '<s>'): 2, ('<s>', 'la'): 1, ('la', 'gata'): 1, ('gata', 'come'): 1, ('come', 'salmón'): 1, ('salmón', '.'): 1, ('<s>', '<s>', 'el'): 1, ('<s>', 'el', 'gato'): 1, ('el', 'gato', 'come'): 1, ('gato', 'come', 'pescado'): 1, ('come', 'pescado', '.'): 1, ('pescado', '.', '</s>'): 1, ('<s>', '<s>', 'la'): 1, ('<s>', 'la', 'gata'): 1, ('la', 'gata', 'come'): 1, ('gata', 'come', 'salmón'): 1, ('come', 'salmón', '.'): 1, ('salmón', '.', '</s>'): 1, } for gram, c in counts.items(): self.assertEqual(ngram.count(gram), c)
def test_count_2gram(self): ngram = NGram(2, self.sents) counts = { ('<s>',): 2, ('el',): 1, ('gato',): 1, ('come',): 2, ('pescado',): 1, ('.',): 2, ('la',): 1, ('gata',): 1, ('salmón',): 1, ('<s>', 'el'): 1, ('el', 'gato'): 1, ('gato', 'come'): 1, ('come', 'pescado'): 1, ('pescado', '.'): 1, ('.', '</s>'): 2, ('<s>', 'la'): 1, ('la', 'gata'): 1, ('gata', 'come'): 1, ('come', 'salmón'): 1, ('salmón', '.'): 1, } for gram, c in counts.items(): self.assertEqual(ngram.count(gram), c)
def test_count_3gram(self): sents = [ 'el gato come pescado .'.split(), 'la gata come salmón .'.split(), 'unaria'.split(), ] ngram = NGram(3, sents) counts = { ('<s>', 'el'): 1, ('el', 'gato'): 1, ('gato', 'come'): 1, ('come', 'pescado'): 1, ('pescado', '.'): 1, ('<s>', 'la'): 1, ('la', 'gata'): 1, ('gata', 'come'): 1, ('come', 'salmón'): 1, ('salmón', '.'): 1, ('<s>', 'unaria'): 1, ('<s>', 'el', 'gato'): 1, ('el', 'gato', 'come'): 1, ('gato', 'come', 'pescado'): 1, ('come', 'pescado', '.'): 1, ('<s>', 'la', 'gata'): 1, ('la', 'gata', 'come'): 1, ('gata', 'come', 'salmón'): 1, ('come', 'salmón', '.'): 1, ('salmón', '.', '</s>'): 1, ('<s>', 'unaria', '</s>'): 1, } for gram, c in counts.items(): self.assertEqual(ngram.count(gram), c)
def test_count_1gram(self): ngram = NGram(1, self.sents) counts = { (): 12, ('el',): 1, ('gato',): 1, ('come',): 2, ('pescado',): 1, ('.',): 2, ('</s>',): 2, ('la',): 1, ('gata',): 1, ('salmón',): 1, } for gram, c in counts.items(): self.assertEqual(ngram.count(gram), c)