def test_count_2gram(self):
        model = AddOneNGram(2, self.sents)

        counts = {
            ('<s>', ): 2,
            ('el', ): 1,
            ('gato', ): 1,
            ('come', ): 2,
            ('pescado', ): 1,
            ('.', ): 2,
            ('la', ): 1,
            ('gata', ): 1,
            ('salmón', ): 1,
            ('<s>', 'el'): 1,
            ('el', 'gato'): 1,
            ('gato', 'come'): 1,
            ('come', 'pescado'): 1,
            ('pescado', '.'): 1,
            ('.', '</s>'): 2,
            ('<s>', 'la'): 1,
            ('la', 'gata'): 1,
            ('gata', 'come'): 1,
            ('come', 'salmón'): 1,
            ('salmón', '.'): 1,
        }
        for gram, c in counts.items():
            self.assertEqual(model.count(gram), c, gram)

        # size of the vocabulary
        self.assertEqual(model.V(), 9)
Beispiel #2
0
 def test_count_3_addone_ngram(self):
     sents_group = [self.sents2, self.sents]
     i = 0
     for sents_chosen in sents_group:
         model = AddOneNGram(3, sents_chosen)
         # sent2 vocab size = 8 and sent vocab size = 9
         # (this explains use of i)
         self.assertEqual(model.V(), 8+i)
         i += 1