Exemple #1
0
    def test_count_2gram(self):
        ngram = InterpolatedNGram(2, self.sents, gamma=1.0)

        counts = {
            (): 12,
            ('el', ): 1,
            ('gato', ): 1,
            ('come', ): 2,
            ('pescado', ): 1,
            ('.', ): 2,
            ('</s>', ): 2,
            ('la', ): 1,
            ('gata', ): 1,
            ('salmón', ): 1,
            ('<s>', 'el'): 1,
            ('el', 'gato'): 1,
            ('gato', 'come'): 1,
            ('come', 'pescado'): 1,
            ('pescado', '.'): 1,
            ('.', '</s>'): 2,
            ('<s>', 'la'): 1,
            ('la', 'gata'): 1,
            ('gata', 'come'): 1,
            ('come', 'salmón'): 1,
            ('salmón', '.'): 1,
        }
        for gram, c in counts.items():
            self.assertEqual(ngram.count(gram), c, gram)
    def test_count_2gram(self):
        ngram = InterpolatedNGram(2, self.sents, gamma=1.0)

        counts = {
            (): 12,
            ('el',): 1,
            ('gato',): 1,
            ('come',): 2,
            ('pescado',): 1,
            ('.',): 2,
            ('</s>',): 2,
            ('la',): 1,
            ('gata',): 1,
            ('salmón',): 1,
            ('<s>', 'el'): 1,
            ('el', 'gato'): 1,
            ('gato', 'come'): 1,
            ('come', 'pescado'): 1,
            ('pescado', '.'): 1,
            ('.', '</s>'): 2,
            ('<s>', 'la'): 1,
            ('la', 'gata'): 1,
            ('gata', 'come'): 1,
            ('come', 'salmón'): 1,
            ('salmón', '.'): 1,
        }
        for gram, c in counts.items():
            self.assertEqual(ngram.count(gram), c, gram)
Exemple #3
0
    def test_held_out(self):
        model = InterpolatedNGram(1, self.sents)

        # only first sentence (second sentence is held-out data)
        counts = {
            (): 6,
            ('el', ): 1,
            ('gato', ): 1,
            ('come', ): 1,
            ('pescado', ): 1,
            ('.', ): 1,
            ('</s>', ): 1,
        }
        for gram, c in counts.items():
            self.assertEqual(model.count(gram), c, gram)
    def test_held_out(self):
        model = InterpolatedNGram(1, self.sents)

        # only first sentence (second sentence is held-out data)
        counts = {
            (): 6,
            ('el',): 1,
            ('gato',): 1,
            ('come',): 1,
            ('pescado',): 1,
            ('.',): 1,
            ('</s>',): 1,
        }
        for gram, c in counts.items():
            self.assertEqual(model.count(gram), c, gram)
Exemple #5
0
    def test_count_1gram(self):
        model = InterpolatedNGram(1, self.sents, gamma=1.0)

        counts = {
            (): 12,
            ('el', ): 1,
            ('gato', ): 1,
            ('come', ): 2,
            ('pescado', ): 1,
            ('.', ): 2,
            ('</s>', ): 2,
            ('la', ): 1,
            ('gata', ): 1,
            ('salmón', ): 1,
        }
        for gram, c in counts.items():
            self.assertEqual(model.count(gram), c, gram)
    def test_count_1gram(self):
        model = InterpolatedNGram(1, self.sents, gamma=1.0)

        counts = {
            (): 12,
            ('el',): 1,
            ('gato',): 1,
            ('come',): 2,
            ('pescado',): 1,
            ('.',): 2,
            ('</s>',): 2,
            ('la',): 1,
            ('gata',): 1,
            ('salmón',): 1,
        }
        for gram, c in counts.items():
            self.assertEqual(model.count(gram), c, gram)