def test_generate_token(self): ngram = NGram(2, self.sents) generator = NGramGenerator(ngram) for i in range(100): # after 'el' always comes 'gato': token = generator.generate_token(('el',)) self.assertEqual(token, 'gato') # after 'come' may come 'pescado' or 'salmón' token = generator.generate_token(('come',)) self.assertTrue(token in ['pescado', 'salmón'])
def test_generate_token(self): ngram = NGram(2, self.sents) generator = NGramGenerator(ngram) for i in range(100): # after 'el' always comes 'gato': token = generator.generate_token(('el', )) self.assertEqual(token, 'gato') # after 'come' may come 'pescado' or 'salmón' token = generator.generate_token(('come', )) self.assertTrue(token in ['pescado', 'salmón'])
def test_generate_token_3and4gram(self): ngram = NGram(3, self.sents3) ngram2 = NGram(4, self.sents3) generator = NGramGenerator(ngram) generator2 = NGramGenerator(ngram2) for i in range(100): # after 'come pescado' always comes 'y' token = generator.generate_token(('come', 'pescado')) self.assertEqual(token, 'y') # after 'come pescado y' always comes 'duerme' token = generator2.generate_token(('come', 'pescado', 'y')) self.assertEqual(token, 'duerme') # sentence may come start with 'el' or 'la' token = generator.generate_token(('<s>', '<s>')) self.assertTrue(token in ['el', 'la']) token = generator2.generate_token(('<s>', '<s>', '<s>')) self.assertTrue(token in ['el', 'la'])