def test_text_generator_generate_sentence_proper_beginning(self): """ Checks that class creates correct sentence from a context '<END>' without '<END>' in the beginning """ corpus = ('my', 'favourite', 'subject', 'is', 'maths', '<END>', 'his', 'favourite', 'thing', 'is', 'music' '<END>', 'i', 'have', 'a', 'favourite', 'film', '<END>', 'my', 'family', 'likes', 'avatar', '<END>', 'my', 'favourite', 'subject', 'is', 'music', '<END>') storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(2, encoded) context = (storage.get_id('<END>'), ) first_generated = storage.get_id('my') last_generated = storage.get_id('<END>') generator = NGramTextGenerator(storage, trie) actual = generator._generate_sentence(context) self.assertNotEqual(storage.get_id('<END>'), actual[0]) self.assertEqual(first_generated, actual[0]) self.assertEqual(last_generated, actual[-1])
def test_ngram_text_generator_duplicates_words(self): corpus = ('stop', 'it', 'stop', 'it', 'stop', 'it', '<END>') storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(2, encoded) context = (storage.get_id('stop'), ) generator = NGramTextGenerator(storage, trie) actual = generator._generate_sentence(context) self.assertEqual(20 + len(context) + 1, len(actual))
def test_ngram_text_generator_identical_words(self): corpus = ('deadline', 'deadline', 'deadline', 'deadline', 'deadline', '<END>') storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(3, encoded) context = (storage.get_id('deadline'), storage.get_id('deadline')) generator = NGramTextGenerator(storage, trie) actual = generator._generate_sentence(context) self.assertEqual(20 + len(context) + 1, len(actual)) # +1 it is for <END>
def test_ngram_text_generator_generate_sentence_properly(self): """ generates correct output according to simple case """ corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno', '<END>') word_storage = WordStorage() word_storage.update(corpus) encoded = encode_text(word_storage, corpus) trie = NGramTrie(2, encoded) context = (word_storage.get_id('i'), ) end = word_storage.get_id('<END>') generator = NGramTextGenerator(word_storage, trie) actual = generator._generate_sentence(context) self.assertEqual(actual[-1], end)
def test_length_of_sentence(self): """ generates sentences with length less than 20 """ corpus = ('i', 'have', 'a', 'cat', 'his', 'name', 'is', 'bruno', 'i', 'have', 'a', 'dog', 'too', 'his', 'name', 'is', 'rex', 'her', 'name', 'is', 'rex', 'too', 'he', 'funny', '<END>') word_storage = WordStorage() word_storage.update(corpus) encoded = encode_text(word_storage, corpus) trie = NGramTrie(2, encoded) context = (word_storage.get_id('cat'), ) generator = NGramTextGenerator(word_storage, trie) actual = len(generator._generate_sentence(context)) expected = len( context) + 21 # cause we generate not more than 20 words + end self.assertLessEqual(actual, expected)
def test_ngram_text_generator_generate_sentence_no_end(self): """ should generate '<END>' anyway """ corpus = ('i', 'have', 'a', 'cat', 'his', 'name', 'is', 'bruno', 'i', 'have', 'a', 'dog', 'too', 'his', 'name', 'is', 'rex', 'her', 'name', 'is', 'rex', 'too', '<END>') word_storage = WordStorage() word_storage.update(corpus) encoded = encode_text(word_storage, corpus) trie = NGramTrie(2, encoded) context = (word_storage.get_id('cat'), ) generator = NGramTextGenerator(word_storage, trie) actual = generator._generate_sentence(context) expected = '<END>' actual = word_storage.get_word(actual[-1]) self.assertEqual(expected, actual)
def test_ngram_text_generator_generate_sentence_ideal(self): """ first and last generated words as expected """ corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>', 'his', 'name', 'is', 'rex', '<END>', 'her', 'name', 'is', 'rex', 'too', '<END>') storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(2, encoded) context = (storage.get_id('i'), ) first_generated = storage.get_id('have') last_generated = storage.get_id('<END>') generator = NGramTextGenerator(storage, trie) actual = generator._generate_sentence(context) self.assertEqual(actual[1], first_generated) self.assertEqual(actual[-1], last_generated)
def test_text_generator_generate_sentence_proper_number_of_end(self): """ Checks that class creates correct sentence with only one <END> """ corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>', 'his', 'name', 'is', 'rex', '<END>', 'there', 'are', 'a', 'cat', 'outside', '<END>', 'here', 'is', 'a', 'cat', 'outside', '<END>') storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(3, encoded) context = (storage.get_id('a'), storage.get_id('is'), storage.get_id('<END>')) generator = NGramTextGenerator(storage, trie) actual = generator._generate_sentence(context) self.assertEqual(1, actual.count(storage.get_id('<END>')))
def test_ngram_text_generator_end_at_the_beginning(self): """" should generate a sentence without <END> in any other position except the end of the sentence """ corpus = ('i', 'like', 'to', 'read', '<END>', 'he', 'likes', 'to', 'read', 'too', 'i', 'like', 'a', 'book', 'called', '"Harry Potter"', '<END>', 'he', 'likes', 'another', 'book', '<END>', 'he', 'does', 'not', 'tell', 'me', 'name', '<END>') storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(2, encoded) context = (storage.get_id('<END>'), ) last_generated = storage.get_id('<END>') generator = NGramTextGenerator(storage, trie) actual = generator._generate_sentence(context) self.assertEqual(last_generated, actual[-1]) self.assertEqual(1, actual.count(storage.get_id('<END>')))
def test_text_generator_generate_sentence_includes_context(self): """ Checks that class creates correct sentence which starts with context (if <END> not in context) """ corpus = ('i', 'have', 'a', 'cat', 'and', 'a', 'dog', '<END>', 'his', 'name', 'is', 'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>', 'his', 'name', 'is', 'rex', '<END>', 'there', 'are', 'a', 'cat', 'and', 'a', 'bear', 'outside', '<END>', 'here', 'is', 'a', 'cat', 'outside', '<END>') storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(3, encoded) generator = NGramTextGenerator(storage, trie) context = (storage.get_id('a'), storage.get_id('cat')) actual = generator._generate_sentence(context) self.assertEqual(context, actual[:len(context)])