def test_text_generator_no_context(self): """ checks if the program can generate sentences without given context """ corpus = ('cat', 'has', 'paws', '<END>', 'dogs', 'have', 'noses', '<END>', 'cat', 'has', 'whiskers', '<END>') storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(3, encoded) two = NGramTrie(2, encoded) four = NGramTrie(4, encoded) context = ( storage.get_id('cat'), storage.get_id('dogs'), ) generator = BackOffGenerator(storage, trie, two, four) actual = generator.generate_text(context, 3) self.assertTrue(all(actual))
def realize_backoff_generator(text): backoff_storage = WordStorage() backoff_storage.update(text) backoff_encoded = encode_text(backoff_storage, text) two = NGramTrie(2, backoff_encoded) trie = NGramTrie(3, backoff_encoded) backoff_context = (backoff_storage.get_id('if'), backoff_storage.get_id('you'),) backoff_generator = BackOffGenerator(backoff_storage, trie, two) backoff_text_generated = backoff_generator.generate_text(backoff_context, 3) return decode_text(backoff_storage, backoff_text_generated)
""" Lab 4 implementation starter """ from lab_4.main import BackOffGenerator, encode_text, WordStorage, decode_text, tokenize_by_sentence from lab_4.ngrams.ngram_trie import NGramTrie if __name__ == '__main__': with open('lab_3/Frank_Baum.txt', 'r', encoding='utf-8') as file_frank: corpus = tokenize_by_sentence(file_frank.read()) storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(3, encoded) four = NGramTrie(4, encoded) context = ( storage.get_id('when'), storage.get_id('the'), ) generator = BackOffGenerator(storage, four, trie) generated_text = generator.generate_text(context, 5) RESULT = decode_text(storage, generated_text) # DO NOT REMOVE NEXT LINE - KEEP IT INTENTIONALLY LAST assert RESULT, 'Encoding not working'
trie = NGramTrie(4, encoded) context = (storage.get_id('i'), storage.get_id('have'), storage.get_id('a')) generator_likelihood = LikelihoodBasedTextGenerator(storage, trie) generated_text = generator_likelihood.generate_text(context, 3) decoded_gen_text = decode_text(storage, generated_text) print('Likelihood generator generates sentences:') print(*decoded_gen_text, sep='. ', end='.\n') two = NGramTrie(2, encoded) trie = NGramTrie(3, encoded) context = ( storage.get_id('i'), storage.get_id('have'), ) generator_backoff = BackOffGenerator(storage, trie, two) actual = generator_backoff.generate_text(context, 3) RESULT = decode_text(storage, actual) print('Backoff generator generates sentences:') print(*RESULT, sep='. ', end='.\n') assert RESULT == ( 'I have a colourful dog', 'I havent a cat too', 'They have beautiful dogs'), 'Text generator does not work'
""" Lab 4 starter """ from lab_4.main import BackOffGenerator, encode_text, WordStorage, decode_text from lab_4.ngrams.ngram_trie import NGramTrie if __name__ == '__main__': corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>', 'his', 'name', 'is', 'rex', '<END>', 'her', 'name', 'is', 'rex', 'too', '<END>') storage = WordStorage() storage.update(corpus) encoded_text = encode_text(storage, corpus) trie = NGramTrie(3, encoded_text) four = NGramTrie(4, encoded_text) context = ( storage.get_id('his'), storage.get_id('name'), storage.get_id('is'), ) generator = BackOffGenerator(storage, trie, four) text = generator.generate_text(context, 3) actual = decode_text(storage, text) RESULT = ('His name is bruno', 'I have a cat', 'His name is bruno') assert RESULT == actual, 'Not work'