import lab_4 from lab_4.main import WordStorage, tokenize_by_sentence,encode_text, NGramTextGenerator from lab_4.ngrams.ngram_trie import NGramTrie if __name__=='__main__': text='I have a dog.\nHis name is Rex' tok_text=tokenize_by_sentence(text) storage=WordStorage() storage.update(tok_text) print(storage) encoded_text=encode_text(storage,tok_text) print(encoded_text) n_gram_trie=NGramTrie(2,encoded_text) print(n_gram_trie) gen=NGramTextGenerator(storage,n_gram_trie) context=(storage.get_id('a'),) print(context) RESULT=gen.generate_text(context,3) print(RESULT) assert RESULT, 'Not working'
""" Lab 4 """ from lab_4.main import WordStorage, tokenize_by_sentence, encode_text, NGramTextGenerator from lab_4.ngrams.ngram_trie import NGramTrie if __name__ == '__main__': text = 'I have a cat. His name is Bruno. I have a dog. Her name is Rex. Her name is Rex too.' text_tokenized = tokenize_by_sentence(text) word_storage = WordStorage() word_storage.update(text_tokenized) encoded = encode_text(word_storage, text_tokenized) trie = NGramTrie(2, encoded) context = (word_storage.get_id('i'), ) generator = NGramTextGenerator(word_storage, trie) RESULT = generator.generate_text(context, 4) print(RESULT) assert RESULT, "Not working"
""" Text generator """ from lab_4.ngrams.ngram_trie import NGramTrie from lab_4.main import tokenize_by_sentence from lab_4.main import WordStorage from lab_4.main import encode_text from lab_4.main import NGramTextGenerator if __name__ == '__main__': text = 'I have a cat. His name is Bruno. I have a dog too. ' \ 'His name is Rex. Her name is Rex too' corpus = tokenize_by_sentence(text) word_storage = WordStorage() word_storage.update(corpus) encoded_text = encode_text(word_storage, corpus) n_gram_trie = NGramTrie(2, encoded_text) n_gram_text_generator = NGramTextGenerator(word_storage, n_gram_trie) context = (word_storage.get_id('i'), word_storage.get_id('have')) text_generated = n_gram_text_generator.generate_text(context, 2) output_text = [] for word_id in text_generated: word = word_storage.get_word(word_id)
""" Lab 4 starter """ from lab_4.main import BackOffGenerator, encode_text, WordStorage, decode_text from lab_4.ngrams.ngram_trie import NGramTrie if __name__ == '__main__': corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>', 'his', 'name', 'is', 'rex', '<END>', 'her', 'name', 'is', 'rex', 'too', '<END>') storage = WordStorage() storage.update(corpus) encoded_text = encode_text(storage, corpus) trie = NGramTrie(3, encoded_text) four = NGramTrie(4, encoded_text) context = ( storage.get_id('his'), storage.get_id('name'), storage.get_id('is'), ) generator = BackOffGenerator(storage, trie, four) text = generator.generate_text(context, 3) actual = decode_text(storage, text) RESULT = ('His name is bruno', 'I have a cat', 'His name is bruno') assert RESULT == actual, 'Not work'
""" Text generation implementation starter """ from lab_4.main import WordStorage, encode_text, LikelihoodBasedTextGenerator, decode_text, BackOffGenerator from lab_4.ngrams.ngram_trie import NGramTrie if __name__ == '__main__': corpus = ('i', 'have', 'a', 'colourful', 'dog', '<END>', 'i', 'have', 'colourful', 'pets', 'too', '<END>', 'they', 'have', 'beautiful', 'dogs', '<END>', 'i', 'havent', 'a', 'cat', '<END>', 'i', 'havent', 'a', 'cat', 'too', '<END>', 'we', 'havent', 'a', 'cat', 'too', '<END>') storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(4, encoded) context = (storage.get_id('i'), storage.get_id('have'), storage.get_id('a')) generator_likelihood = LikelihoodBasedTextGenerator(storage, trie) generated_text = generator_likelihood.generate_text(context, 3) decoded_gen_text = decode_text(storage, generated_text) print('Likelihood generator generates sentences:') print(*decoded_gen_text, sep='. ', end='.\n')
class WordStorageTest(unittest.TestCase): """ check WordStorage class functionality. All tests should pass for score 4 or above """ def test_word_storage_correct_instance_creation(self): """ word storage instance creates with correct attributes """ word_storage = WordStorage() expected = {} self.assertEqual(word_storage.storage, expected) # -------------------------------------------------------- def test_word_storage_put_word_ideal(self): """ word is added to storage """ word_storage = WordStorage() word = 'word' actual = word_storage._put_word(word) self.assertTrue(word in word_storage.storage) expected = word_storage.get_id(word) self.assertEqual(expected, actual) def test_word_storage_put_word_none(self): """ none is not added to storage """ word_storage = WordStorage() letter = None self.assertRaises(ValueError, word_storage._put_word, letter) self.assertEqual(word_storage.storage, {}) def test_word_storage_put_word_not_str(self): """ non string word is not added to storage """ word_storage = WordStorage() letter = 123 self.assertRaises(ValueError, word_storage._put_word, letter) self.assertEqual(word_storage.storage, {}) def test_word_storage_put_word_existing(self): """ existing word is not added to storage """ word_storage = WordStorage() word = 'word' word_storage.storage = {'word': 1} expected = 1 actual = word_storage._put_word(word) self.assertEqual(word_storage.storage, {'word': 1}) self.assertEqual(expected, actual) # ----------------------------------------------------------------- def test_word_storage_get_id_ideal(self): """ ideal case for get_id """ word_storage = WordStorage() word_storage.storage = {'word': 1} expected = 1 actual = word_storage.get_id('word') self.assertEqual(expected, actual) def test_word_storage_get_id_none(self): """ get_id none """ word_storage = WordStorage() word_storage.storage = {'word': 1} self.assertRaises(ValueError, word_storage.get_id, None) def test_word_storage_get_id_not_str(self): """ id is not str get_id """ word_storage = WordStorage() word_storage.storage = {'word': 1} self.assertRaises(ValueError, word_storage.get_id, 123) def test_word_storage_get_id_not_in_storage(self): """ word not in storage """ word_storage = WordStorage() word_storage.storage = {'word': 1} self.assertRaises(KeyError, word_storage.get_id, 'word2') # ----------------------------------------------------------- def test_word_storage_update_ideal(self): """ ideal case for update """ word_storage = WordStorage() corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno', '<END>') word_storage.update(corpus) self.assertEqual(len(word_storage.storage), 9) def test_word_storage_update_duplicates(self): """ ideal case for update """ word_storage = WordStorage() sentences = ('i', 'have', 'a', 'cat', '<END>', 'i', 'have', 'a', 'cat', '<END>') word_storage.update(sentences) self.assertEqual(len(word_storage.storage), 5) def test_word_storage_update_empty(self): """ ideal case for update """ word_storage = WordStorage() sentences = () word_storage.update(sentences) self.assertEqual(word_storage.storage, {}) def test_word_storage_update_none(self): """ ideal case for update """ word_storage = WordStorage() self.assertRaises(ValueError, word_storage.update, None) self.assertEqual(word_storage.storage, {}) def test_word_storage_update_not_tuple(self): """ ideal case for update """ word_storage = WordStorage() sentences = ['i', 'have', 'a', 'cat', '<END>'] self.assertRaises(ValueError, word_storage.update, sentences) self.assertEqual(word_storage.storage, {}) @patch('lab_4.main.WordStorage._put_word', side_effect=WordStorage()._put_word) def test_word_storage_update_calls_required_function(self, mock): """ ideal case for update calling _put_word method """ word_storage = WordStorage() sentences = ('i', 'have', 'a', 'cat', '<END>') word_storage.update(sentences) self.assertTrue(mock.called) # ------------------------------------------------------------------------------------ def test_word_storage_get_word_ideal(self): """ ideal case for get_word """ word_storage = WordStorage() word_storage.storage = {'word': 1} expected = 'word' actual = word_storage.get_word(1) self.assertEqual(expected, actual) def test_word_storage_get_word_none(self): """ get_word none """ word_storage = WordStorage() word_storage.storage = {'word': 1} self.assertRaises(ValueError, word_storage.get_word, None) def test_word_storage_get_word_incorrect_num(self): # new test """ id is not incorrect num """ word_storage = WordStorage() word_storage.storage = {'word': 1} bad_inputs = (2.8, -3, 0) for bad_input in bad_inputs: self.assertRaises(ValueError, word_storage.get_word, bad_input) def test_word_storage_get_word_not_num(self): """ id is not str get_word """ word_storage = WordStorage() word_storage.storage = {'word': 1} self.assertRaises(ValueError, word_storage.get_word, 'word2') def test_word_storage_get_word_not_in_storage(self): """ word not in storage """ word_storage = WordStorage() word_storage.storage = {'word': 1} self.assertRaises(KeyError, word_storage.get_word, 123)
from lab_4.main import tokenize_by_sentence from lab_4.main import WordStorage from lab_4.main import encode_text if __name__ == '__main__': text = 'She is happy. He is happy.' corpus = tokenize_by_sentence(text) word_storage = WordStorage() word_storage.update(corpus) encoded_text = encode_text(word_storage, corpus) RESULT = "('she', 'is', 'happy', '<END>', 'he', 'is', 'happy', '<END>')" print(RESULT) assert RESULT == "('she', 'is', 'happy', '<END>', 'he', 'is', 'happy', '<END>')", 'Something went wrong'
""" Lab 4 implementation start file """ from lab_4.main import LikelihoodBasedTextGenerator, encode_text, WordStorage, decode_text from lab_4.ngrams.ngram_trie import NGramTrie if __name__ == '__main__': corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>', 'his', 'name', 'is', 'rex', '<END>', 'her', 'name', 'is', 'rex', 'too', '<END>') storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(3, encoded) end = storage.get_id('<END>') context = ( storage.get_id('name'), storage.get_id('is'), ) generator = LikelihoodBasedTextGenerator(storage, trie) to_decode = generator.generate_text(context, 2) EXPECTED = ('Name is rex', 'Her name is rex') RESULT = decode_text(storage, to_decode)
from lab_4.ngrams.ngram_trie import NGramTrie from lab_4.main import tokenize_by_sentence from lab_4.main import WordStorage from lab_4.main import encode_text from lab_4.main import NGramTextGenerator if __name__ == '__main__': text = "This is a dog. It likes running. This is a cat. It likes sleeping. Everyone likes sleeping too." text_in_tokens = tokenize_by_sentence(text) word_storage = WordStorage() word_storage.update(text_in_tokens) encoded_text = encode_text(word_storage, text_in_tokens) n_gram_trie = NGramTrie(2, encoded_text) context = (word_storage.get_id('likes'),) text_generator = NGramTextGenerator(word_storage, n_gram_trie) RESULT = text_generator.generate_text(context, 4) print(RESULT) assert RESULT, "Someting went worng.."
TEXT = '''You sit here, dear. All right. Morning! Morning! Well, what have you got? Well, there is egg and bacon; egg sausage and bacon; egg and spam; egg bacon and spam; egg bacon sausage and spam; spam bacon sausage and spam; spam egg spam bacon and spam; spam sausage spam bacon spam tomato and spam; spam egg and spam; spam baked beans spam... Spam! Lovely spam! Lovely spam! ...or Lobster Thermidor au Crevette with a Mornay sauce served in a Provencale manner with shallots and aubergines garnished with truffle pate, brandy and with a fried egg on top and spam. Have you got anything without spam? Well, there's spam egg sausage and spam, that's not got much spam in it. I do not want ANY spam! Why cannot she have egg bacon spam and sausage? THAT'S got spam in it! Has not got as much spam in it as spam egg sausage and spam, has it? Could you do the egg bacon spam and sausage without the spam then? Urgghh! What do you mean 'Urgghh'? I do not like spam! Lovely spam! Wonderful spam! Shut up! Bloody Vikings! You cannot have egg bacon spam and sausage without the spam. I do not like spam! Sshh, dear, do not cause a fuss. I will have your spam. I love it. I am having spam beaked beans spam and spam! Lovely spam! Wonderful spam! Shut up! Baked beans are off. Well could I have her spam instead of the baked beans then?''' corpus = tokenize_by_sentence(TEXT) storage = WordStorage() storage.update(corpus) encoded_text = encode_text(storage, corpus) n_gram_trie = NGramTrie(3, encoded_text) generator = LikelihoodBasedTextGenerator(storage, n_gram_trie) context = (storage.get_id('bloody'), storage.get_id('vikings')) generated_text = generator.generate_text(context, 5) decoded_text = decode_text(storage, generated_text) IS_WORKING = True for sentence in decoded_text:
""" Concordance implementation starter """ import unittest from lab_4.main import tokenize_by_sentence from lab_4.main import WordStorage from lab_4.main import encode_text from lab_4.main import NGramTextGenerator from lab_4.ngrams.ngram_trie import NGramTrie if __name__ == '__main__': text = 'I have a cat.\nHis name is Bruno' corpus = tokenize_by_sentence(text) storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(2, encoded) generator = NGramTextGenerator(storage, trie) context = (storage.get_id('bruno'), ) end = storage.get_id('<END>') actual = generator.generate_text(context, 3) RESULT = (9, 5, 6, 7, 8, 9, 5, 6, 7, 8, 9, 5) # DO NOT REMOVE NEXT LINE - KEEP IT INTENTIONALLY LAST assert RESULT == actual, ''
Text generator """ from lab_4.ngrams.ngram_trie import NGramTrie from lab_4.main import WordStorage, BackOffGenerator from lab_4.main import encode_text, decode_text if __name__ == '__main__': corpus = ('there', 'are', 'a', 'lot', 'of', 'flowers', '<END>', 'there', 'are', 'some', 'dogs', 'outside', '<END>', 'this', 'is', 'my', 'dog', '<END>', 'there', 'is', 'a', 'cat', '<END>', 'there', 'is', 'a', 'cat', 'outside', '<END>', 'here', 'is', 'a', 'cat', 'outside', '<END>') storage = WordStorage() storage.update(corpus) encoded = encode_text(storage, corpus) trie = NGramTrie(3, encoded) four = NGramTrie(4, encoded) context = ( storage.get_id('there'), storage.get_id('are'), storage.get_id('cat'), storage.get_id('outside'), ) generator = BackOffGenerator(storage, trie, four)