예제 #1
0
import lab_4
from lab_4.main import WordStorage,  tokenize_by_sentence,encode_text, NGramTextGenerator
from lab_4.ngrams.ngram_trie import NGramTrie

if __name__=='__main__':
    text='I have a dog.\nHis name is Rex'
    tok_text=tokenize_by_sentence(text)
    storage=WordStorage()
    storage.update(tok_text)
    print(storage)
    encoded_text=encode_text(storage,tok_text)
    print(encoded_text)
    n_gram_trie=NGramTrie(2,encoded_text)
    print(n_gram_trie)
    gen=NGramTextGenerator(storage,n_gram_trie)
    context=(storage.get_id('a'),)
    print(context)
    RESULT=gen.generate_text(context,3)
    print(RESULT)
    assert RESULT, 'Not working'
예제 #2
0
"""
Lab 4
"""

from lab_4.main import WordStorage, tokenize_by_sentence, encode_text, NGramTextGenerator
from lab_4.ngrams.ngram_trie import NGramTrie

if __name__ == '__main__':
    text = 'I have a cat. His name is Bruno. I have a dog. Her name is Rex. Her name is Rex too.'
    text_tokenized = tokenize_by_sentence(text)

    word_storage = WordStorage()
    word_storage.update(text_tokenized)

    encoded = encode_text(word_storage, text_tokenized)

    trie = NGramTrie(2, encoded)
    context = (word_storage.get_id('i'), )
    generator = NGramTextGenerator(word_storage, trie)

    RESULT = generator.generate_text(context, 4)

    print(RESULT)

    assert RESULT, "Not working"
예제 #3
0
"""
Text generator
"""

from lab_4.ngrams.ngram_trie import NGramTrie
from lab_4.main import tokenize_by_sentence
from lab_4.main import WordStorage
from lab_4.main import encode_text
from lab_4.main import NGramTextGenerator

if __name__ == '__main__':
    text = 'I have a cat. His name is Bruno. I have a dog too. ' \
           'His name is Rex. Her name is Rex too'
    corpus = tokenize_by_sentence(text)

    word_storage = WordStorage()
    word_storage.update(corpus)

    encoded_text = encode_text(word_storage, corpus)

    n_gram_trie = NGramTrie(2, encoded_text)

    n_gram_text_generator = NGramTextGenerator(word_storage, n_gram_trie)

    context = (word_storage.get_id('i'), word_storage.get_id('have'))

    text_generated = n_gram_text_generator.generate_text(context, 2)
    output_text = []

    for word_id in text_generated:
        word = word_storage.get_word(word_id)
"""
Lab 4 starter
"""
from lab_4.main import BackOffGenerator, encode_text, WordStorage, decode_text
from lab_4.ngrams.ngram_trie import NGramTrie

if __name__ == '__main__':
    corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno',
              '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>', 'his', 'name',
              'is', 'rex', '<END>', 'her', 'name', 'is', 'rex', 'too', '<END>')
    storage = WordStorage()
    storage.update(corpus)
    encoded_text = encode_text(storage, corpus)
    trie = NGramTrie(3, encoded_text)
    four = NGramTrie(4, encoded_text)
    context = (
        storage.get_id('his'),
        storage.get_id('name'),
        storage.get_id('is'),
    )
    generator = BackOffGenerator(storage, trie, four)

    text = generator.generate_text(context, 3)
    actual = decode_text(storage, text)
    RESULT = ('His name is bruno', 'I have a cat', 'His name is bruno')
    assert RESULT == actual, 'Not work'
예제 #5
0
"""
Text generation implementation starter
"""

from lab_4.main import WordStorage, encode_text, LikelihoodBasedTextGenerator, decode_text, BackOffGenerator
from lab_4.ngrams.ngram_trie import NGramTrie

if __name__ == '__main__':
    corpus = ('i', 'have', 'a', 'colourful', 'dog', '<END>', 'i', 'have',
              'colourful', 'pets', 'too', '<END>', 'they', 'have', 'beautiful',
              'dogs', '<END>', 'i', 'havent', 'a', 'cat', '<END>', 'i',
              'havent', 'a', 'cat', 'too', '<END>', 'we', 'havent', 'a', 'cat',
              'too', '<END>')

    storage = WordStorage()
    storage.update(corpus)

    encoded = encode_text(storage, corpus)

    trie = NGramTrie(4, encoded)

    context = (storage.get_id('i'), storage.get_id('have'),
               storage.get_id('a'))

    generator_likelihood = LikelihoodBasedTextGenerator(storage, trie)

    generated_text = generator_likelihood.generate_text(context, 3)
    decoded_gen_text = decode_text(storage, generated_text)
    print('Likelihood generator generates sentences:')
    print(*decoded_gen_text, sep='. ', end='.\n')
class WordStorageTest(unittest.TestCase):
    """
    check WordStorage class functionality.
        All tests should pass for score 4 or above
    """
    def test_word_storage_correct_instance_creation(self):
        """
        word storage instance creates with correct attributes
        """
        word_storage = WordStorage()
        expected = {}
        self.assertEqual(word_storage.storage, expected)

# --------------------------------------------------------

    def test_word_storage_put_word_ideal(self):
        """
        word is added to storage
        """
        word_storage = WordStorage()
        word = 'word'
        actual = word_storage._put_word(word)
        self.assertTrue(word in word_storage.storage)
        expected = word_storage.get_id(word)
        self.assertEqual(expected, actual)

    def test_word_storage_put_word_none(self):
        """
        none is not added to storage
        """
        word_storage = WordStorage()
        letter = None
        self.assertRaises(ValueError, word_storage._put_word, letter)
        self.assertEqual(word_storage.storage, {})

    def test_word_storage_put_word_not_str(self):
        """
        non string word is not added to storage
        """
        word_storage = WordStorage()
        letter = 123
        self.assertRaises(ValueError, word_storage._put_word, letter)
        self.assertEqual(word_storage.storage, {})

    def test_word_storage_put_word_existing(self):
        """
        existing word is not added to storage
        """
        word_storage = WordStorage()
        word = 'word'
        word_storage.storage = {'word': 1}
        expected = 1
        actual = word_storage._put_word(word)
        self.assertEqual(word_storage.storage, {'word': 1})
        self.assertEqual(expected, actual)

# -----------------------------------------------------------------

    def test_word_storage_get_id_ideal(self):
        """
        ideal case for get_id
        """
        word_storage = WordStorage()
        word_storage.storage = {'word': 1}
        expected = 1
        actual = word_storage.get_id('word')
        self.assertEqual(expected, actual)

    def test_word_storage_get_id_none(self):
        """
        get_id none
        """
        word_storage = WordStorage()
        word_storage.storage = {'word': 1}
        self.assertRaises(ValueError, word_storage.get_id, None)

    def test_word_storage_get_id_not_str(self):
        """
        id is not str get_id
        """
        word_storage = WordStorage()
        word_storage.storage = {'word': 1}
        self.assertRaises(ValueError, word_storage.get_id, 123)

    def test_word_storage_get_id_not_in_storage(self):
        """
        word not in storage
        """
        word_storage = WordStorage()
        word_storage.storage = {'word': 1}
        self.assertRaises(KeyError, word_storage.get_id, 'word2')

# -----------------------------------------------------------

    def test_word_storage_update_ideal(self):
        """
        ideal case for update
        """
        word_storage = WordStorage()
        corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is',
                  'bruno', '<END>')
        word_storage.update(corpus)
        self.assertEqual(len(word_storage.storage), 9)

    def test_word_storage_update_duplicates(self):
        """
        ideal case for update
        """
        word_storage = WordStorage()
        sentences = ('i', 'have', 'a', 'cat', '<END>', 'i', 'have', 'a', 'cat',
                     '<END>')
        word_storage.update(sentences)
        self.assertEqual(len(word_storage.storage), 5)

    def test_word_storage_update_empty(self):
        """
        ideal case for update
        """
        word_storage = WordStorage()
        sentences = ()
        word_storage.update(sentences)
        self.assertEqual(word_storage.storage, {})

    def test_word_storage_update_none(self):
        """
        ideal case for update
        """
        word_storage = WordStorage()
        self.assertRaises(ValueError, word_storage.update, None)
        self.assertEqual(word_storage.storage, {})

    def test_word_storage_update_not_tuple(self):
        """
        ideal case for update
        """
        word_storage = WordStorage()
        sentences = ['i', 'have', 'a', 'cat', '<END>']
        self.assertRaises(ValueError, word_storage.update, sentences)
        self.assertEqual(word_storage.storage, {})

    @patch('lab_4.main.WordStorage._put_word',
           side_effect=WordStorage()._put_word)
    def test_word_storage_update_calls_required_function(self, mock):
        """
        ideal case for update calling _put_word method
        """
        word_storage = WordStorage()
        sentences = ('i', 'have', 'a', 'cat', '<END>')
        word_storage.update(sentences)
        self.assertTrue(mock.called)

# ------------------------------------------------------------------------------------

    def test_word_storage_get_word_ideal(self):
        """
        ideal case for get_word
        """
        word_storage = WordStorage()
        word_storage.storage = {'word': 1}
        expected = 'word'
        actual = word_storage.get_word(1)
        self.assertEqual(expected, actual)

    def test_word_storage_get_word_none(self):
        """
        get_word none
        """
        word_storage = WordStorage()
        word_storage.storage = {'word': 1}
        self.assertRaises(ValueError, word_storage.get_word, None)

    def test_word_storage_get_word_incorrect_num(self):  # new test
        """
        id is not incorrect num
        """
        word_storage = WordStorage()
        word_storage.storage = {'word': 1}

        bad_inputs = (2.8, -3, 0)
        for bad_input in bad_inputs:
            self.assertRaises(ValueError, word_storage.get_word, bad_input)

    def test_word_storage_get_word_not_num(self):
        """
        id is not str get_word
        """
        word_storage = WordStorage()
        word_storage.storage = {'word': 1}
        self.assertRaises(ValueError, word_storage.get_word, 'word2')

    def test_word_storage_get_word_not_in_storage(self):
        """
        word not in storage
        """
        word_storage = WordStorage()
        word_storage.storage = {'word': 1}
        self.assertRaises(KeyError, word_storage.get_word, 123)
예제 #7
0
from lab_4.main import tokenize_by_sentence
from lab_4.main import WordStorage
from lab_4.main import encode_text

if __name__ == '__main__':
    text = 'She is happy. He is happy.'
    corpus = tokenize_by_sentence(text)

    word_storage = WordStorage()
    word_storage.update(corpus)

    encoded_text = encode_text(word_storage, corpus)

    RESULT = "('she', 'is', 'happy', '<END>', 'he', 'is', 'happy', '<END>')"
    print(RESULT)
    assert RESULT == "('she', 'is', 'happy', '<END>', 'he', 'is', 'happy', '<END>')", 'Something went wrong'
예제 #8
0
"""
Lab 4 implementation start file
"""

from lab_4.main import LikelihoodBasedTextGenerator, encode_text, WordStorage, decode_text
from lab_4.ngrams.ngram_trie import NGramTrie

if __name__ == '__main__':
    corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno',
              '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>', 'his', 'name',
              'is', 'rex', '<END>', 'her', 'name', 'is', 'rex', 'too', '<END>')

    storage = WordStorage()
    storage.update(corpus)

    encoded = encode_text(storage, corpus)

    trie = NGramTrie(3, encoded)

    end = storage.get_id('<END>')
    context = (
        storage.get_id('name'),
        storage.get_id('is'),
    )

    generator = LikelihoodBasedTextGenerator(storage, trie)

    to_decode = generator.generate_text(context, 2)

    EXPECTED = ('Name is rex', 'Her name is rex')
    RESULT = decode_text(storage, to_decode)
예제 #9
0
from lab_4.ngrams.ngram_trie import NGramTrie
from lab_4.main import tokenize_by_sentence
from lab_4.main import WordStorage
from lab_4.main import encode_text
from lab_4.main import NGramTextGenerator

if __name__ == '__main__':
    text = "This is a dog. It likes running. This is a cat. It likes sleeping. Everyone likes sleeping too."
    text_in_tokens = tokenize_by_sentence(text)

    word_storage = WordStorage()
    word_storage.update(text_in_tokens)

    encoded_text = encode_text(word_storage, text_in_tokens)

    n_gram_trie = NGramTrie(2, encoded_text)
    context = (word_storage.get_id('likes'),)
    text_generator = NGramTextGenerator(word_storage, n_gram_trie)

    RESULT = text_generator.generate_text(context, 4)

    print(RESULT)

    assert RESULT, "Someting went worng.."
예제 #10
0
    TEXT = '''You sit here, dear. All right. Morning! Morning! Well, what have you got? Well, there is egg and
    bacon; egg sausage and bacon; egg and spam; egg bacon and spam; egg bacon sausage and spam; spam bacon sausage and
    spam; spam egg spam bacon and spam; spam sausage spam bacon spam tomato and spam; spam egg and spam; spam baked
    beans spam... Spam! Lovely spam! Lovely spam! ...or Lobster Thermidor au Crevette with a Mornay sauce served in a
    Provencale manner with shallots and aubergines garnished with truffle pate, brandy and with a fried egg on top and
    spam. Have you got anything without spam? Well, there's spam egg sausage and spam, that's not got much spam in it.
    I do not want ANY spam! Why cannot she have egg bacon spam and sausage? THAT'S got spam in it! Has not got as much
    spam in it as spam egg sausage and spam, has it? Could you do the egg bacon spam and sausage without the spam then?
    Urgghh! What do you mean 'Urgghh'? I do not like spam! Lovely spam! Wonderful spam! Shut up! Bloody Vikings! You
    cannot have egg bacon spam and sausage without the spam. I do not like spam! Sshh, dear, do not cause a fuss.
    I will have your spam. I love it. I am having spam beaked beans spam and spam! Lovely spam! Wonderful spam! 
    Shut up! Baked beans are off. Well could I have her spam instead of the baked beans then?'''

    corpus = tokenize_by_sentence(TEXT)

    storage = WordStorage()
    storage.update(corpus)

    encoded_text = encode_text(storage, corpus)

    n_gram_trie = NGramTrie(3, encoded_text)

    generator = LikelihoodBasedTextGenerator(storage, n_gram_trie)

    context = (storage.get_id('bloody'), storage.get_id('vikings'))
    generated_text = generator.generate_text(context, 5)

    decoded_text = decode_text(storage, generated_text)

    IS_WORKING = True
    for sentence in decoded_text:
예제 #11
0
"""
Concordance implementation starter
"""

import unittest
from lab_4.main import tokenize_by_sentence
from lab_4.main import WordStorage
from lab_4.main import encode_text
from lab_4.main import NGramTextGenerator
from lab_4.ngrams.ngram_trie import NGramTrie

if __name__ == '__main__':
    text = 'I have a cat.\nHis name is Bruno'
    corpus = tokenize_by_sentence(text)

    storage = WordStorage()
    storage.update(corpus)

    encoded = encode_text(storage, corpus)

    trie = NGramTrie(2, encoded)

    generator = NGramTextGenerator(storage, trie)

    context = (storage.get_id('bruno'), )
    end = storage.get_id('<END>')
    actual = generator.generate_text(context, 3)

    RESULT = (9, 5, 6, 7, 8, 9, 5, 6, 7, 8, 9, 5)
    # DO NOT REMOVE NEXT LINE - KEEP IT INTENTIONALLY LAST
    assert RESULT == actual, ''
예제 #12
0
Text generator
"""

from lab_4.ngrams.ngram_trie import NGramTrie
from lab_4.main import WordStorage, BackOffGenerator
from lab_4.main import encode_text, decode_text

if __name__ == '__main__':

    corpus = ('there', 'are', 'a', 'lot', 'of', 'flowers', '<END>', 'there',
              'are', 'some', 'dogs', 'outside', '<END>', 'this', 'is', 'my',
              'dog', '<END>', 'there', 'is', 'a', 'cat', '<END>', 'there',
              'is', 'a', 'cat', 'outside', '<END>', 'here', 'is', 'a', 'cat',
              'outside', '<END>')

    storage = WordStorage()
    storage.update(corpus)

    encoded = encode_text(storage, corpus)

    trie = NGramTrie(3, encoded)
    four = NGramTrie(4, encoded)

    context = (
        storage.get_id('there'),
        storage.get_id('are'),
        storage.get_id('cat'),
        storage.get_id('outside'),
    )

    generator = BackOffGenerator(storage, trie, four)