コード例 #1
0
    def test_decode_text_incorrect_storage(self):
        corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is',
                  'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>',
                  'his', 'name', 'is', 'rex', '<END>', 'her', 'name', 'is',
                  'rex', 'too', '<END>')

        storage = WordStorage()
        storage.update(corpus)

        encoded = encode_text(storage, corpus)

        trie = NGramTrie(3, encoded)

        context = (
            storage.get_id('name'),
            storage.get_id('is'),
        )

        generator = LikelihoodBasedTextGenerator(storage, trie)

        to_decode = generator.generate_text(context, 2)

        bad_inputs = [(), [], 123, None, NGramTrie]

        for bad_storage in bad_inputs:
            self.assertRaises(ValueError, decode_text, bad_storage, to_decode)
コード例 #2
0
    def test_decode_text_ideal_conditions(self):
        corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is',
                  'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>',
                  'his', 'name', 'is', 'rex', '<END>', 'her', 'name', 'is',
                  'rex', 'too', '<END>')

        storage = WordStorage()
        storage.update(corpus)

        encoded = encode_text(storage, corpus)

        trie = NGramTrie(3, encoded)

        context = (
            storage.get_id('name'),
            storage.get_id('is'),
        )

        generator = LikelihoodBasedTextGenerator(storage, trie)

        to_decode = generator.generate_text(context, 2)
        actual = decode_text(storage, to_decode)

        for sentence in actual:
            self.assertTrue('<END>' not in sentence)
            self.assertTrue(sentence[0].isupper())
            self.assertTrue(sentence[-1].isalpha())
    def test_end(self):
        """
             Checks that after decoding no end in result
        """
        corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is',
                  'bruno', '<END>', 'i', 'have', 'a', 'cat', '<END>', 'his',
                  'name', 'is', 'rex', '<END>', 'her', 'name', 'is', 'cat',
                  '<END>')

        storage = WordStorage()
        storage.update(corpus)

        encoded = encode_text(storage, corpus)

        trie = NGramTrie(3, encoded)
        context = (
            storage.get_id('a'),
            storage.get_id('cat'),
        )

        generator = LikelihoodBasedTextGenerator(storage, trie)

        to_decode = generator.generate_text(context, 1)
        actual = decode_text(storage, to_decode)
        expected = ('A cat', )
        self.assertEqual(expected, actual)
コード例 #4
0
    def test_decode_text_ideal(self):
        corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is',
                  'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>',
                  'his', 'name', 'is', 'rex', '<END>', 'her', 'name', 'is',
                  'rex', 'too', '<END>')

        storage = WordStorage()
        storage.update(corpus)

        encoded = encode_text(storage, corpus)

        trie = NGramTrie(3, encoded)

        context = (
            storage.get_id('name'),
            storage.get_id('is'),
        )
        print('Я ТЕСТ', context)
        end = storage.get_id('<END>')

        generator = LikelihoodBasedTextGenerator(storage, trie)

        to_decode = generator.generate_text(context, 2)
        self.assertEqual(to_decode[-1], end)

        expected = ('Name is rex', 'Her name is rex')
        actual = decode_text(storage, to_decode)
        self.assertEqual(expected, actual)
コード例 #5
0
ファイル: start.py プロジェクト: katearb/2020-2-level-labs
def realize_likelihood_generator(text):
    likelihood_storage = WordStorage()
    likelihood_storage.update(text)
    context = (likelihood_storage.get_id('i'),
               likelihood_storage.get_id('shall'),)
    model = load_model('lab_4/likelihood_model.json')
    generator = LikelihoodBasedTextGenerator(model.word_storage, model.n_gram_trie)
    likelihood_text_generated = generator.generate_text(context, 3)

    return decode_text(likelihood_storage, likelihood_text_generated)
コード例 #6
0
    def test_decode_text_upper_first_letter(self):
        '''
        Tests that number all the letters except
            first one in a sentence are in a lower case
        '''
        corpus = ('first', 'sentence', 'here', '<END>', 'second', 'sentence',
                  'here', '<END>', 'third', 'sentence', 'here', '<END>')

        storage = WordStorage()
        storage.update(corpus)

        encoded_text = encode_text(storage, corpus)
        trie = NGramTrie(3, encoded_text)
        context = (storage.get_id('first'), storage.get_id('sentence'))

        likelihood_generator = LikelihoodBasedTextGenerator(storage, trie)
        generated_encoded_text = likelihood_generator.generate_text(context, 1)
        decoded_text = decode_text(storage, generated_encoded_text)
        self.assertFalse(decoded_text[0][1:].isupper())
コード例 #7
0
"""

from lab_4.main import LikelihoodBasedTextGenerator, encode_text, WordStorage, decode_text
from lab_4.ngrams.ngram_trie import NGramTrie

if __name__ == '__main__':
    corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno',
              '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>', 'his', 'name',
              'is', 'rex', '<END>', 'her', 'name', 'is', 'rex', 'too', '<END>')

    storage = WordStorage()
    storage.update(corpus)

    encoded = encode_text(storage, corpus)

    trie = NGramTrie(3, encoded)

    context = (
        storage.get_id('name'),
        storage.get_id('is'),
    )
    end = storage.get_id('<END>')

    generator = LikelihoodBasedTextGenerator(storage, trie)

    to_decode = generator.generate_text(context, 2)

    EXPECTED = ('Name is rex', 'Her name is rex')
    RESULT = decode_text(storage, to_decode)
    assert RESULT == EXPECTED, 'Encoding not working'
コード例 #8
0
from lab_4.main import tokenize_by_sentence
from lab_4.main import WordStorage, LikelihoodBasedTextGenerator
from lab_4.main import encode_text, decode_text
from lab_4.main import NGramTextGenerator

if __name__ == '__main__':
    TEXT = 'I have a cat. His name is Bruno. I have a dog too. His name is Rex. Her name is Rex too.'
    tokenized_text = tokenize_by_sentence(TEXT)

    word_storage = WordStorage()
    word_storage.update(tokenized_text)

    encoded = encode_text(word_storage, tokenized_text)

    trie = NGramTrie(3, encoded)
    context = (
        word_storage.get_id('name'),
        word_storage.get_id('is'),
    )

    generator = NGramTextGenerator(word_storage, trie)
    generated_text = generator.generate_text(context, 2)

    gen_likelihood = LikelihoodBasedTextGenerator(word_storage, trie)
    gen_text = gen_likelihood.generate_text(context, 2)
    decoded_text = decode_text(word_storage, gen_text)

    RESULT = decoded_text

    assert RESULT == ('Name is rex', 'Her name is rex'), "Not working"
コード例 #9
0
           The school library where Oleg studies is good. It is a large clean room. 
           There are four big windows in it. The walls are light blue. There are a lot of shelves full of books. 
           You can find books on literature, physics, history, chemistry, geography, biology and other subjects. 
           There are books in English, too. On the walls you can see pictures of some great writers and poets.
           On the table near the window you can always see beautiful spring and autumn flowers.
           Oleg likes to go to the library. He can always find there something new, something he needs.
           '''
    corpus = tokenize_by_sentence(TEXT)
    print(f'TOKENIZE_BY_SENTENCE RESULT: {corpus}\n')

    word_storage = WordStorage()
    word_storage.update(corpus)
    encoded_text = encode_text(word_storage, corpus)
    print(f'ENCODE_TEXT RESULT: {encoded_text}\n')

    trie = NGramTrie(4, encoded_text)
    context = (word_storage.get_id('the'), word_storage.get_id('walls'),
               word_storage.get_id('are'))
    likelihood_generator = LikelihoodBasedTextGenerator(word_storage, trie)
    generated_text = likelihood_generator.generate_text(context, 3)
    print(f'ENCODED_GENERATED_TEXT: {generated_text}\n')

    decoded_text = decode_text(word_storage, generated_text)
    print(f'DECODED_GENERATED_TEXT: {decoded_text}')

    RESULT = decoded_text
    assert RESULT == (
        'The walls are light blue', 'There are books in english too',
        'On the walls you can see pictures of some great writers and poets'
    ), 'Generator not working'
コード例 #10
0
              'havent', 'a', 'cat', 'too', '<END>', 'we', 'havent', 'a', 'cat',
              'too', '<END>')

    storage = WordStorage()
    storage.update(corpus)

    encoded = encode_text(storage, corpus)

    trie = NGramTrie(4, encoded)

    context = (storage.get_id('i'), storage.get_id('have'),
               storage.get_id('a'))

    generator_likelihood = LikelihoodBasedTextGenerator(storage, trie)

    generated_text = generator_likelihood.generate_text(context, 3)
    decoded_gen_text = decode_text(storage, generated_text)
    print('Likelihood generator generates sentences:')
    print(*decoded_gen_text, sep='. ', end='.\n')

    two = NGramTrie(2, encoded)
    trie = NGramTrie(3, encoded)

    context = (
        storage.get_id('i'),
        storage.get_id('have'),
    )

    generator_backoff = BackOffGenerator(storage, trie, two)

    actual = generator_backoff.generate_text(context, 3)
コード例 #11
0
ファイル: start.py プロジェクト: ashirka/2020-2-level-labs
"Lab 4 implementation starter"

from lab_4.main import WordStorage, encode_text, LikelihoodBasedTextGenerator, decode_text
from lab_4.ngrams.ngram_trie import NGramTrie

if __name__ == '__main__':
    corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno',
              '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>', 'his', 'name',
              'is', 'rex', '<END>', 'her', 'name', 'is', 'rex', 'too', '<END>')

    storage = WordStorage()
    storage.update(corpus)
    encoded_text = encode_text(storage, corpus)

    n_gram_trie = NGramTrie(3, encoded_text)
    context = (
        storage.get_id('i'),
        storage.get_id('have'),
    )
    generator = LikelihoodBasedTextGenerator(storage, n_gram_trie)
    generated_text = generator.generate_text(context, 3)

    RESULT = decode_text(storage, generated_text)
    print(RESULT)

    # DO NOT REMOVE NEXT LINE - KEEP IT INTENTIONALLY LAST
    assert RESULT == ('I have a cat', 'His name is rex',
                      'Her name is rex'), 'Encoding not working'