def test_text_generator_no_context(self):
        """
        checks if the program can generate sentences without given context
        """

        corpus = ('cat', 'has', 'paws', '<END>', 'dogs', 'have', 'noses',
                  '<END>', 'cat', 'has', 'whiskers', '<END>')
        storage = WordStorage()
        storage.update(corpus)

        encoded = encode_text(storage, corpus)

        trie = NGramTrie(3, encoded)
        two = NGramTrie(2, encoded)
        four = NGramTrie(4, encoded)

        context = (
            storage.get_id('cat'),
            storage.get_id('dogs'),
        )

        generator = BackOffGenerator(storage, trie, two, four)

        actual = generator.generate_text(context, 3)
        self.assertTrue(all(actual))
예제 #2
0
def realize_backoff_generator(text):
    backoff_storage = WordStorage()
    backoff_storage.update(text)
    backoff_encoded = encode_text(backoff_storage, text)
    two = NGramTrie(2, backoff_encoded)
    trie = NGramTrie(3, backoff_encoded)
    backoff_context = (backoff_storage.get_id('if'),
                       backoff_storage.get_id('you'),)
    backoff_generator = BackOffGenerator(backoff_storage, trie, two)
    backoff_text_generated = backoff_generator.generate_text(backoff_context, 3)

    return decode_text(backoff_storage, backoff_text_generated)
예제 #3
0
"""
Lab 4 implementation starter
"""

from lab_4.main import BackOffGenerator, encode_text, WordStorage, decode_text, tokenize_by_sentence
from lab_4.ngrams.ngram_trie import NGramTrie

if __name__ == '__main__':
    with open('lab_3/Frank_Baum.txt', 'r', encoding='utf-8') as file_frank:
        corpus = tokenize_by_sentence(file_frank.read())

    storage = WordStorage()
    storage.update(corpus)
    encoded = encode_text(storage, corpus)

    trie = NGramTrie(3, encoded)
    four = NGramTrie(4, encoded)

    context = (
        storage.get_id('when'),
        storage.get_id('the'),
    )

    generator = BackOffGenerator(storage, four, trie)
    generated_text = generator.generate_text(context, 5)
    RESULT = decode_text(storage, generated_text)

    # DO NOT REMOVE NEXT LINE - KEEP IT INTENTIONALLY LAST
    assert RESULT, 'Encoding not working'
예제 #4
0
    trie = NGramTrie(4, encoded)

    context = (storage.get_id('i'), storage.get_id('have'),
               storage.get_id('a'))

    generator_likelihood = LikelihoodBasedTextGenerator(storage, trie)

    generated_text = generator_likelihood.generate_text(context, 3)
    decoded_gen_text = decode_text(storage, generated_text)
    print('Likelihood generator generates sentences:')
    print(*decoded_gen_text, sep='. ', end='.\n')

    two = NGramTrie(2, encoded)
    trie = NGramTrie(3, encoded)

    context = (
        storage.get_id('i'),
        storage.get_id('have'),
    )

    generator_backoff = BackOffGenerator(storage, trie, two)

    actual = generator_backoff.generate_text(context, 3)
    RESULT = decode_text(storage, actual)
    print('Backoff generator generates sentences:')
    print(*RESULT, sep='. ', end='.\n')

    assert RESULT == (
        'I have a colourful dog', 'I havent a cat too',
        'They have beautiful dogs'), 'Text generator does not work'
"""
Lab 4 starter
"""
from lab_4.main import BackOffGenerator, encode_text, WordStorage, decode_text
from lab_4.ngrams.ngram_trie import NGramTrie

if __name__ == '__main__':
    corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno',
              '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>', 'his', 'name',
              'is', 'rex', '<END>', 'her', 'name', 'is', 'rex', 'too', '<END>')
    storage = WordStorage()
    storage.update(corpus)
    encoded_text = encode_text(storage, corpus)
    trie = NGramTrie(3, encoded_text)
    four = NGramTrie(4, encoded_text)
    context = (
        storage.get_id('his'),
        storage.get_id('name'),
        storage.get_id('is'),
    )
    generator = BackOffGenerator(storage, trie, four)

    text = generator.generate_text(context, 3)
    actual = decode_text(storage, text)
    RESULT = ('His name is bruno', 'I have a cat', 'His name is bruno')
    assert RESULT == actual, 'Not work'