Exemple #1
0
    def test_generate_next_word_complex(self):
        corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is',
                  'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>',
                  'his', 'name', 'is', 'rex', '<END>', 'her', 'name', 'is',
                  'rex', 'too', '<END>')

        storage = WordStorage()
        storage.update(corpus)

        encoded = encode_text(storage, corpus)

        trie = NGramTrie(3, encoded)
        two = NGramTrie(2, encoded)
        four = NGramTrie(4, encoded)

        expected_word = storage.get_id('rex')
        context = (
            storage.get_id('name'),
            storage.get_id('is'),
        )

        generator = BackOffGenerator(storage, trie, two, four)

        actual = generator._generate_next_word(context)
        self.assertEqual(expected_word, actual)
Exemple #2
0
def main():
    text = ('I have a cat. His name is Bruno. '
            'I have a dog too. His name is Rex. '
            'Her name is Rex too.')

    corpus = tokenize_by_sentence(text)

    storage = WordStorage()
    storage.update(corpus)

    encoded = encode_text(storage, corpus)

    two = NGramTrie(2, encoded)
    trie = NGramTrie(3, encoded)

    context = (
        storage.get_id('name'),
        storage.get_id('is'),
    )

    generator = BackOffGenerator(storage, trie, two)

    expected = 'rex'
    actual = storage.get_word(generator._generate_next_word(context))

    print(f'TEXT:\n{text}')
    print(f'\nEXPECTED WORD AFTER name is IS {expected}')
    print(f'ACTUAL WORD AFTER name is IS {actual}')

    save_model(generator, 'model.txt')
    load_model('model.txt')

    return actual == expected
Exemple #3
0
    def test_generate_next_word_short_context(self):
        corpus = ('bye', '<END>')

        storage = WordStorage()
        storage.update(corpus)

        encoded = encode_text(storage, corpus)

        four = NGramTrie(4, encoded)
        trie = NGramTrie(3, encoded)
        two = NGramTrie(2, encoded)

        expected_word = storage.get_id('bye')
        context = (storage.get_id('<END>'),)

        generator = BackOffGenerator(storage, two, four, trie)

        actual = generator._generate_next_word(context)
        self.assertEqual(expected_word, actual)
Exemple #4
0
    def test_generate_next_word_no_context(self):
        corpus = ('i', 'watch', 'a', 'horror', 'movie', '<END>',
                  'would', 'you', 'like', 'to', 'watch' 'with', 'me', '<END>',
                  'i', 'do', 'not', 'like', 'such', 'films', '<END>',
                  'i', 'like', 'to', 'watch', 'drama', 'movies', '<END>',
                  'bye')

        storage = WordStorage()
        storage.update(corpus)

        encoded = encode_text(storage, corpus)

        four = NGramTrie(4, encoded)
        trie = NGramTrie(3, encoded)
        two = NGramTrie(2, encoded)

        expected_word = storage.get_id('<END>')
        context = (storage.get_id('bye'),)

        generator = BackOffGenerator(storage, two, four, trie)

        actual = generator._generate_next_word(context)
        self.assertEqual(expected_word, actual)