コード例 #1
0
    def test_text_generator_generate_sentence_proper_beginning(self):
        """
        Checks that class creates correct sentence from a context '<END>' without '<END>' in the beginning
        """
        corpus = ('my', 'favourite', 'subject', 'is', 'maths', '<END>', 'his',
                  'favourite', 'thing', 'is', 'music'
                  '<END>', 'i', 'have', 'a', 'favourite', 'film', '<END>',
                  'my', 'family', 'likes', 'avatar', '<END>', 'my',
                  'favourite', 'subject', 'is', 'music', '<END>')

        storage = WordStorage()
        storage.update(corpus)
        encoded = encode_text(storage, corpus)
        trie = NGramTrie(2, encoded)
        context = (storage.get_id('<END>'), )

        first_generated = storage.get_id('my')
        last_generated = storage.get_id('<END>')

        generator = NGramTextGenerator(storage, trie)
        actual = generator._generate_sentence(context)

        self.assertNotEqual(storage.get_id('<END>'), actual[0])

        self.assertEqual(first_generated, actual[0])
        self.assertEqual(last_generated, actual[-1])
コード例 #2
0
    def test_ngram_text_generator_duplicates_words(self):
        corpus = ('stop', 'it', 'stop', 'it', 'stop', 'it', '<END>')

        storage = WordStorage()
        storage.update(corpus)
        encoded = encode_text(storage, corpus)
        trie = NGramTrie(2, encoded)
        context = (storage.get_id('stop'), )

        generator = NGramTextGenerator(storage, trie)
        actual = generator._generate_sentence(context)

        self.assertEqual(20 + len(context) + 1, len(actual))
コード例 #3
0
    def test_ngram_text_generator_identical_words(self):
        corpus = ('deadline', 'deadline', 'deadline', 'deadline', 'deadline',
                  '<END>')

        storage = WordStorage()
        storage.update(corpus)
        encoded = encode_text(storage, corpus)
        trie = NGramTrie(3, encoded)
        context = (storage.get_id('deadline'), storage.get_id('deadline'))

        generator = NGramTextGenerator(storage, trie)
        actual = generator._generate_sentence(context)

        self.assertEqual(20 + len(context) + 1,
                         len(actual))  # +1 it is for <END>
    def test_ngram_text_generator_generate_sentence_properly(self):
        """
        generates correct output according to simple case
        """
        corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is',
                  'bruno', '<END>')
        word_storage = WordStorage()
        word_storage.update(corpus)
        encoded = encode_text(word_storage, corpus)
        trie = NGramTrie(2, encoded)
        context = (word_storage.get_id('i'), )

        end = word_storage.get_id('<END>')

        generator = NGramTextGenerator(word_storage, trie)
        actual = generator._generate_sentence(context)
        self.assertEqual(actual[-1], end)
    def test_length_of_sentence(self):
        """
        generates sentences with length less than 20
        """
        corpus = ('i', 'have', 'a', 'cat', 'his', 'name', 'is', 'bruno', 'i',
                  'have', 'a', 'dog', 'too', 'his', 'name', 'is', 'rex', 'her',
                  'name', 'is', 'rex', 'too', 'he', 'funny', '<END>')
        word_storage = WordStorage()
        word_storage.update(corpus)
        encoded = encode_text(word_storage, corpus)
        trie = NGramTrie(2, encoded)
        context = (word_storage.get_id('cat'), )

        generator = NGramTextGenerator(word_storage, trie)
        actual = len(generator._generate_sentence(context))
        expected = len(
            context) + 21  # cause we generate not more than 20 words + end
        self.assertLessEqual(actual, expected)
    def test_ngram_text_generator_generate_sentence_no_end(self):
        """
        should generate '<END>' anyway
        """
        corpus = ('i', 'have', 'a', 'cat', 'his', 'name', 'is', 'bruno', 'i',
                  'have', 'a', 'dog', 'too', 'his', 'name', 'is', 'rex', 'her',
                  'name', 'is', 'rex', 'too', '<END>')
        word_storage = WordStorage()
        word_storage.update(corpus)
        encoded = encode_text(word_storage, corpus)
        trie = NGramTrie(2, encoded)
        context = (word_storage.get_id('cat'), )

        generator = NGramTextGenerator(word_storage, trie)
        actual = generator._generate_sentence(context)

        expected = '<END>'
        actual = word_storage.get_word(actual[-1])
        self.assertEqual(expected, actual)
    def test_ngram_text_generator_generate_sentence_ideal(self):
        """
        first and last generated words as expected
        """
        corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is',
                  'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>',
                  'his', 'name', 'is', 'rex', '<END>', 'her', 'name', 'is',
                  'rex', 'too', '<END>')
        storage = WordStorage()
        storage.update(corpus)
        encoded = encode_text(storage, corpus)
        trie = NGramTrie(2, encoded)
        context = (storage.get_id('i'), )

        first_generated = storage.get_id('have')
        last_generated = storage.get_id('<END>')

        generator = NGramTextGenerator(storage, trie)
        actual = generator._generate_sentence(context)
        self.assertEqual(actual[1], first_generated)
        self.assertEqual(actual[-1], last_generated)
コード例 #8
0
    def test_text_generator_generate_sentence_proper_number_of_end(self):
        """
        Checks that class creates correct sentence with only one <END>
        """
        corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is',
                  'bruno', '<END>', 'i', 'have', 'a', 'dog', 'too', '<END>',
                  'his', 'name', 'is', 'rex', '<END>', 'there', 'are', 'a',
                  'cat', 'outside', '<END>', 'here', 'is', 'a', 'cat',
                  'outside', '<END>')

        storage = WordStorage()
        storage.update(corpus)
        encoded = encode_text(storage, corpus)
        trie = NGramTrie(3, encoded)
        context = (storage.get_id('a'), storage.get_id('is'),
                   storage.get_id('<END>'))

        generator = NGramTextGenerator(storage, trie)
        actual = generator._generate_sentence(context)

        self.assertEqual(1, actual.count(storage.get_id('<END>')))
コード例 #9
0
    def test_ngram_text_generator_end_at_the_beginning(self):
        """"
        should generate a sentence without <END> in any other position except the end of the sentence
        """
        corpus = ('i', 'like', 'to', 'read', '<END>', 'he', 'likes', 'to',
                  'read', 'too', 'i', 'like', 'a', 'book', 'called',
                  '"Harry Potter"', '<END>', 'he', 'likes', 'another', 'book',
                  '<END>', 'he', 'does', 'not', 'tell', 'me', 'name', '<END>')

        storage = WordStorage()
        storage.update(corpus)
        encoded = encode_text(storage, corpus)
        trie = NGramTrie(2, encoded)
        context = (storage.get_id('<END>'), )

        last_generated = storage.get_id('<END>')

        generator = NGramTextGenerator(storage, trie)
        actual = generator._generate_sentence(context)

        self.assertEqual(last_generated, actual[-1])
        self.assertEqual(1, actual.count(storage.get_id('<END>')))
コード例 #10
0
    def test_text_generator_generate_sentence_includes_context(self):
        """
        Checks that class creates correct sentence which starts with context (if <END> not in context)
        """
        corpus = ('i', 'have', 'a', 'cat', 'and', 'a', 'dog', '<END>', 'his',
                  'name', 'is', 'bruno', '<END>', 'i', 'have', 'a', 'dog',
                  'too', '<END>', 'his', 'name', 'is', 'rex', '<END>', 'there',
                  'are', 'a', 'cat', 'and', 'a', 'bear', 'outside', '<END>',
                  'here', 'is', 'a', 'cat', 'outside', '<END>')

        storage = WordStorage()
        storage.update(corpus)

        encoded = encode_text(storage, corpus)

        trie = NGramTrie(3, encoded)

        generator = NGramTextGenerator(storage, trie)

        context = (storage.get_id('a'), storage.get_id('cat'))

        actual = generator._generate_sentence(context)

        self.assertEqual(context, actual[:len(context)])