Exemple #1
0
 def test_get_most_frequent_gram_bad_inputs(self):
     """
     Checks that method returns empty tuple
     """
     corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is',
               'bruno', '<END>')
     word_storage = WordStorage()
     word_storage.update(corpus)
     encoded = encode_text(word_storage, corpus)
     ngram = NGramTrie(3, encoded)
     expected = ()
     generator = NGramTextGenerator(word_storage, ngram)
     bad_inputs = [[], {}, (), None, 9, 9.34, True]
     for bad_input in bad_inputs:
         self.assertEqual(expected,
                          generator.get_most_frequent_gram(bad_input))
Exemple #2
0
    def test_get_most_frequent_gram_ideal(self):
        """
        Checks that most frequent ngram gets properly
        """
        corpus = ('i', 'have', 'a', 'cat', '<END>', 'i', 'have', 'a', 'bruno',
                  'cat', '<END>')
        word_storage = WordStorage()
        word_storage.update(corpus)
        encoded = encode_text(word_storage, corpus)
        ngram = NGramTrie(3, encoded)

        generator = NGramTextGenerator(word_storage, ngram)
        context = (word_storage.get_id('i'), word_storage.get_id('have'))
        expected = (word_storage.get_id('i'), word_storage.get_id('have'),
                    word_storage.get_id('a'))
        actual = generator.get_most_frequent_gram(context)
        self.assertEqual(expected, actual)
Exemple #3
0
    def test_get_most_frequent_gram_no_such_context(self):
        """
        Checks that returns empty tuple with no context in the corpus
        """
        corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is',
                  'bruno', '<END>')
        word_storage = WordStorage()
        word_storage.update(corpus)
        encoded = encode_text(word_storage, corpus)
        ngram = NGramTrie(3, encoded)
        generator = NGramTextGenerator(word_storage, ngram)

        context = (
            word_storage.get_id('i'),
            word_storage.get_id('name'),
        )  # there is no such context in ngrams
        expected = ()
        actual = generator.get_most_frequent_gram(context)
        self.assertEqual(expected, actual)