def test_get_most_frequent_gram_bad_inputs(self): """ Checks that method returns empty tuple """ corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno', '<END>') word_storage = WordStorage() word_storage.update(corpus) encoded = encode_text(word_storage, corpus) ngram = NGramTrie(3, encoded) expected = () generator = NGramTextGenerator(word_storage, ngram) bad_inputs = [[], {}, (), None, 9, 9.34, True] for bad_input in bad_inputs: self.assertEqual(expected, generator.get_most_frequent_gram(bad_input))
def test_get_most_frequent_gram_ideal(self): """ Checks that most frequent ngram gets properly """ corpus = ('i', 'have', 'a', 'cat', '<END>', 'i', 'have', 'a', 'bruno', 'cat', '<END>') word_storage = WordStorage() word_storage.update(corpus) encoded = encode_text(word_storage, corpus) ngram = NGramTrie(3, encoded) generator = NGramTextGenerator(word_storage, ngram) context = (word_storage.get_id('i'), word_storage.get_id('have')) expected = (word_storage.get_id('i'), word_storage.get_id('have'), word_storage.get_id('a')) actual = generator.get_most_frequent_gram(context) self.assertEqual(expected, actual)
def test_get_most_frequent_gram_no_such_context(self): """ Checks that returns empty tuple with no context in the corpus """ corpus = ('i', 'have', 'a', 'cat', '<END>', 'his', 'name', 'is', 'bruno', '<END>') word_storage = WordStorage() word_storage.update(corpus) encoded = encode_text(word_storage, corpus) ngram = NGramTrie(3, encoded) generator = NGramTextGenerator(word_storage, ngram) context = ( word_storage.get_id('i'), word_storage.get_id('name'), ) # there is no such context in ngrams expected = () actual = generator.get_most_frequent_gram(context) self.assertEqual(expected, actual)