Example #1
0
 def test_next_word_is(self):
     '''Test next_word_probability for is'''
     next_words = next_word_probability(TEST_TEXT1, 'is')
     self.assertEqual(2, len(next_words), 2)
     self.assertEqual(2, next_words['a'], 2)
     self.assertEqual(1, next_words['not'], 1)
     next_words = probability_from_count(next_words)
     self.assertEqual(TWO_THIRDS, next_words['a'])
     self.assertEqual(ONE_THIRD, next_words['not'])
Example #2
0
 def test_next_word_a(self):
     '''Test next_word_probability for a'''
     next_words = next_word_probability(TEST_TEXT1, 'a')
     self.assertEqual(2, len(next_words))
     self.assertEqual(2, next_words['test'])
     self.assertEqual(1, next_words['mess'])
     next_words = probability_from_count(next_words)
     self.assertEqual(TWO_THIRDS, next_words['test'])
     self.assertEqual(ONE_THIRD, next_words['mess'])
Example #3
0
def later_words_probabilities(sample, word, distance):
    '''
    @param sample: a sample of text to draw from
    @param word: a word occuring before a corrupted sequence
    @param distance: how many words later to estimate (i.e. 1 for the next word, 2 for the word
        after that)
    @returns: a single word which is the most likely possibility
    '''
    # Given a word, collect the relative probabilities of possible following words
    # from @sample. You may want to import your code from the maximum likelihood exercise.
    next_words = next_word_probability(sample, word)
    next_words = probability_from_count(next_words)
    node = []
    node.append(next_words)
    # Repeat the above process--for each distance beyond 1, evaluate the words that
    # might come after each word, and combine them weighting by relative probability
    # into an estimate of what might appear next.
    if distance > 1:
        children = dict()
        for a_word in next_words.iterkeys():
            children[a_word] = later_words_probabilities(sample, a_word, distance - 1)
        node.append(children)

    return node