Ejemplo n.º 1
0
    def test_calculate_log_probabilities_empty_frequencies(self):
        ngram = NGramTrie(2)
        ngram.n_gram_frequencies = {}

        actual = ngram.calculate_log_probabilities()
        self.assertEqual(ngram.n_gram_log_probabilities, {})
        self.assertEqual(1, actual)
Ejemplo n.º 2
0
    def test_calculate_log_probabilities_one_bi_gram(self):
        ngram = NGramTrie(2)
        ngram.n_gram_frequencies = {(1, 2): 10}

        actual = ngram.calculate_log_probabilities()
        self.assertEqual(ngram.n_gram_log_probabilities[(1, 2)], 0.0)
        self.assertEqual(0, actual)
Ejemplo n.º 3
0
 def test_top_n_grams_more(self):
     ngram = NGramTrie(2)
     top_n = 2000000
     ngram.n_gram_frequencies = {(1, 2): 100, (2, 3): 123, (3, 4): 12345}
     expected = ((3, 4), (2, 3), (1, 2))
     actual = ngram.top_n_grams(top_n)
     self.assertEqual(expected, actual)
Ejemplo n.º 4
0
    def test_calculate_log_probabilities_ideal(self):
        ngram = NGramTrie(2)
        ngram.n_gram_frequencies = {(1, 2): 10, (1, 3): 2, (2, 5): 5}
        first_prob = math.log(10 / 12)
        second_prob = math.log(2 / 12)

        actual = ngram.calculate_log_probabilities()
        self.assertEqual(ngram.n_gram_log_probabilities[(1, 2)], first_prob)
        self.assertEqual(ngram.n_gram_log_probabilities[(1, 3)], second_prob)
        self.assertEqual(0, actual)