Esempio n. 1
0
 def test_spam_probability(self):
     training_set = [("foo bar", 1), ("bar baz", 0)]
     counts = nb.count_words(training_set)
     word_probs = nb.word_probabilities(counts, 1, 1)
     message = "there is no foo bar like bar bar"
     actual = nb.spam_probability(word_probs, message)
     expected = 0.9
     self.assertAlmostEqual(actual, expected)
Esempio n. 2
0
    def train(self, training_set):

        # count spam and non-spam messages
        num_spams = len([is_spam
                         for message, is_spam in training_set # pylint: disable=unused-variable
                         if is_spam])
        num_non_spams = len(training_set) - num_spams

        # run training data through our "pipeline"
        word_counts = nb.count_words(training_set)
        self.word_probs = nb.word_probabilities(word_counts,
                                                num_spams,
                                                num_non_spams,
                                                self.k)
Esempio n. 3
0
 def test_word_probabilities(self):
     training_set = [("foo bar", 1), ("bar baz", 0)]
     counts = nb.count_words(training_set)
     actual = nb.word_probabilities(counts, 1, 1)
     expected = [("baz", 0.25, 0.75), ("foo", 0.75, 0.25), ("bar", 0.75, 0.75)]
     self.assertEqual(actual, expected)