Exemple #1
0
 def test_bag_of_words_2_vec_mn(self):
     documents, classifications = bayes.load_documents()  # pylint: disable=unused-variable
     vocabulary = bayes.create_vocabulary(documents)
     features = bayes.bag_of_words_2_vec_mn(vocabulary, documents[0])
     expected = [
         0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
         0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1
     ]
     self.assertEqual(features, expected)
Exemple #2
0
 def test_train_nbo(self):
     documents, classifications = bayes.load_documents()
     vocabulary = bayes.create_vocabulary(documents)
     trainmat = []  # list of lists, e.g., [[...], ..., [...]]
     for document in documents:
         trainmat.append(bayes.set_of_words_2_vec(vocabulary, document))
     # this is interesting as the names sent to the funtion imply
     # different types than the names received by the function.
     # Compare sending trainCategory to receiving classifications.
     # There isn't even a hint of meaning between those two names
     # at the program (self-referentiall) perspective.
     # p0Vect, p1Vect, pAbusive = trainNBO(trainMatrix, trainCategory)
     p0_vec, p1_vec, p_abusive = bayes.train_nbo(trainmat, classifications)
     # print p0V, p1V, pAb
     self.assertAlmostEqual(p_abusive, 0.5)