def test_classify_nb(self): # pylint: disable=missing-docstring documents = [ ['stupid'], ['smart'] ] classifications = [1, 0] vocabulary = bayes.create_vocabulary(documents) trainmat = [] # list of lists, e.g., [[...], ..., [...]] for document in documents: trainmat.append(bayes.set_of_words_2_vec(vocabulary, document)) print "trainmat: ", trainmat, "\n" self.assertEqual(trainmat, [[1, 0], [0, 1]]) p0_vec, p1_vec, p_abusive = bayes.train_nbo(trainmat, classifications) # print p0_vec, p1_vec, p_abusive this_doc = array(bayes.set_of_words_2_vec(vocabulary, ['smart'])) print "this_doc: ", this_doc, "\n" result = bayes.classify_nb(this_doc, p0_vec, p1_vec, p_abusive) print "result: ", result, "\n" self.assertEqual(result, 0) this_doc = array(bayes.set_of_words_2_vec(vocabulary, ['stupid'])) print "this_doc: ", this_doc, "\n" result = bayes.classify_nb(this_doc, p0_vec, p1_vec, p_abusive) print "result: ", result, "\n" self.assertEqual(result, 1) this_doc = array(bayes.set_of_words_2_vec(vocabulary, ['intelligent'])) print "this_doc: ", this_doc, "\n" result = bayes.classify_nb(this_doc, p0_vec, p1_vec, p_abusive) print "result: ", result, "\n" self.assertEqual(result, 0)
def test_train_nbo(self): documents, classifications = bayes.load_documents() vocabulary = bayes.create_vocabulary(documents) trainmat = [] # list of lists, e.g., [[...], ..., [...]] for document in documents: trainmat.append(bayes.set_of_words_2_vec(vocabulary, document)) # this is interesting as the names sent to the funtion imply # different types than the names received by the function. # Compare sending trainCategory to receiving classifications. # There isn't even a hint of meaning between those two names # at the program (self-referentiall) perspective. # p0Vect, p1Vect, pAbusive = trainNBO(trainMatrix, trainCategory) p0_vec, p1_vec, p_abusive = bayes.train_nbo(trainmat, classifications) # print p0V, p1V, pAb self.assertAlmostEqual(p_abusive, 0.5)