def test_predict_prediction_and_score(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) prediction, score = model.predict(self.a_very_close_game) # Correct Prediction Output self.assertEqual(prediction, 0) # Correct Score Output np.testing.assert_array_almost_equal( score[0], self.correct_a_very_close_game_score)
def test_train_model_params(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) # Correct Model np.testing.assert_array_almost_equal(model.priors, self.correct_priors) np.testing.assert_array_almost_equal(model.label_counts, self.correct_label_count) np.testing.assert_array_almost_equal(model.empty_likelihoods, self.correct_empty_likelihoods) np.testing.assert_array_almost_equal(model.likelihoods, self.correct_likelihoods)
def test_update_dictionary_shorten_dictionary(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) model.update_dictionary(self.sports_map, self.sports_map_shortened) prediction, score = model.predict(self.a_very_close_game_short) # Correct Model Parameter Updates np.testing.assert_array_almost_equal(model.priors, self.correct_priors) np.testing.assert_array_almost_equal(model.label_counts, self.correct_label_count) np.testing.assert_array_almost_equal( model.empty_likelihoods, self.correct_shortened_empty_likelihoods) np.testing.assert_array_almost_equal( model.likelihoods, self.correct_shortened_likelihoods) # Correct Scores np.testing.assert_array_almost_equal( score[0], self.correct_shortened_a_very_close_game_score) # Correct Prediction self.assertEqual(prediction[0], 0)
def test_update_add_more_training_data(self): model = Multinomial() model.train(self.sports_labels[0:4], self.sports_data[0:4]) model.update(np.array([self.sports_labels[4]]), np.array([self.sports_data[4]])) prediction, score = model.predict(self.a_very_close_game) # Correct Model Parameter Updates np.testing.assert_array_almost_equal(model.priors, self.correct_priors) np.testing.assert_array_almost_equal(model.label_counts, self.correct_label_count) np.testing.assert_array_almost_equal(model.empty_likelihoods, self.correct_empty_likelihoods) np.testing.assert_array_almost_equal(model.likelihoods, self.correct_likelihoods) # Correct New Scores np.testing.assert_array_almost_equal( score[0], self.correct_a_very_close_game_score) # Correct Prediction self.assertEqual(prediction[0], 0)
def test_update_dictionary_new_map_wrong_values(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.sports_map_shortened["test"] = "fun" self.assertRaises(TypeError, model.update_dictionary, self.sports_map, self.sports_map_shortened)
def test_update_dictionary_new_map_not_dict(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(TypeError, model.update_dictionary, self.sports_map, list(self.sports_map_shortened))
def test_update_dictionary_old_map_not_same_size_as_training_data(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.sports_map["test"] = 15 self.assertRaises(ValueError, model.update_dictionary, self.sports_map, self.sports_map_shortened)
def test_update_dictionary_old_map_wrong_keys(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.sports_map[0] = 1 self.assertRaises(TypeError, model.update_dictionary, self.sports_map, self.sports_map_shortened)
def test__update_new_train_data_not_same_dim_as_training_data(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(ValueError, model.update, self.sports_labels, self.sports_data[:, 0:2])
def test_update_new_train_data_not_numpy_array(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(TypeError, model.update, self.sports_labels, list(self.sports_data))
def test_update_number_of_labels_and_docs_differ(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(ValueError, model.update, self.sports_labels[0:4], self.sports_data)
def test_predict_test_data_not_same_dim_as_training_data(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(ValueError, model.predict, self.a_very_close_game[:, 0:2])
def test_predict_test_data_not_2D(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(ValueError, model.predict, np.array(self.a_very_close_game[0]))
def test_predict_test_data_not_numpy_array(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(TypeError, model.predict, list(self.a_very_close_game))
def test_update_dictionary_new_map_is_empty(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(ValueError, model.update_dictionary, self.sports_map, {})
def test_update_new_train_data_not_2D(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(ValueError, model.update, self.sports_labels, self.sports_data[:, np.newaxis])
print("- Dictionary") print("Accuracy: " + str(matches / len(labels))) ################ # Vector Model # ################ wine_vector_data = wine_data["vectors"] index_map = wine_vector_data["word_map"] reviews_as_vector = wine_vector_data["vectors"] labels_as_vectors = wine_vector_data["labels"] model_vect = VectMultinomial() model_vect.train(labels_as_vectors, reviews_as_vector) vect_predictions, vect_scores = model_vect.predict(reviews_as_vector) accuracy = np.sum( vect_predictions == labels_as_vectors) / labels_as_vectors.shape[0] print("- Vector") print("Accuracy: " + str(accuracy)) ##################### # Spam Data Example # ##################### print("\nSpam Data Example") with open('./sample_data/kaggle/spam/sms_spam.pkl', 'rb') as f: spam_data = pickle.load(f)