def test_update_dictionary_correct_shorten_dictionary(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) model.update_dictionary(self.shortened_dictionary) prediction, score = model.predict(self.a_very_close_game) # Correct Model Parameter Updates self.assertEqual(model.priors, self.correct_priors) self.assertEqual(model.label_counts, self.correct_label_count) self.assertEqual(model.empty_likelihoods, self.correct_shortened_empty_likelihoods) self.assertDictEqual(model.likelihoods, self.correct_shortened_likelihoods) # Correct Scores self.assertIsNotNone(score) self.assertAlmostEqual( score[0]["sport"], self.correct_shortened_a_very_close_game_score["sport"]) self.assertAlmostEqual( score[0]["not sport"], self.correct_shortened_a_very_close_game_score["not sport"]) # Correct Prediction self.assertEqual(prediction[0], "sport")
def test_train_model_params(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) # Correct Model self.assertEqual(model.priors, self.correct_priors) self.assertEqual(model.label_counts, self.correct_label_count) self.assertEqual(model.empty_likelihoods, self.correct_empty_likelihoods) self.assertDictEqual(model.likelihoods, self.correct_likelihoods)
def test_predict_prediction_and_score(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) prediction, score = model.predict(self.a_very_close_game) # Correct Prediction Output self.assertEqual(prediction[0], "sport") # Correct Score Output self.assertIsNotNone(score) self.assertAlmostEqual(score[0]["sport"], self.correct_a_very_close_game_score["sport"]) self.assertAlmostEqual( score[0]["not sport"], self.correct_a_very_close_game_score["not sport"])
def test_update_add_more_training_data(self): model = Multinomial() model.train(self.sports_labels[0:4], self.sports_data[0:4]) model.update([self.sports_labels[4]], [self.sports_data[4]]) prediction, score = model.predict(self.a_very_close_game) # Correct Model self.assertEqual(model.priors, self.correct_priors) self.assertEqual(model.label_counts, self.correct_label_count) self.assertEqual(model.empty_likelihoods, self.correct_empty_likelihoods) self.assertDictEqual(model.likelihoods, self.correct_likelihoods) # Correct Score Output self.assertIsNotNone(score) self.assertAlmostEqual(score[0]["sport"], self.correct_a_very_close_game_score["sport"]) self.assertAlmostEqual( score[0]["not sport"], self.correct_a_very_close_game_score["not sport"]) # Correct Prediction Output self.assertEqual(prediction[0], "sport")
def test_update_dictionary_new_dictionary_is_empty(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(TypeError, model.update_dictionary, {})
def test_update_dictionary_new_dictionary_does_not_contain_strings(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(TypeError, model.update_dictionary, {0})
def test_update_dictionary_new_dictionary_is_not_set(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(TypeError, model.update_dictionary, list(self.extended_dictionary))
def test_update_number_of_labels_and_docs_differ(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(ValueError, model.update, self.sports_labels[0:4], self.sports_data)
def test_update_training_data_does_not_contains_lists_of_strs(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(TypeError, model.update, self.sports_labels, map(lambda x: [0], self.sports_data))
def test_update_training_data_is_not_in_a_list(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(TypeError, model.update, self.sports_labels, set())
def test_update_labels_are_not_str_or_int(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(TypeError, model.update, list(map(lambda x: None, self.sports_labels)), self.sports_data)
def test_predict_test_data_does_not_contains_lists(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(TypeError, model.predict, map(lambda x: set(), self.a_very_close_game))
def test_predict_test_data_is_not_in_a_list(self): model = Multinomial() model.train(self.sports_labels, self.sports_data) self.assertRaises(TypeError, model.predict, set())
with open('./sample_data/nltk/wine/wine_data.pkl', 'rb') as f: wine_data = pickle.load(f) #################### # Dictionary Model # #################### wine_bow_data = wine_data["bagofwords"] raw_data_top_removed = wine_bow_data["raw_data_top_removed"] raw_data = wine_bow_data["raw_data"] labels = wine_bow_data["labels"] model_dict = DictMultinomial() model_dict.train(labels, raw_data_top_removed) dict_predictions, dict_scores = model_dict.predict(raw_data) matches = 0 for i in range(0, len(labels)): if dict_predictions[i] == labels[i]: matches += 1 print("- Dictionary") print("Accuracy: " + str(matches / len(labels))) ################ # Vector Model # ################ wine_vector_data = wine_data["vectors"]