Esempio n. 1
0
    def test_predict_prediction_and_score(self):
        model = Multinomial()
        model.train(self.sports_labels, self.sports_data)
        prediction, score = model.predict(self.a_very_close_game)

        # Correct Prediction Output
        self.assertEqual(prediction, 0)

        # Correct Score Output
        np.testing.assert_array_almost_equal(
            score[0], self.correct_a_very_close_game_score)
Esempio n. 2
0
    def test_train_model_params(self):
        model = Multinomial()
        model.train(self.sports_labels, self.sports_data)

        # Correct Model
        np.testing.assert_array_almost_equal(model.priors, self.correct_priors)
        np.testing.assert_array_almost_equal(model.label_counts,
                                             self.correct_label_count)
        np.testing.assert_array_almost_equal(model.empty_likelihoods,
                                             self.correct_empty_likelihoods)
        np.testing.assert_array_almost_equal(model.likelihoods,
                                             self.correct_likelihoods)
Esempio n. 3
0
    def test_update_dictionary_shorten_dictionary(self):
        model = Multinomial()
        model.train(self.sports_labels, self.sports_data)
        model.update_dictionary(self.sports_map, self.sports_map_shortened)
        prediction, score = model.predict(self.a_very_close_game_short)

        # Correct Model Parameter Updates
        np.testing.assert_array_almost_equal(model.priors, self.correct_priors)
        np.testing.assert_array_almost_equal(model.label_counts,
                                             self.correct_label_count)
        np.testing.assert_array_almost_equal(
            model.empty_likelihoods, self.correct_shortened_empty_likelihoods)
        np.testing.assert_array_almost_equal(
            model.likelihoods, self.correct_shortened_likelihoods)

        # Correct Scores
        np.testing.assert_array_almost_equal(
            score[0], self.correct_shortened_a_very_close_game_score)

        # Correct Prediction
        self.assertEqual(prediction[0], 0)
Esempio n. 4
0
    def test_update_add_more_training_data(self):
        model = Multinomial()
        model.train(self.sports_labels[0:4], self.sports_data[0:4])
        model.update(np.array([self.sports_labels[4]]),
                     np.array([self.sports_data[4]]))
        prediction, score = model.predict(self.a_very_close_game)

        # Correct Model Parameter Updates
        np.testing.assert_array_almost_equal(model.priors, self.correct_priors)
        np.testing.assert_array_almost_equal(model.label_counts,
                                             self.correct_label_count)
        np.testing.assert_array_almost_equal(model.empty_likelihoods,
                                             self.correct_empty_likelihoods)
        np.testing.assert_array_almost_equal(model.likelihoods,
                                             self.correct_likelihoods)

        # Correct New Scores
        np.testing.assert_array_almost_equal(
            score[0], self.correct_a_very_close_game_score)

        # Correct Prediction
        self.assertEqual(prediction[0], 0)
Esempio n. 5
0
 def test_update_dictionary_new_map_not_dict(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.assertRaises(TypeError, model.update_dictionary, self.sports_map,
                       list(self.sports_map_shortened))
Esempio n. 6
0
 def test_update_dictionary_old_map_not_same_size_as_training_data(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.sports_map["test"] = 15
     self.assertRaises(ValueError, model.update_dictionary, self.sports_map,
                       self.sports_map_shortened)
Esempio n. 7
0
 def test_update_dictionary_old_map_wrong_keys(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.sports_map[0] = 1
     self.assertRaises(TypeError, model.update_dictionary, self.sports_map,
                       self.sports_map_shortened)
Esempio n. 8
0
 def test_update_called_before_training(self):
     model = Multinomial()
     self.assertRaises(ValueError, model.update, self.sports_labels,
                       self.sports_data)
Esempio n. 9
0
 def test_update_new_train_data_not_2D(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.assertRaises(ValueError, model.update, self.sports_labels,
                       self.sports_data[:, np.newaxis])
Esempio n. 10
0
 def test__update_new_train_data_not_same_dim_as_training_data(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.assertRaises(ValueError, model.update, self.sports_labels,
                       self.sports_data[:, 0:2])
Esempio n. 11
0
 def test_update_dictionary_called_before_training(self):
     model = Multinomial()
     self.assertRaises(ValueError, model.update_dictionary, self.sports_map,
                       self.sports_map_extra)
Esempio n. 12
0
 def test_train_labels_not_1D(self):
     model = Multinomial()
     self.assertRaises(ValueError, model.train,
                       self.sports_labels[:, np.newaxis], self.sports_data)
Esempio n. 13
0
 def test_predict_called_before_training(self):
     model = Multinomial()
     self.assertRaises(ValueError, model.predict, self.a_very_close_game)
Esempio n. 14
0
 def test_update_dictionary_new_map_wrong_values(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.sports_map_shortened["test"] = "fun"
     self.assertRaises(TypeError, model.update_dictionary, self.sports_map,
                       self.sports_map_shortened)
Esempio n. 15
0
 def test_predict_test_data_not_same_dim_as_training_data(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.assertRaises(ValueError, model.predict,
                       self.a_very_close_game[:, 0:2])
Esempio n. 16
0
 def test_predict_test_data_not_2D(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.assertRaises(ValueError, model.predict,
                       np.array(self.a_very_close_game[0]))
Esempio n. 17
0
 def test_predict_test_data_not_numpy_array(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.assertRaises(TypeError, model.predict,
                       list(self.a_very_close_game))
Esempio n. 18
0
 def test_train_training_data_not_numpy_array(self):
     model = Multinomial()
     self.assertRaises(TypeError, model.train, self.sports_labels,
                       list(self.sports_data))
Esempio n. 19
0
 def test_train_training_data_not_2D(self):
     model = Multinomial()
     self.assertRaises(ValueError, model.train, self.sports_labels,
                       self.sports_data[:, np.newaxis])
Esempio n. 20
0
 def test_update_dictionary_new_map_is_empty(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.assertRaises(ValueError, model.update_dictionary, self.sports_map,
                       {})
Esempio n. 21
0
 def test_update_number_of_labels_and_docs_differ(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.assertRaises(ValueError, model.update, self.sports_labels[0:4],
                       self.sports_data)
Esempio n. 22
0
 def test_update_new_train_data_not_numpy_array(self):
     model = Multinomial()
     model.train(self.sports_labels, self.sports_data)
     self.assertRaises(TypeError, model.update, self.sports_labels,
                       list(self.sports_data))
Esempio n. 23
0
            matches += 1

    print("- Dictionary")
    print("Accuracy: " + str(matches / len(labels)))

    ################
    # Vector Model #
    ################

    wine_vector_data = wine_data["vectors"]

    index_map = wine_vector_data["word_map"]
    reviews_as_vector = wine_vector_data["vectors"]
    labels_as_vectors = wine_vector_data["labels"]

    model_vect = VectMultinomial()
    model_vect.train(labels_as_vectors, reviews_as_vector)
    vect_predictions, vect_scores = model_vect.predict(reviews_as_vector)

    accuracy = np.sum(
        vect_predictions == labels_as_vectors) / labels_as_vectors.shape[0]
    print("- Vector")
    print("Accuracy: " + str(accuracy))

#####################
# Spam Data Example #
#####################

print("\nSpam Data Example")

with open('./sample_data/kaggle/spam/sms_spam.pkl', 'rb') as f: