Esempio n. 1
0
    def test_predict_prediction_and_score(self):
        model = Multinomial()
        model.train(self.sports_labels, self.sports_data)
        prediction, score = model.predict(self.a_very_close_game)

        # Correct Prediction Output
        self.assertEqual(prediction, 0)

        # Correct Score Output
        np.testing.assert_array_almost_equal(
            score[0], self.correct_a_very_close_game_score)
Esempio n. 2
0
    def test_update_dictionary_shorten_dictionary(self):
        model = Multinomial()
        model.train(self.sports_labels, self.sports_data)
        model.update_dictionary(self.sports_map, self.sports_map_shortened)
        prediction, score = model.predict(self.a_very_close_game_short)

        # Correct Model Parameter Updates
        np.testing.assert_array_almost_equal(model.priors, self.correct_priors)
        np.testing.assert_array_almost_equal(model.label_counts,
                                             self.correct_label_count)
        np.testing.assert_array_almost_equal(
            model.empty_likelihoods, self.correct_shortened_empty_likelihoods)
        np.testing.assert_array_almost_equal(
            model.likelihoods, self.correct_shortened_likelihoods)

        # Correct Scores
        np.testing.assert_array_almost_equal(
            score[0], self.correct_shortened_a_very_close_game_score)

        # Correct Prediction
        self.assertEqual(prediction[0], 0)
Esempio n. 3
0
    def test_update_add_more_training_data(self):
        model = Multinomial()
        model.train(self.sports_labels[0:4], self.sports_data[0:4])
        model.update(np.array([self.sports_labels[4]]),
                     np.array([self.sports_data[4]]))
        prediction, score = model.predict(self.a_very_close_game)

        # Correct Model Parameter Updates
        np.testing.assert_array_almost_equal(model.priors, self.correct_priors)
        np.testing.assert_array_almost_equal(model.label_counts,
                                             self.correct_label_count)
        np.testing.assert_array_almost_equal(model.empty_likelihoods,
                                             self.correct_empty_likelihoods)
        np.testing.assert_array_almost_equal(model.likelihoods,
                                             self.correct_likelihoods)

        # Correct New Scores
        np.testing.assert_array_almost_equal(
            score[0], self.correct_a_very_close_game_score)

        # Correct Prediction
        self.assertEqual(prediction[0], 0)
    print("- Dictionary")
    print("Accuracy: " + str(matches / len(labels)))

    ################
    # Vector Model #
    ################

    wine_vector_data = wine_data["vectors"]

    index_map = wine_vector_data["word_map"]
    reviews_as_vector = wine_vector_data["vectors"]
    labels_as_vectors = wine_vector_data["labels"]

    model_vect = VectMultinomial()
    model_vect.train(labels_as_vectors, reviews_as_vector)
    vect_predictions, vect_scores = model_vect.predict(reviews_as_vector)

    accuracy = np.sum(
        vect_predictions == labels_as_vectors) / labels_as_vectors.shape[0]
    print("- Vector")
    print("Accuracy: " + str(accuracy))

#####################
# Spam Data Example #
#####################

print("\nSpam Data Example")

with open('./sample_data/kaggle/spam/sms_spam.pkl', 'rb') as f:
    spam_data = pickle.load(f)