예제 #1
0
def testProcessingPipeline(filename):

    jsonData = getJSONData(filename)
    ingredient_list = getIngredientList(jsonData)

    tfidf = ClassificationUtils.load_classifier("tfidf.pickle")
    bag_of_words = ClassificationUtils.load_classifier("bag_of_words.pickle")
    adaboost = ClassificationUtils.load_classifier("ada_idf_cook.pickle")
    randomfor = ClassificationUtils.load_classifier("rf_idf_cook.pickle")
    bagging = ClassificationUtils.load_classifier("bagging_idf_cook.pickle")

    test_data_tfidf = tfidf.transform(ingredient_list)
    test_data_bag = bag_of_words.transform(ingredient_list)

    adaboost.predict(test_data_bag)
    adaboost.predict(test_data_tfidf)

    randomfor.predict(test_data_bag)
    randomfor.predict(test_data_tfidf)

    bagging.predict(test_data_bag)
    bagging.predict(test_data_tfidf)
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 25 18:18:43 2015
Test Data for the Kaggle Email Spam Test Competition
@author: Rupak Chakraborty
"""

import numpy as np
import ClassificationUtils

test_filename = "test.csv"
num_features = 100
test_file = open(test_filename, "r")
test_data = test_file.read()
test_data = test_data.split()
test_set = np.zeros((len(test_data), num_features))
ClassificationUtils.populateNumpyData(test_filename, test_set)

svm = ClassificationUtils.load_classifier("svm_email.pickle")
rf = ClassificationUtils.load_classifier("rf_email.pickle")
bnb = ClassificationUtils.load_classifier("bnb_email.pickle")
gnb = ClassificationUtils.load_classifier("gnb_email.pickle")
mnb = ClassificationUtils.load_classifier("mnb_email.pickle")

svm_predict = svm.predict(test_set)
rf_predict = rf.predict(test_set)
bnb_predict = bnb.predict(test_set)
gnb_predict = gnb.predict(test_set)
mnb_predict = mnb.predict(test_set)