def MNB_test(x, y):
    print 'MNB test'
    train_x, train_y, test_x, test_y = pp.split_data_set(x, y)

    clf = MultinomialNB()
    crossvalidation(clf, x, y)


    clf.fit(train_x, train_y)
    rst = clf.predict(test_x)
    print calculate_RMSE(rst, test_y)
    print pp.print_mean_median(rst)
def MLR_test(x, y):
    print 'MLR test'    
    train_x, train_y, test_x, test_y = pp.split_data_set(x, y)
    clf = LogisticRegression(solver='lbfgs', multi_class='multinomial', C=4)
    crossvalidation(clf, x, y)



    clf.fit(train_x, train_y)
    rst = clf.predict(test_x)
    print calculate_RMSE(rst, test_y)
    print pp.print_mean_median(rst)
def tree_test(x, y):
    print 'DecisionTree'
    train_x, train_y, test_x, test_y = pp.split_data_set(x, y)
    clf = tree.DecisionTreeClassifier()
    crossvalidation(clf, x, y)


    clf.fit(train_x, train_y)
    rst = clf.predict(test_x)

    train_rst = clf.predict(train_x)

    print calculate_RMSE(train_rst, train_y)

    print calculate_RMSE(rst, test_y)
    print pp.print_mean_median(rst)
def random_forest_test(x, y):
    print 'RandomForest', 
    train_x, train_y, test_x, test_y = pp.split_data_set(x, y)

    clf = RandomForestClassifier()

    crossvalidation(clf, x, y)

    clf.fit(train_x, train_y)
    rst = clf.predict(test_x)

    print rst
    print test_y
    print calculate_RMSE(rst, test_y)
    print 'mean rst: ', 
    pp.print_mean_median(rst)
def svm_test(x, y):
    train_x, train_y, test_x, test_y = pp.split_data_set(x, y)

    clf = svm.SVC(C=0.001, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

    print 'fitting: '
    crossvalidation(clf, x, y)


    clf.fit(train_x, train_y)

    print 'predicting: '
    rst = clf.predict(test_x)
    # print sum(rst == test_y) / len(test_y) * 1.0
    print calculate_RMSE(rst, test_y)   
    pp.print_mean_median(rst)