def spam():
    train, target = load_spambase()

    normalize_columns = [55, 56]
    normalize(train, normalize_columns)
    train = append_new_column(train, 1.0, 0)

    # 10 fold cross validation
    train_size = len(train)
    k = 10
    test_index_generator = cross_validation.k_fold_cross_validation(train_size, k)
    fold = 0
    train_accuracy = 0
    test_accuracy = 0
    train_mse = 0
    test_mse = 0

    for start, end in test_index_generator:
        train_left = train[range(0, start)]
        train_right = train[range(end, train_size)]
        k_fold_train = np.vstack((train_left, train_right))
        test = train[range(start, end)]

        target_left = target[range(0, start)]
        target_right = target[range(end, train_size)]
        train_target = np.append(target_left, target_right)
        test_target = target[range(start, end)]

        cf = LinearRegression()
        cf = cf.fit(k_fold_train, train_target)

        print '=============Train Data Result============'
        predict_train = cf.predict(k_fold_train)
        cm = confusion_matrix(train_target, predict_train)
        print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
        er, acc, fpr, tpr = confusion_matrix_analysis(cm)
        print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr)
        train_accuracy += acc
        print "mse: ", mse(predict_train, train_target), " rmse: ", rmse(predict_train, train_target), " mae: ", mae(
            predict_train,
            train_target)
        train_mse += mse(predict_train, train_target)

        print '=============Test Data Result============'
        predict_test = cf.predict(test)
        cm = confusion_matrix(test_target, predict_test)
        print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
        er, acc, fpr, tpr = confusion_matrix_analysis(cm)
        print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr)
        test_accuracy += acc
        fold += 1
        print "mse: ", mse(predict_test, test_target), " rmse: ", rmse(predict_test, test_target), " mae: ", mae(
            predict_test,
            test_target)
        test_mse += mse(predict_test, test_target)

    print "Average train acc: %f, average test acc: %f" % (train_accuracy / fold, test_accuracy / fold)
    print "Average train mse: %f, average test mse: %f" % (train_mse / fold, test_mse / fold)
def spam():
    train, target = load_spambase()

    normalize_columns = [55, 56]
    normalize(train, normalize_columns)
    train = append_new_column(train, 1.0, 0)

    # 10 fold cross validation
    train_size = len(train)
    k = 10
    test_index_generator = cross_validation.k_fold_cross_validation(
        train_size, k)
    fold = 0
    train_accuracy = 0
    test_accuracy = 0
    train_mse = 0
    test_mse = 0

    for start, end in test_index_generator:
        train_left = train[range(0, start)]
        train_right = train[range(end, train_size)]
        k_fold_train = np.vstack((train_left, train_right))
        test = train[range(start, end)]

        target_left = target[range(0, start)]
        target_right = target[range(end, train_size)]
        train_target = np.append(target_left, target_right)
        test_target = target[range(start, end)]

        cf = LinearRegression()
        cf = cf.fit(k_fold_train, train_target)

        print '=============Train Data Result============'
        predict_train = cf.predict(k_fold_train)
        cm = confusion_matrix(train_target, predict_train)

        er, acc, fpr, tpr = confusion_matrix_analysis(cm)
        train_accuracy += acc
        print "mse: ", mse(predict_train, train_target), " rmse: ", rmse(
            predict_train,
            train_target), " mae: ", mae(predict_train, train_target)
        train_mse += mse(predict_train, train_target)

        print '=============Test Data Result============'
        predict_test = cf.predict(test)
        cm = confusion_matrix(test_target, predict_test)

        er, acc, fpr, tpr = confusion_matrix_analysis(cm)
        test_accuracy += acc
        fold += 1
        print "mse: ", mse(predict_test, test_target), " rmse: ", rmse(
            predict_test, test_target), " mae: ", mae(predict_test,
                                                      test_target)
        test_mse += mse(predict_test, test_target)

    print "Average train mse: %f, average test mse: %f" % (
        1.0 * train_mse / fold, 1.0 * test_mse / fold)
def housing():
    train, train_target, test, test_target = load_boston_house()

    normalize_columns = [0, 1, 2, 6, 7, 9, 10, 11, 12]
    normalize(train, normalize_columns)
    normalize(test, normalize_columns)
    train = append_new_column(train, 1.0, 0)
    test = append_new_column(test, 1.0, 0)

    lr = LinearRegression()
    lr.fit(train, train_target)

    print '=============Train Data Result============'
    predict = lr.predict(train)
    print "mse: ", mse(predict, train_target), " rmse: ", rmse(predict, train_target), " mae: ", mae(predict,
                                                                                                     train_target)
    print '=============Test Data Result============'
    predict = lr.predict(test)
    print "mse: ", mse(predict, test_target), " rmse: ", rmse(predict, test_target), " mae: ", mae(predict, test_target)
Example #4
0
def housing():
    train, train_target, test, test_target = load_boston_house()

    normalize_columns = [0, 1, 2, 6, 7, 9, 10, 11, 12]
    normalize(train, normalize_columns)
    normalize(test, normalize_columns)
    train = append_new_column(train, 1.0, 0)
    test = append_new_column(test, 1.0, 0)

    lr = LinearRegression()
    lr.fit(train, train_target)

    print '=============Train Data Result============'
    predict = lr.predict(train)
    print "mse: ", mse(predict, train_target), " rmse: ", rmse(
        predict, train_target), " mae: ", mae(predict, train_target)
    print '=============Test Data Result============'
    predict = lr.predict(test)
    print "mse: ", mse(predict, test_target), " rmse: ", rmse(
        predict, test_target), " mae: ", mae(predict, test_target)
def ridged_logistic_regression():
    train, train_target, test, test_target = load_polluted_spambase()

    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)

    scaler = normalize(train)
    scaler.scale_test(test)
    train = append_new_column(train, 1.0, 0)
    test = append_new_column(test, 1.0, 0)

    cf = RidgedLogisticRegression()
    cf.fit(train, train_target)
    predict_values = cf.predict(test)
    predict_classes = cf.convert_to_binary(predict_values)
    cm = confusion_matrix(test_target, predict_classes)
    print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
    er, acc, fpr, tpr = confusion_matrix_analysis(cm)
    print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr)
Example #6
0
def housing():
    train, train_target, test, test_target = load_boston_house()

    scaler = normalize(train)
    scaler.scale_test(test)

    train = append_new_column(train, 1.0, 0)
    test = append_new_column(test, 1.0, 0)

    lr = StochasticGradientDescendingRegression()
    # lr = GradientDescendingRegression()
    lr.fit(train, train_target, 0.0001, 500)

    print '---------------Stochastic Gradient----------'
    print '=============Train Data Result============'
    predict = lr.predict(train)
    print "mse: ", mse(predict, train_target), " rmse: ", rmse(predict, train_target), " mae: ", mae(predict,
                                                                                                     train_target)
    print '=============Test Data Result============'
    predict = lr.predict(test)
    print "mse: ", mse(predict, test_target), " rmse: ", rmse(predict, test_target), " mae: ", mae(predict, test_target)

    print '------------- Normal Equation--------------'
    lr = LinearRegression()
    lr.fit(train, train_target)
    print '=============Train Data Result============'
    predict = lr.predict(train)
    print "mse: ", mse(predict, train_target), " rmse: ", rmse(predict, train_target), " mae: ", mae(predict,
                                                                                                     train_target)
    print '=============Test Data Result============'
    predict = lr.predict(test)
    print "mse: ", mse(predict, test_target), " rmse: ", rmse(predict, test_target), " mae: ", mae(predict, test_target)

    print '---------------Regression Tree-----------'
    lr = RegressionTree()
    lr.fit(train, train_target, 2, 0)
    print '=============Train Data Result============'
    predict = lr.predict(train)
    print "mse: ", mse(predict, train_target), " rmse: ", rmse(predict, train_target), " mae: ", mae(predict,
                                                                                                     train_target)
    print '=============Test Data Result============'
    predict = lr.predict(test)
    print "mse: ", mse(predict, test_target), " rmse: ", rmse(predict, test_target), " mae: ", mae(predict, test_target)
Example #7
0
def spam(step, loop, converge):
    train, target = load_spambase()

    train, test, train_target, test_target = cross_validation.train_test_shuffle_split(train, target, len(train) / 10)
    scaler = normalize(train)
    train = append_new_column(train, 1.0, 0)
    scaler.scale_test(test)
    test = append_new_column(test, 1.0, 0)

    print '\n============== Logistic Regression - Stochastic Gradient Descending==============='
    spam_logistic(train, test, train_target, test_target, step, loop, converge)

    print '\n============== Linear Regression - Stochastic Gradient Descending ==============='
    spam_linear(train, test, train_target, test_target, step, loop, converge)

    print '\n============== Linear Regression - Normal Equation==============='
    spam_normal_equation(train, test, train_target, test_target)

    print '\n============== Decision Tree ===================================='
    spam_decision_tree(train, test, train_target, test_target)