def spam():
    train, target = load_spambase()

    normalize_columns = [55, 56]
    normalize(train, normalize_columns)
    train = append_new_column(train, 1.0, 0)

    # 10 fold cross validation
    train_size = len(train)
    k = 10
    test_index_generator = cross_validation.k_fold_cross_validation(train_size, k)
    fold = 0
    train_accuracy = 0
    test_accuracy = 0
    train_mse = 0
    test_mse = 0

    for start, end in test_index_generator:
        train_left = train[range(0, start)]
        train_right = train[range(end, train_size)]
        k_fold_train = np.vstack((train_left, train_right))
        test = train[range(start, end)]

        target_left = target[range(0, start)]
        target_right = target[range(end, train_size)]
        train_target = np.append(target_left, target_right)
        test_target = target[range(start, end)]

        cf = LinearRegression()
        cf = cf.fit(k_fold_train, train_target)

        print '=============Train Data Result============'
        predict_train = cf.predict(k_fold_train)
        cm = confusion_matrix(train_target, predict_train)
        print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
        er, acc, fpr, tpr = confusion_matrix_analysis(cm)
        print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr)
        train_accuracy += acc
        print "mse: ", mse(predict_train, train_target), " rmse: ", rmse(predict_train, train_target), " mae: ", mae(
            predict_train,
            train_target)
        train_mse += mse(predict_train, train_target)

        print '=============Test Data Result============'
        predict_test = cf.predict(test)
        cm = confusion_matrix(test_target, predict_test)
        print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
        er, acc, fpr, tpr = confusion_matrix_analysis(cm)
        print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr)
        test_accuracy += acc
        fold += 1
        print "mse: ", mse(predict_test, test_target), " rmse: ", rmse(predict_test, test_target), " mae: ", mae(
            predict_test,
            test_target)
        test_mse += mse(predict_test, test_target)

    print "Average train acc: %f, average test acc: %f" % (train_accuracy / fold, test_accuracy / fold)
    print "Average train mse: %f, average test mse: %f" % (train_mse / fold, test_mse / fold)
def spam():
    train, target = load_spambase()

    normalize_columns = [55, 56]
    normalize(train, normalize_columns)
    train = append_new_column(train, 1.0, 0)

    # 10 fold cross validation
    train_size = len(train)
    k = 10
    test_index_generator = cross_validation.k_fold_cross_validation(
        train_size, k)
    fold = 0
    train_accuracy = 0
    test_accuracy = 0
    train_mse = 0
    test_mse = 0

    for start, end in test_index_generator:
        train_left = train[range(0, start)]
        train_right = train[range(end, train_size)]
        k_fold_train = np.vstack((train_left, train_right))
        test = train[range(start, end)]

        target_left = target[range(0, start)]
        target_right = target[range(end, train_size)]
        train_target = np.append(target_left, target_right)
        test_target = target[range(start, end)]

        cf = LinearRegression()
        cf = cf.fit(k_fold_train, train_target)

        print '=============Train Data Result============'
        predict_train = cf.predict(k_fold_train)
        cm = confusion_matrix(train_target, predict_train)

        er, acc, fpr, tpr = confusion_matrix_analysis(cm)
        train_accuracy += acc
        print "mse: ", mse(predict_train, train_target), " rmse: ", rmse(
            predict_train,
            train_target), " mae: ", mae(predict_train, train_target)
        train_mse += mse(predict_train, train_target)

        print '=============Test Data Result============'
        predict_test = cf.predict(test)
        cm = confusion_matrix(test_target, predict_test)

        er, acc, fpr, tpr = confusion_matrix_analysis(cm)
        test_accuracy += acc
        fold += 1
        print "mse: ", mse(predict_test, test_target), " rmse: ", rmse(
            predict_test, test_target), " mae: ", mae(predict_test,
                                                      test_target)
        test_mse += mse(predict_test, test_target)

    print "Average train mse: %f, average test mse: %f" % (
        1.0 * train_mse / fold, 1.0 * test_mse / fold)
def decision_tree_all_data():
    train, target = load_spambase()
    cf = tree.DecisionTree()
    cf = cf.fit(train, target, 5)
    print_tree(cf.root)
    predicts = cf.predict(train)
    cm = confusion_matrix(target,predicts)
    print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
    er, acc, fpr, tpr = confusion_matrix_analysis(cm)
    print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr)
Exemple #4
0
def decision_tree_all_data():
    train, target = load_spambase()
    cf = tree.DecisionTree()
    cf = cf.fit(train, target, 5)
    print_tree(cf.root)
    predicts = cf.predict(train)
    cm = confusion_matrix(target, predicts)
    print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (
        cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
    er, acc, fpr, tpr = confusion_matrix_analysis(cm)
    print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr,
                                                              tpr)
Exemple #5
0
def decision_tree():
    train, target = load_spambase()

    # 10 fold cross validation
    train_size = len(train)
    k = 10
    test_index_generator = cross_validation.k_fold_cross_validation(
        train_size, k)
    fold = 0
    train_accuracy = 0
    test_accuracy = 0
    train_mse = 0
    test_mse = 0

    for start, end in test_index_generator:
        train_left = train[range(0, start)]
        train_right = train[range(end, train_size)]
        k_fold_train = np.vstack((train_left, train_right))
        test = train[range(start, end)]

        target_left = target[range(0, start)]
        target_right = target[range(end, train_size)]
        train_target = np.append(target_left, target_right)
        test_target = target[range(start, end)]

        cf = tree.DecisionTree()
        cf = cf.fit(k_fold_train, train_target, 5)
        print "=========Tree=============="
        print_tree(cf.root)

        print '=============Train Data Result============'
        predict_train = cf.predict(k_fold_train)
        cm = confusion_matrix(train_target, predict_train)
        print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (
            cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
        er, acc, fpr, tpr = confusion_matrix_analysis(cm)
        print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr,
                                                                  tpr)
        train_accuracy += acc
        print "mse: ", mse(predict_train, train_target), " rmse: ", rmse(
            predict_train,
            train_target), " mae: ", mae(predict_train, train_target)
        train_mse += mse(predict_train, train_target)

        print '=============Test Data Result============'
        predict_test = cf.predict(test)
        cm = confusion_matrix(test_target, predict_test)
        print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (
            cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
        er, acc, fpr, tpr = confusion_matrix_analysis(cm)
        print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr,
                                                                  tpr)
        test_accuracy += acc
        print "mse: ", mse(predict_test, test_target), " rmse: ", rmse(
            predict_test, test_target), " mae: ", mae(predict_test,
                                                      test_target)
        test_mse += mse(predict_test, test_target)

        fold += 1

    print "Average train acc: %f, average test acc: %f" % (
        train_accuracy / fold, test_accuracy / fold)
    print "Average train mse: %f, average test mse: %f" % (train_mse / fold,
                                                           test_mse / fold)