def naive_bayes_with_lda():
    train, train_target, test, test_target = load_polluted_spambase()

    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)

    start = timeit.default_timer()

    lda = LDA(n_components=100)
    train = lda.fit_transform(train, train_target)
    test = lda.transform(test)

    print lda
    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)

    cf = GaussianNaiveBayes()
    cf.fit(train, train_target)
    raw_predicts = cf.predict(test)
    predict_class = cf.predict_class(raw_predicts)

    cm = confusion_matrix(test_target, predict_class)
    print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
    er, acc, fpr, tpr = confusion_matrix_analysis(cm)
    print "Error rate: %f, accuracy: %f, FPR: %f, TPR: %f" % (er, acc, fpr, tpr)

    stop = timeit.default_timer()
    print "Total Run Time: %s secs" % (stop - start)
def naive_bayes_with_lda():
    train, train_target, test, test_target = load_polluted_spambase()

    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)

    start = timeit.default_timer()

    lda = LDA(n_components=100)
    train = lda.fit_transform(train, train_target)
    test = lda.transform(test)

    print lda
    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)

    cf = GaussianNaiveBayes()
    cf.fit(train, train_target)
    raw_predicts = cf.predict(test)
    predict_class = cf.predict_class(raw_predicts)

    cm = confusion_matrix(test_target, predict_class)
    print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (
        cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
    er, acc, fpr, tpr = confusion_matrix_analysis(cm)
    print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr,
                                                              tpr)

    stop = timeit.default_timer()
    print "Total Run Time: %s secs" % (stop - start)
def polluted_spambase(T=100):
    train, train_target, test, test_target = load_polluted_spambase()
    train_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))
    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)
    boost = AdaBoost()
    start = timeit.default_timer()
    boost.boost(train, train_target, test, test_target, T)
    stop = timeit.default_timer()
    print "Total Run Time: %s secs" % (stop - start)
def polluted_spambase(T=100):
    train, train_target, test, test_target = load_polluted_spambase()
    train_target = np.array(
        map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))
    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)
    boost = AdaBoost()
    start = timeit.default_timer()
    boost.boost(train, train_target, test, test_target, T)
    stop = timeit.default_timer()
    print "Total Run Time: %s secs" % (stop - start)
def ridged_logistic_regression():
    train, train_target, test, test_target = load_polluted_spambase()

    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)

    scaler = normalize(train)
    scaler.scale_test(test)
    train = append_new_column(train, 1.0, 0)
    test = append_new_column(test, 1.0, 0)

    cf = RidgedLogisticRegression()
    cf.fit(train, train_target)
    predict_values = cf.predict(test)
    predict_classes = cf.convert_to_binary(predict_values)
    cm = confusion_matrix(test_target, predict_classes)
    print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
    er, acc, fpr, tpr = confusion_matrix_analysis(cm)
    print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr)
def ski_lasso(alpha, max_iter):
    train, train_target, test, test_target = load_polluted_spambase()

    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)

    start = timeit.default_timer()

    clf = linear_model.Lasso(alpha=alpha, max_iter=max_iter)
    print clf
    clf.fit(train, train_target)
    predicts = clf.predict(test)
    predict_class = map(lambda v: 0 if v <= 0.42 else 1, predicts)
    cm = confusion_matrix(test_target, predict_class)
    print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
    er, acc, fpr, tpr = confusion_matrix_analysis(cm)
    print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr)

    stop = timeit.default_timer()
    print "Total Run Time: %s secs" % (stop - start)