def naive_bayes_with_lda(): train, train_target, test, test_target = load_polluted_spambase() print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) start = timeit.default_timer() lda = LDA(n_components=100) train = lda.fit_transform(train, train_target) test = lda.transform(test) print lda print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) cf = GaussianNaiveBayes() cf.fit(train, train_target) raw_predicts = cf.predict(test) predict_class = cf.predict_class(raw_predicts) cm = confusion_matrix(test_target, predict_class) print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1]) er, acc, fpr, tpr = confusion_matrix_analysis(cm) print "Error rate: %f, accuracy: %f, FPR: %f, TPR: %f" % (er, acc, fpr, tpr) stop = timeit.default_timer() print "Total Run Time: %s secs" % (stop - start)
def naive_bayes_with_lda(): train, train_target, test, test_target = load_polluted_spambase() print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) start = timeit.default_timer() lda = LDA(n_components=100) train = lda.fit_transform(train, train_target) test = lda.transform(test) print lda print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) cf = GaussianNaiveBayes() cf.fit(train, train_target) raw_predicts = cf.predict(test) predict_class = cf.predict_class(raw_predicts) cm = confusion_matrix(test_target, predict_class) print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % ( cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1]) er, acc, fpr, tpr = confusion_matrix_analysis(cm) print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr) stop = timeit.default_timer() print "Total Run Time: %s secs" % (stop - start)
def polluted_spambase(T=100): train, train_target, test, test_target = load_polluted_spambase() train_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, train_target)) test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target)) print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) boost = AdaBoost() start = timeit.default_timer() boost.boost(train, train_target, test, test_target, T) stop = timeit.default_timer() print "Total Run Time: %s secs" % (stop - start)
def polluted_spambase(T=100): train, train_target, test, test_target = load_polluted_spambase() train_target = np.array( map(lambda v: -1.0 if v == 0 else 1.0, train_target)) test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target)) print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) boost = AdaBoost() start = timeit.default_timer() boost.boost(train, train_target, test, test_target, T) stop = timeit.default_timer() print "Total Run Time: %s secs" % (stop - start)
def ridged_logistic_regression(): train, train_target, test, test_target = load_polluted_spambase() print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) scaler = normalize(train) scaler.scale_test(test) train = append_new_column(train, 1.0, 0) test = append_new_column(test, 1.0, 0) cf = RidgedLogisticRegression() cf.fit(train, train_target) predict_values = cf.predict(test) predict_classes = cf.convert_to_binary(predict_values) cm = confusion_matrix(test_target, predict_classes) print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1]) er, acc, fpr, tpr = confusion_matrix_analysis(cm) print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr)
def ski_lasso(alpha, max_iter): train, train_target, test, test_target = load_polluted_spambase() print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape) print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape) start = timeit.default_timer() clf = linear_model.Lasso(alpha=alpha, max_iter=max_iter) print clf clf.fit(train, train_target) predicts = clf.predict(test) predict_class = map(lambda v: 0 if v <= 0.42 else 1, predicts) cm = confusion_matrix(test_target, predict_class) print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1]) er, acc, fpr, tpr = confusion_matrix_analysis(cm) print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr, tpr) stop = timeit.default_timer() print "Total Run Time: %s secs" % (stop - start)