def learn_batch_performance_params (m_val): testing_file = batch_performance_datasets[m_val][0] training_file = batch_performance_datasets[m_val][1] testing_set = file_read (testing_file) training_set = file_read (training_file) partition = partition_data (testing_set) D1 = partition.training D2 = partition.testing perceptron_params = perceptron_learn_batch_performance_params (D1, D2) perceptron_params_without_margin = perceptron_params.get_params (0) perceptron_params_with_margin = perceptron_params.get_params (1) print 'perceptron without margin acc(D2): ' + str(perceptron_params_without_margin.accuracy) print 'perceptron with margin acc(D2): ' + str(perceptron_params_with_margin.accuracy) print '\n\nRunning perceptron on Test set...' perceptron_trained_without_margin = perceptron_train (batch_performance_n, training_set, perceptron_params_without_margin.gamma, perceptron_params_without_margin.eta) perceptron_trained_with_margin = perceptron_train (batch_performance_n, training_set, perceptron_params_with_margin.gamma, perceptron_params_with_margin.eta) perceptron_mistakes_without_margin = perceptron_test (testing_set, perceptron_trained_without_margin[0], perceptron_trained_without_margin[1]) perceptron_mistakes_with_margin = perceptron_test (testing_set, perceptron_trained_with_margin[0], perceptron_trained_with_margin[1]) print 'perecptron without margin acc(Test): ' + str(1.0 - float(perceptron_mistakes_without_margin[len(perceptron_mistakes_without_margin)-1]) / len (testing_set)) print 'perecptron with margin acc(Test): ' + str(1.0 - float(perceptron_mistakes_with_margin[len(perceptron_mistakes_with_margin)-1]) / len (testing_set)) winnow_params = winnow_learn_batch_performance_params (D1, D2) winnow_params_without_margin = winnow_params.get_params (0) winnow_params_with_margin = winnow_params.get_params (1) print 'winnow without margin acc(D2): ' + str(winnow_params_without_margin.accuracy) print 'winnow with margin acc(D2): ' + str(winnow_params_with_margin.accuracy) print '\n\nRunning winnow on Test set...' winnow_trained_without_margin = winnow_train (batch_performance_n, training_set, winnow_params_without_margin.gamma, winnow_params_with_margin.eta) winnow_trained_with_margin = winnow_train (batch_performance_n, training_set, winnow_params_with_margin.gamma, winnow_params_with_margin.eta) winnow_mistakes_without_margin = winnow_test (testing_set, winnow_trained_without_margin) winnow_mistakes_with_margin = winnow_test (testing_set, winnow_trained_with_margin) print 'winnow without margin acc(Test): ' + str(1.0 - float(winnow_mistakes_without_margin[len(winnow_mistakes_without_margin)-1]) / len(testing_set)) print 'winnow with margin acc(Test): ' + str(1.0 - float(winnow_mistakes_with_margin[len(winnow_mistakes_with_margin)-1]) / len(testing_set))
import perceptron as P import numpy as np print_stuff = True if print_stuff: print("\n\n\nPerceptron tests:") Perc_X_train = np.array([[0, 1], [1, 0], [5, 4], [1, 1], [3, 3], [2, 4], [1, 6]]) Perc_Y_train = np.array([[1], [1], [-1], [1], [-1], [-1], [-1]]) [w, b] = P.perceptron_train(Perc_X_train, Perc_Y_train) perc_test = P.perceptron_test(Perc_X_train, Perc_Y_train, w, b) if print_stuff: print("W from sample =", w, "B from sample =", b) print("Test on self form sample: ", perc_test) print("Testing for non-linearly seperable data") Perc_stuff_X = np.array( [[1, 0], [7, 4], [9, 6], [2, 1], [4, 8], [0, 3], [13, 5], [6, 8], [7, 3], [3, 6], [2, 1], [8, 3], [10, 2], [3, 5], [5, 1], [1, 9], [10, 3], [4, 1], [6, 6], [2, 2]]) Perc_stuff_Y = np.array( [[1], [1], [-1], [1], [-1], [-1], [-1], [1], [1], [-1], [1], [-1], [-1], [-1], [1], [1], [-1], [1], [-1], [-1]]) [w, b] = P.perceptron_train(Perc_stuff_X, Perc_stuff_Y) someTest = P.perceptron_test(Perc_stuff_X, Perc_stuff_Y, w, b) if print_stuff: print("Non-linearly seperable data test w=", w, "b=", b, "Accuracy on self =", someTest) Perc_random_X = np.array([[1.84724509, 2.23182926], [1.22695894, 1.6611229], [2.13212121, 4.63313796], [7.78081405, 4.11930532], [7.28450063, 3.90368111], [1.29216053, 2.76912245], [7.0384763, 2.80881342], [1.22081714, 3.80955021],
print("C_3: \n", C_3) plt.scatter(C_3[:,0], C_3[:,1], label='centers') plt.scatter(X_2[:,0], X_2[:,1], label='samples') plt.title('X_2, K=3') # plt.savefig("k_means_results_3.png") #Uncomment to save plot as file plt.show() # PERCEPTRON TESTING # Hand-Tested Data X = np.array( [[1,1], [1,-1], [-1,1], [-1,-1]] ) Y = np.array( [[1], [-1], [-1], [-1]] ) W = p.perceptron_train(X,Y) print("Hand-Tested Data W1: ",W[0][0]," W2: ",W[0][1]," b:",W[1][0]) test_acc = p.perceptron_test(X,Y,W[0],W[1]) print("Accurancy:",test_acc,"\n") # Percepton Test Data X = np.array( [[0,1], [1,0], [5,4], [1,1], [3,3], [2,4], [1,6]] ) Y = np.array( [[1], [1], [-1], [1], [-1], [-1], [-1]] ) W = p.perceptron_train(X,Y) print("Preceptron Test Data 1 W1: ",W[0][0]," W2: ",W[0][1]," b:",W[1][0]) test_acc = p.perceptron_test(X,Y,W[0],W[1]) print("Accurancy:",test_acc,"\n") # Perceptron Test Data - Writeup X = np.array( [[-2,1], [1,1], [1.5,-0.5], [-2,-1], [-1,-1.5], [2,-2]] ) Y = np.array( [[1], [1], [1], [-1], [-1], [-1]] ) W = p.perceptron_train(X,Y) print("Preceptron Test Data 2 W1: ",W[0][0]," W2: ",W[0][1]," b:",W[1][0])
print("Clustering") #test C1 = clu.K_Means(cluX1, 3) #print(C1) #writeup C2 = clu.K_Means(cluX2, 2) print("\tK=2", C2) C3 = clu.K_Means(cluX2, 3) print("\tK=3", C3) #writeup CBetter1 = clu.K_Means_better(cluX2, 2) print("\tBetter K=2", CBetter1) CBetter2 = clu.K_Means_better(cluX2, 3) print("\tBetter K=3", CBetter2) print() print("Perceptron") #test W_B1 = per.perceptron_train(perX1, perY1) perAcc1 = per.perceptron_test(perX1, perY1, W_B1[0], W_B1[1]) #print("\tW",W_B1[0]) #print("\tB",W_B1[1]) #print("\tAccuracy", perAcc1) #writeup W_B2 = per.perceptron_train(perX2, perY2) perAcc2 = per.perceptron_test(perX2, perY2, W_B2[0], W_B2[1]) print("\tW", W_B2[0]) print("\tB", W_B2[1]) print("\tAccuracy", perAcc2)
def main(): args = parse_args() data = load_data('data/adult.data') test_data = load_data('data/adult.test2') val_data = load_data('data/adult.val') if args.depth_plot: print('Calculating f1-scores for different depths...') depths, scores = dt.tune_max_depth(data, val_data) plt.plot(depths, scores) plt.ylabel('F1-score') plt.xlabel('Maximum Depth') plt.show() quit() baseline_tree = dt.build_decision_tree( data, max_depth=1, forced_attribute=args.baseline_attribute) print('Building decision tree...') dt_start = time.time() if args.depth is not None: tree = dt.build_decision_tree(data, max_depth=args.depth) else: tree = dt.build_decision_tree(data) print('Decision tree built in ' + str(time.time() - dt_start) + ' s.') baseline_metrics = compute_metrics(dt.decision_tree_classify, test_data, [baseline_tree]) dt_metrics = compute_metrics(dt.decision_tree_classify, test_data, [tree]) if args.rep: print('Pruning decision tree (reduced error)...') dtre_start = time.time() dt.reduced_error_prune(tree, val_data) print('Decision tree pruned (reduced error) in ' + str(time.time() - dtre_start) + ' s.') dtre_metrics = compute_metrics(dt.decision_tree_classify, test_data, [tree]) elif args.csp: print('Pruning decision tree (chi-square)...') dtcs_start = time.time() dt.chi_square_prune(tree) print('Decision tree pruned (chi-square) in ' + str(time.time() - dtcs_start) + ' s.') dtcs_metrics = compute_metrics(dt.decision_tree_classify, test_data, [tree]) y_train = get_labels(data) y_test = get_labels(test_data) features = extract_features(data, test_data) X_train = features[0] X_test = features[1] feature_names = features[2] print('Building logistic regression model...') lr_start = time.time() lr_model = LogisticRegression(solver='sag').fit(X_train, y_train) print('Logistic regression model built in ' + str(time.time() - lr_start) + ' s.') if args.lr_top is not None: print('Top weighted features in logistic regression model: ' + str(get_lr_top_weights(lr_model, args.lr_top, feature_names)[0])) if args.lr_bot is not None: print( 'Top negatively weighted features in logistic regression model: ' + str(get_lr_top_weights(lr_model, args.lr_bot, feature_names)[1])) lr_pred = lr_model.predict(X_test) weights = perceptron.perceptron(X_train, y_train, 10) perceptron_pred = perceptron.perceptron_test(X_test, weights) perceptron_metrics = [ y_test[i] == perceptron_pred[i] for i in range(len(y_test)) ].count(True) / len(test_data), precision_score( y_test, perceptron_pred), recall_score(y_test, perceptron_pred), f1_score( y_test, perceptron_pred) lr_metrics = [y_test[i] == lr_pred[i] for i in range(len(y_test)) ].count(True) / len(test_data), precision_score( y_test, lr_pred), recall_score(y_test, lr_pred), f1_score( y_test, lr_pred) print('Baseline:') print('Accuracy: ' + str(baseline_metrics[0])) print('Precision: ' + str(baseline_metrics[1])) print('Recall: ' + str(baseline_metrics[2])) print('F1 Score: ' + str(baseline_metrics[3])) print('\nDecision Tree:') print('Accuracy: ' + str(dt_metrics[0])) print('Precision: ' + str(dt_metrics[1])) print('Recall: ' + str(dt_metrics[2])) print('F1 Score: ' + str(dt_metrics[3])) if args.rep: print('\nDecision Tree (w/ reduced error pruning):') print('Accuracy: ' + str(dtre_metrics[0])) print('Precision: ' + str(dtre_metrics[1])) print('Recall: ' + str(dtre_metrics[2])) print('F1 Score: ' + str(dtre_metrics[3])) elif args.csp: print('\nDecision Tree (w/ chi-square pruning):') print('Accuracy: ' + str(dtcs_metrics[0])) print('Precision: ' + str(dtcs_metrics[1])) print('Recall: ' + str(dtcs_metrics[2])) print('F1 Score: ' + str(dtcs_metrics[3])) print('\nPerceptron:') print('Accuracy: ' + str(perceptron_metrics[0])) print('Precision: ' + str(perceptron_metrics[1])) print('Recall: ' + str(perceptron_metrics[2])) print('F1 Score: ' + str(perceptron_metrics[3])) print('\nLogistic Regression:') print('Accuracy: ' + str(lr_metrics[0])) print('Precision: ' + str(lr_metrics[1])) print('Recall: ' + str(lr_metrics[2])) print('F1 Score: ' + str(lr_metrics[3])) if args.plot: metrics_baseline = (baseline_metrics[0], baseline_metrics[1], baseline_metrics[2], baseline_metrics[3]) metrics_dt = (dt_metrics[0], dt_metrics[1], dt_metrics[2], dt_metrics[3]) metrics_perceptron = (perceptron_metrics[0], perceptron_metrics[1], perceptron_metrics[2], perceptron_metrics[3]) metrics_lr = (lr_metrics[0], lr_metrics[1], lr_metrics[2], lr_metrics[3]) metrics_dtre, metrics_dtcs = None, None if args.rep: metrics_dtre = (dtre_metrics[0], dtre_metrics[1], dtre_metrics[2], dtre_metrics[3]) elif args.csp: metrics_dtcs = (dtcs_metrics[0], dtcs_metrics[1], dtcs_metrics[2], dtcs_metrics[3]) plot_metrics(metrics_baseline, metrics_dt, metrics_perceptron, metrics_lr, metrics_dtre, metrics_dtcs)
################################################################################################################################################### # filename: testPreceptron.py # author: Sara Davis # date: 10/10/2018 # version: 1.0 # description: runs testPerceptron.py ############################################################################################################################################# import numpy as np from perceptron import perceptron_train from perceptron import perceptron_test #X_test = np.array([[0,1], [1,0], [5,4], [1,1], [3,3], [2,4], [1,6]]) #should be -2, -2, 6 #Y_test = np.array([[1], [1], [0], [1], [0], [0], [0]]) X_test = np.array([[0, 0], [1, 1], [0, 1], [2, 2], [1, 0], [1, 2]]) Y_test = np.array([[-1], [1], [-1], [1], [-1], [1]]) X = np.array([[-2, 1], [1, 1], [1.5, -.5], [-2, -1], [-1, -1.5], [2, -2]]) Y = np.array([[1], [1], [1], [-1], [-1], [-1]]) W = perceptron_train(X, Y) print(W) test_acc = perceptron_test(X_test, Y_test, W[0], W[1]) print(test_acc)
Y = [] for item in x_str: temp = [float(x) for x in item.split(',')] X.append(temp) if len(y_str)>0: for item in y_str: temp = int(item) Y.append(temp) X = np.array(X) Y = np.array(Y) return X, Y X,Y = load_data("data_1.txt") w,b = p.perceptron_train(X,Y) test_acc = p.perceptron_test(X,Y,w,b) print("Perceptron:",test_acc) X,Y = load_data("data_2.txt") w,b = p.perceptron_train(X,Y) X,Y = load_data("data_1.txt") test_acc = p.perceptron_test(X,Y,w,b) print("Perceptron:",test_acc) def df_test1(x): return np.array([2*x[0]]) x = gd.gradient_descent(df_test1,np.array([5.0]),0.1) print("Gradient Descent:", x)
def main(): data = load_data('data/adult.data') baseline_tree = dt.build_decision_tree(data, max_depth=1) print('Building decision tree...') dt_start = time.time() tree = dt.build_decision_tree(data) print('Decision tree built in ' + str(time.time() - dt_start) + ' s.') test_data = load_data('data/adult.val') baseline_metrics = compute_metrics(dt.decision_tree_classify, test_data, [baseline_tree]) dt_metrics = compute_metrics(dt.decision_tree_classify, test_data, [tree]) y_train = get_labels(data) y_test = get_labels(test_data) features = extract_features(data, test_data) X_train = features[0] X_test = features[1] print('Building logistic regression model...') lr_start = time.time() lr_model = build_lr_model(X_train, y_train) print('Logistic regression model built in ' + str(time.time() - lr_start) + ' s.') lr_pred = lr_model.predict(X_test) #perceptron weights = perceptron.perceptron(X_train, y_train, 6) perceptron_pred=perceptron.perceptron_test(X_test,weights) #skilearn model's perceptron perceptron_ski = build_perceptron_ski(X_train, y_train) y_percep_pred = perceptron_ski.predict(X_test) ''' Result: Accuracy: 0.8032061912658928 Precision: 0.5655369538587178 Recall: 0.7202288091523661 F1 Score: 0.6335773101555352 ''' # Gaussian Naive Bayes naive_bayes_model = build_naive_bayes(X_train, y_train) y_naive_bayes_pred = naive_bayes_model.predict(X_test) ''' Result: Accuracy: 0.48473680977826916 Precision: 0.3092619027626165 Recall: 0.9576183047321893 F1 Score: 0.4675341161536021 ''' print('Baseline:') print('Accuracy: ' + str(baseline_metrics[0])) print('Precision: ' + str(baseline_metrics[1])) print('Recall: ' + str(baseline_metrics[2])) print('F1 Score: ' + str(baseline_metrics[3])) print('\nDecision Tree:') print('Accuracy: ' + str(dt_metrics[0])) print('Precision: ' + str(dt_metrics[1])) print('Recall: ' + str(dt_metrics[2])) print('F1 Score: ' + str(dt_metrics[3])) print('\nLogistic Regression:') print('Accuracy: ' + str([y_test[i] == lr_pred[i] for i in range(len(y_test))].count(True) / len(test_data))) print('Precision: ' + str(precision_score(y_test, lr_pred))) print('Recall: ' + str(recall_score(y_test, lr_pred))) print('F1 Score: ' + str(f1_score(y_test, lr_pred))) print('\nPerceptron Regression:') print('Accuracy: ' + str([y_test[i] == perceptron_pred[i] for i in range(len(y_test))].count(True) / len(test_data))) print('Precision: ' + str(precision_score(y_test, perceptron_pred))) print('Recall: ' + str(recall_score(y_test, perceptron_pred))) print('F1 Score: ' + str(f1_score(y_test, perceptron_pred))) print('\nPerceptron Regression (ski):') print('Accuracy: ' + str([y_test[i] == y_percep_pred[i] for i in range(len(y_test))].count(True) / len(test_data))) print('Precision: ' + str(precision_score(y_test, y_percep_pred))) print('Recall: ' + str(recall_score(y_test, y_percep_pred))) print('F1 Score: ' + str(f1_score(y_test, y_percep_pred))) print('\nNaive Bayes (ski):') print('Accuracy: ' + str([y_test[i] == y_naive_bayes_pred[i] for i in range(len(y_test))].count(True) / len(test_data))) print('Precision: ' + str(precision_score(y_test, y_naive_bayes_pred))) print('Recall: ' + str(recall_score(y_test, y_naive_bayes_pred))) print('F1 Score: ' + str(f1_score(y_test, y_naive_bayes_pred))) print("\nCross Validation")
# Evaluate the perceptron algorithm on the corresponding test examples for each version by reading the parameter vectors # from the corresponding text files. with open('newsgroups_model_p1.txt', 'r') as f: wp1 = f.readlines() wp1 = np.asarray(wp1, dtype=np.float64) wp1 = np.reshape(wp1, (-1, 1)) with open('newsgroups_model_p2.txt', 'r') as f: wp2 = f.readlines() wp2 = np.asarray(wp2, dtype=np.float64) wp2 = np.reshape(wp2, (-1, 1)) pred1 = perceptron_test(wp1, tsdata1) pred2 = perceptron_test(wp2, tsdata2) # Report test accuracy. acc1 = np.mean(tslabels1 == pred1) print('Accuracy of perceptron on test dataset version 1: %0.3f%%.' % (acc1 * 100)) acc2 = np.mean(tslabels2 == pred2) print('Accuracy of perceptron on test dataset version 2: %0.3f%%.' % (acc2 * 100)) # Carry out same procedure for the average perceptron algorithm as that for the perceptron algorithm. aw1, aerror1 = aperceptron_train(trdata1, trlabels1, 10000)
is_spam_list_training, vocabulary_list) = create_feature_vectors.run('./output_data/training_set') # Part 3/4: Train the data on the training set and return the last weight vector. Test the percent # error when this weight is run on the validation set print('\n=========================================================================================') print('Problem 4:') (weight_vector, total_number_of_misclassifications, number_of_runs) = perceptron.perceptron_train(feature_vector_list_training, is_spam_list_training) (feature_vector_list_validation, is_spam_list_validation, _) = create_feature_vectors.run('./output_data/validation_set', vocabulary_list) training_set_error = perceptron.perceptron_test( weight_vector, feature_vector_list_training, is_spam_list_training) validation_set_error = perceptron.perceptron_test( weight_vector, feature_vector_list_validation, is_spam_list_validation) print('Total number of misclassifications: ' + str(total_number_of_misclassifications)) print('Training set error: ' + str(training_set_error)) print('Validation set error: ' + str(validation_set_error)) # Part 5: Find words in the vocabulary with the most positive and negative weights print('\n=========================================================================================') print('Problem 5:') sorted_weight_index_least_to_greatest = sorted(range(len(weight_vector)), key=lambda k: weight_vector[k]) top_most_positive_weights = [vocabulary_list[index] for index in sorted_weight_index_least_to_greatest[-15:]] top_most_positive_weights = list(reversed(top_most_positive_weights))
XTrain = np.genfromtxt('XTrain.csv', delimiter=',') yTrain = np.genfromtxt('yTrain.csv', delimiter=',') yTrain = yTrain.reshape((yTrain.shape[0], 1)) XTest = np.genfromtxt('XTest.csv', delimiter=',') yTest = np.genfromtxt('yTest.csv', delimiter=',') yTest = yTest.reshape((yTest.shape[0], 1)) #get the number of features d = XTrain.shape[1] n = XTrain.shape[0] m = XTest.shape[0] #experiment 1, original perceptron w0 = np.zeros((d, 1)) w = perceptron.perceptron_train(w0, XTrain, yTrain, 10) rate1 = perceptron.perceptron_test(w, XTest, yTest) print(rate1) #result: error rate: 0.03833 #experiment 2, kernel perceptron sigmaList = [0.01, 0.1, 1, 10, 100, 1000] for sigma in sigmaList: error_case = 0 a0 = np.zeros((n, 1)) a = perceptron.kernel_perceptron_train(a0, XTrain, yTrain, 2, sigma) for i in range(0, m): yHat = perceptron.kernel_perceptron_predict(a, XTrain, yTrain, XTest[i, :], sigma)
plt.xlabel('Epochs') plt.ylabel("Errors") plt.plot(error, 'bo-', label='Total errors during epoch') plt.legend() plt.savefig('perceptron_train.png') plt.close() # Test the perceptron algorithm on the test data by reading the parameter vector from spam_model_p.txt. with open('spam_model_p.txt', 'r') as f: w1 = f.readlines() w1 = np.asarray(w1, dtype=np.float64) w1 = np.reshape(w1, (-1, 1)) pred1 = perceptron_test(w1, data1) # Report test accuracy. acc1 = np.mean(labels1 == pred1) print('\nAccuracy on test data: %0.2f%%.' % (acc1 * 100)) # Carry out same procedure for Average perceptron algorithm as done for the Vanilla perceptron algorithm. print("\nAverage Perceptron algorithm: \n") aw, aerror = aperceptron_train(data, labels, 50) ap = np.savetxt('spam_model_ap.txt', aw) print("\nNumber of mistakes during each epoch:")