def learn_batch_performance_params (m_val): testing_file = batch_performance_datasets[m_val][0] training_file = batch_performance_datasets[m_val][1] testing_set = file_read (testing_file) training_set = file_read (training_file) partition = partition_data (testing_set) D1 = partition.training D2 = partition.testing perceptron_params = perceptron_learn_batch_performance_params (D1, D2) perceptron_params_without_margin = perceptron_params.get_params (0) perceptron_params_with_margin = perceptron_params.get_params (1) print 'perceptron without margin acc(D2): ' + str(perceptron_params_without_margin.accuracy) print 'perceptron with margin acc(D2): ' + str(perceptron_params_with_margin.accuracy) print '\n\nRunning perceptron on Test set...' perceptron_trained_without_margin = perceptron_train (batch_performance_n, training_set, perceptron_params_without_margin.gamma, perceptron_params_without_margin.eta) perceptron_trained_with_margin = perceptron_train (batch_performance_n, training_set, perceptron_params_with_margin.gamma, perceptron_params_with_margin.eta) perceptron_mistakes_without_margin = perceptron_test (testing_set, perceptron_trained_without_margin[0], perceptron_trained_without_margin[1]) perceptron_mistakes_with_margin = perceptron_test (testing_set, perceptron_trained_with_margin[0], perceptron_trained_with_margin[1]) print 'perecptron without margin acc(Test): ' + str(1.0 - float(perceptron_mistakes_without_margin[len(perceptron_mistakes_without_margin)-1]) / len (testing_set)) print 'perecptron with margin acc(Test): ' + str(1.0 - float(perceptron_mistakes_with_margin[len(perceptron_mistakes_with_margin)-1]) / len (testing_set)) winnow_params = winnow_learn_batch_performance_params (D1, D2) winnow_params_without_margin = winnow_params.get_params (0) winnow_params_with_margin = winnow_params.get_params (1) print 'winnow without margin acc(D2): ' + str(winnow_params_without_margin.accuracy) print 'winnow with margin acc(D2): ' + str(winnow_params_with_margin.accuracy) print '\n\nRunning winnow on Test set...' winnow_trained_without_margin = winnow_train (batch_performance_n, training_set, winnow_params_without_margin.gamma, winnow_params_with_margin.eta) winnow_trained_with_margin = winnow_train (batch_performance_n, training_set, winnow_params_with_margin.gamma, winnow_params_with_margin.eta) winnow_mistakes_without_margin = winnow_test (testing_set, winnow_trained_without_margin) winnow_mistakes_with_margin = winnow_test (testing_set, winnow_trained_with_margin) print 'winnow without margin acc(Test): ' + str(1.0 - float(winnow_mistakes_without_margin[len(winnow_mistakes_without_margin)-1]) / len(testing_set)) print 'winnow with margin acc(Test): ' + str(1.0 - float(winnow_mistakes_with_margin[len(winnow_mistakes_with_margin)-1]) / len(testing_set))
import perceptron as P import numpy as np print_stuff = True if print_stuff: print("\n\n\nPerceptron tests:") Perc_X_train = np.array([[0, 1], [1, 0], [5, 4], [1, 1], [3, 3], [2, 4], [1, 6]]) Perc_Y_train = np.array([[1], [1], [-1], [1], [-1], [-1], [-1]]) [w, b] = P.perceptron_train(Perc_X_train, Perc_Y_train) perc_test = P.perceptron_test(Perc_X_train, Perc_Y_train, w, b) if print_stuff: print("W from sample =", w, "B from sample =", b) print("Test on self form sample: ", perc_test) print("Testing for non-linearly seperable data") Perc_stuff_X = np.array( [[1, 0], [7, 4], [9, 6], [2, 1], [4, 8], [0, 3], [13, 5], [6, 8], [7, 3], [3, 6], [2, 1], [8, 3], [10, 2], [3, 5], [5, 1], [1, 9], [10, 3], [4, 1], [6, 6], [2, 2]]) Perc_stuff_Y = np.array( [[1], [1], [-1], [1], [-1], [-1], [-1], [1], [1], [-1], [1], [-1], [-1], [-1], [1], [1], [-1], [1], [-1], [-1]]) [w, b] = P.perceptron_train(Perc_stuff_X, Perc_stuff_Y) someTest = P.perceptron_test(Perc_stuff_X, Perc_stuff_Y, w, b) if print_stuff: print("Non-linearly seperable data test w=", w, "b=", b, "Accuracy on self =", someTest) Perc_random_X = np.array([[1.84724509, 2.23182926], [1.22695894, 1.6611229], [2.13212121, 4.63313796], [7.78081405, 4.11930532], [7.28450063, 3.90368111], [1.29216053, 2.76912245], [7.0384763, 2.80881342], [1.22081714, 3.80955021],
yTest = np.genfromtxt(os.path.join(data_dir, 'yTest.csv'), delimiter=',') # Visualize the image ''' idx = 0 datapoint = XTrain[idx, 1:] plt.imshow(datapoint.reshape((28,28), order = 'F'), cmap='gray') plt.show() ''' # TODO: Test perceptron_predict function, defined in perceptron.py yPred = perceptron.perceptron_predict(w, XTrain[0]) # TODO: Test perceptron_train function, defined in perceptron.py w = np.zeros(XTrain.shape[1]) w = perceptron.perceptron_train(w, XTrain, yTrain, 10) # TODO: Test RBF_kernel function, defined in perceptron.py K = perceptron.RBF_kernel(XTrain[0:5], XTrain[0:5], 10.0) # TODO: Test kernel_perceptron_predict function, defined in perceptron.py a = np.zeros(XTrain.shape[0]) yPred = perceptron.kernel_perceptron_predict(a, XTrain, yTrain, XTrain[0], 10.0) # TODO: Test kernel_perceptron_train function, defined in perceptron.py a = np.zeros(XTrain.shape[0]) a0 = perceptron.kernel_perceptron_train(a, XTrain, yTrain, 5, 100) # TODO: Run experiments outlined in HW4 PDF w = np.zeros(XTrain.shape[1])
C_3 = km.K_Means_better(X_2, K_3) # Visuals for debugging, Uncomment matplot header to use print("C_3: \n", C_3) plt.scatter(C_3[:,0], C_3[:,1], label='centers') plt.scatter(X_2[:,0], X_2[:,1], label='samples') plt.title('X_2, K=3') # plt.savefig("k_means_results_3.png") #Uncomment to save plot as file plt.show() # PERCEPTRON TESTING # Hand-Tested Data X = np.array( [[1,1], [1,-1], [-1,1], [-1,-1]] ) Y = np.array( [[1], [-1], [-1], [-1]] ) W = p.perceptron_train(X,Y) print("Hand-Tested Data W1: ",W[0][0]," W2: ",W[0][1]," b:",W[1][0]) test_acc = p.perceptron_test(X,Y,W[0],W[1]) print("Accurancy:",test_acc,"\n") # Percepton Test Data X = np.array( [[0,1], [1,0], [5,4], [1,1], [3,3], [2,4], [1,6]] ) Y = np.array( [[1], [1], [-1], [1], [-1], [-1], [-1]] ) W = p.perceptron_train(X,Y) print("Preceptron Test Data 1 W1: ",W[0][0]," W2: ",W[0][1]," b:",W[1][0]) test_acc = p.perceptron_test(X,Y,W[0],W[1]) print("Accurancy:",test_acc,"\n") # Perceptron Test Data - Writeup X = np.array( [[-2,1], [1,1], [1.5,-0.5], [-2,-1], [-1,-1.5], [2,-2]] ) Y = np.array( [[1], [1], [1], [-1], [-1], [-1]] )
def main(): # params to tune bias = 0 # epochs = 20 # arrays containing train and test train = np.array(parse_features('trainingimages', 5000)) train_labels = np.array(parse_labels('traininglabels', 5000)) test = np.array(parse_features('testimages', 1000)) test_labels = np.array(parse_labels('testlabels', 1000)) # initialize weight vectors w = np.array([[0 for i in range(28 * 28 + 1)] for j in range(10)]) w = w.astype(float) train_acc = [] # [0 for i in range(epochs)] # training # for j in range(epochs): epochs = 1 while True: mistakes = 0 alpha = 1.0 / (1.0 + epochs) for i in range(5000): mistakes += P.perceptron_train(w, alpha, train[i], train_labels[i]) train_acc.append((5000 - mistakes) / (5000.0)) # accuracy so far print train_acc[epochs - 1], epochs # stop training if not improving much #if epochs != 1 and train_acc[epochs-1] - train_acc[epochs-2] < 0.001: if epochs == 20: break epochs += 1 # draw training accuracies in the end # testing accuracy = 0 for i in range(1000): guess = P.perceptron_decision(w, test[i]) if guess == test_labels[i]: accuracy += 1 accuracy /= 1000.0 print "test accuracy:" print accuracy # building confusion matrix confusion_counts = [[0 for i in range(10)] for j in range(10)] confusion_totals = [[0 for i in range(10)] for j in range(10)] confusion = [[0 for i in range(10)] for j in range(10)] for i in range(0, 1000): digit = test_labels[i] for j in range(0, 10): confusion_totals[digit][j] += 1 # the whole digit row +=1 guess = P.perceptron_decision(w, test[i]) confusion_counts[digit][guess] += 1 for i in range(0, 10): for j in range(0, 10): rate = (confusion_counts[i][j] + 0.0) / confusion_totals[i][j] confusion[i][j] = "{0:.3f}".format(rate) for i in range(0, 10): print confusion[i] # draw training accuracies fig = plt.figure() plt.xlabel('num of epochs') plt.ylabel('training accuracy') plt.title('Training Curve') ax = fig.add_subplot(111) for i, j in zip(range(epochs), train_acc): ax.annotate(str("{0:.2f}".format(j)), xy=(i, j)) plt.plot(range(epochs), train_acc) plt.savefig('training_curve_bias.png', bbox_inches='tight')
ptron = perceptron_learn(ptron, examples[i], true_classes[i], learn_rate) accuracy = perceptron_accuracy(ptron, examples, true_classes) print("Final accuracy: " + str(accuracy)) return ptron def plot_everything(ptron, examples): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') for example in examples: ax.scatter(example[0], example[1], example[2], 'z', s=20, c=[(example[0], example[1], example[2])]) xx, yy = np.meshgrid(np.arange(0, 1.1, 0.1), np.arange(0, 1.1, 0.1)) # ax + by + cz + d == 0 becomes z == (-d - ax - by)/c z = (-ptron[3] - ptron[0] * xx - ptron[1] * yy) / ptron[2] ax.plot_surface(xx, yy, z, alpha=0.2) plt.show() import perceptron ptron = perceptron.new_perceptron() examples, classes = perceptron.create_data() perceptron.plot_everything(ptron, examples) perceptron.perceptron_train(ptron, examples, classes, 0.01, 100) perceptron.plot_everything(ptron, examples)
'--input_data_dir', type=str, default='../data/perceptron', help='Directory for the perceptron dataset.') FLAGS, unparsed = parser.parse_known_args() print('\033[1m' + '1. Perceptron Convergence:\n' + '\033[0m') # Read the training data for Exercise 5. Xtrain, ttrain = read_data(FLAGS.input_data_dir + "/perceptron.txt") print('Exercise 5(a): \n') # Train the perceptron algorithm on the training data. w, weights, err = perceptron_train(Xtrain, ttrain, 10) # Print the weight vector at different epochs. print("Weight vector at the end of each epoch:\n") ep = [] for i in range(weights.shape[0] - 1): print("Epoch %d:" % (i + 1)) print(weights[i + 1]) ep = np.append(ep, i + 1) # Plot of errors at different epochs. plt.xlabel('Epoch') plt.ylabel("Errors") plt.plot(ep, err, 'bo-', label='Total errors during epoch')
print("Clustering") #test C1 = clu.K_Means(cluX1, 3) #print(C1) #writeup C2 = clu.K_Means(cluX2, 2) print("\tK=2", C2) C3 = clu.K_Means(cluX2, 3) print("\tK=3", C3) #writeup CBetter1 = clu.K_Means_better(cluX2, 2) print("\tBetter K=2", CBetter1) CBetter2 = clu.K_Means_better(cluX2, 3) print("\tBetter K=3", CBetter2) print() print("Perceptron") #test W_B1 = per.perceptron_train(perX1, perY1) perAcc1 = per.perceptron_test(perX1, perY1, W_B1[0], W_B1[1]) #print("\tW",W_B1[0]) #print("\tB",W_B1[1]) #print("\tAccuracy", perAcc1) #writeup W_B2 = per.perceptron_train(perX2, perY2) perAcc2 = per.perceptron_test(perX2, perY2, W_B2[0], W_B2[1]) print("\tW", W_B2[0]) print("\tB", W_B2[1]) print("\tAccuracy", perAcc2)
################################################################################################################################################### # filename: testPreceptron.py # author: Sara Davis # date: 10/10/2018 # version: 1.0 # description: runs testPerceptron.py ############################################################################################################################################# import numpy as np from perceptron import perceptron_train from perceptron import perceptron_test #X_test = np.array([[0,1], [1,0], [5,4], [1,1], [3,3], [2,4], [1,6]]) #should be -2, -2, 6 #Y_test = np.array([[1], [1], [0], [1], [0], [0], [0]]) X_test = np.array([[0, 0], [1, 1], [0, 1], [2, 2], [1, 0], [1, 2]]) Y_test = np.array([[-1], [1], [-1], [1], [-1], [1]]) X = np.array([[-2, 1], [1, 1], [1.5, -.5], [-2, -1], [-1, -1.5], [2, -2]]) Y = np.array([[1], [1], [1], [-1], [-1], [-1]]) W = perceptron_train(X, Y) print(W) test_acc = perceptron_test(X_test, Y_test, W[0], W[1]) print(test_acc)
X = [] Y = [] for item in x_str: temp = [float(x) for x in item.split(',')] X.append(temp) if len(y_str)>0: for item in y_str: temp = int(item) Y.append(temp) X = np.array(X) Y = np.array(Y) return X, Y X,Y = load_data("data_1.txt") w,b = p.perceptron_train(X,Y) test_acc = p.perceptron_test(X,Y,w,b) print("Perceptron:",test_acc) X,Y = load_data("data_2.txt") w,b = p.perceptron_train(X,Y) X,Y = load_data("data_1.txt") test_acc = p.perceptron_test(X,Y,w,b) print("Perceptron:",test_acc) def df_test1(x): return np.array([2*x[0]]) x = gd.gradient_descent(df_test1,np.array([5.0]),0.1) print("Gradient Descent:", x)
# Read the training and test data. trdata1, trlabels1 = read_examples(FLAGS.input_data_dir + "/newsgroups_train1.txt") trdata2, trlabels2 = read_examples(FLAGS.input_data_dir + "/newsgroups_train2.txt") tsdata1, tslabels1 = read_examples(FLAGS.input_data_dir + "/newsgroups_test1.txt") tsdata2, tslabels2 = read_examples(FLAGS.input_data_dir + "/newsgroups_test2.txt") # Train the perceptron algorithm on training datasets for 10,000 epochs. w1, ws1, error1 = perceptron_train(trdata1, trlabels1, 10000) w2, ws2, error2 = perceptron_train(trdata2, trlabels2, 10000) # Save the returned parameter vectors from training in appropriate text files. p1 = np.savetxt('newsgroups_model_p1.txt', w1) p2 = np.savetxt('newsgroups_model_p2.txt', w2) # Evaluate the perceptron algorithm on the corresponding test examples for each version by reading the parameter vectors # from the corresponding text files. with open('newsgroups_model_p1.txt', 'r') as f: wp1 = f.readlines() wp1 = np.asarray(wp1, dtype=np.float64)
# Part 1: Split Training data into training and validation set split_training_data.run() # Part 2: Transform each email in the training set into a feature vector (feature_vector_list_training, is_spam_list_training, vocabulary_list) = create_feature_vectors.run('./output_data/training_set') # Part 3/4: Train the data on the training set and return the last weight vector. Test the percent # error when this weight is run on the validation set print('\n=========================================================================================') print('Problem 4:') (weight_vector, total_number_of_misclassifications, number_of_runs) = perceptron.perceptron_train(feature_vector_list_training, is_spam_list_training) (feature_vector_list_validation, is_spam_list_validation, _) = create_feature_vectors.run('./output_data/validation_set', vocabulary_list) training_set_error = perceptron.perceptron_test( weight_vector, feature_vector_list_training, is_spam_list_training) validation_set_error = perceptron.perceptron_test( weight_vector, feature_vector_list_validation, is_spam_list_validation) print('Total number of misclassifications: ' + str(total_number_of_misclassifications)) print('Training set error: ' + str(training_set_error)) print('Validation set error: ' + str(validation_set_error)) # Part 5: Find words in the vocabulary with the most positive and negative weights
# Read the training and test data. trdata1, trlabels1 = read_examples(FLAGS.input_data_dir + "/newsgroups_train1.txt") trdata2, trlabels2 = read_examples(FLAGS.input_data_dir + "/newsgroups_train2.txt") tsdata1, tslabels1 = read_examples(FLAGS.input_data_dir + "/newsgroups_test1.txt") tsdata2, tslabels2 = read_examples(FLAGS.input_data_dir + "/newsgroups_test2.txt") # Train the perceptron algorithm on training datasets for 10,000 epochs. w1, _, _ = perceptron_train(trdata1, trlabels1, 10000) w2, _, _ = perceptron_train(trdata2, trlabels2, 10000) # Train and test the SVM algortihm on the newsgroups datasets. svm_tra = svm.SVC(kernel='linear', C=5.0) svm_tra.fit(trdata1, trlabels1) pred = svm_tra.predict(tsdata1) ac = np.mean(pred == tslabels1) * 100 print('Accuracy of linear svm on test dataset version 1: %0.3f%%.' % (ac)) svm_tra1 = svm.SVC(kernel='linear', C=5.0) svm_tra1.fit(trdata2, trlabels2) pred = svm_tra1.predict(tsdata2)
data, labels = read_examples(FLAGS.input_data_dir + '/spam_train_svm.txt') data1, labels1 = read_examples(FLAGS.input_data_dir + '/spam_test_svm.txt') print("SVM algorithm: \n") svm = svm.SVC(C=5, kernel='linear') svm.fit(data, labels) svm_test = svm.predict(data1) accuracy = np.mean(labels1 == svm_test) print("Accuracy of SVM with linear kernel: %0.2f%%." % (accuracy * 100)) print("\nPerceptron algorithm: \n") # Train the perceptron algorithm on the training data. w, _, error = perceptron_train(data, labels, 50) # Save the returned parameter vector in spam_model_p.txt. p = np.savetxt('spam_model_p.txt', w) # Report number of mistakes during each epoch and total number of mistakes during training. print("\nNumber of mistakes during each epoch:") eph = 1 for i in error: print("Epoch %d: %d" % (eph, i)) eph += 1 print("\nTotal number of mistakes: %d" % np.sum(error))
X_t, t_t = read_data(FLAGS.input_data_dir + '/optdigits.tes') X_t_sc = scalling(X_t) t10 = np.zeros((len(t), 10)) for i in range(10): for j in range(len(t)): if t[j] == i: t10[j, i] = 1 else: t10[j, i] = -1 accrucys = [] for j in range(1, 21): pred_ehp = [] for i in range(10): w = perceptron_train(X_sc[1000:, :], t10[1000:, i], j) w = w / np.linalg.norm(w) pred = X_sc[:1000, :].dot(w) pred_ehp.append(pred) labels = np.argmax(pred_ehp, axis=0) ac = np.mean(labels == t[:1000]) * 100 print(j, ac) accrucys.append(ac) print('max accrucy in epoch:', np.argmax(accrucys) + 1) set_eph = 8 set_eph = np.argmax(accrucys) + 1 st = timeit.default_timer() pred_ehp = [] for i in range(10): w = perceptron_train(X_sc[1000:, :], t10[1000:, i], set_eph)