def run_part1_digit_2(k): print("Importing data...") (_, traindata, _, trainlabel) = feature_map_part1_1(train_raw_digit_dataset, {' ': 0, '#': 1, '+': 1}, (28, 28), 10) (_, testdata, _, testlabel) = feature_map_part1_1(test_raw_digit_dataset, {' ': 0, '#': 1, '+': 1}, (28, 28), 10) print("Testing") start_time = time.time() preds_test = [] counter = 0 for (example, data) in zip(testdata, testlabel): diffs = distance(example, traindata) #diffs = sin_distance(example, traindata) #diffs = manhattan_distance(example, traindata) idx = np.argsort(diffs) neighbor_rank = trainlabel[idx] neighbor_rank = neighbor_rank[0:k] hist = [np.sum(neighbor_rank[neighbor_rank == i]) for i in np.arange(0, 10)] preds_test.append(np.argmax(hist)) counter += 1 print(str(counter) + "/" + str(len(testlabel)), end='\r') sys.stdout.write("\033[K") preds_test = np.array(preds_test) print("Accuracy: " + str(np.sum(preds_test == testlabel) / (len(testlabel) * 1.0) * 100) + "%") end_time = time.time() print('Testing finished in %.3f seconds' % (end_time - start_time)) confusion_matrix = np.array([np.array([np.sum(preds_test[testlabel == i] == j) \ for j in np.arange(0, 10)]) \ for i in np.arange(0, 10)]) print('Confusion Matrix:') print('\n'.join([''.join(['{:6}'.format(item) for item in row]) for row in confusion_matrix]))
def run_part1_digit_ec4(): print("Importing data...") (_, traindata, _, trainlabel) = feature_map_part1_1(train_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10) (_, testdata, _, testlabel) = feature_map_part1_1(test_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10) clf = svm.SVC() print("Training...") clf.fit(traindata, trainlabel) print("Testing") preds_test = clf.predict(testdata) print("\nAccuracy: " + str(np.sum(preds_test == testlabel) / (len(testlabel) * 1.0) * 100) + "%") confusion_matrix = np.array([np.array([np.sum(preds_test[testlabel == i] == j) \ for j in np.arange(0, 10)]) \ for i in np.arange(0, 10)]) print('Confusion Matrix:') print('\n'.join([ ''.join(['{:6}'.format(item) for item in row]) for row in confusion_matrix ]))
def run_extra_credit_1_audio(k): print("================AUDIO - BINARY FEATURE================") print("Importing data...") train_dataset = feature_map_part1_1(train_segmented_dataset, { ' ': 1, '%': 0 }, (25, 10), 2) test_dataset = feature_map_part1_1(test_segmented_dataset, { ' ': 1, '%': 0 }, (25, 10), 2) (model, _, examples, confusion_matrix) = run(k, train_dataset, test_dataset) print("=====================================================\n")
def run_part1_face_1(k): print("================FACE - BINARY FEATURE================") print("Importing data...") train_dataset = feature_map_part1_1(train_raw_face_dataset, { ' ': 0, '#': 1 }, (70, 60), 2) test_dataset = feature_map_part1_1(test_raw_face_dataset, { ' ': 0, '#': 1 }, (70, 60), 2) run(0.1, train_dataset, test_dataset) print("=====================================================\n")
def run_part1_digit_extra_1(k): print("================DIGIT - TERNARY FEATURE================") print("Importing data...") train_dataset = feature_map_part1_1(train_raw_digit_dataset, { ' ': 0, '#': 1, '+': 2 }, (28, 28), 10) test_dataset = feature_map_part1_1(test_raw_digit_dataset, { ' ': 0, '#': 1, '+': 2 }, (28, 28), 10) run(0.1, train_dataset, test_dataset) print("=====================================================\n")
def run_part1_digit_1(learning_rate, include_bias, random_init, rand_set, epoch_num): print("Importing data...") (_, traindata, _, trainlabel) = feature_map_part1_1(train_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10) (_, testdata, _, testlabel) = feature_map_part1_1(test_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10) # w = np.zeros((10, 28 * 28)) w = np.random.rand(10, len(traindata[0])) bias = np.random.rand(10) if (not random_init): w = np.zeros((10, len(traindata[0]))) bias = np.zeros(10) if (not include_bias): bias = np.zeros(10) print("Training...") trainerr = [] testerr = [] for epoch in np.arange(0, epoch_num + 1): # Permute the dataset p_ind = np.arange(0, len(trainlabel)) if rand_set: p_ind = np.random.permutation(p_ind) p_traindata = traindata[p_ind] p_trainlabel = trainlabel[p_ind] # print(traindata.shape) # print(p_traindata.shape) # print(trainlabel.shape) # print(p_trainlabel.shape) # print(p_ind.shape) # break # Epoch test preds_test = np.argmax( np.dot(w, testdata.transpose()) + bias.reshape(10, 1), 0) preds_train = np.argmax( np.dot(w, traindata.transpose()) + bias.reshape(10, 1), 0) trainerr.append( np.sum(preds_train != trainlabel) / (len(trainlabel) * 1.0)) testerr.append( np.sum(preds_test != testlabel) / (len(testlabel) * 1.0)) print("Epoch " + str(epoch) + ": " + str(trainerr[epoch] * 100) + "% " + str(testerr[epoch] * 100) + "%", end='\r') sys.stdout.write("\033[K") # Train for one epoch alpha = learning_rate * 1000.0 / (1000.0 + epoch + 1) for (example, label) in zip(p_traindata, p_trainlabel): pred = np.argmax(np.dot(w, example) + bias) if (pred != label): w[label] += alpha * example w[pred] -= alpha * example if (include_bias): bias[label] += alpha bias[pred] -= alpha print("Testing") preds_test = np.argmax( np.dot(w, testdata.transpose()) + bias.reshape(10, 1), 0) print("Accuracy: " + str(np.sum(preds_test == testlabel) / (len(testlabel) * 1.0) * 100) + "%") confusion_matrix = np.array([np.array([np.sum(preds_test[testlabel == i] == j) \ for j in np.arange(0, 10)]) \ for i in np.arange(0, 10)]) print('Confusion Matrix:') print('\n'.join([ ''.join(['{:6}'.format(item) for item in row]) for row in confusion_matrix ])) # Plot learning curve fig1 = plt.figure(figsize=(10, 6)) plt.plot(np.arange(0, epoch_num + 1), trainerr, label='Training Error') plt.plot(np.arange(0, epoch_num + 1), testerr, label='Testing Error') plt.xlabel('Epoch', fontsize=14) plt.ylabel('Error rate', fontsize=14) plt.xlim(xmin=0, xmax=epoch_num) plt.ylim(ymin=0, ymax=1) plt.legend() plt.grid(True) plt.title('Learning Curves', fontsize=16) # EC 2 fig2, axes1 = plt.subplots(nrows=2, ncols=5, figsize=(10, 6)) fig2.subplots_adjust(left=0.03, right=0.95, top=0.93, bottom=0.05, wspace=0.30, hspace=0.05) for i in np.arange(0, 5): axstop = axes1[0][i] axsbot = axes1[1][i] imtop = axstop.imshow(np.reshape(w[i], (28, 28)), interpolation='nearest', cmap="jet") imbot = axsbot.imshow(np.reshape(w[i + 5], (28, 28)), interpolation='nearest', cmap="jet") axstop.set_title(str(i)) axsbot.set_title(str(i + 5)) axstop.set_axis_off() axsbot.set_axis_off() cbartop = plt.colorbar(imtop, ax=axstop, fraction=0.046, pad=0.04) cbartop.locator = ticker.MaxNLocator(nbins=5) cbartop.update_ticks() cbarbot = plt.colorbar(imbot, ax=axsbot, fraction=0.046, pad=0.04) cbarbot.locator = ticker.MaxNLocator(nbins=5) cbarbot.update_ticks() plt.suptitle('Weight Vectors Visualization', fontsize=16) plt.show()
def run_part1_digit_1(k): print("================DIGIT - BINARY FEATURE================") print("Importing data...") train_dataset = feature_map_part1_1(train_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10) test_dataset = feature_map_part1_1(test_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10) (model, _, examples, confusion_matrix) = run(0.1, train_dataset, test_dataset) confusion_matrix_ndig = np.array(confusion_matrix) np.fill_diagonal(confusion_matrix_ndig, 0) confusion_pairs = largest_indices(confusion_matrix_ndig, 4) confusion_pairs = list(zip(confusion_pairs[0], confusion_pairs[1])) (priors, distributions) = model fig1, axes1 = plt.subplots(nrows=5, ncols=4, figsize=(6, 7.5)) fig1.subplots_adjust(left=0.07, right=0.92, top=0.93, bottom=0.05, wspace=0.05, hspace=0.05) for i in np.arange(0, 5): axs = axes1[i] ims = [axs[0].imshow(np.reshape(1-examples[2*i][0], (28, 28)), interpolation = 'nearest', cmap="Greys"), \ axs[1].imshow(np.reshape(1-examples[2*i][1], (28, 28)), interpolation = 'nearest', cmap="Greys"), \ axs[2].imshow(np.reshape(1-examples[2*i+1][0], (28, 28)), interpolation = 'nearest', cmap="Greys"), \ axs[3].imshow(np.reshape(1-examples[2*i+1][1], (28, 28)), interpolation = 'nearest', cmap="Greys")] for j in np.arange(0, 4): axs[j].set_axis_off() plt.suptitle( 'Example Pairs with Lowest(left) and Highest(right) posterior probability', fontsize=12) fig2, axes2 = plt.subplots(nrows=4, ncols=3, figsize=(6, 8)) fig2.subplots_adjust(left=0.05, right=0.92, top=0.95, bottom=0.05, wspace=0.35, hspace=0.01) for pairi in np.arange(0, 4): axs = axes2[pairi] logp1 = np.log( np.array([d[1] for d in distributions[confusion_pairs[pairi][0]]])) logp2 = np.log( np.array([d[1] for d in distributions[confusion_pairs[pairi][1]]])) ims = [axs[0].imshow(np.reshape(logp1, (28, 28)), interpolation = 'nearest', cmap='jet'), \ axs[1].imshow(np.reshape(logp2, (28, 28)), interpolation = 'nearest', cmap='jet'), \ axs[2].imshow(np.reshape(logp1 - logp2, (28, 28)), interpolation = 'nearest', cmap='jet')] for j in np.arange(0, 3): axs[j].set_axis_off() cbar = plt.colorbar(ims[j], ax=axs[j], fraction=0.046, pad=0.04) cbar.locator = ticker.MaxNLocator(nbins=5) cbar.update_ticks() plt.suptitle('Odds ratios', fontsize=16) plt.show() print("=====================================================\n")