def run_knn_classifcation(): train_inputs, train_targets = load_train() #train_inputs, train_targets = load_train_small() valid_inputs, valid_targets = load_valid() test_inputs, test_targets = load_test() # Initialize the array of k's k_arr = [1, 3, 5, 7, 9] performance = [] performance_test = [] # Predict labels of validation set using each k as hyperparameter for k in k_arr: predicted_labels = run_knn(k, train_inputs, train_targets, valid_inputs) # Evaluate the performance on the validation set performance.append(evaluate(valid_targets, predicted_labels)) # Cheating now... predicted_labels = run_knn(k, train_inputs, train_targets, test_inputs) # Evaluate the performance on the test set performance_test.append(evaluate(test_targets, predicted_labels)) # Plot the classification rates of validation set with respect to the hyperparameters. plt.plot(k_arr, performance, "o-", label='knn validation') # Plot the classification rates of test data with respect to the hyperparameters. plt.plot(k_arr, performance_test, "o-", label='knn test') plt.xlabel('k') plt.ylabel('classification rate') plt.legend() plt.show()
def init(): train_data, valid_data, test_data, train_label, valid_label, test_label = LoadData('digits.npz') print train_data[:,0:200].shape print valid_data.shape # train_data, train_label = load_train() # valid_data, valid_label = load_valid() # test_data, test_label = load_test() cl_rate_valid = [] index=[] for i in [1,3,5,7,9]: valid_labels_knn = run_knn(i, train_data[:,0:200], train_label[:,0:200], valid_data) num_correct_prediction = 0 num_total_points = 0 classification_rate = 0 count = 0 correct_count = 0 for valid_label_knn in valid_labels_knn: if valid_label_knn == valid_label[count]: correct_count = correct_count + 1 count = count + 1 # print 'the classification rate of validation would be :' # print "%.2f%%" % (float(correct_count) / float(count)*100) cl_rate_valid.append(float(correct_count) / float(count)) cl_rate_test = [] for i in [1,3,5,7,9]: test_labels_knn = run_knn(i, train_data[:,0:200], train_label[:,0:200], test_data) count = 0 correct_count = 0 for test_label_knn in test_labels_knn: if test_label_knn == test_label[count]: correct_count = correct_count + 1 count = count + 1 # print 'the classification rate of test would be :' # print "%.2f%%" % (float(correct_count) / float(count)*100) cl_rate_test.append(float(correct_count) / float(count)) index.append(i) length = len(cl_rate_test) for i in range(length): k = 2*i+1 print 'when k = %d, classification rate of valid is %.2f, of test is %.2f' % (k, cl_rate_valid[i], cl_rate_test[i]) plt.figure(1) plt.plot(index, cl_rate_test, marker='o', label='test_set') plt.plot(index,cl_rate_valid,marker='x',label='valid_set') legend = plt.legend() plt.grid() plt.xlabel('k') plt.ylabel('Classification Rate') plt.axis([1, 9, 0.7, 1]) plt.show()
def init(): train_data, train_label = load_train() valid_data, valid_label = load_valid() test_data, test_label = load_test() cl_rate_valid = [] index = [] for i in [1, 3, 5, 7, 9]: valid_labels_knn = run_knn(i, train_data, train_label, valid_data) num_correct_prediction = 0 num_total_points = 0 classification_rate = 0 count = 0 correct_count = 0 for valid_label_knn in valid_labels_knn: if valid_label_knn == valid_label[count]: correct_count = correct_count + 1 count = count + 1 # print 'the classification rate of validation would be :' # print "%.2f%%" % (float(correct_count) / float(count)*100) cl_rate_valid.append(float(correct_count) / float(count)) cl_rate_test = [] for i in [1, 3, 5, 7, 9]: test_labels_knn = run_knn(i, train_data, train_label, test_data) count = 0 correct_count = 0 for test_label_knn in test_labels_knn: if test_label_knn == test_label[count]: correct_count = correct_count + 1 count = count + 1 # print 'the classification rate of test would be :' # print "%.2f%%" % (float(correct_count) / float(count)*100) cl_rate_test.append(float(correct_count) / float(count)) index.append(i) length = len(cl_rate_test) for i in range(length): k = 2 * i + 1 print 'when k = %d, classification rate of valid is %.2f, of test is %.2f' % ( k, cl_rate_valid[i], cl_rate_test[i]) plt.figure(1) plt.plot(index, cl_rate_test, marker='o', label='test_set') plt.plot(index, cl_rate_valid, marker='x', label='valid_set') legend = plt.legend() plt.grid() plt.xlabel('k') plt.ylabel('Classification Rate') plt.axis([1, 9, 0.7, 1]) plt.show()
def knn(): train_data, train_labels = load_train() #for validation valid_data, valid_labels = load_valid() #for test #valid_data, valid_labels = load_test() values = [1, 3, 5, 7, 9] ratio = [] for k in values: c = 0 prediction_labels = run_knn(k, train_data, train_labels, valid_data) for i in range(len(valid_labels)): if valid_labels[i] == prediction_labels[i]: c += 1 ratio.append(float(c) / len(prediction_labels)) plt.plot(values, ratio) #for validation plt.axis([1, 9, 0.81, 0.87]) #for test #plt.axis([1, 9, 0.87, 0.95]) plt.show()
def main(): train_data, train_labels = load_train() test, target = load_valid() print "VALIDATION" for i in [1, 3, 5, 7, 9]: labels = run_knn(i, train_data, train_labels, test) #plot_digits(test) print "K = ", i, " perc = ", get_perc(labels, target) test, target = load_test() print "TEST" for i in [1, 3, 5, 7, 9]: labels = run_knn(i, train_data, train_labels, test) #plot_digits(test) print "K = ", i, " perc = ", get_perc(labels, target)
def calClassificationRate(k, train_data, train_target, input_data, intput_target): outputTarget = run_knn(k, train_data, train_target, input_data) correctTarget = 0 for i in range(len(intput_target)): if (intput_target[i] == outputTarget[i]): correctTarget += 1 return float(correctTarget) / float(len(intput_target))
def main(): train_data, valid_data, test_data, train_labels, valid_labels, test_labels = LoadData('digits.npz') print "VALIDATION" for i in [1, 3, 5, 7, 9]: start = time.time() labels = run_knn(i, train_data.T, train_labels, valid_data.T) print "Took: ", time.time() - start #plot_digits(test) print "K = ", i, " perc = ", 100 - get_perc(labels, valid_labels.T) print "TEST" for i in [1, 3, 5, 7, 9]: start = time.time() labels = run_knn(i, train_data.T, train_labels, test_data.T) print "Took: ", time.time() - start #plot_digits(test) print "K = ", i, " perc = ", 100 - get_perc(labels, test_labels.T)
def q2_5(): inputs_train, inputs_valid, inputs_test, target_train, target_valid, target_test = LoadData('digits.npz') k = 7 #found out that works the best on ass 1 out_hyp = knn.run_knn(k, inputs_train.T, target_train.T, inputs_valid.T) target_valid.shape = out_hyp.shape errors = 0 for i, o in enumerate(out_hyp): errors += abs(o - target_valid[i]) class_err = (float(errors) / float(target_valid.size)) * 100 print ("the class error for K-NN is :{}%".format(class_err)) W1, W2, b1, b2, train_error, valid_error,train_MCE_arr, valid_MCE_arr= TrainNN(100, 0.01, 0.9, 1000, 2)
def main(): train_inputs, train_targets = load_train() valid_inputs, valid_targets = load_valid() test_inputs, test_targets = load_test() validation_rates = np.zeros((5)) k_values = np.array([1, 3, 5, 7, 9]) print "== Validation Performance ==" for i in range(5): k = k_values[i] run_labels = run_knn(k, train_inputs, train_targets, valid_inputs) correct_labels = 1-np.abs(run_labels - valid_targets) validation_rates[i] = 100.0*np.sum(correct_labels)/run_labels.size print "k = %d, rate=%f" % (k, validation_rates[i]) #make our plot pretty \{^,^}/ plt.plot(k_values, validation_rates) plt.title('Validation Rate vs k') plt.ylim([50,100]) plt.xlabel('Value of k') plt.ylabel('Validation Rate (%)') plt.grid(True) plt.show() print "== Test Performance ==" #compute test classification rate for k=3, 5, 7 for i in range(1,4): k = k_values[i] run_labels = run_knn(k, train_inputs, train_targets, test_inputs) correct_labels = 1-np.abs(run_labels - test_targets) validation_rate = 100.0*np.sum(correct_labels)/run_labels.size print "k = %d, rate=%f" % (k, validation_rate)
def main(): inputs_train, inputs_valid, inputs_test, target_train, target_valid, target_test = LoadData('digits.npz') k_values = np.array([1, 3, 5, 7, 9]) print "== Validation Performance ==" for i in range(5): k = k_values[i] run_labels_v = run_knn(k, inputs_train.T, target_train.T, inputs_valid.T) correct_labels_v = np.abs(run_labels_v - target_valid.T) validation_rate = (100.0*np.sum(correct_labels_v))/run_labels_v.size print "k = %d, rate=%f" % (k, validation_rate)
def compute_classification_rate(k_values, train_inputs, train_targets, inputs, targets): c_rate = [] for k in k_values: labels = run_knn(k, train_inputs, train_targets, inputs) num_correct_labels = 0 for i in xrange(len(targets)): num_correct_labels += int(targets[i] == labels[i]) c_rate.append(num_correct_labels / float(len(targets))) return c_rate
def run_test2_1(k,in_train, out_train, ins, outs): tstart = datetime.now() out_hyp = knn.run_knn(k, in_train, out_train, ins) tend = datetime.now() delta_time = tend - tstart total_time = int(delta_time.total_seconds() * 1000) tp9 = 0 tp4 = 0 fp9 = 0 fp4 = 0 pos = 0 tot_valid = outs.size for v in outs: pos += v i = 0 for h in out_hyp: if h == 1: if h == outs[i]: tp9 += 1 else: fp9 += 1 else: if h == outs[i]: tp4 += 1 else: fp4 += 1 i += 1 precision9 = float(tp9) / float(tp9+fp9) precision4 = float(tp4) / float(tp4+fp4) recall9 = float(tp9) / float(pos) recall4 = float(tp4) / float(tot_valid - pos) accuracy = float(tp9 + tp4) / float(tot_valid) return precision4, recall4, precision9, \ recall9, accuracy, total_time,
from run_knn import run_knn import numpy as np import matplotlib.pyplot as plt import sys if __name__ == '__main__': #import test data T = np.load(sys.argv[1])['train_inputs'] V = np.load(sys.argv[2])['valid_inputs'] L = np.load(sys.argv[1])['train_targets'] S = np.load(sys.argv[2])['valid_targets'] k = [1,3,5,7,9] R = list() for e in k: result = run_knn(e, T, L, V) correct = 0.0 for i in range(len(result)): if(result[i] == S[i]): correct += 1 R.append(correct / len(result)) plt.plot(k, R) plt.ylabel('fraction of correct classifications') plt.xlabel('number of nearest neighbors') plt.show()
#loading the dataset train_data, train_labels = utils.load_train() #loading the validation set valid_data,valid_labels = utils.load_valid() # vector of each k K = np.array([1,3,5,7,9]) #dictionnay result results={} for k in K: #prediction prediction = run_knn(k,train_data,train_labels,valid_data) #computing the precision results[k]= np.mean(prediction==valid_labels) #plotting the result precisions = np.array([(k,results[k]) for k in results]) plt.plot(precisions[:,0], precisions[:,1],'r-o') plt.title('precision as a function of k') plt.savefig("precisons_k.png")
from utils import * from plot_digits import * from run_knn import run_knn #SCRIPT TO RUN KNN train_data, train_labels = load_test() np.set_printoptions(threshold='nan') print(train_data) print(train_labels) print(train_data.shape) print(train_labels.shape) valid_data, valid_labels = load_valid() #test_data, test_labels = load_test(); valid1 = run_knn(9, train_data, train_labels, valid_data) print valid_labels.shape #print valid1 print valid1.shape cl1 = 0 for i in range(0, valid1.shape[0]): if valid1[i, 0] == valid_labels[i, 0]: cl1 = cl1 + 1 cl1 = (cl1 * 1.0) / valid1.shape[0] # valid3 = run_knn(3, train_data, train_labels, valid_data); # # cl3 = 0; # for i in range(0,valid3.shape[0]): # if valid3[i,0] == valid_labels[i,0]:
import numpy as np import matplotlib.pyplot as plt import plot_digits (train_input, train_targets) = utils.load_train() (valid_inputs, valid_targets) = utils.load_valid() (test_inputs, test_targets) = utils.load_test() (valid_inputr, valid_inputc) = valid_inputs.shape (test_inputr, test_inputc) = test_inputs.shape k = np.zeros(5) classificationrate_valid = np.zeros(5) classificationrate_test = np.zeros(5) for i in range(5): k[i] = 2 * i + 1 valid_count = 0 test_count = 0 valid_p = run_knn(k[i], train_input, train_targets, valid_inputs) (row, col) = valid_p.shape for j in xrange(row): if valid_p[j][0] == valid_targets[j][0]: valid_count += 1 classificationrate_valid[i] = valid_count * 1.0 / float(valid_inputr) test_p = run_knn(k[i], train_input, train_targets, test_inputs) (row, col) = test_p.shape for j in xrange(row): if test_p[j][0] == test_targets[j][0]: test_count += 1 classificationrate_test[i] = test_count * 1.0 / float(test_inputr) print classificationrate_valid print classificationrate_test
# outputfile = 'model.npz' # SaveModel(outputfile, W1, W2, b1, b2, train_error, valid_error) if __name__ == '__main__': num_hiddens = 10 eps = 0.02 momentum = 0.5 num_epochs = 1000 K = 10 # number of nearest neighbors W1, W2, b1, b2, train_error, valid_error = TrainNN(num_hiddens, eps, momentum, num_epochs) inputs_train, inputs_valid, inputs_test, target_train, target_valid, target_test = LoadData( 'digits.npz') knn_valid_target =\ run_knn(K,inputs_train.T,target_train.T,inputs_valid.T).squeeze() knn_test_target =\ run_knn(K,inputs_train.T,target_train.T,inputs_test.T).squeeze() knn_valid_error = 1 - np.mean(knn_valid_target == target_valid) knn_test_error = 1 - np.mean(knn_test_target == target_test) print("{:2d}-NN valiation error={:4.2f},\ test error={:4.2f}".format(K, knn_valid_error, knn_test_error)) # representation DisplayErrorPlot(train_error, valid_error)
def count_rate(k, train_inputs, train_targets, valid_inputs, valid_targets): valid_labels = run_knn(k, train_inputs, train_targets, valid_inputs) return float(np.sum(valid_labels == valid_targets)) / valid_targets.size
valid_data, valid_labels = ut.load_valid() test_data, test_labels = ut.load_test() #Create empty arrays for accuracy values validation_accuracies = [] test_accuracies = [] #List for k k_values = [1,3,5,7,9] #Validation Set for k in k_values: correct_predictions = 0 total_predictions = 0 predicted_valid_labels = run_knn(k, train_data, train_labels, valid_data) #Iterate through the predicted labels and compare them to the true labels to determine validation accuracy for index, value in enumerate(predicted_valid_labels): if predicted_valid_labels[index] == valid_labels[index]: correct_predictions += 1 total_predictions += 1 else: total_predictions += 1 validation_accuracies.append(100*(float(correct_predictions) / total_predictions)) #Test Set for k in k_values: correct_predictions = 0 total_predictions = 0
# -*- coding: utf-8 -*- from utils import load_train, load_valid from run_knn import run_knn (train_inputs, train_targets) = load_train() (valid_inputs, valid_targets) = load_valid() for k in [1, 3, 5, 7, 9]: print run_knn(k, train_inputs, train_targets, valid_inputs)
training_data_set = np.load('mnist_train.npz') train_data = training_data_set['train_inputs'] train_label = training_data_set['train_targets'] valid_data_set = np.load('mnist_valid.npz') valid_data = valid_data_set['valid_inputs'] valid_label = valid_data_set['valid_targets'] test_set = np.load('mnist_test.npz') test_data = test_set['test_inputs'] test_label = test_set['test_targets'] k = 5 test_labels_knn = run_knn(k, train_data, train_label, test_data) print test_labels_knn num_correct_prediction = 0 num_total_points = 0 classification_rate = 0 count = 0 correct_count = 0 for test_label_knn in test_labels_knn: if test_label_knn == test_label[count]: correct_count = correct_count + 1 count = count + 1 print correct_count
valid = np.load("mnist_valid.npz") # print(valid.files) valid_data = valid['valid_inputs'] # print(valid_data.shape) valid_labels = valid['valid_targets'] test = np.load("mnist_test.npz") # print(test.files) test_data = test['test_inputs'] # print(test_data.shape) test_labels = test['test_targets'] classification_rate_validation = [] classification_rate_test = [] for k in [1, 3, 5, 7, 9]: vk_labels = run_knn(k, train_data, train_labels, valid_data) tk_labels = run_knn(k, train_data, train_labels, test_data) classification_rate_validation.append( np.count_nonzero(vk_labels == valid_labels) / len(valid_labels)) classification_rate_test.append( np.count_nonzero(tk_labels == test_labels) / len(test_labels)) print(classification_rate_validation) print(classification_rate_test) plt.scatter(np.array([1, 3, 5, 7, 9]), classification_rate_validation) plt.scatter(np.array([1, 3, 5, 7, 9]), classification_rate_test) plt.legend([ 'Classification rate of validation data', 'Classification rate of test data' ])
def main(): print("initializing sensors...\n") # initialize load sensor hx = HX711(5, 6) hx.set_reading_format("LSB", "MSB") hx.set_reference_unit(-428.72) hx.reset() hx.tare() # initialize capacitive sensor cap = MPR121.MPR121() if not cap.begin(): print('Error initializing MPR121. Check your wiring!') sys.exit(1) last_touched = cap.touched() capacitive = 0 # initialize microphone card = 'sysdefault:CARD=Device' fs = 44100 num_ms = 132300 inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, card) inp.setchannels(1) inp.setrate(fs) inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) inp.setperiodsize(32) # initialize camera camera = PiCamera() # initialize neural net model # model = run_NN.initializeNN() while (True): try: val = raw_input("\nPRESS ANY KEY TO START ") print("start recording...") totalLen = 0 signal = [] while (totalLen < fs * 3): l, data = inp.read() if (l > 0): signal += list(np.fromstring(data, 'int16')) totalLen += l audioFeatures = extractFeatures.extractFeatures( fs, np.array(signal)) print("done recording...") print("capturing image...") timeStamp = strftime("%Y-%m-%d_%H:%M:%S", gmtime()) timeStamp = 'temp' imgName = IMAGE_DATA_PATH + timeStamp + '.jpg' camera.start_preview() camera.capture(imgName) camera.stop_preview() # get values from load sensor and capacitive sensor count = LOAD_SENSOR_TRIES loadValue = 0 print("sensing weight and capacitance...") while (count > 0): count -= 1 # load sensing loadValue += hx.get_weight(5) hx.power_down() hx.power_up() # capacitive sensing current_touched = cap.touched() for i in range(12): pin_bit = 1 << i if ((current_touched & pin_bit) and (last_touched & pin_bit)): capacitive = 1 last_touched = current_touched # wait time.sleep(0.1) # print results to STDOUT loadValue /= LOAD_SENSOR_TRIES print("Data Features:") print("\tweight = " + str(loadValue)) print("\tcapacitance = " + str(capacitive)) print("\tamplitude = " + str(audioFeatures[0])) print("\tnumber of peaks = " + str(audioFeatures[1])) print("\tcentroid = " + str(audioFeatures[2])) print("\tspectrum = " + str(audioFeatures[3])) ''' print("\tMel-Freq Cep Coeff = " + str(audioFeatures[4])) print("\tRolloff Point = " + str(audioFeatures[5])) print("\tMax Spectral Flux = " + str(audioFeatures[6])) print("\tAvg Spectral Flux = " + str(audioFeatures[7])) ''' plotAudio(fs, signal) print("predicting recycling category...") # format data to pass to classifiers args = [str(loadValue), str(capacitive)] + audioFeatures for i in xrange(0, len(args)): args[i] = str(args[i]) # run classifiers on data # 0 = compost # 1 = metal # 2 = plastic # if highest probability is less than 50% then predict trash svm_indicies, svm_preds = run_svm.run_svm(TRINITY_DATA_PATH, args) knn_indicies, knn_preds = run_knn.run_knn(TRINITY_DATA_PATH, args) rf_indicies, rf_preds = run_randomforest.run_randomforest( TRINITY_DATA_PATH, args) log_indicies, log_preds = run_log.run_log(TRINITY_DATA_PATH, args) # img_indicies, img_preds = run_NN.predict(model, imgName) # max_prob_NN = max(img_preds) # max_arg_NN = img_indicies[np.argmax(img_preds)] max_prob_NN = 0.0 max_arg_NN = 'trash' max_prob_svm = max(svm_preds) max_arg_svm = svm_indicies[np.argmax(svm_preds)] max_prob_knn = max(knn_preds) max_arg_knn = knn_indicies[np.argmax(knn_preds)] max_prob_rf = max(rf_preds) max_arg_rf = rf_indicies[np.argmax(rf_preds)] max_prob_log = max(log_preds) max_arg_log = log_indicies[np.argmax(log_preds)] if (max_prob_svm <= 0.45): max_arg_svm = 'trash' if (max_prob_knn <= 0.45): max_arg_knn = 'trash' if (max_prob_rf <= 0.45): max_arg_rf = 'trash' if (max_prob_log <= 0.45): max_arg_log = 'trash' # if (max_prob_NN <= 0.45): # max_arg_NN = 'trash' print("SVM prediction is..." + max_arg_svm) print("KNN prediction is..." + max_arg_knn) print("Random Forest prediction is..." + max_arg_rf) print("Logistic Regression prediction is..." + max_arg_log) # print("NN prediction is..."+max_arg_NN) ''' if (max_arg_knn == 'trash'): final_pred = max_arg_NN if (max_arg_NN == 'trash'): final_pred = max_arg_knn if (max_arg_knn != 'trash' and max_arg_NN != 'trash'): if (max_prob_knn > max_prob_NN): final_pred = max_arg_knn else: final_pred = max_arg_NN print("FINAL PREDICTION IS... " + final_pred) # save data val = raw_input("should I save this data point? ") if (val == 'yes'): val = raw_input("what is the category of this point? ") addData(val,args, TRINITY_DATA_PATH) ''' except (KeyboardInterrupt, SystemExit): camera.close() GPIO.cleanup() sys.exit() '''
- Will show and save relevant plots """ trainInputs, trainTargets = load_train() smallInputs, smallTargets = load_train_small() validInputs, validTargets = load_valid() testInputs, testTargets = load_test() kList = [1, 3, 5, 7, 9] classRates = range(0, len(kList)) classRatesT = range(0, len(kList)) listCount = 0 for k in kList: correctCount = 0 validLables = run_knn(k, trainInputs, trainTargets, validInputs) for i in xrange(len(validLables)): if validLables[i] == validTargets[i]: correctCount += 1 classRates[listCount] = (correctCount / float(len(validLables))) listCount += 1 listCount = 0 for k in kList: correctCount = 0 validLables = run_knn(k, trainInputs, trainTargets, testInputs) for i in xrange(len(validLables)): if validLables[i] == testTargets[i]: correctCount += 1 classRatesT[listCount] = (correctCount / float(len(validLables))) listCount += 1