def main(max, classifyMetric): for k in range(1, max + 1): foutModel = open( path + str(k) + "_" + X + "_" + Y + "_" + classifyMetric + "_result_2.txt", "w") knn.main(sys.argv[1], k, foutModel, classifyMetric) foutModel.close
def main(): num_hiddens = 10 eps = 0.1 momentum = 0.0 num_epochs = 1000 current_problem = [2.1, 2.2, 2.3, 2.4, 2.5] print "Running problems: ", current_problem # 2.1 and 2.2 if 2.2 in current_problem and 2.1 in current_problem: W1, W2, b1, b2, train_error, valid_error, train_class_error, valid_class_error = TrainNN(num_hiddens, eps, momentum, num_epochs) DisplayErrorPlot(train_error, valid_error, mode='cross_entropy') DisplayErrorPlot(train_class_error, valid_class_error, mode='classification_error') # 2.3 if 2.3 in current_problem: for eps in [0.5, 0.2, 0.01]: W1, W2, b1, b2, train_error, valid_error, train_class_error, valid_class_error = TrainNN(num_hiddens, eps, momentum, num_epochs) # iterate through different eps suffix = '_at_eps_' + str(eps) suffix = suffix.replace('.', ',') DisplayErrorPlot(train_error, valid_error, mode='cross_entropy' + suffix) DisplayErrorPlot(train_class_error, valid_class_error, mode='classification_error' + suffix) eps = 0.1 for momentum in [0.9, 0.5, 0.0]: W1, W2, b1, b2, train_error, valid_error, train_class_error, valid_class_error = TrainNN(num_hiddens, eps, momentum, num_epochs) # iterate through different momentum suffix = '_at_momentum_' + str(momentum) suffix = suffix.replace('.', ',') DisplayErrorPlot(train_error, valid_error, mode='cross_entropy' + suffix) DisplayErrorPlot(train_class_error, valid_class_error, mode='classification_error' + suffix) # 2.4 if 2.4 in current_problem: eps = 0.02 momentum = 0.5 for num_hiddens in [2, 5, 10, 30, 100]: start = time.time() ( W1, W2, b1, b2, train_error, valid_error, test_error, train_class_error, valid_class_error, test_class_error, ) = TrainNN(num_hiddens, eps, momentum, num_epochs, run_test=True) elapsed = time.time() - start print "Training time at hidden_unit=", num_hiddens, " is ", elapsed, "seconds" # iterate through different num_hiddens suffix = '_at_hidden_unit_' + str(num_hiddens) DisplayErrorPlot(train_error, valid_error, mode='cross_entropy' + suffix, test=test_error) DisplayErrorPlot(train_class_error, valid_class_error, mode='classification_error' + suffix, test=test_class_error) if 2.5 in current_problem: import knn knn.main()
def main(testfile=None): if testfile: a=rd.read(testfile) k=3 predictions=[] for x in range(len(a)): neighbors = knn.getNeighbors(train, a[x], k) result = knn.getResponse(neighbors) predictions.append(result) return(predictions) else: knn.main(train,tests)
def main(i=0, j=62, testfile=None): tr, ts = rt.read2(i, j) k = 3 if testfile: tst = rd.read2(testfile) predictions = [] for x in range(len(tst)): neighbors = knn.getNeighbors(tr, tst[x], k) result = knn.getResponse(neighbors) predictions.append(result) return (predictions) else: knn.main(tr, ts)
def main(kInKnn): start = time.time() [trainingX, trainingY, testX, testY] = loadDataset("ex2data1train.csv", "ex2data1test.csv") pred = knn.main(trainingX, trainingY, testX, kInKnn) end = time.time() count = 0 for i in range(len(pred)): if pred[i] == testY[i]: count += 1 print("accuracy: " + str(round(count / float(len(testY)) * 100, 2)) + "%") print("training time: " + str(round(end - start, 1)) + " seconds")
def main(kInKnn): start = time.time() [trainingX, trainingY, testX, testY] = loadDataset('ex2data1train.csv', 'ex2data1test.csv') pred = knn.main(trainingX, trainingY, testX, kInKnn) end = time.time() count = 0 for i in range(len(pred)): if pred[i] == testY[i]: count += 1 print('accuracy: ' + str(round(count / float(len(testY)) * 100, 2)) + '%') print('training time: ' + str(round(end - start, 1)) + ' seconds')
def bagging(N): #training data xRF = RFData.x y = RFData.y #testing data xtRF = RFData.xt yt = RFData.yt xKNN = knnData.main()[0] #training X xtKNN = knnData.main()[2] #test X countYPredict = [] for i in range(len(yt)): countYPredict.append(0) for k in range(N): # number of bootstrapping x_RF = [] y_RF = [] x_KNN = [] y_KNN = [] # bootstrapping for i in range(int(len(xRF) * 0.6)): r = randint(0, len(xRF) - 1) tRF = [] for j in range(len(xRF[0]) - 1): tRF.append(xRF[r][j]) # for RF, data duplicates are not allowed if tRF not in x_RF: x_RF.append(tRF) y_RF.append(y[r]) x_KNN.append(xKNN[r]) y_KNN.append(y[r]) # RF start = time.time() rf = RF(B=TreeNum, Bagging=isBagging) rf.train(x_RF, y_RF) pred = rf.predict(xtRF) end = time.time() count = 0 for i in range(len(pred)): if pred[i] == yt[i]: count += 1 print("RF, trial #" + str(k + 1) + ": ") print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) + '%') print('\ttraining time: ' + str(round(end - start, 1)) + ' seconds') for i in range(len(pred)): countYPredict[i] = countYPredict[i] + pred[i] # KNN start = time.time() pred = knn.main(x_KNN, y_KNN, xtKNN, kInKnn) end = time.time() count = 0 for i in range(len(pred)): if pred[i] == yt[i]: count += 1 print("KNN, trial #" + str(k + 1) + ": ") print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) + '%') print('\ttraining time: ' + str(round(end - start, 1)) + ' seconds') for i in range(len(pred)): countYPredict[i] = countYPredict[i] + pred[i] finalPredict = [] for i in range(len(yt)): if countYPredict[i] >= N: finalPredict.append(1) else: finalPredict.append(0) count = 0 for i in range(len(finalPredict)): if finalPredict[i] == yt[i]: count += 1 print() print('After combining the classifiers by bagging: ') print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) + '%')
while choice2 != 4: print('\nkNN Classification:') print( '1 -> Classify Dataset. (Splits Dataset into training and test set)' ) print( '2 -> Classify Testset. (Test kNN accuracy using Dataset as training set)' ) print('3 -> Classify New Emails.\n4 -> Back.') choice2 = int(input('Enter your choice: ')) if choice2 == 1: # Classify Dataset dataset_name = input( 'Enter the name of dataset folder to be classified: ') knn.main(dataset_name) elif choice2 == 2: # Classify Testset dataset_name = input( 'Enter the name of dataset folder to be used as training set: ' ) testset_name = input('Enter the name of test set folder: ') knn_classify.main(dataset_name, testset_name) elif choice2 == 3: # Classify New Emails dataset_name = input( 'Enter the name of dataset folder to be used as training set: ' ) reply = input(
def hello(): in_data = request.json['details'] predictions = main(in_data) return predictions
def main(argv): arg_index_start = 0 DEBUG = "FALSE" try: opts, args = getopt.getopt(argv, 'd', ['debug']) if not args: usage() sys.exit(2) except getopt.GetoptError as err: usage() sys.exit(2) for opt, arg in opts: if opt in ('-d'): DEBUG = "TRUE" arg_index_start = 1 # arguments if not argv[arg_index_start + 4]: usage() sys.exit(2) if os.path.isdir(argv[arg_index_start]): feature_files_dir = argv[arg_index_start] else: print("Feature directory does not exist: ", argv[arg_index_start], file=sys.stderr) usage() sys.exit(2) if os.path.isfile(argv[arg_index_start + 1]): outtype_dataset_file = argv[arg_index_start + 1] else: print("Out type datafile does not exist: ", argv[arg_index_start + 1], file=sys.stderr) usage() sys.exit(2) i = 2 datatype_filenames = [] datatypes = [] dist_metrics = [] weights = [] while i < len(args): if os.path.isfile(argv[arg_index_start + i]): datatype_filename = argv[arg_index_start + i] datatype_filenames.append(datatype_filename) datatype = os.path.basename(datatype_filename.rsplit('-', 1)[0]) datatypes.append(datatype) else: print("Input datatype file does not exist: ", argv[arg_index_start + i], file=sys.stderr) usage() sys.exit(2) if argv[arg_index_start + (i + 1)]: dist_metrics.append(argv[arg_index_start + (i + 1)]) else: print("No distance metric provided for ", argv[arg_index_start + i], file=sys.stderr) usage() sys.exit(2) if argv[arg_index_start + (i + 2)]: weights.append(argv[arg_index_start + (i + 2)]) else: print("No weight provided for ", argv[arg_index_start + i], file=sys.stderr) usage() sys.exit(2) i = i + 3 outtype_col_name = "Id" if DEBUG == "TRUE": print("*** DEBUG: " + sys.argv[0] + ": datatypes:", datatypes, file=sys.stderr) print("*** DEBUG: " + sys.argv[0] + ": dist_metrics:", dist_metrics, file=sys.stderr) print("*** DEBUG: " + sys.argv[0] + ": weights:", weights, file=sys.stderr) total_ids_scores = [] for datatype_num in range(len(datatypes)): if DEBUG == "TRUE": print("*** DEBUG: " + sys.argv[0] + ": datatype_num:", datatype_num, file=sys.stderr) features_file = feature_files_dir + "/" + datatypes[ datatype_num] + ".tmp" if not os.path.isfile(features_file): print("Features file does not exist: ", features_file, file=sys.stderr) sys.exit(2) knn_args = features_file + " " + outtype_dataset_file + " " + datatype_filenames[ datatype_num] + " " + dist_metrics[datatype_num] + " false" ids_scores = knn.main(knn_args.split()) if DEBUG == "TRUE": print("*** DEBUG: " + sys.argv[0] + ": ids_scores:", ids_scores, file=sys.stderr) total_ids_scores = total_ids_scores + ids_scores if DEBUG == "TRUE": print("*** DEBUG: " + sys.argv[0] + ": total_ids_scores:", total_ids_scores, file=sys.stderr) new_total_ids_scores = [] for id_score in total_ids_scores: if DEBUG == "TRUE": print("*** DEBUG: " + sys.argv[0] + ": id_score:", id_score, file=sys.stderr) found = "FALSE" for new_id_score in new_total_ids_scores: if new_id_score[0] == id_score[0]: new_id_score[1] = new_id_score[1] + id_score[1] found = "TRUE" break if found == "FALSE": new_total_ids_scores.append(id_score) if DEBUG == "TRUE": print("*** DEBUG: " + sys.argv[0] + ": new_total_ids_scores:", new_total_ids_scores, file=sys.stderr) for id_score in new_total_ids_scores: id_score[1] = id_score[1] / len(datatypes) new_total_ids_scores.sort(key=lambda x: x[1], reverse=True) if DEBUG == "TRUE": print("*** DEBUG: " + sys.argv[0] + ": new_total_ids_score:", new_total_ids_scores, file=sys.stderr) return (new_total_ids_scores)
def bagging(N): # training data xRF = RFData.x y = RFData.y # testing data xtRF = RFData.xt yt = RFData.yt xKNN = knnData.main()[0] # training X xtKNN = knnData.main()[2] # test X countYPredict = [] for i in range(len(yt)): countYPredict.append(0) for k in range(N): # number of bootstrapping x_RF = [] y_RF = [] x_KNN = [] y_KNN = [] # bootstrapping for i in range(int(len(xRF) * 0.6)): r = randint(0, len(xRF) - 1) tRF = [] for j in range(len(xRF[0]) - 1): tRF.append(xRF[r][j]) # for RF, data duplicates are not allowed if tRF not in x_RF: x_RF.append(tRF) y_RF.append(y[r]) x_KNN.append(xKNN[r]) y_KNN.append(y[r]) # RF start = time.time() rf = RF(B=TreeNum, Bagging=isBagging) rf.train(x_RF, y_RF) pred = rf.predict(xtRF) end = time.time() count = 0 for i in range(len(pred)): if pred[i] == yt[i]: count += 1 print("RF, trial #" + str(k + 1) + ": ") print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%") print("\ttraining time: " + str(round(end - start, 1)) + " seconds") for i in range(len(pred)): countYPredict[i] = countYPredict[i] + pred[i] # KNN start = time.time() pred = knn.main(x_KNN, y_KNN, xtKNN, kInKnn) end = time.time() count = 0 for i in range(len(pred)): if pred[i] == yt[i]: count += 1 print("KNN, trial #" + str(k + 1) + ": ") print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%") print("\ttraining time: " + str(round(end - start, 1)) + " seconds") for i in range(len(pred)): countYPredict[i] = countYPredict[i] + pred[i] finalPredict = [] for i in range(len(yt)): if countYPredict[i] >= N: finalPredict.append(1) else: finalPredict.append(0) count = 0 for i in range(len(finalPredict)): if finalPredict[i] == yt[i]: count += 1 print() print("After combining the classifiers by bagging: ") print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%")
import knn import matplotlib.pyplot as plt from pylab import * k_values = [1,5,10,15,30,50] p = 0.5 for k in k_values: knn.main("ALL.dat", "AML.dat", k, p) print "\n" # plot(k_values, [0.93, 0.94, 0.83, 0.79, 0.61, 0.61]) # xlabel('k') # ylabel('accuracy') # savefig('question1.jpg') print "\n\n---------------------------------\n\n" p_values = [0, 0.05, 0.20, 0.50, 0.75, 0.95, 1.00] results = [] k = 30 for p in p_values: #knn.main("ALL.dat", "AML.dat", k, p) print "\n" fig, ax = plt.subplots() sensitivity = [1.0, 1.0, 1.0, 1.0, 0.77, 0.0, 0.0] specificity = [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0] to_plot_spec = [1 - x for x in specificity] ax.plot(sensitivity, to_plot_spec)
print( '\tk - [OPTIONAL] number of neighbours for knn subsection \'a\'' ) if __name__ == '__main__': if len(sys.argv) <= 2: usage() sys.exit(1) mod = sys.argv[1] sub = sys.argv[2] ret = 0 if mod == 'knn': import knn ret = knn.main(sub, sys.argv[3:]) elif mod == 'perceptron': import perceptron ret = perceptron.main(sub) elif mod == 'svm': import svm ret = svm.main(sub) else: usage() sys.exit(1) if ret != 0: usage()
print('Invalid Input.') elif choice == 2: # Classify using kNN choice2 = 0 while choice2 != 4: print('\nkNN Classification:') print('1 -> Classify Dataset. (Splits Dataset into training and test set)') print('2 -> Classify Testset. (Test kNN accuracy using Dataset as training set)') print('3 -> Classify New Emails.\n4 -> Back.') choice2 = int(input('Enter your choice: ')) if choice2 == 1: # Classify Dataset dataset_name = input('Enter the name of dataset folder to be classified: ') knn.main(dataset_name) elif choice2 == 2: # Classify Testset dataset_name = input('Enter the name of dataset folder to be used as training set: ') testset_name = input('Enter the name of test set folder: ') knn_classify.main(dataset_name, testset_name) elif choice2 == 3: # Classify New Emails dataset_name = input('Enter the name of dataset folder to be used as training set: ') reply = input('Do you want get new unread emails from your email account? (y/n): ')[0].lower() if reply == 'y': usr = input('Email: ') pwd = getpass('Password: ')
def process_message(msg): msg = msg.lower() global qno global ans #NLTK functions hlink = "nationalrail.co.uk" label_link.configure(text=hlink, fg="blue", cursor="hand2") label_link.bind("<Button-1>", openLink) hiList = [ "hello", "hi", "hey", "heya", "hiya", "hai", "howdy", "ciao", "ni hao" ] biList = [ "goodbye", "bye", "bi", "exit", "see you", "cya", "see ya", "byebye", "ciao", "bye bye" ] if msg.lower() in hiList: return "Hello to you too user!" elif msg.lower() in biList: sys.exit() if msg.lower() == "good": return "That's very good" delayList = ["delay", "late", "delayed"] print(qno) k = False for w in delayList: if msg in delayList: k = True if k == True: if qno == 0 or qno > 4 and qno < 10: qno = 10 qno += 1 print() return "How long has your train been delayed by?" if qno == 11: msg = re.sub(r'[^\d]', "", msg) msg = int(msg) ans[0] = msg qno += 1 return "Where did your train depart from and arriving at?" elif qno == 12: x = nlp.findStation(msg) ke.startKE(x[1], x[0], "", "", "") facts = readfacts() if facts[0] != "null": qno += 1 for s in stations: if x[1] == s[0]: x[1] = s[1] if x[0] == s[0]: x[0] = s[1] ans[1] = x[1] ans[2] = x[0] return "What time did you depart?" elif qno == 13: x = nlp.findTime(msg) x = re.sub(r'[^\d]', "", x) ans[3] = int(x) qno += 1 return "What was you orginal estimated arrival?" elif qno == 14: x = nlp.findTime(msg) x = re.sub(r'[^\d]', "", x) ans[4] = int(x) qno += 1 return "OK 1 sec!" elif qno == 15: s1 = " ".join(ans[1]) s2 = " ".join(ans[2]) x = knn.main(ans[4], ans[3], stationsDict[s1], stationsDict[s2], ans[0]) x = "The estimated time of arrival is " + x resetfacts() qno = 0 return x if qno == 0: x = nlp.findStation(msg) facts = readfacts() print(facts) print("asdaf") ke.startKE(x[1], x[0], "", "", "") facts = readfacts() print(facts) if facts[0] != "null": qno += 1 ans[0] = x[1] if facts[1] != "null": qno += 1 ans[1] = x[0] return facts[5] elif qno == 1: x = nlp.finddplace(msg) facts = readfacts() ke.startKE(x, ans[0], "", "", "") facts = readfacts() if facts[0] != "null": qno += 1 ans[0] = x return facts[5] elif qno == 2: x = nlp.findDate(msg) ke.startKE(ans[0], ans[1], "", x, "") facts = readfacts() if facts[2] != "null": qno += 1 ans[2] = x return facts[5] elif qno == 3: x = nlp.findTime(msg) ke.startKE(ans[0], ans[1], x, ans[2], "") facts = readfacts() if facts[3] != "null": qno += 1 ans[3] = x return facts[5] elif qno == 4: x = nlp.checkDepart(msg) ke.startKE(ans[0], ans[1], ans[3], ans[2], x) facts = readfacts() if facts[4] != "null": qno += 1 ans[4] = x temp = webScraper.main() hlink = temp[1] label_link.configure(text=hlink, fg="blue", cursor="hand2") return temp[0] return ["arrival or departure?"] if msg == " ": return "Hello?" facts = readfacts() print(facts) # while "null" in facts: #ke.test(msg) # print("NUL IN FACT") return "Sorry! I don't understand!"
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) fgmask = fgbg.apply(frame) fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_OPEN, kernel) #fgmask = cv2.filter2D(fgmask,-1,kernel) #fgmask = cv2.dilate(fgmask,kernel,iterations = 1) fgmask = cv2.morphologyEx(fgmask, cv2.MORPH_CLOSE, kernel2) res = cv2.bitwise_and(fgmask, mask) cv2.imshow("frame", res) k = cv2.waitKey(1) & 0xFF #if k == ord('a'): feature = cleaner.do(res) if feature != None: knn.main(float(feature[0]), float(feature[1])) if k == ord('q'): break time.sleep(0.2) cap.release() cv2.destroyAllWindows()
with open('recent.file', 'w') as myfile: myfile.write(feature_data) print(feature_data) while True: # mengcapture frame realtime (ret, frame) = cap.read() cv2.putText( frame, 'Prediksi: ' + prediction, (15, 60), cv2.FONT_HERSHEY_SIMPLEX,1, (0, 0, 255), 2) # window output cv2.imshow('Sistem Pendeteksi warna kematangan tomat', frame) color_histogram_of_test_image(frame) prediction = knn.main('training.csv', 'recent.file') if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()