def recog(): if flask.request.files.get("img"): img = flask.request.files["img"] img.save(os.path.join("static", img.filename)) img = Image.open(open(os.path.join("static", img.filename), "rb")) y_hat = classify(img) return flask.jsonify({"class": y_hat[0], "prob": y_hat[1]}), 201 return flask.jsonify({"status": "not an image file"})
async def recog(): files = await quart.request.files if files.get("img", None): img_str = files["img"].read() img = Image.open(io.BytesIO(img_str)) y_hat = classify(img) return quart.jsonify({"class": y_hat[0], "prob": y_hat[1]}), 201 return quart.jsonify({"status": "not an image file"})
def _example_run(args): """Command to run example for ECT-584. First runs the extract on the raw emails to produce a raw dataset. This dataset is then cleaned and the training data is staged to produce a pre_processed dataset test dataset with predicted event values appended. This is then processed to create a test and training arff dataset. The training dataset is used to create a classification model using weka's J48 implementation of the C4.5 classification algorithm. Once the model has been created the test dataset has events predicated and these predicated values are appended to the test dataset and saved to an excel file. Then this file is loaded and compared against the validation dataset to access the accuracy of the model. """ #calls main.py run)extract with relative file paths #run_extract(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'raw')), # os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage')), # 'extract', 'text', False) #calls main.py run_clean with relative file paths run_clean(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage')), os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage')), 'extract', 'text') #calls main.py stage_train with relative file paths stage_train(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage')), 'extract_cleaned', 'text', os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'train')), 't_data', 'excel', os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage'))) #calls main.py run_process with relative file paths run_process(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'stage')), os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'process')), 'pre_processed', 'text') #calls main.py build_model with relative file paths build_model(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'process', 'train.arff')), os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'process', 'model.model'))) #calls main.py classify with relative file paths classify(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'process', 'test')), os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'process', 'model.model')), os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'results', 'output.xls')), 'event') #calls main.py validate with relative file paths validate(os.path.normpath(os.path.join(__file__, '..', '..', 'data', 'results')), 'output')
def knn_tuning(data, label, k_range=range(1,11)): from models import get_knn k_f1=[] for k in k_range: knn = get_knn(k=k) scores = classify(knn, data, label) k_f1.append(scores['f1']) print(f'k = {k} done! f1 = {k_f1[-1]}') print(k_f1) import matplotlib.pyplot as plt plt.figure() plt.plot(k_range,k_f1) plt.xlabel('Value of k for KNN') plt.ylabel('CrossValidation f1') plt.show()
def upload(): file_path = filedialog.askopenfile(initialdir='.', filetypes=[('*.jpeg', '*.jpg')]).name # get fun fact if file_path: fun_fact = classify(file_path) else: fun_fact = '' label['text'] = fun_fact # display image im = Image.open(file_path) # im = im.resize((50,50)) photo = ImageTk.PhotoImage(im) canvas.delete('all') canvas.create_image((0, 0), image=photo, anchor="s") canvas.pack()
def forward_feature_selection(model, features_names, score_name = 'roc_auc'): current = [] best_select = None for i in range(len(features_names)-len(current)): stage_best = None for f in features_names: if f not in current: current.append(f) print(current) data,label = get_data(current) scores = classify(model, data, label) print(f"try {current}, ", scores) if stage_best is None or stage_best[1]<scores[score_name]: stage_best = (f, scores[score_name]) current.pop() current.append(stage_best[0]) if best_select is None or stage_best[1]>best_select[1]: best_select = (copy.deepcopy(current), stage_best[1]) print(f"k = {i}, best_{score_name} = {best_select[1]}, ", best_select[0]) return best_select
def login(image_URIs, confidence): #todo, only checking 3 for i in range(len(image_URIs)): if i == 0 and not imgURItoFile("unknown", image_URIs[i]): print 'i=1 failed' return None if i == 1 and not imgURItoFile("unknown_left", image_URIs[i]): print 'i=2 failed' return None if i == 2 and not imgURItoFile("unknown_right", image_URIs[i]): print 'i=3 failed' return None result = main.classify( KNOWN_IMAGE_DIR, confidence ) #todo, make sure unknown is saved and recent. This may cause problems if multiple people try and log in at once. Make separate, dedicated thread on server per login request? os.remove("unknown") #cleanup os.remove("unknown_left") os.remove("unknown_right") return result
def main(): # train_data = { # 'test/spam1' : 'spam', # 'test/spam2' : 'spam', # 'test/spam3' : 'spam', # 'test/spam4' : 'spam', # 'test/ham1' : 'ham', # 'test/ham2' : 'ham', # 'test/ham3' : 'ham', # 'test/ham4' : 'ham', # } # # Setup train_data, test_data = parse_labels() stats(test_data) test_data = { 'test/test0' : 'spam', } print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Generating Counts" word_dict, spam_count, ham_count = create_word_counts(train_data) print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Done." # Iterate through possible values of lambda for lambda smoothing. # la_set = [0.005, 0.1, 0.5, 1.0, 2.0] la_set = [0] for la in la_set: print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Computing Probs for lambda={0}".format(la) word_probs = compute_probs(word_dict, spam_count, ham_count, la) spam_prior_prob = (float)(spam_count) / (spam_count + ham_count) ham_prior_prob = (float)(ham_count) / (spam_count + ham_count) print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Done" TP, FP, TN, FN, problems = classify(test_data, word_probs, spam_prior_prob, ham_prior_prob) summarize_findings(la, TP, FP, TN, FN, problems)
def c(params): f = (labels == params['first_digit']) | (labels == params['second_digit']) i = images[f.ravel()] l = params['first_digit'] == labels[f.ravel()] return classify(params['theta'], np.concatenate((i, np.ones((i.shape[0], 1))), axis=1)), l, params['lambda']
continue if command == "\n" or command == "\r": # if maxdatasize == len(datacollectionarray): # print "Flushing collected data", datacollectionarray # # preprocessed = icatest.preprocess_data_array(datacollectionarray) # extracted = dataprocessing.find_signals(datacollectionarray) # result = icatest.classify(extracted) # print result # datacollectionarray = [] stripped = readbuffer.strip() if stripped != '': mappedvalue = int(stripped) / 4 try: mappedvalue = float(m(mappedvalue)) except: continue datacollectionarray.append(mappedvalue) print mappedvalue readbuffer = "" continue readbuffer += command if time.time() - start > sampletake_secoonds: print "Flushing collected data", len(datacollectionarray) extracted = dataprocessing.find_signals(datacollectionarray) result = main.classify(extracted) print result datacollectionarray = [] break
print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Generating Counts" word_dict, total_doc_count, spam_doc_count, ham_doc_count = create_word_counts(train_data) print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Done." la = 2.000 print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Computing Probs for lambda={0}".format(la) word_probs = compute_probs(word_dict, spam_doc_count, ham_doc_count, la) spam_prior_prob = (float)(spam_doc_count) / (spam_doc_count + ham_doc_count) ham_prior_prob = (float)(ham_doc_count) / (spam_doc_count + ham_doc_count) print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Spam Prior Prob : {0}".format(spam_prior_prob) print datetime.datetime.now().strftime("%I:%M:%S") + " : " + " Ham Prior Prob : {0}".format(ham_prior_prob) best_mi = compute_mi(word_probs, spam_prior_prob, ham_prior_prob) top_200 = best_mi[0:200] print [word[0] for word in top_200] mi_word_probs = {} for (word, mi) in top_200: mi_word_probs[word] = word_probs[word] mi_word_probs["*"] = word_probs["*"] TP, FP, TN, FN = classify(test_data, mi_word_probs, spam_prior_prob, ham_prior_prob) summarize_findings(TP, FP, TN, FN) print "Done." if __name__ == "__main__": main()
def processPic(): picbase64 = request.get_json()["picbase64"] commaIndex = picbase64.find(",") pic = picbase64[(commaIndex + 1):] result = classify(pic) return result