def predict_dir(caffemodel, deploy, file_list, IMAGE_SIZE=227, LAYER_NAME="my-fc8", mode=True): images = np.zeros((len(file_list), 3, IMAGE_SIZE, IMAGE_SIZE), dtype=np.float) # read age list real_ages = [] for index, dicom_file in enumerate(file_list): print dicom_file real_age = info.getInfo(dicom_file) real_ages.append(real_age) images[index, :, :, :] = preprocess.process(dicom_file, IMAGE_SIZE=IMAGE_SIZE) if mode: caffe.set_mode_gpu() else: caffe.set_mode_cpu() net = caffe.Net(deploy, caffemodel, caffe.TEST) for index, dicom_file in enumerate(file_list): real_age = info.getInfo(dicom_file) real_ages.append(real_age) images[index, :, :, :] = preprocess.process(dicom_file, IMAGE_SIZE=IMAGE_SIZE) net.blobs['data'].reshape(len(file_list), 3, IMAGE_SIZE, IMAGE_SIZE) net.blobs['data'].data[...] = images output = net.forward() MAE_SUM = 0.0 for index, result in enumerate(output[LAYER_NAME]): predict_age = result[0] real_age = real_ages[index] ''' the condition that you think the prediction result is correct ''' MAE_SUM += abs(predict_age-real_age) print(abs(predict_age-real_age)) print MAE_SUM return MAE_SUM/len(file_list)
def preproc(): sym = txt.get() if sym != "": pre.process(sym) print("preprocess") tm.showinfo("Input", "Preprocess Successfully Finished") else: tm.showinfo("Input error", "Select Dataset")
def preprocess(): sym = txt.get() res = "" message.configure(text=res) if sym != "": pre.process(sym) print("preprocess") tm.showinfo("Input", "Preprocess Successfully Finished") else: tm.showinfo("Input error", "Select Dataset")
def main(): model = pickle.loads(open('models/LogRegression_thre1')) # provide your filename here process(filename='your_file_name.csv') datadf = pd.read_csv(FILENAME) datadf = datadf.drop(datadf.columns[[0]],axis=1) datadf = (datadf-datadf.mean())/(datadf.max()-datadf.min()) X = np.array(datadf) predictions = model.predict(X) # consider the predicted rating to be in the range of +.- 1 # for example of predicted is 7 then it may be between 6-8 print predictions
def data_write_disk(): test = P.process() #return test test = map(lambda t: ', '.join(str(x) for x in t), test) test = map(lambda t: t+"\n", test) #t = ", ".join(test) #print "precision: ", test[0] #print "recall : ", test[1] #print "f1 : ", test[2] #print test[0] #print test[1] #print test[2] ##print NP.mean(test[0]) ##print NP.mean(test[1]) ##print NP.mean(test[2]) #print "precision:", NP.mean(test[0]) #print "recall:", NP.mean(test[1]) #print "f1:", NP.mean(test[2]) with open("datasample.csv", "a") as f: f.writelines(test) GT.dump_call_bb_list()
def genModel(artist, song, model, embedDim, interval, distance): XTrain, yTrain, XPredict, yTest, mean, var = pp.process(artist, song, embedDim, interval, distance) yPredict = model.train(XTrain, yTrain, XPredict) yTest = yTest * var + mean yPredict = yPredict * var + mean yPredict[yPredict < 0] = 0 # 预测值出现负数直接归零 return yPredict, yTest
def generateHdf5_fromfilelist(source_list, target): h5_file = hy.File(target, 'w') file_list = [] for source in source_list: for root, dirs, files in os.walk(source): for dicom_file in files: file_list.append(os.path.join(root, dicom_file)) random.shuffle(file_list) random.shuffle(file_list) random.shuffle(file_list) # change the image size to you want IMAGE_SIZE = 224 data = np.zeros((len(file_list), 3, IMAGE_SIZE, IMAGE_SIZE)) labels = np.zeros(len(file_list), dtype=np.float32) for index, dicom_file in enumerate(file_list): age = info.getInfo(dicom_file) im = preprocess.process(dicom_file, IMAGE_SIZE=IMAGE_SIZE) data[index, :, :, :] = im labels[index] = age print(dicom_file, age) h5_file['data'] = data h5_file['label'] = labels print(labels) h5_file.close()
def dump_distribution(): test = P.process() #print test bl = GT.dump_call_bb_list() #print bl t = [] for k,v in test.items(): #if int(k) in bl: if True: #print k, v t.append(v) #t1 = map(lambda e: (e[0], e[1], e[2]*e[3]*e[4]), t) t1 = map(lambda e: (e[0], e[1], e[2], e[3], e[4]), t) #print t1 k = len(t1[0]) kk = list(product(range(2), repeat=k)) res = [0] * len(kk) for t11 in t1: i = kk.index(t11) #print i, t11 res[i] += 1 res = map(lambda i: float(i)/len(t1), res) print "distribution:", res print "length:", len(t1)
async def analyze(): result = await request.body result = json.loads(result.decode("utf-8")) if result["tweets"]: before = time() result["vaildAccount"] = True merged_tweets = process(result["tweets"]) result["year"], result["month"] = year_month(result["info"]) result["monthGraph"] = month_graph(result["tweets"]) result["hourGraph"] = hour_graph(merged_tweets) result["dayGraph"] = day_graph(merged_tweets) result["dayTimeGraph"] = day_time_graph(result["tweets"]) result["region"], result["regionProba"] = predict_region( merged_tweets, xgb_region) result["country"], result["countryProba"] = predict_country( merged_tweets, xgb_country, tfidf_vectorizer) result["wordcloud"] = wordcloud(merged_tweets) result["sentimentGraph"] = sentiment_graph(merged_tweets, result["tweets"]) result["hmuGraph"] = hmu_graph(result["tweets"]) result["hashtagGraph"] = hashtag_graph(result["tweets"]) result["logScale"] = log_scale(result["tweets"]) result["mentions"] = mentions(result["tweets"]) result["urls"] = urls(result["tweets"]) result["took"] = f"{time()-before} seconds" print(result["took"]) return jsonify(result) else: result["vaildAccount"] = False return jsonify(result)
def predict(net, comments, sequence_length=50, train_on_gpu=False): net.eval() #preprocess: cleaned_comments = prepros(comments) #print(cleaned_comments) #process: features = process(cleaned_comments) #print(features) feature_tensor = torch.from_numpy(features) feature_tensor = feature_tensor.type(torch.LongTensor) batch_size = feature_tensor.size(0) #print(feature_tensor.size(0)) # initialize hidden state h = net.init_hidden(batch_size) if (train_on_gpu): feature_tensor = feature_tensor.cuda() # get the output from the model output, h = net(feature_tensor, h) #print(output.squeeze()) # convert output probabilities to prediction pred = torch.argmax(output, dim=1) # printing output value, before rounding #print(pred) return pred, output, cleaned_comments
def output_features_of_dataset(source, caffemodel, deploy_file, IMAGE_SIZE=227, gpu_mode=True, LAST_LAYER_NAME="ip1", batch_size=240, save_file_path="./features.txt"): if gpu_mode: caffe.set_mode_gpu() else: caffe.set_mode_cpu() net = caffe.Net(deploy_file, caffemodel, caffe.TEST) samples = [] for dir_name in os.listdir(source): one_person_dir = os.path.join(source, dir_name) for file_name in os.listdir(source): one_person_pic_path = os.path.join(one_person_dir, file_name) samples.append((dir_name, file_name, one_person_pic_path)) data = np.zeros((batch_size, 3, IMAGE_SIZE, IMAGE_SIZE)) with open(save_file_path, "w") as f: for index, sample in enumerate(samples): t = index % batch_size data[t, :, :, :] = preprocess.process(sample[2], IMAGE_SIZE) if t == 0: net.blobs['data'].data[...] = data output = net.forward() features = output[LAST_LAYER_NAME] lines = [ "%s %s %s\n" % (s[0][0], s[0][1], " ".join(s[1])) for s in zip(samples[index - 50:index], features) ] f.writelines(lines)
def read_files(file): with open(file) as f: full_lines = '' for i in f.readlines(): full_lines+=i file_result = json.loads(json.dumps(process(full_lines))) return file_result
def main_op(): review_spirit = w.get('1.0', END) demo = process(review_spirit) demo1 = create_word_features(demo) demo2 = ('Review : ' + clf.classify(demo1)) l2 = Label(bottom_frame, text=demo2) l2.pack()
def get_trainval_data(batch_size, train_percent, num_workers=1, data_dir='../data/', num_dir=1, IM_SIZE=(160, 160), target_im_size=(128, 128), threshold=0.5, transform=True, no_cut_select=0.4): wear_cut, no_wear_cut, _, _ = process(crop_size=IM_SIZE[0], data_dir=data_dir, num_dir=num_dir, threshold=threshold) images, labels = Imdb(wear_cut, no_wear_cut, no_cut_select=no_cut_select) train_idx = random.sample(range(0, len(images)), int(len(images) * train_percent)) mask = np.zeros(len(images), dtype=bool) mask[train_idx] = True train_images = np.asarray(images)[mask] train_labels = np.asarray(labels)[mask] val_images = np.asarray(images)[~mask] val_labels = np.asarray(labels)[~mask] train_dataset = DatasetTrainVal(train_images, train_labels, target_im_size=target_im_size, transform=transform) # TODO: initialise val_dataset = DatasetTrainVal(val_images, val_labels, target_im_size=target_im_size, transform=transform) # TODO: initialise train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=num_workers) val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=num_workers) return train_dataloader, val_dataloader
def __collect(json_file_path, source_filenames_path, dump_path): function_tuples = [] with open(json_file_path, 'r') as f1, open(source_filenames_path, 'r') as f2: for json_idx, json_tree_str in enumerate(tqdm(f1.readlines())): if json_idx > 50: # TEST print('Force Early Stopping.') break json_tree = json.loads(json_tree_str) source_code_file = self.py150_source_dir + f2.readline( )[:-1] # remove '\n' if len(json_tree) == 0: continue try: with open(source_code_file, 'r') as f: tree = ast.parse(f.read()) function_dict = process(json_idx, json_tree, tree) for functionItem in function_dict.values(): if functionItem.node_idx >= 0: token_seq = filter_tokens( functionItem.raw_token_seq) function_tuples.append( (functionItem.functionName, functionItem.json_idx, functionItem.node_idx, token_seq)) except IOError: # no FileNotFoundError in py2 print("Early stopping of preprocessing") break with open(dump_path, 'wb') as f: pickle.dump(function_tuples, f)
def make(sourcefile, modulename): import cleaner, preprocess if not os.access(sourcefile, os.F_OK): raise IOError(sourcefile) #sourcefile basename = os.path.basename(sourcefile) preprocessed = "%s.c"%(modulename) cleaned = "%s_clean.c"%(modulename) #xml = "%s.xml"%(modulename) pyfinal = "%s.py"%(modulename) if not os.access(pyfinal, os.F_OK): if not os.access(cleaned, os.F_OK): if not os.access(preprocessed, os.F_OK): # preprocess the file if preprocess.process(sourcefile, preprocessed) > 0: return log.info('PREPROCESS - OK') # clean it if cleaner.clean(preprocessed, cleaned) > 0: return log.info('CLEAN - OK') # generate yfinal if gen(cleaned, modulename) > 0: return log.info('PYFINAL - OK') __import__(modulename) import inspect nbClass = len(inspect.getmembers(sys.modules[modulename], inspect.isclass)) nbMembers = len(inspect.getmembers(sys.modules[modulename], inspect.isclass)) log.info("module %s has %d members for %d class"%(modulename, nbMembers, nbClass))
def main_op(): review_spirit = w.get('1.0', END) demo = process(review_spirit) demo1 = creation_list_mots(demo) demo2 = ('sentiment est ' + clf.classify(demo1)) l2 = Label(bottom_frame, text=demo2) l2.pack()
def test_one(self): # All will be removed, except the first column in both alignments msa_a = TabularMSA([Protein('DL-'), Protein('KL-'), Protein('DL-')]) msa_b = TabularMSA([Protein('KT-'), Protein('DT-'), Protein('KT-')]) contact_mtx = np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]]) exp_contact_mtx = np.array([[1]]) gap_threshold = 0.5 num_mtx_a, bin_mtx_a, gappy_idxs_a, constant_idxs_a = preprocess.process( msa_a, gap_threshold, AA_TABLE) num_mtx_b, bin_mtx_b, gappy_idxs_b, constant_idxs_b = preprocess.process( msa_b, gap_threshold, AA_TABLE) proc_contact_mtx = preprocess.process_contact_mtx( contact_mtx, gappy_idxs_a, constant_idxs_a, gappy_idxs_b, constant_idxs_b) print(proc_contact_mtx) print(exp_contact_mtx) assert np.array_equal(proc_contact_mtx, exp_contact_mtx)
def save_valid_midis(url, directory="midis"): valid_instruments = ["piano", "harpsichord"] links = get_all_links(url) prev_midis = [get_first_notes(x) for x in get_midis_in_directory("midis")] os.mkdir("temp") for link in list(links): name = re.findall("/[^/]+", link)[-1][1:] urlretrieve(link, "temp.mid") midi = mido.MidiFile("temp.mid") if len(midi.tracks) > 3: continue valid = True for track in midi.tracks[1:]: valid = valid and reduce( lambda a, b: a in track.name.lower() or b in track.name.lower( ), valid_instruments) if not valid: continue notes = get_first_notes(midi) for notes1 in prev_midis: if check_two_midis_similar(notes1, notes): valid = False break if not valid: continue prev_midis.append(notes) midi.save(filename="temp" + "/" + name) process("temp") for file in os.listdir("temp"): shutil.copy("temp/" + file, "midis/" + file) shutil.rmtree("temp") print("downloaded all")
def train(data, targets, filenames): targets = [val == "INFEC" for val in targets] # Set INFEC as positive val # Choose training mode options = ["Cross validation", "Build and test model"] res = ui.prompt(options=options) mode = options[int(res)] # Choose ML algorithm options = ["Support Vector Machine", "Random Forest", "Decision Tree Classifier", "KNN"] res = ui.prompt("Choose a ML algorithm:", options) switch = { 0: svm.SVC(C=100., random_state=0), 1: RandomForestClassifier(n_estimators=50, max_depth=None, random_state=0), 2: DecisionTreeClassifier(random_state=0), 3: KNeighborsClassifier() } clf = switch.get(int(res)) if mode == "Cross validation": model_evaluation(data, targets, clf) elif mode == "Build and test model": # Train model clf.fit(data, targets) # Get test dir while True: dirname = ui.prompt("Which directory are the test files in?") if os.path.isdir(dirname): break print("ERROR: Directory not found.") # Set up data/targets for test model print("\n************************************") print("* PREPARING MODEL FOR EVALUATION *") print("************************************") pageNames, y_true, filenames = pproc.process(dirname) y_true = [val == "INFEC" for val in y_true] # Set INFEC as positive val test_data = ft.features(pageNames) y_pred = clf.predict(test_data) save_filenames(y_true, y_pred, filenames) conf_matrix = skm.confusion_matrix(y_true, y_pred) accuracy = skm.accuracy_score(y_true, y_pred) precision = skm.precision_score(y_true, y_pred, average=None) recall = skm.recall_score(y_true, y_pred, average=None) f1 = skm.f1_score(y_true, y_pred, average=None) print("\n{}".format(conf_matrix)) print("Accuracy: {}".format(accuracy)) print("Precision: {}".format(precision[1])) print("Recall: {}".format(recall[1])) print("F1: {}".format(f1[1]))
def extract_names(cv_dir, word_limit): extracted_names = {} files = os.listdir(cv_dir) for file in files: if file.endswith('.pdf'): text = convert2txt.extract_text(cv_dir + file, '.pdf') words = text.split() text = ' '.join(words[:word_limit]) text = preprocess.process(text) nlp_text = nlp(text) extracted_names[file] = [] for e in nlp_text.ents: extracted_names[file].append(e.text) elif file.endswith('.doc'): text = convert2txt.extract_text(cv_dir + file, '.doc') words = text.split() text = ' '.join(words[:word_limit]) text = preprocess.process(text) nlp_text = nlp(text) extracted_names[file] = [] for e in nlp_text.ents: extracted_names[file].append(e.text) elif file.endswith('.docx'): text = convert2txt.extract_text(cv_dir + file, '.docx') words = text.split() text = ' '.join(words[:word_limit]) text = preprocess.process(text) nlp_text = nlp(text) extracted_names[file] = [] for e in nlp_text.ents: extracted_names[file].append(e.text) elif file.endswith('.txt'): with open(cv_dir + file, encoding='utf-8') as f: text = f.read() words = text.split() text = ' '.join(words[:word_limit]) text = preprocess.process(text) nlp_text = nlp(text) extracted_names[file] = [] for e in nlp_text.ents: extracted_names[file].append(e.text) return extracted_names
def read_data(path, n_bin=3): prob_name = path.split('/')[-1] datafile = path + '/' + prob_name + '.data' # data = np.loadtxt(datafile, delimiter=',', dtype=str) data = parse_c45(prob_name, path) data = np.asarray(data.to_float()) # print(data) X = data[:, 1:-1] X = process(X, prob_name, n_bin) y = data[:, -1].astype(int) return X, y
def predict(self, image_path): ''' 模型预测返回结果 :param input: 评估传入样例 {"image_path":"image\/172691.jpg"} :return: 模型预测成功之后返回给系统样例 {"label":"ZASSEOR"} ''' pred = '' for n in range(len(self.model_list)): model = self.model_list[n] model = model.to(device) # img = Image.open(image_path).convert("RGB") srcimg = cv2.imread(image_path, 1) img = cv2.cvtColor(srcimg, cv2.COLOR_BGR2GRAY) # 分割手写字符 roi, roicon = process(img) # [row_up, row_down], [(), (), ...()] if roi is None: return {"label": "None"} roi_imgs = get_roi_img(srcimg, roi, roicon, gap=2) if len(roi_imgs) == 0: return {"label": "None"} # roi_imgs 拆分 roi_imgs = roiimg_split(roi_imgs) big_indx = None big_indx = find_space(roicon) # roi_imgs 检查空格 for i in range(len(roi_imgs)): # cv2.imshow('' ,cv2.resize(roi_imgs[i], (40, 56), cv2.INTER_LANCZOS4)) # cv2.waitKey() roi_imgs[i] = torch.FloatTensor( cv2.resize(roi_imgs[i], (40, 56), cv2.INTER_LANCZOS4)) inputs = torch.stack(roi_imgs, dim=0).permute(0, 3, 1, 2).to(device) inputs = inputs.to(device) output = model(inputs) if output.ndim == 0: return {"label": "None"} predict = torch.max(output, 1)[1] for i in range(predict.shape[0]): if predict[i] < 26: pred += chr(predict[i].item() + 65) elif predict[i] == 26: pred += '-' elif predict[i] == 27: pred += "'" if big_indx is not None: l_pred = list(pred) l_pred.insert(big_indx, ' ') pred = ''.join(l_pred) return {"label": pred}
def predict(caffemodel, deploy, dicom_file, IMAGE_SIZE=227, LAYER_NAME="my-fc8"): age = info.getInfo(dicom_file) im = preprocess.process(dicom_file, IMAGE_SIZE=IMAGE_SIZE) caffe.set_mode_gpu() net = caffe.Net(deploy, caffemodel, caffe.TEST) net.blobs['data'].reshape(1, 3, IMAGE_SIZE, IMAGE_SIZE) # read a dicom file net.blobs['data'].data[...] = im output = net.forward() predict_age = output[LAYER_NAME][0][0] # return age, predict_age print("%s predict: %s real: %s" % (dicom_file, predict_age, age))
def get_pred(): global image image = get_img(master, w) image = np.asarray(image)[:,:,0] image = preprocess.process(image) image = np.expand_dims(image, axis=0) image = np.expand_dims(image, axis=3) pred = classifier.predict(image)[0] pred = class_indices[np.argmax(pred)] print_pred(pred) w.delete('all') master.after(10000, get_pred)
def get_formatted_data(train=True): """ """ data = None if (train): data = get_data() else: data = get_test_data() data['text1_processed'] = data.text1.apply(lambda x: pp.process(x)) data['text2_processed'] = data.text2.apply(lambda x: pp.process(x)) ## get the tokens ## https://keras.io/preprocessing/text/#tokenizer ## https://blog.keras.io/using-pre-trained-word-embeddings-in-a-keras-model.html list1 = list(data.text1.values.astype(str)) list2 = list(data.text2.values.astype(str)) # Preprocessed text # Experiment - with stopwords and lemmatization - Probably not a good idea # list1 = list(data.text1_processed.values.astype(str)) # list2 = list(data.text2_processed.values.astype(str)) all_questions = list1 + list2 tokenizer.fit_on_texts(all_questions) maximum_length_of_question = 40 sequences_text1 = tokenizer.texts_to_sequences(data.text1.values) sequences_text1 = sequence.pad_sequences(sequences_text1, maxlen=maximum_length_of_question) sequences_text2 = tokenizer.texts_to_sequences( data.text2.values.astype(str)) sequences_text2 = sequence.pad_sequences(sequences_text2, maxlen=maximum_length_of_question) return [sequences_text1, sequences_text2]
def generate_siamese_lmdb(source, target, IMAGE_SIZE=227): env = lmdb.Environment(target, map_size=int(1e12), writemap=True) dataset = generate_siamese_dataset(source, totals=250000) _same = dataset[0] _diff = dataset[1] random.shuffle(_same) random.shuffle(_diff) for x in _same: x.append(1) for x in _diff: x.append(0) samples = [] samples.extend(_same) samples.extend(_diff) # print samples random.shuffle(samples) random.shuffle(samples) random.shuffle(samples) # print len(samples) # print samples with env.begin(write=True) as txn: datum = caffe.proto.caffe_pb2.Datum() dimension = 3 datum.channels = dimension datum.height = IMAGE_SIZE datum.width = IMAGE_SIZE sample = np.zeros((2*dimension, IMAGE_SIZE, IMAGE_SIZE)) index = 0 for one_sample in samples: print index, one_sample label = one_sample[-1] sample[:dimension, :, :] = preprocess.process(one_sample[0], IMAGE_SIZE) sample[dimension:, :, :] = preprocess.process(one_sample[1], IMAGE_SIZE) datum.data = sample.tobytes() datum.label = label str_id = "%8d" % index txn.put(str_id, datum.SerializeToString()) index = index + 1
def get_test_data(data_dir='../test/', num_dir=1, IM_SIZE=(512, 512), threshold=0.5): _, _, preprocessed_cutouts, original_img_shape = process(crop_size=IM_SIZE[0], data_dir=data_dir, num_dir=num_dir, threshold=threshold) # preprocessed cutouts contains list of list of cutouts per image prep_np = [] for folder_no in range(len(preprocessed_cutouts)): for crop_no in range(len(preprocessed_cutouts[folder_no])): preprocessed_cutouts[folder_no][crop_no][0] = \ preprocessed_cutouts[folder_no][crop_no][0].transpose([2, 0, 1]) cutout_np = np.asarray([pr[0] for pr in preprocessed_cutouts[folder_no]]) prep_np.append(cutout_np) return prep_np, preprocessed_cutouts, original_img_shape
def main(): input_file = 'zh_wiki_00' output_file = 'zh_words' in_file = open(input_file, 'r') out_file = open(output_file, 'w+') for line in in_file.readlines(): out = process(line) if len(out): out_file.write(str(out)) out_file.write('\n') in_file.close() out_file.close()
def predict_dir(caffemodel, deploy, source_list, IMAGE_SIZE=227, LAYER_NAME="my-fc8", mode=True, BORDER_AGE=18): # f = open("predict.log", "w") file_list = [] correct_num = 0 for source in source_list: for root, dirs, files in os.walk(source): for file in files: file_list.append(os.path.join(root, file)) # f.write(str(real_age)+" "+str(predict_age)+'\n') # f.close() images = np.zeros((len(file_list), 3, IMAGE_SIZE, IMAGE_SIZE), dtype=np.float) # read age list real_ages = [] for index, dicom_file in enumerate(file_list): print dicom_file real_age = info.getInfo(dicom_file) real_ages.append(real_age) images[index, :, :, :] = preprocess.process(dicom_file, IMAGE_SIZE=IMAGE_SIZE) # if abs(predict_age - real_age)<=3: # correct_num = correct_num+1 if mode: caffe.set_mode_gpu() else: caffe.set_mode_cpu() net = caffe.Net(deploy, caffemodel, caffe.TEST) output = {} for x in range(0, len(file_list), 10): # net.blobs['data'].reshape(100, 3, IMAGE_SIZE, IMAGE_SIZE) if len(file_list) - x < 10: net.blobs['data'].reshape(len(file_list) - x, 3, IMAGE_SIZE, IMAGE_SIZE) net.blobs['data'].data[...] = images[x:] else: net.blobs['data'].reshape(10, 3, IMAGE_SIZE, IMAGE_SIZE) net.blobs['data'].data[...] = images[x:x+10] o = net.forward() output = dict(output.items()+o.items()) for index, result in enumerate(output[LAYER_NAME]): predict_age = result[0] real_age = real_ages[index] ''' the condition that you think the prediction result is correct ''' # if abs(predict_age - real_age) <= 3: # correct_num = correct_num+1 if (predict_age > BORDER_AGE and real_age > BORDER_AGE) or (predict_age <= BORDER_AGE and real_age <= BORDER_AGE): correct_num = correct_num+1 return float(correct_num)/len(file_list)
def test_process_1(self): aln = TabularMSA([Protein('AL-'), Protein('VL-'), Protein('MLA')]) gap_thr = 0.5 exp_num = [[AA_TABLE['A']], [AA_TABLE['V']], [AA_TABLE['M']]] exp_bin = [[ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]] num_mtx, bin_mtx, gappy_idxs, constant_idxs = preprocess.process( aln, gap_thr, AA_TABLE) assert np.array_equal(exp_num, num_mtx) assert np.array_equal(exp_bin, bin_mtx) assert gappy_idxs == [2] assert constant_idxs == [1]
def post_img(): result = {} try: f = open('img.jpg', 'wb') f.write(request.get_data()) f.close() roi = preprocess.process('img.jpg') cv2.imwrite('cropped.jpg', roi) image_data = tf.gfile.FastGFile('cropped.jpg', 'rb').read() label_lines = [line.rstrip() for line in tf.gfile.GFile("/tf_files/retrained_labels.txt")] with tf.gfile.FastGFile("/tf_files/retrained_graph.pb", 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(graph_def, name='') with tf.Session() as sess: softmax_tensor = sess.graph.get_tensor_by_name('final_result:0') predictions = sess.run(softmax_tensor, {'DecodeJpeg/contents:0': image_data}) top_k = predictions[0].argsort()[-len(predictions[0]):][::-1] best_guess = top_k[0] guesses = [] result['error'] = False result['guesses'] = [] for node_id in top_k: human_string = label_lines[node_id] score = predictions[0][node_id] result['guesses'].append({ 'name': human_string, 'confidence': "%0.5f" % score }) except Exception as e: result = { "error" : True, "guesses" : [] } app.logger.warning(e) return json.dumps(result) + "\n"
def make(sourcefile, modulename, target=False): ''' using gccxml directly distort ctypeslib performances but on some libraries, we don't have a choice. ''' if not os.access(sourcefile, os.F_OK): raise IOError(sourcefile) #sourcefile basename = os.path.basename(sourcefile) preprocessed = "%s.c"%(modulename) cleaned = "%s_clean.c"%(modulename) xml = "%s.xml"%(modulename) pyfinal = "%s.py"%(modulename) if target: gen2(sourcefile, modulename, target) log.info('PYFINAL - OK') else: if not os.access(pyfinal, os.F_OK): if not os.access(cleaned, os.F_OK): if not os.access(preprocessed, os.F_OK): # preprocess the file if preprocess.process(sourcefile, preprocessed) > 0: return log.info('PREPROCESS - OK') # clean it if cleaner.clean(preprocessed, cleaned) > 0: return log.info('CLEAN - OK') # generate yfinal if gen(cleaned, modulename) > 0: return log.info('PYFINAL - OK') __import__(modulename) import inspect nbClass = len(inspect.getmembers(sys.modules[modulename], inspect.isclass)) nbMembers = len(inspect.getmembers(sys.modules[modulename], inspect.isclass)) log.info("module %s has %d members for %d class"%(modulename, nbMembers, nbClass))
def data(): gd = GT.obtain_gd() test = P.process() return (gd, test)
def processText(docContent): tokens = process(docContent) tokenFrequency = defaultdict(list) for token in tokens: tokenFrequency[token] = tokenFrequency.get(token, 0) + 1 return set(tokens), tokenFrequency
def process_file(segment, filename): with codecs.open(filename, 'r', 'utf-8') as f: for sentence in preprocess.process(f): yield segment(sentence)