def CigNet_prediction(self): new_table_data = self.scaler.transform(self.real_table) real_table = pd.DataFrame(new_table_data, index=self.real_table.index, columns=self.real_table.columns) result = predict_decision(self.predictor, real_table) result2 = predict_proba(self.predictor, real_table) result = np.concatenate([result, result2], axis=1) result_df = pd.DataFrame(result, index=real_table.index) result_df.columns = ['distance', 'non-driver_prob', 'driver_prob'] stats = importr('stats') for l in [ -2, -1.75, -1.5, -1.25 - 1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 1 ]: result_df['p_value'] = 1 - norm.cdf(result_df['distance'], loc=l) result_df['q_value'] = stats.p_adjust(FloatVector( result_df["p_value"].tolist()), method='BH') if result_df[result_df['q_value'] < 0.05].shape[0] * 1. / result_df.shape[0] < 0.65: break candidate_list = self.CellNet['from'].unique() result_df = result_df[result_df.index.isin(candidate_list)] result_df = result_df.sort_values(by='distance', ascending=False) result_df['Rank'] = result_df['distance'].rank(axis=0, ascending=False) return result_df
def hello(): if 'review' not in request.form: return jsonify({'error': 'no review in body'}), 400 else: review = request.form['review'] proba = predict_proba(review, model, tokenizer, max_length)[0, 0] #output = len(review) #print(review, proba) return jsonify(ceil(proba * 100))
def predict(self): active_columns = [x for x in self.real_table.columns if (x.lower().find('h3k4me3') != -1 or x.lower().find('h3k27ac') != -1 or x.lower().find('h3k4me1') != -1 or x.lower().find('h3k79me2') != -1 or x.lower().find('rna') != -1) and x.find('single') == -1] suppressive_columns = [x for x in self.real_table.columns if (x.lower().find('h3k27me3') != -1 or x.lower().find('h3k9me3') != -1) and x.find('single') == -1] paths = optimization(self.gene_meta_df, self.training_table, self.real_table, active_columns, suppressive_columns, self.preknown) best_path = None best_score = None for key, value in paths.items(): cur_score, cur_path = value if cur_score > best_score: best_score = cur_score best_path = cur_path train_df = self.training_table[best_path].copy() train_df = label_label(train_df, self.gene_meta_df) scaler = center_normalization(train_df.iloc[:, :-1]) train_df.iloc[:, :-1] = preprocessing_table(scaler, train_df.iloc[:, :-1]) predictor = predict_logisticregression(train_df.iloc[:, :-1], train_df.iloc[:, -1], c=.2) real_table = self.real_table[best_path].copy() real_table = preprocessing_table(scaler, real_table) result = predict_decision(predictor, real_table) result2 = predict_proba(predictor, real_table) result = np.concatenate([result, result2], axis=1) df = pd.DataFrame(result, index=real_table.index) df.columns = ['distance', 'non-CIG_prob', 'CIG_prob'] del df['non-CIG_prob'] for l in np.arange(-20, 1, 0.25): df['p_value'] = 1 - norm.cdf(df['distance'], loc=l) df['FDR'] = stats.p_adjust(FloatVector(df["p_value"].tolist()), method='BH') if df[df['FDR'] < 0.05].shape[0] < 600: break df = df.sort_values(by=['distance'], ascending=False) df['rank'] = range(1, df.shape[0] + 1) self.prediction = df
def preknown_cost(meta_df, train_df, real_table, columns, preknown_list, verbose=False): cur_train_df = train_df[columns].copy() cur_train_df = label_label(cur_train_df, meta_df) scaler = center_normalization(cur_train_df.iloc[:, :-1]) cur_train_df.iloc[:, :-1] = preprocessing_table(scaler, cur_train_df.iloc[:, :-1]) predictor = predict_logisticregression(cur_train_df.iloc[:, :-1], cur_train_df.iloc[:, -1], c=.2) for i in range(len(predictor.coef_[0])): if columns[i].find('h3k27me3') != -1 and columns[i].find('kurtosis') == -1: predictor.coef_[0][i] = -1 elif columns[i].find('h3k27me3') != -1 and columns[i].find('kurtosis') != -1: predictor.coef_[0][i] = 1 cur_real_table = real_table[columns].copy() cur_real_table = preprocessing_table(scaler, cur_real_table) result = predict_decision(predictor, cur_real_table) result2 = predict_proba(predictor, cur_real_table) result = np.concatenate([result, result2], axis=1) result_df = pd.DataFrame(result, index=cur_real_table.index) result_df.columns = ['distance', 'non-CIG_prob', 'CIG_prob'] del result_df['non-CIG_prob'] result_df = result_df.sort_values(by=['distance'], ascending=False) result_df['rank'] = range(1, result_df.shape[0] + 1) cur_hit_df = result_df[(result_df.index.isin(preknown_list))] if verbose: print cur_hit_df cur_hit = cur_hit_df[cur_hit_df['rank'] <= 10].shape[0] * 15 cur_hit += cur_hit_df[(cur_hit_df['rank'] > 10) & (cur_hit_df['rank'] < 20)].shape[0] * 10 cur_hit += cur_hit_df[(cur_hit_df['rank'] > 20) & (cur_hit_df['rank'] < 50)].shape[0] * 8 cur_hit += cur_hit_df[(cur_hit_df['rank'] > 50) & (cur_hit_df['rank'] < 100)].shape[0] * 3 cur_hit += cur_hit_df[(cur_hit_df['rank'] > 100) & (cur_hit_df['rank'] < 200)].shape[0] * 1 cur_hit += cur_hit_df[(cur_hit_df['rank'] > 200) & (cur_hit_df['rank'] < 500)].shape[0] * 0.1 if verbose: print cur_hit, columns return cur_hit
'ensemble/logits': [], } } criterion = CrossEntropyLoss().cuda() for i in range(args.n_models): net = utils.load_model(os.path.join(args.models_dir, str(i), 'model')) net.eval() for s, mode, n in zip(['running', 'sample'], ['eval', 'ensemble'], [1, args.n_tries]): utils.set_strategy(net, strategy=s) _, labels, logits = utils.predict_proba(testloader_kn, net, ensembles=n, n_classes=args.n_classes, return_logits=True) eval_data['known']['{}/logits'.format(mode)].append(logits) eval_data['known']['labels'] = labels _, _, logits = utils.predict_proba(testloader_ukn, net, ensembles=n, n_classes=args.n_classes, return_logits=True) eval_data['unknown']['{}/logits'.format(mode)].append(logits) for d, t in product(['known', 'unknown'], ['eval', 'ensemble']): eval_data[d]['{}/logits'.format(t)] = np.squeeze( np.stack(eval_data[d]['{}/logits'.format(t)]))
def model_prediction(): model_path = 'networks/resnet-18_trained.t7' test_dir = 'Result/' cuda = True log_file = 'evalss_data' eval_rot = True eval_no_crop = True n_tries = 10 seed = 42 output_dir = 'Result' running = True print("loading model", model_path) torch.cuda.manual_seed_all(seed) torch.manual_seed(seed) np.random.seed(seed) log_file = utils.uniquify(os.path.join(output_dir, log_file), sep='-') eval_data = test_dir net = utils.load_model(model_path, cuda) image_array = cv2.imread(os.path.join(test_dir, 'data', 'image.png')) mean_of_image = np.mean(image_array, axis=(0, 1))/1000 std_of_image = np.std(image_array, axis=(0, 1))/100 ### Load the data into PyTorch using dataloader dataloader = utils.get_dataloader(test_dir, [0.6000, 0.3946, 0.6041], [0.2124, 0.2335, 0.2360], eval_no_crop, eval_rot, batch_size=1) #dataloader = utils.get_dataloader(test_dir, mean_of_image, std_of_image, eval_no_crop, eval_rot, batch_size=1) print(type(dataloader)) ''' # A function to get probabililties using only one iteration net = net.eval() for img, label in dataloader: print(type(img)) print(type(label)) img = img.cuda() print('IMAGE TYPE:',img) pred = net(img).data.cpu().numpy() print('checking', pred) probs = nn.functional.softmax(pred) print('PROBABILITIES:', probs) ''' if not running: net.eval() utils.set_strategy(net, 'sample') have_do = utils.set_do_to_train(net) res = utils.predict_proba(dataloader, net, n_classes=5, return_logits=True, ensembles=n_tries, cuda=cuda) print('Result', res) ''' eval_data['test'] = { 'ensemble/proba': res[0], 'ensemble/logits': res[2], 'eval/labels': res[1], 'ensemble/filenames': res[3] } ''' else: net.eval() utils.set_strategy(net, 'running') have_do = utils.set_do_to_train(net) res = utils.predict_proba(dataloader, net, n_classes=5, return_logits=True, ensembles=n_tries if have_do else 3) print('type(eval_data):', type(eval_data)) ''' eval_data['test'].update({ 'eval/proba': res[0], 'eval/logits': res[2], 'eval/labels': res[1], 'ensemble/filenames': res[3] }) ''' # Get the mean of predictions for n_tries iterations for each class prob_means_en = np.mean(res[0], axis=0) output_file_name = 'res_norotate' torch.save(res, output_dir + '/' + output_file_name) #print(res[2].shape) print('Created output file \'', output_file_name, ' \' ') torch.cuda.empty_cache() return (prob_means_en)
ckpt = torch.load(args.model) log_file = utils.uniquify(os.path.join(os.path.dirname(args.model), args.log_file), sep='-') _, dataloader_known = utils.get_dataloader(data=args.data_kn, test_bs=args.test_bs, data_root=args.data_root) _, dataloader_unknown = utils.get_dataloader(data=args.data_ukn, test_bs=args.test_bs, data_root=args.data_root, drop_last_test=True) eval_data = {} net = utils.load_model(args.model) net.eval() utils.set_strategy(net, 'sample') have_do = utils.set_do_to_train(net) res = utils.predict_proba(dataloader_unknown, net, n_classes=args.n_classes, return_logits=True, ensembles=args.n_tries) eval_data['unknown'] = { 'ensemble/proba': res[0], 'ensemble/logits': res[2], 'ensemble/labels': res[1] } res = utils.predict_proba(dataloader_known, net, n_classes=args.n_classes, return_logits=True, ensembles=args.n_tries) eval_data['known'] = { 'ensemble/proba': res[0], 'ensemble/logits': res[2], 'ensemble/labels': res[1] } net.eval()