def get_aucs(db): # B = 10000 B = 100000 aucs = list() aucs0 = list() aucs1 = list() assert db in {"toy1", "toy2"} if db == "toy1": data_tr, data_te = load_toy1(n=10000) else: data_tr, data_te = load_toy2(n=10000) X_tr, Y_tr, Z_tr = data_tr X_te, Y_te, Z_te = data_te X = np.vstack([X_tr, X_te]) Y = np.concatenate([Y_tr, Y_te]) Z = np.concatenate([Z_tr, Z_te]) cs = np.linspace(0, 1, 101) for c in cs: if db == "toy1": S = c * X[:, 0] + (1 - c) * X[:, 1] else: S = -c * X[:, 0] + (1 - c) * X[:, 1] aucs.append(auc(S, Y, B=B)) aucs0.append(auc(S[Z == 0], Y[Z == 0], B=B)) aucs1.append(auc(S[Z == 1], Y[Z == 1], B=B)) return np.array(aucs), np.array(aucs0), np.array(aucs1)
def validate(sess): gen = data_gen.gen_valid valid_accs = [] valid_outs = [] valid_targets = [] sum = 0 for batch, i in gen(): valid_fetches = [out_softmax] valid_feed_dict = {X_pl: batch['X'], t_pl: batch['t'], is_training_pl: False} valid_out = sess.run(fetches=valid_fetches, feed_dict=valid_feed_dict)[0] valid_targets.append(batch['t']) valid_outs.append(valid_out) sum += i valid_outs = np.concatenate(valid_outs, axis=0)[:sum] valid_targets = np.concatenate(valid_targets, axis=0)[:sum] #valid_outs_binomial = valid_outs[:, 0] valid_outs_binomial_rev = valid_outs[:, 1] valid_preds = valid_outs_binomial_rev>0.5 valid_accs = np.mean(np.equal(valid_preds, valid_targets)) valid_aucs = utils.auc(valid_targets, valid_outs_binomial_rev) print(" valid_accs, %.3f" % (valid_accs*100)) print(" valid_aucs, %.3f" % (valid_aucs*100)) sum_fetches = [val_summaries, global_step] sum_feed_dict = { valid_accs_pl: valid_accs, valid_aucs_pl: valid_aucs } summaries, i = sess.run(sum_fetches, sum_feed_dict) summary_writer.add_summary(summaries, i)
def validate(sess): gen = data_gen.gen_valid valid_accs = [] valid_outs = [] valid_targets = [] sum = 0 for batch, i in gen(): valid_fetches = [out_softmax] valid_feed_dict = { X_pl: batch['X'], t_pl: batch['t'], is_training_pl: False } valid_out = sess.run(fetches=valid_fetches, feed_dict=valid_feed_dict)[0] valid_targets.append(batch['t']) valid_outs.append(valid_out) sum += i valid_outs = np.concatenate(valid_outs, axis=0)[:sum] valid_targets = np.concatenate(valid_targets, axis=0)[:sum] #valid_outs_binomial = valid_outs[:, 0] valid_outs_binomial_rev = valid_outs[:, 1] valid_preds = valid_outs_binomial_rev > 0.5 valid_accs = np.mean(np.equal(valid_preds, valid_targets)) valid_aucs = utils.auc(valid_targets, valid_outs_binomial_rev) print(" valid_accs, %.3f" % (valid_accs * 100)) print(" valid_aucs, %.3f" % (valid_aucs * 100)) sum_fetches = [val_summaries, global_step] sum_feed_dict = { valid_accs_pl: valid_accs, valid_aucs_pl: valid_aucs } summaries, i = sess.run(sum_fetches, sum_feed_dict) summary_writer.add_summary(summaries, i)
def test_single(model, turn): output_list = [] model.eval() for i in range(len(test_adj_list)): adj = torch.Tensor(test_adj_list[i]).cuda() feature = torch.Tensor(test_feature_list[i]).cuda() output = model(feature, adj) output_list.append(output) labels = [] output_list = torch.Tensor(output_list) labels = torch.Tensor(test_label_list) labels.unsqueeze_(0) output_list.unsqueeze_(0) print("accuracy:", accuracy(output_list.t(), labels.t())) print("auc:", auc(output_list.t(), labels.t())) # loss_test = F.binary_cross_entropy_with_logits(output_list, torch.Tensor(test_label_list)) # print("single_test_loss:", loss_test) if phase1: save_pred("res_cheby/", turn, 1, output_list.t()) elif phase3: save_pred("res_cheby/", turn, 3, output_list.t())
def test_hinge(): output_list = [] label = 0 test_labels = [] #Cn_2 elements, ground truth for cos pairs in test model_hinge.eval() for i in range(len(test_adj_list)): adj = torch.Tensor(test_adj_list[i]) feature = torch.Tensor(test_feature_list[i]).cuda() output = model_hinge(feature, adj) output.squeeze_(0) output_list.append(output) for i in range(len(test_adj_list)): for j in range(i + 1, i + 1 + pair_num): j = j % len(test_adj_list) if test_label_list[i] == test_label_list[j]: test_labels.append([1]) else: test_labels.append([-1]) cos_list = get_cos_list(output_list, 0) test_labels = torch.Tensor(test_labels) # lossF = torch.nn.MarginRankingLoss(margin=0) # loss_test = lossF(cos_list, torch.Tensor([0]), test_labels) # print("test_loss:", loss_test) print("hinge_accuracy:", accuracy(cos_list, test_labels)) print("hinge_auc:", auc(cos_list, test_labels)) save_pred("res_cheby/", turn, 2, cos_list)
def get_ptws(alpha, y_val): B = 100000 aucs = list() ptws = list() data_tr, data_te = load_toy2(n=10000) X_tr, Y_tr, Z_tr = data_tr X_te, Y_te, Z_te = data_te X = np.vstack([X_tr, X_te]) Y = np.concatenate([Y_tr, Y_te]) Z = np.concatenate([Z_tr, Z_te]) cs = np.linspace(0, 1, 101) for c in cs: if db == "toy1": S = c * X[:, 0] + (1 - c) * X[:, 1] else: S = -c * X[:, 0] + (1 - c) * X[:, 1] aucs.append(auc(S, Y, B=B)) ptws.append(pointwise_tpr(S[Y == y_val], Z[Y == y_val], alpha)) return np.array(aucs), np.array(ptws)
preds_up = [] dsc = np.zeros((num_test, 1)) recall = np.zeros_like(dsc) tn = np.zeros_like(dsc) prec = np.zeros_like(dsc) thresh = 0.5 for i in range(num_test): gt = orig_gts[testIdx[i]] preds_up.append( cv2.resize(preds[i], (gt.shape[1], gt.shape[0]), interpolation=cv2.INTER_NEAREST)) dsc[i] = utils.check_preds(preds_up[i] > thresh, gt) recall[i], _, prec[i] = utils.auc(gt, preds_up[i] > thresh) print('-' * 30) print('At threshold =', thresh) print('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format( np.sum(dsc) / num_test, np.sum(recall) / num_test, np.sum(prec) / num_test)) model.load_weights("weights.hdf5") _, _, _, preds = model.predict(imgs_test) #preds = model.predict(imgs_test) #use this if model is unet preds_up = [] dsc = np.zeros((num_test, 1)) recall = np.zeros_like(dsc)
def keras_fit_generator_attention(img_rows=96, img_cols=96, n_imgs=10**4, batch_size=32, regenerate=True, model_type = 'unet', loss_type='dice', train=True, test=True): if regenerate: data_to_array(img_rows, img_cols) #preprocess_data() X_train, y_train, X_val, y_val = load_data() img_rows = X_train.shape[1] img_cols = X_train.shape[2] gt1 = y_train[:,::8,::8,:] gt2 = y_train[:,::4,::4,:] gt3 = y_train[:,::2,::2,:] gt4 = y_train gt_train = [gt1,gt2,gt3,gt4] #choose loss function if loss_type == 'dice': loss_f = losses.dice_loss elif loss_type =='tversky': loss_f = losses.tversky_loss elif loss_type =='focal_tversky': loss_f = losses.focal_tversky else: print('wrong loss function type') return -1 plot_type = 0 epochs_num = 50 model_name = model_type+'_'+loss_type filepath='../data/weights/weights_'+model_type+'_'+loss_type+'.hdf5' #you need to create this folder before run result_text_path='../data/results/results.txt' #you need to create this file before run result_images_path='../data/results/images1/' #you need to create this folder before run #choose model if model_type=='unet': sgd = SGD(lr=0.01, momentum=0.90) model = newmodels.unet(sgd, (256,256,1), loss_f) model_checkpoint = ModelCheckpoint(filepath, monitor='val_dsc', verbose=1, save_best_only=True, save_weights_only=True, mode='max') elif model_type=='attn_unet': sgd = SGD(lr=0.01, momentum=0.90, decay=1e-6) model = newmodels.attn_unet(sgd, (256,256,1), loss_f) model_checkpoint = ModelCheckpoint(filepath, monitor='val_dsc', verbose=1, save_best_only=True, save_weights_only=True, mode='max') elif model_type=='ds_mi_attn_unet': plot_type = 1 y_train = gt_train sgd = SGD(lr=0.01, momentum=0.90, decay=1e-6) model = newmodels.attn_reg(sgd,(256,256,1),loss_f) model_checkpoint = ModelCheckpoint(filepath, monitor='val_final_dsc', verbose=1, save_best_only=True, save_weights_only=True, mode='max') else: print('wrong model type') return -1 model.summary() c_backs = [model_checkpoint] c_backs.append( EarlyStopping(monitor='loss', min_delta=0.001, patience=5) ) model_name = (model_type+'_'+loss_type+'{}').format(int(time.time())) tb_call_back = TensorBoard(log_dir='./log_dir_5.25.1/{}'.format(model_name)) c_backs.append(tb_call_back) if train: hist = model.fit(X_train, y_train, validation_split=0.15, shuffle=True, epochs=epochs_num, batch_size=batch_size, verbose=True, callbacks=c_backs)#, callbacks=[estop,tb]) h = hist.history # utils.plot(h, epochs_num, batch_size, img_cols, plot_type, model_name = model_name) if test==True: X_val = np.load('../data/X_val.npy') y_val = np.load('../data/y_val.npy') num_test = X_val.shape[0] test_img_list = os.listdir('../data/test/') if model_type=='ds_mi_attn_unet': _,_,_,preds = model.predict(X_val) else: preds = model.predict(X_val) #use this if the model is not muti-input ds unet preds_up=[] dsc = np.zeros((num_test,1)) recall = np.zeros_like(dsc) tn = np.zeros_like(dsc) prec = np.zeros_like(dsc) thresh = 0.5 # check the predictions from the trained model for i in range(num_test): gt = y_val[i] pred_up = cv2.resize(preds[i], (gt.shape[1], gt.shape[0]), interpolation=cv2.INTER_NEAREST) preds_up.append(pred_up) dsc[i] = utils.check_preds(pred_up > thresh, gt) recall[i], _, prec[i] = utils.auc(gt, pred_up >thresh) f = open(result_text_path, "a") f.write('\n') f.write('-'*30) f.write('\nModel name: ') f.write(model_name) f.write('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format( np.sum(dsc)/num_test, np.sum(recall)/num_test, np.sum(prec)/num_test )) f.write('\n') f.close() print('-'*30) print('At threshold =', thresh) print('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format( np.sum(dsc)/num_test, np.sum(recall)/num_test, np.sum(prec)/num_test )) # check the predictions with the best saved model from checkpoint model.load_weights(filepath) if model_type=='ds_mi_attn_unet': _,_,_,preds = model.predict(X_val) else: preds = model.predict(X_val) #use this if the model is not muti-input ds unet preds_up=[] dsc = np.zeros((num_test,1)) recall = np.zeros_like(dsc) tn = np.zeros_like(dsc) prec = np.zeros_like(dsc) for i in range(num_test): gt = y_val[i] pred_up = cv2.resize(preds[i], (gt.shape[1], gt.shape[0]), interpolation=cv2.INTER_NEAREST) preds_up.append(pred_up) dsc[i] = utils.check_preds(pred_up > thresh, gt) recall[i], _, prec[i] = utils.auc(gt, pred_up >thresh) print('-'*30) print('USING HDF5 saved model at thresh=', thresh) print('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format( np.sum(dsc)/num_test, np.sum(recall)/num_test, np.sum(prec)/num_test )) f = open(result_text_path, "a") f.write('\n') f.write('-'*30) f.write('\nModel name: ') f.write(model_name) f.write('\nUSING HDF5 saved model') f.write('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format( np.sum(dsc)/num_test, np.sum(recall)/num_test, np.sum(prec)/num_test )) f.write('\n') f.close() while True: idx = np.random.randint(0,num_test) if utils.avg_img(y_val[idx]>0)>3 and utils.avg_img(y_val[idx]>0) <8: # print(utils.avg_img(y_val[idx]>0)) break idxs = [94,55,69,75,52,77] for idx in idxs: #plot a test sample for each model gt_plot = y_val[idx] plt.figure(dpi=200) plt.subplot(121) plt.axis('off') plt.imshow(np.squeeze(gt_plot), cmap='gray') plt.title('Original Segmentated Img {}'.format(idx)) plt.subplot(122) plt.axis('off') plt.imshow(np.squeeze(preds_up[idx]), cmap='gray') plt.title('Mask {}'.format(idx)) plt.savefig(result_images_path+str(idx)+'/'+model_name+'ori-gt-'.format('.png'))
x_batch = X[idx] out = predict(x_batch) preds.append(out) if num_batches * batch_size < n: # Computing rest rest = n - num_batches * batch_size idx = range(n-rest, n) x_batch = X[idx] out = predict(x_batch) preds.append(out) # Making metadata predictions = np.concatenate(preds, axis = 0) acc_eval = utils.accuracy(predictions, y) all_accuracy.append(acc_eval) auc_eval = utils.auc(predictions, y) all_auc.append(auc_eval) roc_eval_fpr, roc_eval_tpr, roc_eval_thresholds = utils.roc(predictions, y) all_roc_fpr.append(roc_eval_fpr) all_roc_tpr.append(roc_eval_tpr) all_roc_thresholds.append(roc_eval_thresholds) if Print: print " validating: %s loss" % subset print " average evaluation accuracy (%s): %.5f" % (subset, acc_eval) print " average evaluation AUC (%s): %.5f" % (subset, auc_eval) print print "Epoch %d of %d" % (epoch + 1, num_epochs) if epoch in learning_rate_schedule: lr = np.float32(learning_rate_schedule[epoch])
f1_score = np.zeros_like(dsc) thresh = 0.5 # check the predictions from the trained model for i in range(num_test): #gt = orig_masks[testIdx[i]] name = img_list[testIdx[i]] gt = plt.imread( os.path.join(orig_dir, name.split('.')[0] + "_segmentation.png")) pred_up = cv2.resize(preds[i], (gt.shape[1], gt.shape[0]), interpolation=cv2.INTER_NEAREST) dsc[i] = utils.check_preds(pred_up > thresh, gt) recall[i], spec[i], prec[i], iou[i] = utils.auc(gt, pred_up > thresh) avg_dsc = np.sum(dsc) / num_test avg_recall = np.sum(recall) / num_test avg_precision = np.sum(prec) / num_test f1_score = 2 * avg_recall * avg_precision / (avg_precision + avg_recall) avg_iou = np.sum(iou) / num_test avg_specificity = np.sum(spec) / num_test print('-' * 30) print('At threshold =', thresh) print( '\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision \t{2:^.3f} \n Specificity \t{3:^.3f} \n IOU \t\t{4:^.3f} \n F1 \t\t{5:^.3f}' .format(avg_dsc, avg_recall, avg_precision, avg_specificity, avg_iou, f1_score)) # check the predictions with the best saved model from checkpoint
torch.save(model.state_dict(), save_model_path) save_predict_target_path = os.path.join( save_time_fold, preprocess_path.split('/')[-1] + '_times_' + str(train_times) + '_' + str(round(best_test, 4)) + '.txt') predict_target = torch.cat((predicts, targets), dim=0).detach().cpu().numpy() np.savetxt(save_predict_target_path, predict_target) precision_score[train_times] = round(precision(predicts, targets), 4) recall_score[train_times] = round(recall(predicts, targets), 4) specificity_score[train_times] = round( specificity(predicts, targets), 4) mcc_score[train_times] = round(mcc(predicts, targets), 4) auc_score = round(auc(predicts, targets), 4) aupr_score = round(aupr(predicts, targets), 4) print('Epoch: {:04d}'.format(epoch + 1), 'Train_times:', train_times) print( "*****************test_score {:.4f} best_socre {:.4f}****************" .format(test_score, best_test)) print("All Test Score:", acc_score) print(args.dataset, " Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) print("acc Score:", acc_score) print("precision Score:", precision_score) print("recall score", recall_score) print("specificity score", specificity_score) print("mcc score", mcc_score) print("auc socre", auc_score) print("aupr score", aupr_score)
atp = np.sum(tp, axis = 0) tn = sta[:,1] atn = np.sum(tn, axis = 0) fp = sta[:,2] afp = np.sum(fp, axis = 0) fn = sta[:,3] afn = np.sum(fn, axis = 0) atpr = atp*1.0/(atp +afn) afpr = afp*1.0/(afp + atn) adpr = atpr - afpr dsc = 2.0*tp/(2.0*tp + fp + fn) dsc_adpr = dsc[:,np.argmax(adpr)] mdsc_adpr = np.mean(dsc_adpr) sdsc_adpr = np.std(dsc_adpr) thdad = threshs[np.argmax(adpr)] manual_auc_all = auc(afpr, atpr) fnr_ad = 1-atpr[np.argmax(adpr)] fpr_ad = afpr[np.argmax(adpr)] PAUCA += [manual_auc_all] TTHM_ad += [thdad] MMDSC_ad += [mdsc_adpr] SMDSC_ad += [sdsc_adpr] FPR += [fpr_ad] FNR += [fnr_ad] data = np.concatenate((np.array(rhos).reshape(1,-1),np.array(PAUCA).reshape(1,-1), np.array(MMDSC_ad).reshape(1,-1),np.array(SMDSC_ad).reshape(1,-1), np.array(TTHM_ad).reshape(1,-1),np.array(FPR).reshape(1,-1), np.array(FNR).reshape(1,-1) ))
prec = np.zeros_like(dsc) thresh = 0.5 # check the predictions from the trained model for i in range(num_test): #gt = orig_masks[testIdx[i]] name = img_list[testIdx[i]] gt = plt.imread( os.path.join(orig_dir, name.split('.')[0] + "_segmentation.png")) pred_up = cv2.resize(preds[i], (gt.shape[1], gt.shape[0]), interpolation=cv2.INTER_NEAREST) dsc[i] = utils.check_preds(pred_up > thresh, gt) recall[i], _, prec[i] = utils.auc(gt, pred_up > thresh) print('-' * 30) print('At threshold =', thresh) print('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format( np.sum(dsc) / num_test, np.sum(recall) / num_test, np.sum(prec) / num_test)) # check the predictions with the best saved model from checkpoint model.load_weights("weights.hdf5") _, _, _, preds = model.predict(imgs_test) #preds = model.predict(imgs_test) #use this if the model is unet preds_up = [] dsc = np.zeros((num_test, 1))
def predict(i, rand): # 33000-36000 from models.stack_transformer1d import Gene path = 'processed_phase3' gene_chip = torch.load(os.path.join(path, 'mask.high.torch'))[i:i + 1000] print('gene_chip', gene_chip.shape) input_data = torch.load(os.path.join(path, 'chr9.phase3.impute.high.hap.torch'))[i:i + 1000] print('input_data', input_data.shape) mask = Mask(gene_chip) mask.maf_cal = mask.maf_cal(input_data) mask.missing_rate = 0.1 data_div = Data_Div() col = [i for i in range(input_data.shape[1])] random.seed(1) data_div.study_panel = data_div.sampler(col, 0.05) data_div.reference_panel = list(set(col) - set(data_div.study_panel)) val_data = input_data[:, data_div.study_panel] # torch.random.manual_seed(rand) # mask.gene_chip = mask.maf_random_mask(mask.maf_cal) print('mask number is: ', mask.gene_chip.sum()) val_dataset = PreDataSet(val_data, mask) print('val_sampler is', len(val_dataset)) gene = Gene() use_cuda = torch.cuda.is_available() print('cuda flag: ' + str(use_cuda)) torch.cuda.empty_cache() model_save_path = './processed_phase3/model' model = torch.load(os.path.join(model_save_path, str(0) + '.best_model_wts')) model = model['state'] gene.load_state_dict(model) gene = gene.cuda() result = [] target = [] for i, dataset in enumerate(val_dataset): encode_input = dataset['encode_input'].float().unsqueeze(0) # print(encode_input.shape) # encode_target=dataset['encode_target'].float().unsqueeze(0) # print(encode_target.shape) # encode_mask=dataset['encode_mask'] # print(encode_mask.shape) print(i) if use_cuda: encode_input = encode_input.cuda() # encode_target=encode_target.cuda() # encode_mask=encode_mask.cuda() output_v = gene(encode_input) result.append(output_v.detach()) # target.append(encode_target) result = torch.cat(result, 0).squeeze(1).transpose(1, 0).cpu() # target=torch.cat(target,0).transpose(1,0).cpu() maf_list = [0.005, 0.05, 0.5] # maf_list = [0, 0.000001, 0.005, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5] for i in range(len(maf_list) - 1): maf_mask = ((mask.maf_cal <= maf_list[i + 1]) & (mask.maf_cal > maf_list[i])) print('maf_cal number is: ', maf_mask.sum()) encode_mask = (mask.gene_chip & maf_mask).view(-1, 1) print('encode_mask is: ', sum(encode_mask)) pre = result.masked_select(encode_mask) groud_s = val_data.masked_select(encode_mask) # print('correct rate is: ',correct_rate(pre,groud_s)) print('r2 score is: ', r2_score(pre, groud_s)) # print('pearson is :',pearson(pre,groud_s)[0]**2) print('auc score: ', auc(pre.detach().numpy(), groud_s.detach().numpy()))
subset = "test" if subset == "test": xb_test, tb_test, _, ts_test = data.load_test(CVsplit) elif subset == "train": sys.exit(subset + ": not implemented yet") elif subset == "train_valid": sys.exit(subset + ": not implemented yet") else: sys.exit(subset + ": not implemented yet") t = np.vstack((tb_test, ts_test)) n = np.size(t, axis=0) import utils AUC = utils.auc(predictions, t) total_AUC += AUC predictions = predictions > prob predictions = (predictions - 1) * -1 hard_preds = predictions #>prob t = (t - 1) * -1 ## Below is for getting FPs # FP_list = [] # counter = 0 # for idx in range(len(tb_test)): # if predictions[idx]==0:#predictions[idx]: # FP_list.append(np.array([xb_test[idx]])) # print(idx) # FP_list = np.concatenate(FP_list, axis=0)
if subset == "test": xb_test, tb_test, _, ts_test = data.load_test(CVsplit) elif subset == "train": sys.exit(subset + ": not implemented yet") elif subset == "train_valid": sys.exit(subset + ": not implemented yet") else: sys.exit(subset + ": not implemented yet") t = np.vstack((tb_test, ts_test)) n = np.size(t, axis=0) import utils AUC = utils.auc(predictions, t) total_AUC += AUC predictions = predictions > prob predictions = (predictions - 1) * -1 hard_preds = predictions # >prob t = (t - 1) * -1 ## Below is for getting FPs # FP_list = [] # counter = 0 # for idx in range(len(tb_test)): # if predictions[idx]==0:#predictions[idx]: # FP_list.append(np.array([xb_test[idx]])) # print(idx) # FP_list = np.concatenate(FP_list, axis=0)
x_batch = X[idx] out = predict(x_batch) preds.append(out) if num_batches * batch_size < n: # Computing rest rest = n - num_batches * batch_size idx = range(n - rest, n) x_batch = X[idx] out = predict(x_batch) preds.append(out) # Making metadata predictions = np.concatenate(preds, axis=0) acc_eval = utils.accuracy(predictions, y) all_accuracy.append(acc_eval) auc_eval = utils.auc(predictions, y) all_auc.append(auc_eval) roc_eval_fpr, roc_eval_tpr, roc_eval_thresholds = utils.roc( predictions, y) all_roc_fpr.append(roc_eval_fpr) all_roc_tpr.append(roc_eval_tpr) all_roc_thresholds.append(roc_eval_thresholds) if Print: print " validating: %s loss" % subset print " average evaluation accuracy (%s): %.5f" % (subset, acc_eval) print " average evaluation AUC (%s): %.5f" % (subset, auc_eval) print print "Epoch %d of %d" % (epoch + 1, num_epochs)
def deconvolve( filename, res_filename, smooth, mean_mode, title, xticks, ciu, cycles, aline, indiv_areas, print_res=True, ): """Handles deconvolution and result processing Works as a control center for the package, handling all processes and output. Args: filename: Data file name without file extension res_filename: Identifier for result file names smooth: Moving average smoothing factor in format: [window size, interval]. No smoothing if left empty mean_mode: 'der' for second derivative 'rel_max' for relative maxima [mean1, mean2, ... ] for given means, where mean is float for x-axis value, int for index [] to return unfitted data cycles: Number of optimisation iterations. print res: If True returns plots and error log, in a folder one level above. If False returns dict with parameters for deconvoluted peaks for each ATD aline(optional): If True all data will be alined according to the smallest x value for a global maximum in the dataset Returns: If print_res is False: retdic: Dictionary with fitted parameters and errors""" datadic = parse.handle_file(filename) if aline: #Aline file if desired datadic = parse.aline(parse.handle_file(filename), filename) arrival_time = datadic[filename] av_error = [] areas = [] fwhms = [] retdic = {} #Create results folder script_dir = os.path.abspath(os.path.join(__file__, "../..")) results_dir = os.path.join(script_dir, filename + res_filename + '/') if not os.path.isdir(results_dir): os.makedirs(results_dir) #Create error log file with open( results_dir + filename + res_filename + '_errorlog_' + str(cycles) + '.txt', 'w') as f: for key in sorted(datadic, key=utils.natural_keys): #Loop over ATDs voltage = key if voltage == filename: #Exclude arrival times continue print voltage intensities = list(smoother.smooth(datadic[voltage], smooth)) norm_factor = 100 / max( intensities) #Scale factor for normalisation means = utils.find_means(intensities, arrival_time, mean_mode) means.sort() print 'Mean indices: ' + str(means) initial_sds = [0.01 for _ in range(len(means))] initial_heights = [intensities[i] for i in means] fitted_parameters_f, fit_f, fitted_parameters_r, fit_r = optimisation.run_opt_cycles( cycles, arrival_time, intensities, initial_sds, initial_heights, means) av_par, gausslist, min_er, error = analyse.list_of_gaus( arrival_time, intensities, fitted_parameters_f, fitted_parameters_r, norm_factor) av_error.append(min_er) #For final average error calculation areacur = [] fwhmcur = [] total_area = utils.auc(intensities, arrival_time) * norm_factor erind = error.index(min_er) #For area under the curve plot for i in range(len(gausslist[erind]) - 2): areacur.append(((utils.auc(gausslist[erind][i], arrival_time) * norm_factor) / total_area) * 100) if len(datadic) == 2 or indiv_areas: utils.indiv_area_plot(areacur, filename, results_dir, title, voltage) #For full width half maximum plot for i in range(len(gausslist[erind]) - 2): fwhmcur.append(utils.fwhm(gausslist[erind][i][2])) areas.append(areacur) fwhms.append(fwhmcur) if print_res: #Create plots and error log f.write(key + '\n') f.write(str(error[0]) + ' forward error' + '\n') f.write(str(error[1]) + ' reverse error' + '\n') f.write(str(error[2]) + ' average error' + '\n') f.write('\n\n\n') f.write('Average gaussian parameters:\n\n') f.write(str(erind) + '\n\n') f.write('Intenity\tMean\tSd\n') for i in av_par: f.write(str(i)) f.write('\n') f.write('\n\n\n\n') #utils.plot_things is a versatile plotting function utils.plot_things(arrival_time, [gausslist[erind]], filename, voltage, res_filename, title, ciu) # else: #Make dict output with parameters and error values # erlist = error # parlist = fitted_parameters_f # minind = erlist.index(min(erlist)) # retdic[voltage] = [] # retdic[voltage].append(means) # retdic[voltage].append(parlist[minind]) # retdic[voltage].append(erlist[minind]) # retdic[voltage].append([fit_f, fit_r, fit_av][minind]) if print_res: #Return results concerning full CIU: area plot, FWHM plot analyse.results(f, av_error, areas, fwhms, datadic, results_dir, filename, res_filename, title, xticks) print av_error else: return retdic
def test_pixel2(X, noise_idx, outlier_method_l, opt): ''' content_path = 'data/sherlock.txt' if content_lines is None else None #noise_path = 'data/news_noise1.txt' if noise_lines is not None else None noise_path = 'data/sherlock_noise3.txt' if noise_lines is None else None ''' #words_ar, X, noise_idx = words.doc_word_embed_content_noise(content_path, noise_path, 'data/sherlock_whiten.txt', content_lines, noise_lines)#.to(utils.device) #('data/sherlock_noise3.txt', 'data/test_noise.txt')#.to(utils.device) noise_idx = noise_idx.unsqueeze(-1) print('** {} number of outliers {}'.format(X.size(0), len(noise_idx))) #pdb.set_trace() opt.n, opt.feat_dim = X.size(0), X.size(1) #percentage of points to remove. opt.remove_p = 0.2 #number of top dirs for calculating tau0. opt.n_top_dir = 1 opt.n_iter = 1 #use select_idx rather than the scores tau, since tau's are scores for remaining points after outliers. tau1, select_idx1, n_removed1, tau0, select_idx0, n_removed0 = mean.compute_tau1_tau0( X, opt) ##tau1, select_idx1, n_removed1, tau0, select_idx0, n_removed0 = torch.ones(len(X)).to(utils.device), None, 5, torch.ones(len(X)).to(utils.device), None, 5 #mean.compute_tau1_tau0(X, opt) all_idx = torch.zeros(X.size(0), device=utils.device) ones = torch.ones(noise_idx.size(0), device=utils.device) all_idx.scatter_add_(dim=0, index=noise_idx.squeeze(), src=ones) opt.baseline = 'tau0' #'lof'#'knn' 'l2' #'l2' #'tau0' #'l2'#'isolation_forest'#'dbscan' #'isolation_forest' scores_l = [] for method in outlier_method_l: if method == 'iso forest': tau = baselines.isolation_forest(X) elif method == 'ell env': tau = baselines.ellenv(X) elif method == 'lof': tau = baselines.knn_dist_lof(X) elif method == 'dbscan': tau = baselines.dbscan(X) elif method == 'l2': tau = baselines.l2(X) elif method == 'knn': tau = baselines.knn_dist(X) elif method == 'tau2': select_idx2 = torch.LongTensor(list(range(len(X)))).to( utils.device) tau = mean.compute_tau2(X, select_idx2, opt) else: raise Exception('Outlier method {} not supported'.format(method)) good_scores = tau[all_idx == 0] bad_scores = tau[all_idx == 1] auc = utils.auc(good_scores, bad_scores) scores_l.append(auc) if opt.n_iter > 1: #all_idx = torch.LongTensor(range(len(X_classes))).to(utils.device) all_idx = torch.LongTensor(range(len(X))).to(utils.device) zeros1 = torch.zeros(len(X), device=utils.device) zeros1[select_idx1] = 1 outliers_idx1 = all_idx[zeros1 == 0] zeros0 = torch.zeros(len(X), device=utils.device) zeros0[select_idx0] = 1 outliers_idx0 = all_idx[zeros0 == 0] if opt.baseline != 'tau0': outliers_idx0 = torch.topk(tau0, k=n_removed0, largest=True)[1] else: #should not be used if n_iter > 1 outliers_idx0 = torch.topk(tau0, k=n_removed0, largest=True)[1] outliers_idx1 = torch.topk(tau1, k=n_removed1, largest=True)[1] #Distribution of true outliers with respect to the predicted scores. compute_auc_b = True if compute_auc_b: #complement of noise_idx #X_range = list(range(len(X))) zeros = torch.zeros(len(tau1), device=utils.device) zeros[noise_idx] = 1 inliers_tau1 = tau1[zeros == 0] #this vs index_select outliers_tau1 = tau1[ zeros == 1] #torch.index_select(tau1, dim=0, index=noise_idx) ##utils.inlier_outlier_hist(inliers_tau1, outliers_tau1, 'tau1', high=40) tau1_auc = utils.auc(inliers_tau1, outliers_tau1) inliers_tau0 = tau0[zeros == 0] #this vs index_select outliers_tau0 = tau0[ zeros == 1] #torch.index_select(tau0, dim=0, index=noise_idx) ##utils.inlier_outlier_hist(inliers_tau0, outliers_tau0, opt.baseline, high=40) tau0_auc = utils.auc(inliers_tau0, outliers_tau0) print('tau1 size {}'.format(tau1.size(0))) outliers_idx0_exp = outliers_idx0.unsqueeze(0).expand(len(noise_idx), -1) outliers_idx1_exp = outliers_idx1.unsqueeze(0).expand(len(noise_idx), -1) assert len(outliers_idx0) == len(outliers_idx1) tau0_cor = noise_idx.eq(outliers_idx0_exp).sum() tau1_cor = noise_idx.eq(outliers_idx1_exp).sum() print('{}_cor {} out of {} tau1_cor {} out of {}'.format( opt.baseline, tau0_cor, len(outliers_idx0), tau1_cor, len(outliers_idx1))) #return tau0_cor.item()/len(outliers_idx0), tau1_cor.item()/len(outliers_idx0), tau0_auc, tau1_auc #0 instead of 1 return [tau1_auc, tau0_auc] + scores_l