def get_aucs(db):
    # B = 10000
    B = 100000
    aucs = list()
    aucs0 = list()
    aucs1 = list()
    assert db in {"toy1", "toy2"}
    if db == "toy1":
        data_tr, data_te = load_toy1(n=10000)
    else:
        data_tr, data_te = load_toy2(n=10000)

    X_tr, Y_tr, Z_tr = data_tr
    X_te, Y_te, Z_te = data_te
    X = np.vstack([X_tr, X_te])
    Y = np.concatenate([Y_tr, Y_te])
    Z = np.concatenate([Z_tr, Z_te])

    cs = np.linspace(0, 1, 101)
    for c in cs:
        if db == "toy1":
            S = c * X[:, 0] + (1 - c) * X[:, 1]
        else:
            S = -c * X[:, 0] + (1 - c) * X[:, 1]
        aucs.append(auc(S, Y, B=B))
        aucs0.append(auc(S[Z == 0], Y[Z == 0], B=B))
        aucs1.append(auc(S[Z == 1], Y[Z == 1], B=B))

    return np.array(aucs), np.array(aucs0), np.array(aucs1)
Beispiel #2
0
 def validate(sess):
     gen = data_gen.gen_valid
     valid_accs = []
     valid_outs = []
     valid_targets = []
     sum = 0
     for batch, i in gen():
         valid_fetches = [out_softmax]
         valid_feed_dict = {X_pl: batch['X'], t_pl: batch['t'], is_training_pl: False}
         valid_out = sess.run(fetches=valid_fetches, feed_dict=valid_feed_dict)[0]
         valid_targets.append(batch['t'])
         valid_outs.append(valid_out)
         sum += i
     valid_outs = np.concatenate(valid_outs, axis=0)[:sum]
     valid_targets = np.concatenate(valid_targets, axis=0)[:sum]
     #valid_outs_binomial = valid_outs[:, 0]
     valid_outs_binomial_rev = valid_outs[:, 1]
     valid_preds = valid_outs_binomial_rev>0.5
     valid_accs = np.mean(np.equal(valid_preds, valid_targets))
     valid_aucs = utils.auc(valid_targets, valid_outs_binomial_rev)
     print(" valid_accs, %.3f" % (valid_accs*100))
     print(" valid_aucs, %.3f" % (valid_aucs*100))
     sum_fetches = [val_summaries, global_step]
     sum_feed_dict = {
         valid_accs_pl: valid_accs,
         valid_aucs_pl: valid_aucs
     }
     summaries, i = sess.run(sum_fetches, sum_feed_dict)
     summary_writer.add_summary(summaries, i)
Beispiel #3
0
 def validate(sess):
     gen = data_gen.gen_valid
     valid_accs = []
     valid_outs = []
     valid_targets = []
     sum = 0
     for batch, i in gen():
         valid_fetches = [out_softmax]
         valid_feed_dict = {
             X_pl: batch['X'],
             t_pl: batch['t'],
             is_training_pl: False
         }
         valid_out = sess.run(fetches=valid_fetches,
                              feed_dict=valid_feed_dict)[0]
         valid_targets.append(batch['t'])
         valid_outs.append(valid_out)
         sum += i
     valid_outs = np.concatenate(valid_outs, axis=0)[:sum]
     valid_targets = np.concatenate(valid_targets, axis=0)[:sum]
     #valid_outs_binomial = valid_outs[:, 0]
     valid_outs_binomial_rev = valid_outs[:, 1]
     valid_preds = valid_outs_binomial_rev > 0.5
     valid_accs = np.mean(np.equal(valid_preds, valid_targets))
     valid_aucs = utils.auc(valid_targets, valid_outs_binomial_rev)
     print(" valid_accs, %.3f" % (valid_accs * 100))
     print(" valid_aucs, %.3f" % (valid_aucs * 100))
     sum_fetches = [val_summaries, global_step]
     sum_feed_dict = {
         valid_accs_pl: valid_accs,
         valid_aucs_pl: valid_aucs
     }
     summaries, i = sess.run(sum_fetches, sum_feed_dict)
     summary_writer.add_summary(summaries, i)
def test_single(model, turn):
    output_list = []

    model.eval()
    for i in range(len(test_adj_list)):
        adj = torch.Tensor(test_adj_list[i]).cuda()
        feature = torch.Tensor(test_feature_list[i]).cuda()
        output = model(feature, adj)
        output_list.append(output)

    labels = []
    output_list = torch.Tensor(output_list)
    labels = torch.Tensor(test_label_list)
    labels.unsqueeze_(0)
    output_list.unsqueeze_(0)

    print("accuracy:", accuracy(output_list.t(), labels.t()))
    print("auc:", auc(output_list.t(), labels.t()))

    # loss_test = F.binary_cross_entropy_with_logits(output_list, torch.Tensor(test_label_list))
    # print("single_test_loss:", loss_test)

    if phase1:
        save_pred("res_cheby/", turn, 1, output_list.t())
    elif phase3:
        save_pred("res_cheby/", turn, 3, output_list.t())
def test_hinge():
    output_list = []
    label = 0
    test_labels = []  #Cn_2 elements, ground truth for cos pairs in test

    model_hinge.eval()
    for i in range(len(test_adj_list)):
        adj = torch.Tensor(test_adj_list[i])
        feature = torch.Tensor(test_feature_list[i]).cuda()
        output = model_hinge(feature, adj)
        output.squeeze_(0)
        output_list.append(output)

    for i in range(len(test_adj_list)):
        for j in range(i + 1, i + 1 + pair_num):
            j = j % len(test_adj_list)
            if test_label_list[i] == test_label_list[j]:
                test_labels.append([1])
            else:
                test_labels.append([-1])

    cos_list = get_cos_list(output_list, 0)
    test_labels = torch.Tensor(test_labels)

    # lossF = torch.nn.MarginRankingLoss(margin=0)
    # loss_test = lossF(cos_list, torch.Tensor([0]), test_labels)

    # print("test_loss:", loss_test)
    print("hinge_accuracy:", accuracy(cos_list, test_labels))
    print("hinge_auc:", auc(cos_list, test_labels))

    save_pred("res_cheby/", turn, 2, cos_list)
def get_ptws(alpha, y_val):
    B = 100000
    aucs = list()
    ptws = list()
    data_tr, data_te = load_toy2(n=10000)

    X_tr, Y_tr, Z_tr = data_tr
    X_te, Y_te, Z_te = data_te
    X = np.vstack([X_tr, X_te])
    Y = np.concatenate([Y_tr, Y_te])
    Z = np.concatenate([Z_tr, Z_te])

    cs = np.linspace(0, 1, 101)
    for c in cs:
        if db == "toy1":
            S = c * X[:, 0] + (1 - c) * X[:, 1]
        else:
            S = -c * X[:, 0] + (1 - c) * X[:, 1]
        aucs.append(auc(S, Y, B=B))
        ptws.append(pointwise_tpr(S[Y == y_val], Z[Y == y_val], alpha))

    return np.array(aucs), np.array(ptws)
Beispiel #7
0
preds_up = []
dsc = np.zeros((num_test, 1))
recall = np.zeros_like(dsc)
tn = np.zeros_like(dsc)
prec = np.zeros_like(dsc)

thresh = 0.5

for i in range(num_test):
    gt = orig_gts[testIdx[i]]
    preds_up.append(
        cv2.resize(preds[i], (gt.shape[1], gt.shape[0]),
                   interpolation=cv2.INTER_NEAREST))
    dsc[i] = utils.check_preds(preds_up[i] > thresh, gt)
    recall[i], _, prec[i] = utils.auc(gt, preds_up[i] > thresh)

print('-' * 30)
print('At threshold =', thresh)
print('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format(
    np.sum(dsc) / num_test,
    np.sum(recall) / num_test,
    np.sum(prec) / num_test))

model.load_weights("weights.hdf5")
_, _, _, preds = model.predict(imgs_test)
#preds = model.predict(imgs_test)   #use this if model is unet

preds_up = []
dsc = np.zeros((num_test, 1))
recall = np.zeros_like(dsc)
Beispiel #8
0
def keras_fit_generator_attention(img_rows=96, img_cols=96, n_imgs=10**4, batch_size=32, regenerate=True, model_type = 'unet', loss_type='dice', train=True, test=True):

    if regenerate:
        data_to_array(img_rows, img_cols)
        #preprocess_data()

    X_train, y_train, X_val, y_val = load_data()
    img_rows = X_train.shape[1]
    img_cols =  X_train.shape[2]

    
    gt1 = y_train[:,::8,::8,:]
    gt2 = y_train[:,::4,::4,:]
    gt3 = y_train[:,::2,::2,:]
    gt4 = y_train
    gt_train = [gt1,gt2,gt3,gt4]
    
    #choose loss function
    if loss_type == 'dice':
        loss_f = losses.dice_loss
    elif loss_type =='tversky':
        loss_f = losses.tversky_loss
    elif loss_type =='focal_tversky':
        loss_f = losses.focal_tversky
    else:
        print('wrong loss function type')
        return -1
    
    plot_type = 0
    epochs_num = 50
    model_name = model_type+'_'+loss_type
    filepath='../data/weights/weights_'+model_type+'_'+loss_type+'.hdf5'  #you need to create this folder before run
    result_text_path='../data/results/results.txt'  #you need to create this file before run
    result_images_path='../data/results/images1/'   #you need to create this folder before run
    #choose model
    if model_type=='unet':
        sgd = SGD(lr=0.01, momentum=0.90)
        model = newmodels.unet(sgd, (256,256,1), loss_f)
        model_checkpoint = ModelCheckpoint(filepath, monitor='val_dsc', 
                             verbose=1, save_best_only=True, 
                             save_weights_only=True, mode='max')
    elif model_type=='attn_unet':
        sgd = SGD(lr=0.01, momentum=0.90, decay=1e-6)
        model = newmodels.attn_unet(sgd, (256,256,1), loss_f)
        model_checkpoint = ModelCheckpoint(filepath, monitor='val_dsc', 
                             verbose=1, save_best_only=True, 
                             save_weights_only=True, mode='max')
    elif model_type=='ds_mi_attn_unet':
        plot_type = 1
        y_train = gt_train
        sgd = SGD(lr=0.01, momentum=0.90, decay=1e-6)
        model = newmodels.attn_reg(sgd,(256,256,1),loss_f)
        model_checkpoint = ModelCheckpoint(filepath, monitor='val_final_dsc', 
                             verbose=1, save_best_only=True, 
                             save_weights_only=True, mode='max')
    else:
        print('wrong model type')
        return -1

    model.summary()

    c_backs = [model_checkpoint]
    c_backs.append( EarlyStopping(monitor='loss', min_delta=0.001, patience=5) )
    
    
    model_name = (model_type+'_'+loss_type+'{}').format(int(time.time()))
    tb_call_back = TensorBoard(log_dir='./log_dir_5.25.1/{}'.format(model_name))
    c_backs.append(tb_call_back)

    if train:
        hist = model.fit(X_train, y_train, validation_split=0.15,
                     shuffle=True, epochs=epochs_num, batch_size=batch_size,
                     verbose=True, callbacks=c_backs)#, callbacks=[estop,tb])

        h = hist.history
#         utils.plot(h, epochs_num, batch_size, img_cols, plot_type, model_name = model_name)

    if test==True:
        X_val = np.load('../data/X_val.npy')
        y_val = np.load('../data/y_val.npy')
        num_test = X_val.shape[0]
        test_img_list = os.listdir('../data/test/')
        if model_type=='ds_mi_attn_unet':
            _,_,_,preds = model.predict(X_val)
        else:
            preds = model.predict(X_val)   #use this if the model is not muti-input ds unet
        
        preds_up=[]
        dsc = np.zeros((num_test,1))
        recall = np.zeros_like(dsc)
        tn = np.zeros_like(dsc)
        prec = np.zeros_like(dsc)

        thresh = 0.5
        
        
        # check the predictions from the trained model 
        for i in range(num_test):
            gt = y_val[i]
            pred_up = cv2.resize(preds[i], (gt.shape[1], gt.shape[0]), interpolation=cv2.INTER_NEAREST)
            preds_up.append(pred_up)
            dsc[i] = utils.check_preds(pred_up > thresh, gt)
            recall[i], _, prec[i] = utils.auc(gt, pred_up >thresh)

        f = open(result_text_path, "a")
        f.write('\n')
        f.write('-'*30)
        f.write('\nModel name: ')
        f.write(model_name)
        f.write('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format(
                np.sum(dsc)/num_test,  
                np.sum(recall)/num_test,
                np.sum(prec)/num_test ))
        f.write('\n')
        f.close()
        
        
        print('-'*30)
        print('At threshold =', thresh)
        print('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format(
                np.sum(dsc)/num_test,  
                np.sum(recall)/num_test,
                np.sum(prec)/num_test ))

        # check the predictions with the best saved model from checkpoint
        model.load_weights(filepath)
    
        if model_type=='ds_mi_attn_unet':
            _,_,_,preds = model.predict(X_val)
        else:
            preds = model.predict(X_val)   #use this if the model is not muti-input ds unet

        preds_up=[]
        dsc = np.zeros((num_test,1))
        recall = np.zeros_like(dsc)
        tn = np.zeros_like(dsc)
        prec = np.zeros_like(dsc)

        for i in range(num_test):
            gt = y_val[i]
            pred_up = cv2.resize(preds[i], (gt.shape[1], gt.shape[0]), interpolation=cv2.INTER_NEAREST)
            preds_up.append(pred_up)
            dsc[i] = utils.check_preds(pred_up > thresh, gt)
            recall[i], _, prec[i] = utils.auc(gt, pred_up >thresh)

        print('-'*30)
        print('USING HDF5 saved model at thresh=', thresh)
        print('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format(
                np.sum(dsc)/num_test,  
                np.sum(recall)/num_test,
                np.sum(prec)/num_test ))
        
        f = open(result_text_path, "a")
        f.write('\n')
        f.write('-'*30)
        f.write('\nModel name: ')
        f.write(model_name)
        f.write('\nUSING HDF5 saved model')
        f.write('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format(
                np.sum(dsc)/num_test,  
                np.sum(recall)/num_test,
                np.sum(prec)/num_test ))
        f.write('\n')
        f.close()

        while True:
            idx = np.random.randint(0,num_test)
            if utils.avg_img(y_val[idx]>0)>3 and utils.avg_img(y_val[idx]>0) <8:
#                 print(utils.avg_img(y_val[idx]>0))
                break

        idxs = [94,55,69,75,52,77]
        
        for idx in idxs:
            #plot a test sample for each model
            gt_plot = y_val[idx]
            plt.figure(dpi=200)
            plt.subplot(121)
            plt.axis('off')
            plt.imshow(np.squeeze(gt_plot), cmap='gray')
            plt.title('Original Segmentated Img {}'.format(idx))
            plt.subplot(122)
            plt.axis('off')
            plt.imshow(np.squeeze(preds_up[idx]), cmap='gray')
            plt.title('Mask {}'.format(idx))

            plt.savefig(result_images_path+str(idx)+'/'+model_name+'ori-gt-'.format('.png'))
Beispiel #9
0
				x_batch = X[idx]
				out = predict(x_batch)
				preds.append(out)
			if num_batches * batch_size < n:
				# Computing rest
				rest = n - num_batches * batch_size
				idx = range(n-rest, n)
				x_batch = X[idx]
				out = predict(x_batch)
				preds.append(out)
			# Making metadata
			predictions = np.concatenate(preds, axis = 0)
			acc_eval = utils.accuracy(predictions, y)
			all_accuracy.append(acc_eval)

			auc_eval = utils.auc(predictions, y)
			all_auc.append(auc_eval)

			roc_eval_fpr, roc_eval_tpr, roc_eval_thresholds = utils.roc(predictions, y)
			all_roc_fpr.append(roc_eval_fpr)
			all_roc_tpr.append(roc_eval_tpr)
			all_roc_thresholds.append(roc_eval_thresholds)
			if Print:
				print "  validating: %s loss" % subset
				print "  average evaluation accuracy (%s): %.5f" % (subset, acc_eval)
				print "  average evaluation AUC (%s): %.5f" % (subset, auc_eval)
				print
	print "Epoch %d of %d" % (epoch + 1, num_epochs)

	if epoch in learning_rate_schedule:
		lr = np.float32(learning_rate_schedule[epoch])
f1_score = np.zeros_like(dsc)

thresh = 0.5

# check the predictions from the trained model
for i in range(num_test):
    #gt = orig_masks[testIdx[i]]
    name = img_list[testIdx[i]]
    gt = plt.imread(
        os.path.join(orig_dir,
                     name.split('.')[0] + "_segmentation.png"))

    pred_up = cv2.resize(preds[i], (gt.shape[1], gt.shape[0]),
                         interpolation=cv2.INTER_NEAREST)
    dsc[i] = utils.check_preds(pred_up > thresh, gt)
    recall[i], spec[i], prec[i], iou[i] = utils.auc(gt, pred_up > thresh)

avg_dsc = np.sum(dsc) / num_test
avg_recall = np.sum(recall) / num_test
avg_precision = np.sum(prec) / num_test
f1_score = 2 * avg_recall * avg_precision / (avg_precision + avg_recall)
avg_iou = np.sum(iou) / num_test
avg_specificity = np.sum(spec) / num_test
print('-' * 30)
print('At threshold =', thresh)
print(
    '\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision \t{2:^.3f} \n Specificity \t{3:^.3f} \n IOU \t\t{4:^.3f} \n F1 \t\t{5:^.3f}'
    .format(avg_dsc, avg_recall, avg_precision, avg_specificity, avg_iou,
            f1_score))

# check the predictions with the best saved model from checkpoint
Beispiel #11
0
            torch.save(model.state_dict(), save_model_path)
            save_predict_target_path = os.path.join(
                save_time_fold,
                preprocess_path.split('/')[-1] + '_times_' + str(train_times) +
                '_' + str(round(best_test, 4)) + '.txt')
            predict_target = torch.cat((predicts, targets),
                                       dim=0).detach().cpu().numpy()
            np.savetxt(save_predict_target_path, predict_target)

            precision_score[train_times] = round(precision(predicts, targets),
                                                 4)
            recall_score[train_times] = round(recall(predicts, targets), 4)
            specificity_score[train_times] = round(
                specificity(predicts, targets), 4)
            mcc_score[train_times] = round(mcc(predicts, targets), 4)
            auc_score = round(auc(predicts, targets), 4)
            aupr_score = round(aupr(predicts, targets), 4)
        print('Epoch: {:04d}'.format(epoch + 1), 'Train_times:', train_times)
        print(
            "*****************test_score {:.4f} best_socre {:.4f}****************"
            .format(test_score, best_test))
        print("All Test Score:", acc_score)
print(args.dataset, " Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
print("acc Score:", acc_score)
print("precision Score:", precision_score)
print("recall score", recall_score)
print("specificity score", specificity_score)
print("mcc score", mcc_score)
print("auc socre", auc_score)
print("aupr score", aupr_score)
	atp = np.sum(tp, axis = 0)
	tn = sta[:,1]
	atn = np.sum(tn, axis = 0)
	fp = sta[:,2]
	afp = np.sum(fp, axis = 0)
	fn = sta[:,3]
	afn = np.sum(fn, axis = 0)
	atpr = atp*1.0/(atp +afn)
	afpr = afp*1.0/(afp + atn)
	adpr = atpr - afpr
	dsc = 2.0*tp/(2.0*tp + fp + fn)
	dsc_adpr = dsc[:,np.argmax(adpr)]
	mdsc_adpr = np.mean(dsc_adpr)
	sdsc_adpr = np.std(dsc_adpr)		
	thdad = threshs[np.argmax(adpr)]
	manual_auc_all = auc(afpr, atpr)
	fnr_ad = 1-atpr[np.argmax(adpr)]
	fpr_ad = afpr[np.argmax(adpr)]

	PAUCA += [manual_auc_all] 
	TTHM_ad += [thdad]
	MMDSC_ad += [mdsc_adpr]
	SMDSC_ad += [sdsc_adpr]
	FPR += [fpr_ad]
	FNR += [fnr_ad]

data = np.concatenate((np.array(rhos).reshape(1,-1),np.array(PAUCA).reshape(1,-1),
					   np.array(MMDSC_ad).reshape(1,-1),np.array(SMDSC_ad).reshape(1,-1),
					   np.array(TTHM_ad).reshape(1,-1),np.array(FPR).reshape(1,-1),
					   np.array(FNR).reshape(1,-1)
						))
Beispiel #13
0
prec = np.zeros_like(dsc)

thresh = 0.5

# check the predictions from the trained model
for i in range(num_test):
    #gt = orig_masks[testIdx[i]]
    name = img_list[testIdx[i]]
    gt = plt.imread(
        os.path.join(orig_dir,
                     name.split('.')[0] + "_segmentation.png"))

    pred_up = cv2.resize(preds[i], (gt.shape[1], gt.shape[0]),
                         interpolation=cv2.INTER_NEAREST)
    dsc[i] = utils.check_preds(pred_up > thresh, gt)
    recall[i], _, prec[i] = utils.auc(gt, pred_up > thresh)

print('-' * 30)
print('At threshold =', thresh)
print('\n DSC \t\t{0:^.3f} \n Recall \t{1:^.3f} \n Precision\t{2:^.3f}'.format(
    np.sum(dsc) / num_test,
    np.sum(recall) / num_test,
    np.sum(prec) / num_test))

# check the predictions with the best saved model from checkpoint
model.load_weights("weights.hdf5")
_, _, _, preds = model.predict(imgs_test)
#preds = model.predict(imgs_test)   #use this if the model is unet

preds_up = []
dsc = np.zeros((num_test, 1))
Beispiel #14
0
def predict(i, rand):
    # 33000-36000
    from models.stack_transformer1d import Gene
    path = 'processed_phase3'
    gene_chip = torch.load(os.path.join(path, 'mask.high.torch'))[i:i + 1000]
    print('gene_chip', gene_chip.shape)
    input_data = torch.load(os.path.join(path, 'chr9.phase3.impute.high.hap.torch'))[i:i + 1000]
    print('input_data', input_data.shape)

    mask = Mask(gene_chip)
    mask.maf_cal = mask.maf_cal(input_data)
    mask.missing_rate = 0.1

    data_div = Data_Div()
    col = [i for i in range(input_data.shape[1])]
    random.seed(1)
    data_div.study_panel = data_div.sampler(col, 0.05)
    data_div.reference_panel = list(set(col) - set(data_div.study_panel))

    val_data = input_data[:, data_div.study_panel]
    # torch.random.manual_seed(rand)
    # mask.gene_chip = mask.maf_random_mask(mask.maf_cal)
    print('mask number is: ', mask.gene_chip.sum())

    val_dataset = PreDataSet(val_data, mask)
    print('val_sampler is', len(val_dataset))

    gene = Gene()
    use_cuda = torch.cuda.is_available()
    print('cuda flag:  ' + str(use_cuda))
    torch.cuda.empty_cache()
    model_save_path = './processed_phase3/model'

    model = torch.load(os.path.join(model_save_path, str(0) + '.best_model_wts'))
    model = model['state']
    gene.load_state_dict(model)
    gene = gene.cuda()

    result = []
    target = []
    for i, dataset in enumerate(val_dataset):
        encode_input = dataset['encode_input'].float().unsqueeze(0)
        # print(encode_input.shape)
        # encode_target=dataset['encode_target'].float().unsqueeze(0)
        # print(encode_target.shape)
        # encode_mask=dataset['encode_mask']
        # print(encode_mask.shape)
        print(i)

        if use_cuda:
            encode_input = encode_input.cuda()
            # encode_target=encode_target.cuda()
            # encode_mask=encode_mask.cuda()

        output_v = gene(encode_input)
        result.append(output_v.detach())
        # target.append(encode_target)
    result = torch.cat(result, 0).squeeze(1).transpose(1, 0).cpu()
    # target=torch.cat(target,0).transpose(1,0).cpu()
    maf_list = [0.005, 0.05, 0.5]
    # maf_list = [0, 0.000001, 0.005, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]

    for i in range(len(maf_list) - 1):
        maf_mask = ((mask.maf_cal <= maf_list[i + 1]) & (mask.maf_cal > maf_list[i]))
        print('maf_cal number is: ', maf_mask.sum())
        encode_mask = (mask.gene_chip & maf_mask).view(-1, 1)
        print('encode_mask is: ', sum(encode_mask))
        pre = result.masked_select(encode_mask)
        groud_s = val_data.masked_select(encode_mask)
        # print('correct rate is:  ',correct_rate(pre,groud_s))
        print('r2 score is:  ', r2_score(pre, groud_s))
        # print('pearson is :',pearson(pre,groud_s)[0]**2)
        print('auc score: ', auc(pre.detach().numpy(), groud_s.detach().numpy()))
Beispiel #15
0
        subset = "test"

    if subset == "test":
        xb_test, tb_test, _, ts_test = data.load_test(CVsplit)
    elif subset == "train":
        sys.exit(subset + ": not implemented yet")
    elif subset == "train_valid":
        sys.exit(subset + ": not implemented yet")
    else:
        sys.exit(subset + ": not implemented yet")

    t = np.vstack((tb_test, ts_test))
    n = np.size(t, axis=0)

    import utils
    AUC = utils.auc(predictions, t)
    total_AUC += AUC

    predictions = predictions > prob
    predictions = (predictions - 1) * -1
    hard_preds = predictions  #>prob
    t = (t - 1) * -1

    ## Below is for getting FPs
    #    FP_list = []
    #    counter = 0
    #    for idx in range(len(tb_test)):
    #        if predictions[idx]==0:#predictions[idx]:
    #            FP_list.append(np.array([xb_test[idx]]))
    #        print(idx)
    #    FP_list = np.concatenate(FP_list, axis=0)
    if subset == "test":
        xb_test, tb_test, _, ts_test = data.load_test(CVsplit)
    elif subset == "train":
        sys.exit(subset + ": not implemented yet")
    elif subset == "train_valid":
        sys.exit(subset + ": not implemented yet")
    else:
        sys.exit(subset + ": not implemented yet")

    t = np.vstack((tb_test, ts_test))
    n = np.size(t, axis=0)

    import utils

    AUC = utils.auc(predictions, t)
    total_AUC += AUC

    predictions = predictions > prob
    predictions = (predictions - 1) * -1
    hard_preds = predictions  # >prob
    t = (t - 1) * -1

    ## Below is for getting FPs
    #    FP_list = []
    #    counter = 0
    #    for idx in range(len(tb_test)):
    #        if predictions[idx]==0:#predictions[idx]:
    #            FP_list.append(np.array([xb_test[idx]]))
    #        print(idx)
    #    FP_list = np.concatenate(FP_list, axis=0)
Beispiel #17
0
                x_batch = X[idx]
                out = predict(x_batch)
                preds.append(out)
            if num_batches * batch_size < n:
                # Computing rest
                rest = n - num_batches * batch_size
                idx = range(n - rest, n)
                x_batch = X[idx]
                out = predict(x_batch)
                preds.append(out)
            # Making metadata
            predictions = np.concatenate(preds, axis=0)
            acc_eval = utils.accuracy(predictions, y)
            all_accuracy.append(acc_eval)

            auc_eval = utils.auc(predictions, y)
            all_auc.append(auc_eval)

            roc_eval_fpr, roc_eval_tpr, roc_eval_thresholds = utils.roc(
                predictions, y)
            all_roc_fpr.append(roc_eval_fpr)
            all_roc_tpr.append(roc_eval_tpr)
            all_roc_thresholds.append(roc_eval_thresholds)
            if Print:
                print "  validating: %s loss" % subset
                print "  average evaluation accuracy (%s): %.5f" % (subset,
                                                                    acc_eval)
                print "  average evaluation AUC (%s): %.5f" % (subset,
                                                               auc_eval)
                print
    print "Epoch %d of %d" % (epoch + 1, num_epochs)
Beispiel #18
0
def deconvolve(
    filename,
    res_filename,
    smooth,
    mean_mode,
    title,
    xticks,
    ciu,
    cycles,
    aline,
    indiv_areas,
    print_res=True,
):
    """Handles deconvolution and result processing

    Works as a control center for the package, handling all processes and 
    output.
    
    Args:
        filename: Data file name without file extension
        res_filename: Identifier for result file names
        smooth: Moving average smoothing factor in format: 
            [window size, interval]. No smoothing if left empty
        mean_mode: 'der' for second derivative
                   'rel_max' for relative maxima
                   [mean1, mean2, ... ] for given means, where mean is float 
                        for x-axis value, int for index
                   [] to return unfitted data
        cycles: Number of optimisation iterations.
        print res: If True returns plots and error log, in a folder one level 
                above.
                   If False returns dict with parameters for deconvoluted peaks 
                for each ATD
        aline(optional): If True all data will be alined according to the 
            smallest x value for a global maximum in the dataset

    Returns:
        If print_res is False:
           retdic: Dictionary with fitted parameters and errors"""
    datadic = parse.handle_file(filename)
    if aline:  #Aline file if desired
        datadic = parse.aline(parse.handle_file(filename), filename)
    arrival_time = datadic[filename]
    av_error = []
    areas = []
    fwhms = []
    retdic = {}
    #Create results folder
    script_dir = os.path.abspath(os.path.join(__file__, "../.."))
    results_dir = os.path.join(script_dir, filename + res_filename + '/')
    if not os.path.isdir(results_dir):
        os.makedirs(results_dir)
    #Create error log file
    with open(
            results_dir + filename + res_filename + '_errorlog_' +
            str(cycles) + '.txt', 'w') as f:
        for key in sorted(datadic, key=utils.natural_keys):  #Loop over ATDs
            voltage = key
            if voltage == filename:  #Exclude arrival times
                continue
            print voltage
            intensities = list(smoother.smooth(datadic[voltage], smooth))
            norm_factor = 100 / max(
                intensities)  #Scale factor for normalisation
            means = utils.find_means(intensities, arrival_time, mean_mode)
            means.sort()
            print 'Mean indices: ' + str(means)
            initial_sds = [0.01 for _ in range(len(means))]
            initial_heights = [intensities[i] for i in means]
            fitted_parameters_f, fit_f, fitted_parameters_r, fit_r = optimisation.run_opt_cycles(
                cycles, arrival_time, intensities, initial_sds,
                initial_heights, means)
            av_par, gausslist, min_er, error = analyse.list_of_gaus(
                arrival_time, intensities, fitted_parameters_f,
                fitted_parameters_r, norm_factor)
            av_error.append(min_er)  #For final average error calculation
            areacur = []
            fwhmcur = []
            total_area = utils.auc(intensities, arrival_time) * norm_factor
            erind = error.index(min_er)
            #For area under the curve plot
            for i in range(len(gausslist[erind]) - 2):
                areacur.append(((utils.auc(gausslist[erind][i], arrival_time) *
                                 norm_factor) / total_area) * 100)
            if len(datadic) == 2 or indiv_areas:
                utils.indiv_area_plot(areacur, filename, results_dir, title,
                                      voltage)
            #For full width half maximum plot
            for i in range(len(gausslist[erind]) - 2):
                fwhmcur.append(utils.fwhm(gausslist[erind][i][2]))
            areas.append(areacur)
            fwhms.append(fwhmcur)
            if print_res:  #Create plots and error log
                f.write(key + '\n')
                f.write(str(error[0]) + ' forward error' + '\n')
                f.write(str(error[1]) + ' reverse error' + '\n')
                f.write(str(error[2]) + ' average error' + '\n')
                f.write('\n\n\n')
                f.write('Average gaussian parameters:\n\n')
                f.write(str(erind) + '\n\n')
                f.write('Intenity\tMean\tSd\n')
                for i in av_par:
                    f.write(str(i))
                    f.write('\n')
                f.write('\n\n\n\n')
                #utils.plot_things is a versatile plotting function
                utils.plot_things(arrival_time, [gausslist[erind]], filename,
                                  voltage, res_filename, title, ciu)
            # else: #Make dict output with parameters and error values
            #     erlist = error
            #     parlist = fitted_parameters_f
            #     minind = erlist.index(min(erlist))
            #     retdic[voltage] = []
            #     retdic[voltage].append(means)
            #     retdic[voltage].append(parlist[minind])
            #     retdic[voltage].append(erlist[minind])
            #     retdic[voltage].append([fit_f, fit_r, fit_av][minind])
        if print_res:  #Return results concerning full CIU: area plot, FWHM plot
            analyse.results(f, av_error, areas, fwhms, datadic, results_dir,
                            filename, res_filename, title, xticks)
            print av_error
        else:
            return retdic
def test_pixel2(X, noise_idx, outlier_method_l, opt):
    '''
    content_path = 'data/sherlock.txt' if content_lines is None else None
    #noise_path = 'data/news_noise1.txt' if noise_lines is not None else None
    noise_path = 'data/sherlock_noise3.txt' if noise_lines is None else None
    '''
    #words_ar, X, noise_idx = words.doc_word_embed_content_noise(content_path, noise_path, 'data/sherlock_whiten.txt', content_lines, noise_lines)#.to(utils.device) #('data/sherlock_noise3.txt', 'data/test_noise.txt')#.to(utils.device)

    noise_idx = noise_idx.unsqueeze(-1)
    print('** {} number of outliers {}'.format(X.size(0), len(noise_idx)))
    #pdb.set_trace()

    opt.n, opt.feat_dim = X.size(0), X.size(1)
    #percentage of points to remove.
    opt.remove_p = 0.2
    #number of top dirs for calculating tau0.
    opt.n_top_dir = 1
    opt.n_iter = 1
    #use select_idx rather than the scores tau, since tau's are scores for remaining points after outliers.
    tau1, select_idx1, n_removed1, tau0, select_idx0, n_removed0 = mean.compute_tau1_tau0(
        X, opt)
    ##tau1, select_idx1, n_removed1, tau0, select_idx0, n_removed0 = torch.ones(len(X)).to(utils.device), None, 5, torch.ones(len(X)).to(utils.device), None, 5 #mean.compute_tau1_tau0(X, opt)

    all_idx = torch.zeros(X.size(0), device=utils.device)
    ones = torch.ones(noise_idx.size(0), device=utils.device)

    all_idx.scatter_add_(dim=0, index=noise_idx.squeeze(), src=ones)

    opt.baseline = 'tau0'  #'lof'#'knn' 'l2' #'l2' #'tau0' #'l2'#'isolation_forest'#'dbscan' #'isolation_forest'
    scores_l = []

    for method in outlier_method_l:
        if method == 'iso forest':
            tau = baselines.isolation_forest(X)
        elif method == 'ell env':
            tau = baselines.ellenv(X)
        elif method == 'lof':
            tau = baselines.knn_dist_lof(X)
        elif method == 'dbscan':
            tau = baselines.dbscan(X)
        elif method == 'l2':
            tau = baselines.l2(X)
        elif method == 'knn':
            tau = baselines.knn_dist(X)
        elif method == 'tau2':
            select_idx2 = torch.LongTensor(list(range(len(X)))).to(
                utils.device)
            tau = mean.compute_tau2(X, select_idx2, opt)
        else:
            raise Exception('Outlier method {} not supported'.format(method))
        good_scores = tau[all_idx == 0]
        bad_scores = tau[all_idx == 1]
        auc = utils.auc(good_scores, bad_scores)
        scores_l.append(auc)

    if opt.n_iter > 1:
        #all_idx = torch.LongTensor(range(len(X_classes))).to(utils.device)
        all_idx = torch.LongTensor(range(len(X))).to(utils.device)
        zeros1 = torch.zeros(len(X), device=utils.device)
        zeros1[select_idx1] = 1
        outliers_idx1 = all_idx[zeros1 == 0]
        zeros0 = torch.zeros(len(X), device=utils.device)
        zeros0[select_idx0] = 1
        outliers_idx0 = all_idx[zeros0 == 0]
        if opt.baseline != 'tau0':
            outliers_idx0 = torch.topk(tau0, k=n_removed0, largest=True)[1]

    else:
        #should not be used if n_iter > 1
        outliers_idx0 = torch.topk(tau0, k=n_removed0, largest=True)[1]
        outliers_idx1 = torch.topk(tau1, k=n_removed1, largest=True)[1]

        #Distribution of true outliers with respect to the predicted scores.
        compute_auc_b = True
        if compute_auc_b:
            #complement of noise_idx
            #X_range = list(range(len(X)))
            zeros = torch.zeros(len(tau1), device=utils.device)
            zeros[noise_idx] = 1

            inliers_tau1 = tau1[zeros == 0]  #this vs index_select
            outliers_tau1 = tau1[
                zeros == 1]  #torch.index_select(tau1, dim=0, index=noise_idx)
            ##utils.inlier_outlier_hist(inliers_tau1, outliers_tau1, 'tau1', high=40)
            tau1_auc = utils.auc(inliers_tau1, outliers_tau1)

            inliers_tau0 = tau0[zeros == 0]  #this vs index_select
            outliers_tau0 = tau0[
                zeros == 1]  #torch.index_select(tau0, dim=0, index=noise_idx)
            ##utils.inlier_outlier_hist(inliers_tau0, outliers_tau0, opt.baseline, high=40)
            tau0_auc = utils.auc(inliers_tau0, outliers_tau0)

    print('tau1 size {}'.format(tau1.size(0)))
    outliers_idx0_exp = outliers_idx0.unsqueeze(0).expand(len(noise_idx), -1)
    outliers_idx1_exp = outliers_idx1.unsqueeze(0).expand(len(noise_idx), -1)
    assert len(outliers_idx0) == len(outliers_idx1)

    tau0_cor = noise_idx.eq(outliers_idx0_exp).sum()
    tau1_cor = noise_idx.eq(outliers_idx1_exp).sum()
    print('{}_cor {} out of {} tau1_cor {} out of {}'.format(
        opt.baseline, tau0_cor, len(outliers_idx0), tau1_cor,
        len(outliers_idx1)))

    #return tau0_cor.item()/len(outliers_idx0), tau1_cor.item()/len(outliers_idx0), tau0_auc, tau1_auc #0 instead of 1
    return [tau1_auc, tau0_auc] + scores_l