def MI_Net_with_RC(dataset): """Train and evaluate on MI-Net with residual connection. Parameters ----------------- dataset : dict A dictionary contains all dataset information. We split train/test by keys. Returns ----------------- test_acc : float Testing accuracy of MI-Net with residual connection. """ # load data and convert type train_bags = dataset['train'] test_bags = dataset['test'] # convert bag to batch train_set = convertToBatch(train_bags) test_set = convertToBatch(test_bags) dimension = train_set[0][0].shape[1] # data: instance feature, n*d, n = number of training instance data_input = Input(shape=(dimension,), dtype='float32', name='input') # fully-connected fc1 = Dense(128, activation='relu', W_regularizer=l2(args.weight_decay))(data_input) fc2 = Dense(128, activation='relu', W_regularizer=l2(args.weight_decay))(fc1) fc3 = Dense(128, activation='relu', W_regularizer=l2(args.weight_decay))(fc2) # dropout dropout1 = Dropout(p=0.5)(fc1) dropout2 = Dropout(p=0.5)(fc2) dropout3 = Dropout(p=0.5)(fc3) # residual connection rc1 = RC_block(pooling_mode=args.pooling_mode, name='rc1')(dropout1) rc2 = RC_block(pooling_mode=args.pooling_mode, name='rc2')(dropout2) rc3 = RC_block(pooling_mode=args.pooling_mode, name='rc3')(dropout3) # score sum mg_sum = merge([rc1, rc2, rc3], mode='sum') out = Dense(1, activation='sigmoid', W_regularizer=l2(args.weight_decay))(mg_sum) model = Model(input=[data_input], output=[out]) sgd = SGD(lr=args.init_lr, decay=1e-4, momentum=args.momentum, nesterov=True) model.compile(loss=bag_loss, optimizer=sgd, metrics=[bag_accuracy]) # train model t1 = time.time() num_batch = len(train_set) for epoch in range(args.max_epoch): train_loss, train_acc = train_eval(model, train_set) test_loss, test_acc = test_eval(model, test_set) print 'epoch=', epoch, ' train_loss= {:.3f}'.format(train_loss), ' train_acc= {:.3f}'.format(train_acc), ' test_loss= {:.3f}'.format(test_loss), ' test_acc= {:.3f}'.format(test_acc) t2 = time.time() print 'run time:', (t2-t1) / 60, 'min' print 'test_acc={:.3f}'.format(test_acc) return test_acc
def mi_Net(dataset): """Train and evaluate on mi-Net. Parameters ----------------- dataset : dict A dictionary contains all dataset information. We split train/test by keys. Returns ----------------- test_acc : float Testing accuracy of mi-Net. """ # load data and convert type train_bags = dataset['train'] test_bags = dataset['test'] # convert bag to batch train_set = convertToBatch(train_bags) test_set = convertToBatch(test_bags) dimension = train_set[0][0].shape[1] # data: instance feature, n*d, n = number of training instance data_input = Input(shape=(dimension,), dtype='float32', name='input') # fully-connected fc1 = Dense(256, activation='relu', W_regularizer=l2(args.weight_decay))(data_input) fc2 = Dense(128, activation='relu', W_regularizer=l2(args.weight_decay))(fc1) fc3 = Dense(64, activation='relu', W_regularizer=l2(args.weight_decay))(fc2) # dropout dropout = Dropout(0.5)(fc3) # score pooling sp = Score_pooling(output_dim=1, W_regularizer=l2(args.weight_decay), pooling_mode=args.pooling_mode, name='sp')(dropout) model = Model(input=[data_input], output=[sp]) sgd = SGD(lr=args.init_lr, decay=1e-4, momentum=args.momentum, nesterov=True) model.compile(loss=bag_loss, optimizer=sgd, metrics=[bag_accuracy]) # train model t1 = time.time() num_batch = len(train_set) for epoch in range(args.max_epoch): train_loss, train_acc = train_eval(model, train_set) test_loss, test_acc = test_eval(model, test_set) print 'epoch=', epoch, ' train_loss= {:.3f}'.format(train_loss), ' train_acc= {:.3f}'.format(train_acc), ' test_loss={:.3f}'.format(test_loss), ' test_acc= {:.3f}'.format(test_acc) t2 = time.time() print 'run time:', (t2-t1) / 60.0, 'min' print 'test_acc={:.3f}'.format(test_acc) return test_acc
def deepLPI_unit(dataset, dataset_str, bagdict, lncRNA_len, mRNA_len, lncRNA_struct_len, mRNA_struct_len): train_bags = dataset['train'] test_bags = dataset['test'] train_mRNA_bags = dataset['train_mRNA'] test_mRNA_bags = dataset['test_mRNA'] train_lncRNA_bags = dataset['train_lncRNA'] test_lncRNA_bags = dataset['test_lncRNA'] train_bags_nm = dataset['train_bags_nm'] train_ins_nm = dataset['train_ins_nm'] test_bags_nm = dataset['test_bags_nm'] test_ins_nm = dataset['test_ins_nm'] train_bags_str = dataset_str['train'] test_bags_str = dataset_str['test'] train_mRNA_bags_str = dataset_str['train_mRNA'] test_mRNA_bags_str = dataset_str['test_mRNA'] train_lncRNA_bags_str = dataset_str['train_lncRNA'] test_lncRNA_bags_str = dataset_str['test_lncRNA'] train_bags_nm_str = dataset_str['train_bags_nm'] train_ins_nm_str = dataset_str['train_ins_nm'] test_bags_nm_str = dataset_str['test_bags_nm'] test_ins_nm_str = dataset_str['test_ins_nm'] # convert bag to batch train_mRNA_set = convertToBatch(train_mRNA_bags) test_mRNA_set = convertToBatch(test_mRNA_bags) train_lncRNA_set = convertToBatch(train_lncRNA_bags) test_lncRNA_set = convertToBatch(test_lncRNA_bags) train_set = convertToBatch(train_bags) test_set = convertToBatch(test_bags) dimension = train_set[0][0].shape[0] train_mRNA_set_str = convertToBatch(train_mRNA_bags_str) test_mRNA_set_str = convertToBatch(test_mRNA_bags_str) train_lncRNA_set_str = convertToBatch(train_lncRNA_bags_str) test_lncRNA_set_str = convertToBatch(test_lncRNA_bags_str) train_set_str = convertToBatch(train_bags_str) test_set_str = convertToBatch(test_bags_str) dimension_str = train_set_str[0][0].shape[0] idx=0 for ibatch, batch in enumerate(test_set): if test_set[ibatch][0].shape[0]!=test_set_str[ibatch][0].shape[0]: continue idx=idx+1 bagdict[test_bags_nm[ibatch].encode('ascii','ignore').strip()]=([test_lncRNA_set[ibatch][0], test_mRNA_set[ibatch][0], test_lncRNA_set_str[ibatch][0], test_mRNA_set_str[ibatch][0]], test_lncRNA_set[ibatch][1]) bagname.append(test_bags_nm[ibatch].encode('ascii','ignore').strip()) idx=0 for ibatch, batch in enumerate(train_set): if train_set[ibatch][0].shape[0]!=train_set_str[ibatch][0].shape[0]: continue idx=idx+1 bagdict[train_bags_nm[ibatch].encode('ascii','ignore').strip()]=([train_lncRNA_set[ibatch][0], train_mRNA_set[ibatch][0], train_lncRNA_set_str[ibatch][0], train_mRNA_set_str[ibatch][0]], test_lncRNA_set[ibatch][1]) bagname.append(train_bags_nm[ibatch].encode('ascii','ignore').strip()) return bagdict
def deepLPI(dataset, dataset_str, lncRNA_len, mRNA_len, lncRNA_struct_len, mRNA_struct_len, pre_trained_weight): train_bags = dataset['train'] test_bags = dataset['test'] train_mRNA_bags = dataset['train_mRNA'] test_mRNA_bags = dataset['test_mRNA'] train_lncRNA_bags = dataset['train_lncRNA'] test_lncRNA_bags = dataset['test_lncRNA'] train_bags_nm = dataset['train_bags_nm'] train_ins_nm = dataset['train_ins_nm'] test_bags_nm = dataset['test_bags_nm'] test_ins_nm = dataset['test_ins_nm'] train_bags_str = dataset_str['train'] test_bags_str = dataset_str['test'] train_mRNA_bags_str = dataset_str['train_mRNA'] test_mRNA_bags_str = dataset_str['test_mRNA'] train_lncRNA_bags_str = dataset_str['train_lncRNA'] test_lncRNA_bags_str = dataset_str['test_lncRNA'] train_bags_nm_str = dataset_str['train_bags_nm'] train_ins_nm_str = dataset_str['train_ins_nm'] test_bags_nm_str = dataset_str['test_bags_nm'] test_ins_nm_str = dataset_str['test_ins_nm'] # convert bag to batch train_mRNA_set = convertToBatch(train_mRNA_bags) test_mRNA_set = convertToBatch(test_mRNA_bags) train_lncRNA_set = convertToBatch(train_lncRNA_bags) test_lncRNA_set = convertToBatch(test_lncRNA_bags) train_set = convertToBatch(train_bags) test_set = convertToBatch(test_bags) dimension = train_set[0][0].shape[0] train_mRNA_set_str = convertToBatch(train_mRNA_bags_str) test_mRNA_set_str = convertToBatch(test_mRNA_bags_str) train_lncRNA_set_str = convertToBatch(train_lncRNA_bags_str) test_lncRNA_set_str = convertToBatch(test_lncRNA_bags_str) train_set_str = convertToBatch(train_bags_str) test_set_str = convertToBatch(test_bags_str) dimension_str = train_set_str[0][0].shape[0] model = model_func(lncRNA_len, mRNA_len, lncRNA_struct_len, mRNA_struct_len) ## Load model #model.load_weights('model_deepLPI.h5') #model.save('testsave.h5') #visulize_saliency(model, test_lncRNA_set[0][0], test_mRNA_set[0][0], test_lncRNA_set[0][1]) t1 = time.time() num_batch = len(train_set) all_auc=[] all_auprc=[] model=deepLPI_train(dataset, dataset_str, lncRNA_len, mRNA_len, lncRNA_struct_len, mRNA_struct_len) model.load_weights(pre_trained_weight) for epoch in range(1): test_loss, test_acc, test_auc, test_auprc = test_deepLPI(model, test_set, test_mRNA_set, test_lncRNA_set, test_set_str, test_mRNA_set_str, test_lncRNA_set_str, test_bags_nm, test_ins_nm) all_auc.append(test_auc) all_auprc.append(test_auprc) t2 = time.time() model.save('model_deepLPI_final.h5') return test_acc, np.mean(all_auc), np.mean(all_auprc)
def deepLPI_train(dataset, dataset_str, lncRNA_len, mRNA_len, lncRNA_struct_len, mRNA_struct_len): train_bags = dataset['train'] test_bags = dataset['test'] train_mRNA_bags = dataset['train_mRNA'] test_mRNA_bags = dataset['test_mRNA'] train_lncRNA_bags = dataset['train_lncRNA'] test_lncRNA_bags = dataset['test_lncRNA'] train_bags_nm = dataset['train_bags_nm'] train_ins_nm = dataset['train_ins_nm'] test_bags_nm = dataset['test_bags_nm'] test_ins_nm = dataset['test_ins_nm'] train_bags_str = dataset_str['train'] test_bags_str = dataset_str['test'] train_mRNA_bags_str = dataset_str['train_mRNA'] test_mRNA_bags_str = dataset_str['test_mRNA'] train_lncRNA_bags_str = dataset_str['train_lncRNA'] test_lncRNA_bags_str = dataset_str['test_lncRNA'] train_bags_nm_str = dataset_str['train_bags_nm'] train_ins_nm_str = dataset_str['train_ins_nm'] test_bags_nm_str = dataset_str['test_bags_nm'] test_ins_nm_str = dataset_str['test_ins_nm'] # convert bag to batch train_mRNA_set = convertToBatch(train_mRNA_bags) test_mRNA_set = convertToBatch(test_mRNA_bags) train_lncRNA_set = convertToBatch(train_lncRNA_bags) test_lncRNA_set = convertToBatch(test_lncRNA_bags) train_set = convertToBatch(train_bags) test_set = convertToBatch(test_bags) dimension = train_set[0][0].shape[0] train_mRNA_set_str = convertToBatch(train_mRNA_bags_str) test_mRNA_set_str = convertToBatch(test_mRNA_bags_str) train_lncRNA_set_str = convertToBatch(train_lncRNA_bags_str) test_lncRNA_set_str = convertToBatch(test_lncRNA_bags_str) train_set_str = convertToBatch(train_bags_str) test_set_str = convertToBatch(test_bags_str) dimension_str = train_set_str[0][0].shape[0] model = model_func(lncRNA_len, mRNA_len, lncRNA_struct_len, mRNA_struct_len) # train model t1 = time.time() num_batch = len(train_set) all_auc=[] all_auprc=[] iso_expr_data_all=get_expr_data("./dataset/isoform_expression_data.txt") lnc_expr_data_all=get_expr_data("./dataset/lncRNA_expression_data.txt") lncRNA_feature_colum=188 #small dataset for epoch in range(args.max_epoch): #Training initial_score_all = np.array([]) crf_bag_index=[] y_all=np.array([]) lnc_expr_data=[] iso_expr_data=[] num_train_batch = len(train_set) train_loss = np.zeros((num_train_batch, 1), dtype=float) train_acc = np.zeros((num_train_batch, 1), dtype=float) for ibatch, batch in enumerate(train_mRNA_set): if train_set[ibatch][0].shape[0]!=train_set_str[ibatch][0].shape[0]: continue y_all=np.hstack((y_all, train_mRNA_set[ibatch][1])) initial_score_all_ = model.predict_on_batch([train_lncRNA_set[ibatch][0], train_mRNA_set[ibatch][0], train_lncRNA_set_str[ibatch][0], train_mRNA_set_str[ibatch][0]]) initial_score_all = np.hstack((initial_score_all, np.transpose(initial_score_all_)[0])) i=0 for i in range(train_mRNA_set[ibatch][0].shape[0]): crf_bag_index.append(ibatch) ibag_name=train_bags_nm[ibatch].encode('ascii','ignore').strip() ibag_name.replace("'","-") if len(ibag_name.split('-'))>2: lncRNA_name=ibag_name.split('-')[0]+'-'+ibag_name.split('-')[1] else: lncRNA_name=ibag_name.split('-')[0] for ins in train_ins_nm[ibatch]: if lncRNA_name in lnc_expr_data_all: lnc_expr_data.append(lnc_expr_data_all[lncRNA_name]) else: lnc_expr_data.append([0] * lncRNA_feature_colum) iso_expr_data.append(iso_expr_data_all[ins.encode('ascii','ignore').strip()])#nicodedata.normalize("NFKD", ins)]) y_all=np.asarray(y_all, dtype=np.int) #WGCNA for isoform expression data iso_expr_data=np.asarray(iso_expr_data) co_exp_net=np.corrcoef(iso_expr_data) # Set nan to be zero nan_where = np.isnan(co_exp_net) co_exp_net[nan_where] = 0 # Diagnal to be zero for ii in range(co_exp_net.shape[0]): co_exp_net[ii, ii] = 0 # Apply soft threshold co_exp_net = np.fabs(co_exp_net) co_exp_net = pow(co_exp_net, 6) co_exp_net_isoform=co_exp_net #WGCNA for lncRNA expression data lnc_expr_data=np.asarray(lnc_expr_data) lnc_co_exp_net=np.corrcoef(lnc_expr_data) # Set nan to be zero lnc_nan_where = np.isnan(lnc_co_exp_net) lnc_co_exp_net[lnc_nan_where] = 0 # Diagnal to be zero for ii in range(lnc_co_exp_net.shape[0]): lnc_co_exp_net[ii, ii] = 0 # Apply soft threshold lnc_co_exp_net = np.fabs(lnc_co_exp_net) lnc_co_exp_net = pow(lnc_co_exp_net, 6) co_exp_net_lncRNA=lnc_co_exp_net crf_bag_index=np.asarray(crf_bag_index) K_training_size=y_all.shape[0] K_testing_size=0 theta = np.array([1.0, 1.0]) new_label, theta, pos_prob_crf, unary_potential, pairwise_potential = run_crf(epoch, initial_score_all, y_all, crf_bag_index, co_exp_net_isoform, co_exp_net_lncRNA, K_training_size, K_testing_size, theta, sigma=0.1) if epoch > 0: s_index=0 updated_train_label=[] for ibatch, batch in enumerate(train_mRNA_set): e_index=s_index+train_lncRNA_set[ibatch][1].shape[0] updated_train_label.append((train_lncRNA_set[ibatch][0], np.asarray(new_label[s_index:e_index]))) s_index=e_index train_lncRNA_set=updated_train_label for ibatch, batch in enumerate(train_mRNA_set): if train_set[ibatch][0].shape[0]!=train_set_str[ibatch][0].shape[0] : continue if train_set[ibatch][0].shape[0]!=train_lncRNA_set[ibatch][1].shape[0]: continue result = model.train_on_batch([train_lncRNA_set[ibatch][0], train_mRNA_set[ibatch][0], train_lncRNA_set_str[ibatch][0], train_mRNA_set_str[ibatch][0]], train_lncRNA_set[ibatch][1]) train_loss[ibatch] = result[0] train_acc[ibatch] = result[1] model, mean_train_loss, mean_train_acc = model, np.mean(train_loss), np.mean(train_acc) return model
def extract_data(dataset, dataset_str, lncRNA_len, mRNA_len, lncRNA_struct_len, mRNA_struct_len): train_bags = dataset['train'] test_bags = dataset['test'] train_mRNA_bags = dataset['train_mRNA'] test_mRNA_bags = dataset['test_mRNA'] train_lncRNA_bags = dataset['train_lncRNA'] test_lncRNA_bags = dataset['test_lncRNA'] train_bags_nm = dataset['train_bags_nm'] train_ins_nm = dataset['train_ins_nm'] test_bags_nm = dataset['test_bags_nm'] test_ins_nm = dataset['test_ins_nm'] train_bags_str = dataset_str['train'] test_bags_str = dataset_str['test'] train_mRNA_bags_str = dataset_str['train_mRNA'] test_mRNA_bags_str = dataset_str['test_mRNA'] train_lncRNA_bags_str = dataset_str['train_lncRNA'] test_lncRNA_bags_str = dataset_str['test_lncRNA'] train_bags_nm_str = dataset_str['train_bags_nm'] train_ins_nm_str = dataset_str['train_ins_nm'] test_bags_nm_str = dataset_str['test_bags_nm'] test_ins_nm_str = dataset_str['test_ins_nm'] # convert bag to batch train_mRNA_set = convertToBatch(train_mRNA_bags) test_mRNA_set = convertToBatch(test_mRNA_bags) train_lncRNA_set = convertToBatch(train_lncRNA_bags) test_lncRNA_set = convertToBatch(test_lncRNA_bags) train_set = convertToBatch(train_bags) test_set = convertToBatch(test_bags) dimension = train_set[0][0].shape[0] train_mRNA_set_str = convertToBatch(train_mRNA_bags_str) test_mRNA_set_str = convertToBatch(test_mRNA_bags_str) train_lncRNA_set_str = convertToBatch(train_lncRNA_bags_str) test_lncRNA_set_str = convertToBatch(test_lncRNA_bags_str) train_set_str = convertToBatch(train_bags_str) test_set_str = convertToBatch(test_bags_str) dimension_str = train_set_str[0][0].shape[0] return train_bags,test_bags,train_mRNA_bags, test_mRNA_bags, train_lncRNA_bags, test_lncRNA_bags, train_bags_nm, train_ins_nm,test_bags_nm,test_ins_nm, train_bags_str, test_bags_str, train_mRNA_bags_str, test_mRNA_bags_str, train_lncRNA_bags_str, test_lncRNA_bags_str, train_bags_nm_str, train_ins_nm_str, test_bags_nm_str, test_ins_nm_str, train_mRNA_set, test_mRNA_set, train_lncRNA_set, test_lncRNA_set, train_set, test_set, dimension, train_mRNA_set_str, test_mRNA_set_str, train_lncRNA_set_str, test_lncRNA_set_str, train_set_str, test_set_str, dimension_str
def MI_Net_with_DS(dataset): """Train and evaluate on MI-Net with deep supervision. Parameters ----------------- dataset : dict A dictionary contains all dataset information. We split train/test by keys. Returns ----------------- test_acc : float Testing accuracy of MI-Net with deep supervision. """ # load data and convert type train_bags = dataset['train'] test_bags = dataset['test'] # convert bag to batch train_set = convertToBatch(train_bags) test_set = convertToBatch(test_bags) dimension = train_set[0][0].shape[1] weight = [1.0, 1.0, 1.0, 0.0] # data: instance feature, n*d, n = number of training instance data_input = Input(shape=(dimension, ), dtype='float32', name='input') # fully-connected fc1 = Dense(256, activation='relu', kernel_regularizer=l2(args.weight_decay))(data_input) fc2 = Dense(128, activation='relu', kernel_regularizer=l2(args.weight_decay))(fc1) fc3 = Dense(64, activation='relu', kernel_regularizer=l2(args.weight_decay))(fc2) # dropout dropout1 = Dropout(rate=0.5)(fc1) dropout2 = Dropout(rate=0.5)(fc2) dropout3 = Dropout(rate=0.5)(fc3) # features pooling fp1 = Feature_pooling(output_dim=1, kernel_regularizer=l2(args.weight_decay), pooling_mode=args.pooling_mode, name='fp1')(dropout1) fp2 = Feature_pooling(output_dim=1, kernel_regularizer=l2(args.weight_decay), pooling_mode=args.pooling_mode, name='fp2')(dropout2) fp3 = Feature_pooling(output_dim=1, kernel_regularizer=l2(args.weight_decay), pooling_mode=args.pooling_mode, name='fp3')(dropout3) # score average mg_ave = average([fp1, fp2, fp3], name='ave') model = Model(inputs=[data_input], outputs=[fp1, fp2, fp3, mg_ave]) sgd = SGD(lr=args.init_lr, decay=1e-4, momentum=args.momentum, nesterov=True) model.compile(loss={ 'fp1': bag_loss, 'fp2': bag_loss, 'fp3': bag_loss, 'ave': bag_loss }, loss_weights={ 'fp1': weight[0], 'fp2': weight[1], 'fp3': weight[2], 'ave': weight[3] }, optimizer=sgd, metrics=[bag_accuracy]) # train model t1 = time.time() num_batch = len(train_set) for epoch in range(args.max_epoch): train_loss, train_acc = train_eval(model, train_set) test_loss, test_acc = test_eval(model, test_set) print 'epoch=', epoch, ' train_loss= {:.3f}'.format( train_loss), ' train_acc= {:.3f}'.format( train_acc), ' test_loss={:.3f}'.format( test_loss), ' test_acc= {:.3f}'.format(test_acc) t2 = time.time() print 'run time:', (t2 - t1) / 60, 'min' print 'test_acc={:.3f}'.format(test_acc) return test_acc