def RNN_model(folds): pos_train, neg_train = dp.split_train('N_data/train/' + str(folds) + '/train.fasta') # ########### # """test""" sequence, label = dp.get_data_test('N_data/test/test.fa') test_X, test_Y = dp.phy_decode_all(sequence, label) testX, testY, _ = dp.reshape(test_X, test_Y) del sequence, label sequence2, label2 = dp.get_data_val('N_data/train/' + str(folds) + '/val.fasta') val_X, val_Y = dp.decode(sequence2, label2) valX, valY, _ = dp.reshape(val_X, val_Y) del sequence2, label2 iteration_times = 10 for t in range(0, iteration_times): ############ print("iteration_times: %d" % t) pos_df = pos_train.sample(frac=1, random_state=1) neg_df = neg_train.sample(frac=1, random_state=1) n_df = neg_df[len(pos_df) * t:(len(pos_df) * (t + 1))] p_df = pos_df df_all = p_df.append(n_df) df_all = df_all.sample(frac=1, random_state=1) sequence1, label1 = dp.cut_train(df_all, 50) train_X, train_Y = dp.decode(sequence1, label1) trainX, trainY, input = dp.reshape(train_X, train_Y) if (t == 0): physical_all_model = mixallCNNmodel(trainX, trainY, valX, valY, input, folds, train_time=t) else: physical_all_model = mixallCNNmodel(trainX, trainY, valX, valY, input, folds, train_time=t) predict_weighted_merge = physical_all_model.predict(testX) predict_classes = copy.deepcopy(predict_weighted_merge[:, 1]) for n in range(len(predict_classes)): if predict_classes[n] >= 0.5: predict_classes[n] = 1 else: predict_classes[n] = 0 with open('result/Revaluation.txt', mode='a') as resFile: resFile.write(str(t) + " " + calculate_performance(len(testY), testY[:, 1], predict_classes, predict_weighted_merge[:, 1]) + '\r\n') resFile.close() true_label = testY result = np.column_stack((true_label[:, 1], predict_weighted_merge[:, 1])) result = pd.DataFrame(result) result.to_csv(path_or_buf='result/Rresult' + '-' + str(t) + '.txt', index=False, header=None, sep='\t', quoting=csv.QUOTE_NONE)
def fig1(): # """test""" sequence, label = dp.get_data_test('N_data/test/test.fa') test_X, test_Y = dp.decode(sequence, label) testX, testY, _ = dp.reshape(test_X, test_Y) test_phyA_X, test_phyA_Y = dp.phy_decode_A(sequence, label) test_phyAX, test_phyAY, _ = dp.reshape(test_phyA_X, test_phyA_Y) test_phyB_X, test_phyB_Y = dp.phy_decode_B(sequence, label) test_phyBX, test_phyBY, _ = dp.reshape(test_phyB_X, test_phyB_Y) test_phyC_X, test_phyC_Y = dp.phy_decode_C(sequence, label) test_phyCX, test_phyCY, _ = dp.reshape(test_phyC_X, test_phyC_Y) test_phyH_X, test_phyH_Y = dp.phy_decode_H(sequence, label) test_phyHX, test_phyHY, _ = dp.reshape(test_phyH_X, test_phyH_Y) test_phyO_X, test_phyO_Y = dp.phy_decode_O(sequence, label) test_phyOX, tes_phyOtY, _ = dp.reshape(test_phyO_X, test_phyO_Y) test_phyP_X, test_phyP_Y = dp.phy_decode_P(sequence, label) test_phyPX, test_phyPY, _ = dp.reshape(test_phyP_X, test_phyP_Y) del sequence, label print("Test data coding finished!") struct_Onehot_model = load_model('model/8/model/5OnehotNetwork.h5') physical_O_model = load_model('model/8/model/5OtherNetwork.h5') physical_P_model = load_model('model/8/model/5PhysicochemicalNetwork.h5') physical_H_model = load_model('model/8/model/5HydrophobicityNetwork.h5') physical_C_model = load_model('model/8/model/5CompositionNetwork.h5') physical_B_model = load_model('model/8/model/5BetapropensityNetwork.h5') physical_A_model = load_model( 'model/5/model/5AlphaturnpropensityNetwork.h5') true_class = testY[:, 1] # onehot pred_proba = struct_Onehot_model.predict(testX, batch_size=2048) pred_score = pred_proba[:, 1] precision, recall, _ = precision_recall_curve(true_class, pred_score) average_precision = average_precision_score(true_class, pred_score) fpr, tpr, _ = roc_curve(true_class, pred_score) roc_auc = auc(fpr, tpr) # A pred_probaA = physical_A_model.predict(test_phyAX, batch_size=2048) pred_scoreA = pred_probaA[:, 1] precisionA, recallA, _ = precision_recall_curve(true_class, pred_scoreA) average_precisionA = average_precision_score(true_class, pred_scoreA) fprA, tprA, _ = roc_curve(true_class, pred_scoreA) roc_aucA = auc(fprA, tprA) # B pred_probaB = physical_B_model.predict(test_phyBX, batch_size=2048) pred_scoreB = pred_probaB[:, 1] precisionB, recallB, _ = precision_recall_curve(true_class, pred_scoreB) average_precisionB = average_precision_score(true_class, pred_scoreB) fprB, tprB, _ = roc_curve(true_class, pred_scoreB) roc_aucB = auc(fprB, tprB) # C pred_probaC = physical_C_model.predict(test_phyCX, batch_size=2048) pred_scoreC = pred_probaC[:, 1] precisionC, recallC, _ = precision_recall_curve(true_class, pred_scoreC) average_precisionC = average_precision_score(true_class, pred_scoreC) fprC, tprC, _ = roc_curve(true_class, pred_scoreC) roc_aucC = auc(fprC, tprC) # H pred_probaH = physical_H_model.predict(test_phyHX, batch_size=2048) pred_scoreH = pred_probaH[:, 1] precisionH, recallH, _ = precision_recall_curve(true_class, pred_scoreH) average_precisionH = average_precision_score(true_class, pred_scoreH) fprH, tprH, _ = roc_curve(true_class, pred_scoreH) roc_aucH = auc(fprH, tprH) # P pred_probaP = physical_P_model.predict(test_phyPX, batch_size=2048) pred_scoreP = pred_probaP[:, 1] precisionP, recallP, _ = precision_recall_curve(true_class, pred_scoreP) average_precisionP = average_precision_score(true_class, pred_scoreP) fprP, tprP, _ = roc_curve(true_class, pred_scoreP) roc_aucP = auc(fprP, tprP) # O pred_probaO = physical_O_model.predict(test_phyOX, batch_size=2048) pred_scoreO = pred_probaO[:, 1] precisionO, recallO, _ = precision_recall_curve(true_class, pred_scoreO) average_precisionO = average_precision_score(true_class, pred_scoreO) fprO, tprO, _ = roc_curve(true_class, pred_scoreO) roc_aucO = auc(fprO, tprO) # EN monitor = 'val_loss' weights = [] with open('model/8/loss/5Onehotloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/8/loss/5Otherloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/8/loss/5Physicochemicalloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/8/loss/5Hydrophobicityloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/8/loss/5Compositionloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/8/loss/5Betapropensityloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/8/loss/5Alphaturnpropensityloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) weight_array = np.array(weights, dtype=np.float) weight_array = normalization_softmax(weight_array) predict_weighted_merge = 0 predict_temp = weight_array[0] * struct_Onehot_model.predict(testX) predict_weighted_merge += predict_temp predict_temp = weight_array[1] * physical_O_model.predict(test_phyOX) predict_weighted_merge += predict_temp predict_temp = weight_array[2] * physical_P_model.predict(test_phyPX) predict_weighted_merge += predict_temp predict_temp = weight_array[3] * physical_H_model.predict(test_phyHX) predict_weighted_merge += predict_temp predict_temp = weight_array[4] * physical_C_model.predict(test_phyCX) predict_weighted_merge += predict_temp predict_temp = weight_array[5] * physical_B_model.predict(test_phyBX) predict_weighted_merge += predict_temp predict_temp = weight_array[6] * physical_A_model.predict(test_phyAX) predict_weighted_merge += predict_temp predict_classes = copy.deepcopy(predict_weighted_merge[:, 1]) for n in range(len(predict_classes)): if predict_classes[n] >= 0.5: predict_classes[n] = 1 else: predict_classes[n] = 0 fprE, tprE, roc_aucE, precisionE, recallE = calculate_performance( testY[:, 1].tolist(), predict_classes.tolist(), predict_weighted_merge[:, 1]) # ################# Print PR#################### plt.figure() plt.plot(fpr, tpr, color='#0000FF', lw=2, linestyle='-', label='One hot net(AUC=%0.2f)' % roc_auc) plt.plot(fprA, tprA, color='#00BFFF', lw=2, linestyle='-', label='alpha propensity net(AUC=%0.2f)' % roc_aucA) plt.plot(fprB, tprB, color='#00FFFF', lw=2, linestyle='-', label='beta propensity net(AUC=%0.2f)' % roc_aucB) plt.plot(fprC, tprC, color='#00FF00', lw=2, linestyle='-', label='Composition net(AUC=%0.2f)' % roc_aucC) plt.plot(fprH, tprH, color='#6B8E23', lw=2, linestyle='-', label='Hydrophobicity net(AUC=%0.2f)' % roc_aucH) plt.plot(fprP, tprP, color='#B8860B', lw=2, linestyle='-', label='Phy-chemi properties net(AUC=%0.2f)' % roc_aucP) plt.plot(fprO, tprO, color='#FFA500', lw=2, linestyle='-', label='Other properties net(AUC=%0.2f)' % roc_aucO) plt.plot(fprE, tprE, color='#FF0000', lw=2, linestyle='-', label='Ensemble net(AUC=%0.2f)' % roc_aucE) # plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='-.') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver Operating Characteristic Curve') plt.legend(loc="lower right") plt.savefig('./ROC.png') print('aa')
def run_model(folds): pos_train, neg_train = dp.split_train('N_data/train/' + str(folds) + '/train.fasta') # ########### # """test""" sequence, label = dp.get_data_test('N_data/test/test.fa') test_X, test_Y = dp.decode(sequence, label) testX, testY, _ = dp.reshape(test_X, test_Y) test_phyA_X, test_phyA_Y = dp.phy_decode_A(sequence, label) test_phyAX, test_phyAY, _ = dp.reshape(test_phyA_X, test_phyA_Y) test_phyB_X, test_phyB_Y = dp.phy_decode_B(sequence, label) test_phyBX, test_phyBY, _ = dp.reshape(test_phyB_X, test_phyB_Y) test_phyC_X, test_phyC_Y = dp.phy_decode_C(sequence, label) test_phyCX, test_phyCY, _ = dp.reshape(test_phyC_X, test_phyC_Y) test_phyH_X, test_phyH_Y = dp.phy_decode_H(sequence, label) test_phyHX, test_phyHY, _ = dp.reshape(test_phyH_X, test_phyH_Y) test_phyO_X, test_phyO_Y = dp.phy_decode_O(sequence, label) test_phyOX, tes_phyOtY, _ = dp.reshape(test_phyO_X, test_phyO_Y) test_phyP_X, test_phyP_Y = dp.phy_decode_P(sequence, label) test_phyPX, test_phyPY, _ = dp.reshape(test_phyP_X, test_phyP_Y) del sequence, label print("Test data coding finished!") """val""" sequence2, label2 = dp.get_data_val('N_data/train/' + str(folds) + '/val.fasta') val_onehot_X, val_onehot_Y = dp.decode(sequence2, label2) val_onehotX, val_onehotY, _ = dp.reshape(val_onehot_X, val_onehot_Y) val_phyA_X, val_phyA_Y = dp.phy_decode_A(sequence2, label2) val_phyAX, val_phyAY, _ = dp.reshape(val_phyA_X, val_phyA_Y) val_phyB_X, val_phyB_Y = dp.phy_decode_B(sequence2, label2) val_phyBX, val_phyBY, _ = dp.reshape(val_phyB_X, val_phyB_Y) val_phyC_X, val_phyC_Y = dp.phy_decode_C(sequence2, label2) val_phyCX, val_phyCY, _ = dp.reshape(val_phyC_X, val_phyC_Y) val_phyH_X, val_phyH_Y = dp.phy_decode_H(sequence2, label2) val_phyHX, val_phyHY, _ = dp.reshape(val_phyH_X, val_phyH_Y) val_phyO_X, val_phyO_Y = dp.phy_decode_O(sequence2, label2) val_phyOX, val_phyOY, _ = dp.reshape(val_phyO_X, val_phyO_Y) val_phyP_X, val_phyP_Y = dp.phy_decode_P(sequence2, label2) val_phyPX, val_phyPY, _ = dp.reshape(val_phyP_X, val_phyP_Y) del sequence2, label2 print("Val data coding finished!") # testX, testY = val_onehotX, val_onehotY # test_phyAX, test_phyAY = val_phyAX, val_phyAY # test_phyBX, test_phyBY = val_phyBX, val_phyBY # test_phyCX, test_phyCY = val_phyCX, val_phyCY # test_phyHX, test_phyHY = val_phyHX, val_phyHY # test_phyOX, tes_phyOtY = val_phyOX, val_phyOY # test_phyPX, test_phyPY = val_phyPX, val_phyPY iteration_times = 10 # 很多倍 for t in range(0, iteration_times): ############ print("iteration_times: %d" % t) pos_df = pos_train.sample(frac=1, random_state=1) neg_df = neg_train.sample(frac=1, random_state=1) n_df = neg_df[len(pos_df)*t:(len(pos_df)*(t+1))] p_df = pos_df df_all = p_df.append(n_df) df_all = df_all.sample(frac=1, random_state=1) sequence1, label1 = dp.cut_train(df_all, 50) train_onehot_X, train_onehot_Y = dp.decode(sequence1, label1) train_onehotX, train_onehotY, onehot_input = dp.reshape(train_onehot_X, train_onehot_Y) train_phyA_X, train_phyA_Y = dp.phy_decode_A(sequence1, label1) train_phyAX, train_phyAY, phyA_input = dp.reshape(train_phyA_X, train_phyA_Y) train_phyB_X, train_phyB_Y = dp.phy_decode_B(sequence1, label1) train_phyBX, train_phyBY, phyB_input = dp.reshape(train_phyB_X, train_phyB_Y) train_phyC_X, train_phyC_Y = dp.phy_decode_C(sequence1, label1) train_phyCX, train_phyCY, phyC_input = dp.reshape(train_phyC_X, train_phyC_Y) train_phyH_X, train_phyH_Y = dp.phy_decode_H(sequence1, label1) train_phyHX, train_phyHY, phyH_input = dp.reshape(train_phyH_X, train_phyH_Y) train_phyO_X, train_phyO_Y = dp.phy_decode_O(sequence1, label1) train_phyOX, train_phyOY, phyO_input = dp.reshape(train_phyO_X, train_phyO_Y) train_phyP_X, train_phyP_Y = dp.phy_decode_P(sequence1, label1) train_phyPX, train_phyPY, phyP_input = dp.reshape(train_phyP_X, train_phyP_Y) print("itreation %d times Train data coding finished!" % t) if (t == 0): struct_Onehot_model = OnehotNetwork(train_onehotX, train_onehotY, val_onehotX, val_onehotY, onehot_input, folds, train_time=t) physical_O_model = OtherNetwork(train_phyOX, train_phyOY, val_phyOX, val_phyOY, phyO_input, folds, train_time=t) physical_P_model = PhysicochemicalNetwork(train_phyPX, train_phyPY, val_phyPX, val_phyPY, phyP_input, folds, train_time=t) physical_H_model = HydrophobicityNetwork(train_phyHX, train_phyHY, val_phyHX, val_phyHY, phyH_input, folds, train_time=t) physical_C_model = CompositionNetwork(train_phyCX, train_phyCY, val_phyCX, val_phyCY, phyC_input, folds, train_time=t) physical_B_model = BetapropensityNetwork(train_phyBX, train_phyBY, val_phyBX, val_phyBY, phyB_input, folds, train_time=t) physical_A_model = AlphaturnpropensityNetwork(train_phyAX, train_phyAY, val_phyAX, val_phyAY, phyA_input, folds, train_time=t) print("itreation %d times training finished!" % t) else: struct_Onehot_model = OnehotNetwork(train_onehotX, train_onehotY, val_onehotX, val_onehotY, onehot_input, folds, train_time=t) physical_O_model = OtherNetwork(train_phyOX, train_phyOY, val_phyOX, val_phyOY, phyO_input, folds, train_time=t) physical_P_model = PhysicochemicalNetwork(train_phyPX, train_phyPY, val_phyPX, val_phyPY, phyP_input, folds, train_time=t) physical_H_model = HydrophobicityNetwork(train_phyHX, train_phyHY, val_phyHX, val_phyHY, phyH_input, folds, train_time=t) physical_C_model = CompositionNetwork(train_phyCX, train_phyCY, val_phyCX, val_phyCY, phyC_input, folds, train_time=t) physical_B_model = BetapropensityNetwork(train_phyBX, train_phyBY, val_phyBX, val_phyBY, phyB_input, folds, train_time=t) physical_A_model = AlphaturnpropensityNetwork(train_phyAX, train_phyAY, val_phyAX, val_phyAY, phyA_input, folds, train_time=t) print("itreation %d times training finished!" % t) # struct_Onehot_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'OnehotNetwork.h5') # physical_O_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'OtherNetwork.h5') # physical_P_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'PhysicochemicalNetwork.h5') # physical_H_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'HydrophobicityNetwork.h5') # physical_C_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'CompositionNetwork.h5') # physical_B_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'BetapropensityNetwork.h5') # physical_A_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'AlphaturnpropensityNetwork.h5') # print("itreation %d times training finished!" % t) monitor = 'val_loss' weights = [] with open('model/' + str(folds) + '/loss/' + str(t) + 'Onehotloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/' + str(folds) + '/loss/' + str(t) + 'Otherloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/' + str(folds) + '/loss/' + str(t) + 'Physicochemicalloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/' + str(folds) + '/loss/' + str(t) + 'Hydrophobicityloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/' + str(folds) + '/loss/' + str(t) + 'Compositionloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/' + str(folds) + '/loss/' + str(t) + 'Betapropensityloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) with open('model/' + str(folds) + '/loss/' + str(t) + 'Alphaturnpropensityloss.json', 'r') as checkpoint_fp: weights.append(1 / float(json.load(checkpoint_fp)[monitor])) weight_array = np.array(weights, dtype=np.float) del weights print("Loss chick point %d times finished!" % t) weight_array = normalization_softmax(weight_array) predict_weighted_merge = 0 predict_temp = weight_array[0] * struct_Onehot_model.predict(testX) predict_weighted_merge += predict_temp predict_temp = weight_array[1] * physical_O_model.predict(test_phyOX) predict_weighted_merge += predict_temp predict_temp = weight_array[2] * physical_P_model.predict(test_phyPX) predict_weighted_merge += predict_temp predict_temp = weight_array[3] * physical_H_model.predict(test_phyHX) predict_weighted_merge += predict_temp predict_temp = weight_array[4] * physical_C_model.predict(test_phyCX) predict_weighted_merge += predict_temp predict_temp = weight_array[5] * physical_B_model.predict(test_phyBX) predict_weighted_merge += predict_temp predict_temp = weight_array[6] * physical_A_model.predict(test_phyAX) predict_weighted_merge += predict_temp predict_classes = copy.deepcopy(predict_weighted_merge[:, 1]) for n in range(len(predict_classes)): if predict_classes[n] >= 0.5: predict_classes[n] = 1 else: predict_classes[n] = 0 #print("len(testY)",len(testY)) # print("testY[:, 1]",testY[:, 1]) # print("type(testY[:, 1])", type(testY[:, 1])) #print("predict_classes",predict_classes) #print("predict_weighted_merge[:, 1]", predict_weighted_merge[:, 1]) with open('result/' + str(folds) + '/evaluation.txt', mode='a') as resFile: resFile.write(str(t) + " " + calculate_performance(len(testY), testY[:, 1].tolist(), predict_classes.tolist(), predict_weighted_merge[:, 1]) + '\r\n') resFile.close() true_label = testY result = np.column_stack((true_label[:, 1], predict_weighted_merge[:, 1])) result = pd.DataFrame(result) result.to_csv(path_or_buf='result/' + str(folds) + '/result' + '-' + str(t) + '.txt', index=False, header=None, sep='\t', quoting=csv.QUOTE_NONE)