def RNN_model(folds):
    pos_train, neg_train = dp.split_train('N_data/train/' + str(folds) + '/train.fasta')
    # ###########
    # """test"""
    sequence, label = dp.get_data_test('N_data/test/test.fa')
    test_X, test_Y = dp.phy_decode_all(sequence, label)
    testX, testY, _ = dp.reshape(test_X, test_Y)
    del sequence, label

    sequence2, label2 = dp.get_data_val('N_data/train/' + str(folds) + '/val.fasta')
    val_X, val_Y = dp.decode(sequence2, label2)
    valX, valY, _ = dp.reshape(val_X, val_Y)
    del sequence2, label2

    iteration_times = 10
    for t in range(0, iteration_times):
        ############
        print("iteration_times: %d" % t)
        pos_df = pos_train.sample(frac=1, random_state=1)
        neg_df = neg_train.sample(frac=1, random_state=1)
        n_df = neg_df[len(pos_df) * t:(len(pos_df) * (t + 1))]
        p_df = pos_df

        df_all = p_df.append(n_df)
        df_all = df_all.sample(frac=1, random_state=1)

        sequence1, label1 = dp.cut_train(df_all, 50)
        train_X, train_Y = dp.decode(sequence1, label1)
        trainX, trainY, input = dp.reshape(train_X, train_Y)

        if (t == 0):
            physical_all_model = mixallCNNmodel(trainX, trainY, valX, valY, input, folds, train_time=t)
        else:
            physical_all_model = mixallCNNmodel(trainX, trainY, valX, valY, input, folds, train_time=t)

        predict_weighted_merge = physical_all_model.predict(testX)
        predict_classes = copy.deepcopy(predict_weighted_merge[:, 1])
        for n in range(len(predict_classes)):
            if predict_classes[n] >= 0.5:
                predict_classes[n] = 1
            else:
                predict_classes[n] = 0

        with open('result/Revaluation.txt', mode='a') as resFile:
            resFile.write(str(t) + " " + calculate_performance(len(testY), testY[:, 1], predict_classes,
                                                               predict_weighted_merge[:, 1]) + '\r\n')
        resFile.close()
        true_label = testY
        result = np.column_stack((true_label[:, 1], predict_weighted_merge[:, 1]))
        result = pd.DataFrame(result)
        result.to_csv(path_or_buf='result/Rresult' + '-' + str(t) + '.txt', index=False, header=None, sep='\t',
                      quoting=csv.QUOTE_NONE)
Esempio n. 2
0
def fig1():
    # """test"""
    sequence, label = dp.get_data_test('N_data/test/test.fa')
    test_X, test_Y = dp.decode(sequence, label)
    testX, testY, _ = dp.reshape(test_X, test_Y)
    test_phyA_X, test_phyA_Y = dp.phy_decode_A(sequence, label)
    test_phyAX, test_phyAY, _ = dp.reshape(test_phyA_X, test_phyA_Y)
    test_phyB_X, test_phyB_Y = dp.phy_decode_B(sequence, label)
    test_phyBX, test_phyBY, _ = dp.reshape(test_phyB_X, test_phyB_Y)
    test_phyC_X, test_phyC_Y = dp.phy_decode_C(sequence, label)
    test_phyCX, test_phyCY, _ = dp.reshape(test_phyC_X, test_phyC_Y)
    test_phyH_X, test_phyH_Y = dp.phy_decode_H(sequence, label)
    test_phyHX, test_phyHY, _ = dp.reshape(test_phyH_X, test_phyH_Y)
    test_phyO_X, test_phyO_Y = dp.phy_decode_O(sequence, label)
    test_phyOX, tes_phyOtY, _ = dp.reshape(test_phyO_X, test_phyO_Y)
    test_phyP_X, test_phyP_Y = dp.phy_decode_P(sequence, label)
    test_phyPX, test_phyPY, _ = dp.reshape(test_phyP_X, test_phyP_Y)
    del sequence, label
    print("Test data coding finished!")

    struct_Onehot_model = load_model('model/8/model/5OnehotNetwork.h5')
    physical_O_model = load_model('model/8/model/5OtherNetwork.h5')
    physical_P_model = load_model('model/8/model/5PhysicochemicalNetwork.h5')
    physical_H_model = load_model('model/8/model/5HydrophobicityNetwork.h5')
    physical_C_model = load_model('model/8/model/5CompositionNetwork.h5')
    physical_B_model = load_model('model/8/model/5BetapropensityNetwork.h5')
    physical_A_model = load_model(
        'model/5/model/5AlphaturnpropensityNetwork.h5')

    true_class = testY[:, 1]
    # onehot
    pred_proba = struct_Onehot_model.predict(testX, batch_size=2048)
    pred_score = pred_proba[:, 1]
    precision, recall, _ = precision_recall_curve(true_class, pred_score)
    average_precision = average_precision_score(true_class, pred_score)
    fpr, tpr, _ = roc_curve(true_class, pred_score)
    roc_auc = auc(fpr, tpr)
    # A
    pred_probaA = physical_A_model.predict(test_phyAX, batch_size=2048)
    pred_scoreA = pred_probaA[:, 1]
    precisionA, recallA, _ = precision_recall_curve(true_class, pred_scoreA)
    average_precisionA = average_precision_score(true_class, pred_scoreA)
    fprA, tprA, _ = roc_curve(true_class, pred_scoreA)
    roc_aucA = auc(fprA, tprA)
    # B
    pred_probaB = physical_B_model.predict(test_phyBX, batch_size=2048)
    pred_scoreB = pred_probaB[:, 1]
    precisionB, recallB, _ = precision_recall_curve(true_class, pred_scoreB)
    average_precisionB = average_precision_score(true_class, pred_scoreB)
    fprB, tprB, _ = roc_curve(true_class, pred_scoreB)
    roc_aucB = auc(fprB, tprB)
    # C
    pred_probaC = physical_C_model.predict(test_phyCX, batch_size=2048)
    pred_scoreC = pred_probaC[:, 1]
    precisionC, recallC, _ = precision_recall_curve(true_class, pred_scoreC)
    average_precisionC = average_precision_score(true_class, pred_scoreC)
    fprC, tprC, _ = roc_curve(true_class, pred_scoreC)
    roc_aucC = auc(fprC, tprC)
    # H
    pred_probaH = physical_H_model.predict(test_phyHX, batch_size=2048)
    pred_scoreH = pred_probaH[:, 1]
    precisionH, recallH, _ = precision_recall_curve(true_class, pred_scoreH)
    average_precisionH = average_precision_score(true_class, pred_scoreH)
    fprH, tprH, _ = roc_curve(true_class, pred_scoreH)
    roc_aucH = auc(fprH, tprH)
    # P
    pred_probaP = physical_P_model.predict(test_phyPX, batch_size=2048)
    pred_scoreP = pred_probaP[:, 1]
    precisionP, recallP, _ = precision_recall_curve(true_class, pred_scoreP)
    average_precisionP = average_precision_score(true_class, pred_scoreP)
    fprP, tprP, _ = roc_curve(true_class, pred_scoreP)
    roc_aucP = auc(fprP, tprP)
    # O
    pred_probaO = physical_O_model.predict(test_phyOX, batch_size=2048)
    pred_scoreO = pred_probaO[:, 1]
    precisionO, recallO, _ = precision_recall_curve(true_class, pred_scoreO)
    average_precisionO = average_precision_score(true_class, pred_scoreO)
    fprO, tprO, _ = roc_curve(true_class, pred_scoreO)
    roc_aucO = auc(fprO, tprO)
    # EN
    monitor = 'val_loss'
    weights = []
    with open('model/8/loss/5Onehotloss.json', 'r') as checkpoint_fp:
        weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
    with open('model/8/loss/5Otherloss.json', 'r') as checkpoint_fp:
        weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
    with open('model/8/loss/5Physicochemicalloss.json', 'r') as checkpoint_fp:
        weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
    with open('model/8/loss/5Hydrophobicityloss.json', 'r') as checkpoint_fp:
        weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
    with open('model/8/loss/5Compositionloss.json', 'r') as checkpoint_fp:
        weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
    with open('model/8/loss/5Betapropensityloss.json', 'r') as checkpoint_fp:
        weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
    with open('model/8/loss/5Alphaturnpropensityloss.json',
              'r') as checkpoint_fp:
        weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
    weight_array = np.array(weights, dtype=np.float)

    weight_array = normalization_softmax(weight_array)
    predict_weighted_merge = 0
    predict_temp = weight_array[0] * struct_Onehot_model.predict(testX)
    predict_weighted_merge += predict_temp
    predict_temp = weight_array[1] * physical_O_model.predict(test_phyOX)
    predict_weighted_merge += predict_temp
    predict_temp = weight_array[2] * physical_P_model.predict(test_phyPX)
    predict_weighted_merge += predict_temp
    predict_temp = weight_array[3] * physical_H_model.predict(test_phyHX)
    predict_weighted_merge += predict_temp
    predict_temp = weight_array[4] * physical_C_model.predict(test_phyCX)
    predict_weighted_merge += predict_temp
    predict_temp = weight_array[5] * physical_B_model.predict(test_phyBX)
    predict_weighted_merge += predict_temp
    predict_temp = weight_array[6] * physical_A_model.predict(test_phyAX)
    predict_weighted_merge += predict_temp

    predict_classes = copy.deepcopy(predict_weighted_merge[:, 1])
    for n in range(len(predict_classes)):
        if predict_classes[n] >= 0.5:
            predict_classes[n] = 1
        else:
            predict_classes[n] = 0

    fprE, tprE, roc_aucE, precisionE, recallE = calculate_performance(
        testY[:, 1].tolist(), predict_classes.tolist(),
        predict_weighted_merge[:, 1])

    #  ################# Print PR####################
    plt.figure()
    plt.plot(fpr,
             tpr,
             color='#0000FF',
             lw=2,
             linestyle='-',
             label='One hot net(AUC=%0.2f)' % roc_auc)
    plt.plot(fprA,
             tprA,
             color='#00BFFF',
             lw=2,
             linestyle='-',
             label='alpha propensity net(AUC=%0.2f)' % roc_aucA)
    plt.plot(fprB,
             tprB,
             color='#00FFFF',
             lw=2,
             linestyle='-',
             label='beta propensity net(AUC=%0.2f)' % roc_aucB)
    plt.plot(fprC,
             tprC,
             color='#00FF00',
             lw=2,
             linestyle='-',
             label='Composition net(AUC=%0.2f)' % roc_aucC)
    plt.plot(fprH,
             tprH,
             color='#6B8E23',
             lw=2,
             linestyle='-',
             label='Hydrophobicity net(AUC=%0.2f)' % roc_aucH)
    plt.plot(fprP,
             tprP,
             color='#B8860B',
             lw=2,
             linestyle='-',
             label='Phy-chemi properties net(AUC=%0.2f)' % roc_aucP)
    plt.plot(fprO,
             tprO,
             color='#FFA500',
             lw=2,
             linestyle='-',
             label='Other properties net(AUC=%0.2f)' % roc_aucO)
    plt.plot(fprE,
             tprE,
             color='#FF0000',
             lw=2,
             linestyle='-',
             label='Ensemble net(AUC=%0.2f)' % roc_aucE)
    # plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='-.')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic Curve')
    plt.legend(loc="lower right")
    plt.savefig('./ROC.png')
    print('aa')
def run_model(folds):
    pos_train, neg_train = dp.split_train('N_data/train/' + str(folds) + '/train.fasta')
    # ###########
    # """test"""
    sequence, label = dp.get_data_test('N_data/test/test.fa')
    test_X, test_Y = dp.decode(sequence, label)
    testX, testY, _ = dp.reshape(test_X, test_Y)
    test_phyA_X, test_phyA_Y = dp.phy_decode_A(sequence, label)
    test_phyAX, test_phyAY, _ = dp.reshape(test_phyA_X, test_phyA_Y)
    test_phyB_X, test_phyB_Y = dp.phy_decode_B(sequence, label)
    test_phyBX, test_phyBY, _ = dp.reshape(test_phyB_X, test_phyB_Y)
    test_phyC_X, test_phyC_Y = dp.phy_decode_C(sequence, label)
    test_phyCX, test_phyCY, _ = dp.reshape(test_phyC_X, test_phyC_Y)
    test_phyH_X, test_phyH_Y = dp.phy_decode_H(sequence, label)
    test_phyHX, test_phyHY, _ = dp.reshape(test_phyH_X, test_phyH_Y)
    test_phyO_X, test_phyO_Y = dp.phy_decode_O(sequence, label)
    test_phyOX, tes_phyOtY, _ = dp.reshape(test_phyO_X, test_phyO_Y)
    test_phyP_X, test_phyP_Y = dp.phy_decode_P(sequence, label)
    test_phyPX, test_phyPY, _ = dp.reshape(test_phyP_X, test_phyP_Y)
    del sequence, label
    print("Test data coding finished!")


    """val"""
    sequence2, label2 = dp.get_data_val('N_data/train/' + str(folds) + '/val.fasta')
    val_onehot_X, val_onehot_Y = dp.decode(sequence2, label2)
    val_onehotX, val_onehotY, _ = dp.reshape(val_onehot_X, val_onehot_Y)
    val_phyA_X, val_phyA_Y = dp.phy_decode_A(sequence2, label2)
    val_phyAX, val_phyAY, _ = dp.reshape(val_phyA_X, val_phyA_Y)
    val_phyB_X, val_phyB_Y = dp.phy_decode_B(sequence2, label2)
    val_phyBX, val_phyBY, _ = dp.reshape(val_phyB_X, val_phyB_Y)
    val_phyC_X, val_phyC_Y = dp.phy_decode_C(sequence2, label2)
    val_phyCX, val_phyCY, _ = dp.reshape(val_phyC_X, val_phyC_Y)
    val_phyH_X, val_phyH_Y = dp.phy_decode_H(sequence2, label2)
    val_phyHX, val_phyHY, _ = dp.reshape(val_phyH_X, val_phyH_Y)
    val_phyO_X, val_phyO_Y = dp.phy_decode_O(sequence2, label2)
    val_phyOX, val_phyOY, _ = dp.reshape(val_phyO_X, val_phyO_Y)
    val_phyP_X, val_phyP_Y = dp.phy_decode_P(sequence2, label2)
    val_phyPX, val_phyPY, _ = dp.reshape(val_phyP_X, val_phyP_Y)
    del sequence2, label2
    print("Val data coding finished!")

    # testX, testY = val_onehotX, val_onehotY
    # test_phyAX, test_phyAY = val_phyAX, val_phyAY
    # test_phyBX, test_phyBY = val_phyBX, val_phyBY
    # test_phyCX, test_phyCY = val_phyCX, val_phyCY
    # test_phyHX, test_phyHY = val_phyHX, val_phyHY
    # test_phyOX, tes_phyOtY = val_phyOX, val_phyOY
    # test_phyPX, test_phyPY = val_phyPX, val_phyPY

    iteration_times = 10  # 很多倍
    for t in range(0, iteration_times):
        ############
        print("iteration_times: %d" % t)
        pos_df = pos_train.sample(frac=1, random_state=1)
        neg_df = neg_train.sample(frac=1, random_state=1)
        n_df = neg_df[len(pos_df)*t:(len(pos_df)*(t+1))]
        p_df = pos_df

        df_all = p_df.append(n_df)
        df_all = df_all.sample(frac=1, random_state=1)

        sequence1, label1 = dp.cut_train(df_all, 50)
        train_onehot_X, train_onehot_Y = dp.decode(sequence1, label1)
        train_onehotX, train_onehotY, onehot_input = dp.reshape(train_onehot_X, train_onehot_Y)
        train_phyA_X, train_phyA_Y = dp.phy_decode_A(sequence1, label1)
        train_phyAX, train_phyAY, phyA_input = dp.reshape(train_phyA_X, train_phyA_Y)
        train_phyB_X, train_phyB_Y = dp.phy_decode_B(sequence1, label1)
        train_phyBX, train_phyBY, phyB_input = dp.reshape(train_phyB_X, train_phyB_Y)
        train_phyC_X, train_phyC_Y = dp.phy_decode_C(sequence1, label1)
        train_phyCX, train_phyCY, phyC_input = dp.reshape(train_phyC_X, train_phyC_Y)
        train_phyH_X, train_phyH_Y = dp.phy_decode_H(sequence1, label1)
        train_phyHX, train_phyHY, phyH_input = dp.reshape(train_phyH_X, train_phyH_Y)
        train_phyO_X, train_phyO_Y = dp.phy_decode_O(sequence1, label1)
        train_phyOX, train_phyOY, phyO_input = dp.reshape(train_phyO_X, train_phyO_Y)
        train_phyP_X, train_phyP_Y = dp.phy_decode_P(sequence1, label1)
        train_phyPX, train_phyPY, phyP_input = dp.reshape(train_phyP_X, train_phyP_Y)
        print("itreation %d times Train data coding finished!" % t)

        if (t == 0):
            struct_Onehot_model = OnehotNetwork(train_onehotX, train_onehotY, val_onehotX, val_onehotY, onehot_input, folds, train_time=t)
            physical_O_model = OtherNetwork(train_phyOX, train_phyOY, val_phyOX, val_phyOY, phyO_input, folds, train_time=t)
            physical_P_model = PhysicochemicalNetwork(train_phyPX, train_phyPY, val_phyPX, val_phyPY, phyP_input, folds, train_time=t)
            physical_H_model = HydrophobicityNetwork(train_phyHX, train_phyHY, val_phyHX, val_phyHY, phyH_input, folds, train_time=t)
            physical_C_model = CompositionNetwork(train_phyCX, train_phyCY, val_phyCX, val_phyCY, phyC_input, folds, train_time=t)
            physical_B_model = BetapropensityNetwork(train_phyBX, train_phyBY, val_phyBX, val_phyBY, phyB_input, folds, train_time=t)
            physical_A_model = AlphaturnpropensityNetwork(train_phyAX, train_phyAY, val_phyAX, val_phyAY, phyA_input, folds, train_time=t)
            print("itreation %d times training finished!" % t)
        else:
            struct_Onehot_model = OnehotNetwork(train_onehotX, train_onehotY, val_onehotX, val_onehotY, onehot_input,
                                                folds, train_time=t)
            physical_O_model = OtherNetwork(train_phyOX, train_phyOY, val_phyOX, val_phyOY, phyO_input, folds,
                                            train_time=t)
            physical_P_model = PhysicochemicalNetwork(train_phyPX, train_phyPY, val_phyPX, val_phyPY, phyP_input,
                                                      folds, train_time=t)
            physical_H_model = HydrophobicityNetwork(train_phyHX, train_phyHY, val_phyHX, val_phyHY, phyH_input,
                                                     folds, train_time=t)
            physical_C_model = CompositionNetwork(train_phyCX, train_phyCY, val_phyCX, val_phyCY, phyC_input, folds,
                                                  train_time=t)
            physical_B_model = BetapropensityNetwork(train_phyBX, train_phyBY, val_phyBX, val_phyBY, phyB_input,
                                                     folds, train_time=t)
            physical_A_model = AlphaturnpropensityNetwork(train_phyAX, train_phyAY, val_phyAX, val_phyAY, phyA_input,
                                                          folds, train_time=t)
            print("itreation %d times training finished!" % t)

        # struct_Onehot_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'OnehotNetwork.h5')
        # physical_O_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'OtherNetwork.h5')
        # physical_P_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'PhysicochemicalNetwork.h5')
        # physical_H_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'HydrophobicityNetwork.h5')
        # physical_C_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'CompositionNetwork.h5')
        # physical_B_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'BetapropensityNetwork.h5')
        # physical_A_model = load_model('model/' + str(folds) + '/model/' + str(t) + 'AlphaturnpropensityNetwork.h5')
        # print("itreation %d times training finished!" % t)

        monitor = 'val_loss'
        weights = []
        with open('model/' + str(folds) + '/loss/' + str(t) + 'Onehotloss.json', 'r') as checkpoint_fp:
            weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
        with open('model/' + str(folds) + '/loss/' + str(t) + 'Otherloss.json', 'r') as checkpoint_fp:
            weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
        with open('model/' + str(folds) + '/loss/' + str(t) + 'Physicochemicalloss.json', 'r') as checkpoint_fp:
            weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
        with open('model/' + str(folds) + '/loss/' + str(t) + 'Hydrophobicityloss.json', 'r') as checkpoint_fp:
            weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
        with open('model/' + str(folds) + '/loss/' + str(t) + 'Compositionloss.json', 'r') as checkpoint_fp:
            weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
        with open('model/' + str(folds) + '/loss/' + str(t) + 'Betapropensityloss.json', 'r') as checkpoint_fp:
            weights.append(1 / float(json.load(checkpoint_fp)[monitor]))
        with open('model/' + str(folds) + '/loss/' + str(t) + 'Alphaturnpropensityloss.json', 'r') as checkpoint_fp:
            weights.append(1 / float(json.load(checkpoint_fp)[monitor]))

        weight_array = np.array(weights, dtype=np.float)
        del weights
        print("Loss chick point %d times finished!" % t)

        weight_array = normalization_softmax(weight_array)

        predict_weighted_merge = 0
        predict_temp = weight_array[0] * struct_Onehot_model.predict(testX)
        predict_weighted_merge += predict_temp
        predict_temp = weight_array[1] * physical_O_model.predict(test_phyOX)
        predict_weighted_merge += predict_temp
        predict_temp = weight_array[2] * physical_P_model.predict(test_phyPX)
        predict_weighted_merge += predict_temp
        predict_temp = weight_array[3] * physical_H_model.predict(test_phyHX)
        predict_weighted_merge += predict_temp
        predict_temp = weight_array[4] * physical_C_model.predict(test_phyCX)
        predict_weighted_merge += predict_temp
        predict_temp = weight_array[5] * physical_B_model.predict(test_phyBX)
        predict_weighted_merge += predict_temp
        predict_temp = weight_array[6] * physical_A_model.predict(test_phyAX)
        predict_weighted_merge += predict_temp

        predict_classes = copy.deepcopy(predict_weighted_merge[:, 1])
        for n in range(len(predict_classes)):
            if predict_classes[n] >= 0.5:
                predict_classes[n] = 1
            else:
                predict_classes[n] = 0
        #print("len(testY)",len(testY))
        # print("testY[:, 1]",testY[:, 1])
        # print("type(testY[:, 1])", type(testY[:, 1]))
        #print("predict_classes",predict_classes)
        #print("predict_weighted_merge[:, 1]", predict_weighted_merge[:, 1])
        with open('result/' + str(folds) + '/evaluation.txt', mode='a') as resFile:
            resFile.write(str(t) + " " + calculate_performance(len(testY), testY[:, 1].tolist(), predict_classes.tolist(),
                                                               predict_weighted_merge[:, 1]) + '\r\n')
        resFile.close()
        true_label = testY
        result = np.column_stack((true_label[:, 1], predict_weighted_merge[:, 1]))
        result = pd.DataFrame(result)
        result.to_csv(path_or_buf='result/' + str(folds) + '/result' + '-' + str(t) + '.txt', index=False, header=None, sep='\t',
                      quoting=csv.QUOTE_NONE)