Esempio n. 1
0
def pred_sep_model(model, X_test, y_test):
    y_score = model.predict([X_test, X_test])
    y_test_tmp = utils.to_categorical(y_test)
    fpr, tpr, _ = roc_curve(y_test_tmp[:, 0], y_score[:, 0])
    roc_auc = auc(fpr, tpr)
    y_class = utils.categorical_probas_to_classes(y_score)
    y_test_tmp = y_test
    acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
        len(y_class), y_class, y_test_tmp)
    print((
        'DeepPPI-sep:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,f1=%f,roc_auc=%f'
        % (acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc)))
Esempio n. 2
0
def train_sep_model(protein):
    X = protein.iloc[:, :4].values.astype("float")
    y = protein.iloc[:, 4].values.astype("int")
    #shuffle data
    np.random.seed(1)
    index = [i for i in range(len(y))]
    np.random.shuffle(index)
    X = X[index]
    y = y[index]

    model = get_sep_model()
    y_train = utils.to_categorical(y)
    model.fit(
        [X, X],
        y_train,
        nb_epoch=100,
        #validation_split=0.1,
        batch_size=32,
        verbose=0)
    return model
Esempio n. 3
0
    return meta_train_fusion, meta_test_fusion, pre_score


X = train_data
y = train_label
loo = LeaveOneOut()
sepscores = []
y_score = np.ones((1, 2)) * 0.5
y_class = np.ones((1, 1)) * 0.5

for train, test in loo.split(X):
    X_train = X[train]
    y_train = y[train]
    X_test = X[test]
    y_test = y[test]
    y_sparse = utils.to_categorical(y)
    y_train_sparse = utils.to_categorical(y_train)
    y_test_sparse = utils.to_categorical(y_test)
    meta_train, meta_test, y_predict_score = get_second_level(
        X_train, y_train, X_test, num_class)
    y_predict_class = utils.categorical_probas_to_classes(y_predict_score)
    y_score = np.vstack((y_score, y_predict_score))
    y_class = np.vstack((y_class, y_predict_class))
    cv_clf = []

y_class = y_class[1:]
y_score = y_score[1:]
fpr, tpr, _ = roc_curve(y_sparse[:, 0], y_score[:, 0])
roc_auc = auc(fpr, tpr)
acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
    len(y_class), y_class, y)
Esempio n. 4
0
shu = scale(data)

X, y_ = get_shuffle(shu, label)
y = y_

sepscores = []
ytest = np.ones((1, 2)) * 0.5
yscore = np.ones((1, 2)) * 0.5

cv_clf = AdaBoostClassifier()

skf = StratifiedKFold(n_splits=10)
ytest = np.ones((1, 2)) * 0.5
yscore = np.ones((1, 2)) * 0.5
for train, test in skf.split(X, y):
    y_train = utils.to_categorical(y[train])
    hist = cv_clf.fit(X[train], y[train])
    y_score = cv_clf.predict_proba(X[test])
    yscore = np.vstack((yscore, y_score))
    y_test = utils.to_categorical(y[test])
    ytest = np.vstack((ytest, y_test))
    fpr, tpr, _ = roc_curve(y_test[:, 0], y_score[:, 0])
    roc_auc = auc(fpr, tpr)
    y_class = utils.categorical_probas_to_classes(y_score)
    y_test_tmp = y[test]
    acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
        len(y_class), y_class, y_test_tmp)
    sepscores.append(
        [acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc])
    print(
        'AdaBoost:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,f1=%f,roc_auc=%f'
Esempio n. 5
0
    pre_score = LR.predict_proba(meta_test)
    return meta_train, meta_test, pre_score


skf = StratifiedKFold(n_splits=5)
sepscores = []
num_class = 2
ytest = np.ones((1, 2)) * 0.5
yscore = np.ones((1, 2)) * 0.5
for train, test in skf.split(train_data, train_label):
    meta_train, meta_test, y_score = get_second_level(train_data[train],
                                                      train_label[train],
                                                      train_data[test],
                                                      num_class)
    yscore = np.vstack((yscore, y_score))
    y_test = utils.to_categorical(train_label[test])
    ytest = np.vstack((ytest, y_test))
    fpr, tpr, _ = roc_curve(y_test[:, 0], y_score[:, 0])
    roc_auc = auc(fpr, tpr)
    y_class = utils.categorical_probas_to_classes(y_score)
    y_test_tmp = train_label[test]
    acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
        len(y_class), y_class, y_test_tmp)
    sepscores.append(
        [acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc])
    print(
        'RF:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,f1=%f,roc_auc=%f'
        % (acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc))
scores = np.array(sepscores)
print("acc=%.2f%% (+/- %.2f%%)" %
      (np.mean(scores, axis=0)[0] * 100, np.std(scores, axis=0)[0] * 100))
Esempio n. 6
0
ytest = np.ones((1, 2)) * 0.5
yscore = np.ones((1, 2)) * 0.5
probas_cnn = []
tprs_cnn = []
sepscore_cnn = []
skf = StratifiedKFold(n_splits=10)
for train, test in skf.split(X, y):
    clf_cnn = get_CNN_model(input_dim, out_dim)
    X_train_cnn = np.reshape(X[train], (-1, 1, input_dim))
    X_test_cnn = np.reshape(X[test], (-1, 1, input_dim))
    clf_list = clf_cnn.fit(X_train_cnn, to_categorical(y[train]), nb_epoch=50)
    y_cnn_probas = clf_cnn.predict(X_test_cnn)
    probas_cnn.append(y_cnn_probas)
    y_class = utils.categorical_probas_to_classes(y_cnn_probas)

    y_test = utils.to_categorical(y[test])  #generate the test
    ytest = np.vstack((ytest, y_test))
    y_test_tmp = y[test]
    yscore = np.vstack((yscore, y_cnn_probas))

    acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
        len(y_class), y_class, y[test])
    mean_fpr = np.linspace(0, 1, 100)
    fpr, tpr, thresholds = roc_curve(y[test], y_cnn_probas[:, 1])
    tprs_cnn.append(interp(mean_fpr, fpr, tpr))
    tprs_cnn[-1][0] = 0.0
    roc_auc = auc(fpr, tpr)
    sepscore_cnn.append(
        [acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc])

row = ytest.shape[0]
Esempio n. 7
0
shu1 = shu[:, mask]
X = np.reshape(shu1, (shu1.shape[0], shu1.shape[2]))
y = label
#y_raw=np.mat(label_)
#y=np.transpose(y_raw)
#X_train_origin, X_test_origin, y_train, y_test = train_test_split(X, y,test_size=0.2)
cv_clf = RandomForestClassifier(n_estimators=500, max_depth=5)
skf = StratifiedKFold(n_splits=5)
sepscores = []
ytest = np.ones((1, 2)) * 0.5
yscore = np.ones((1, 2)) * 0.5
for train, test in skf.split(X, y):
    X_train_enc = cv_clf.fit(X[train], y[train])
    y_score = cv_clf.predict_proba(X[test])
    yscore = np.vstack((yscore, y_score))
    y_test = utils.to_categorical(y[test])
    ytest = np.vstack((ytest, y_test))
    fpr, tpr, _ = roc_curve(y_test[:, 0], y_score[:, 0])
    roc_auc = auc(fpr, tpr)
    y_class = utils.categorical_probas_to_classes(y_score)
    y_test_tmp = y[test]
    acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
        len(y_class), y_class, y_test_tmp)
    sepscores.append(
        [acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc])
    print(
        'SVM:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,f1=%f,roc_auc=%f'
        % (acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc))
scores = np.array(sepscores)
print("acc=%.2f%% (+/- %.2f%%)" %
      (np.mean(scores, axis=0)[0] * 100, np.std(scores, axis=0)[0] * 100))
        reduce_lr = ReduceLROnPlateau(monitor="val_loss",
                                      factor=0.1,
                                      patience=3,
                                      verbose=1)
        early_stopping = EarlyStopping(monitor="val_loss",
                                       min_delta=0,
                                       patience=5,
                                       verbose=1)

        print(model.summary())
        model.compile(
            loss="binary_crossentropy",
            optimizer="adam",
            metrics=["accuracy"],
        )
        y_train = utils.to_categorical(label[train_index])

        model.fit(
            dataset[train_index],
            y_train,
            validation_split=0.1,
            batch_size=batch_s,
            epochs=50,
            verbose=1,
            shuffle=True,
            callbacks=[reduce_lr, early_stopping],
        )

        # prediction probability
        y_test = utils.to_categorical(label[test_index])
        predictions = model.predict(dataset[test_index])
Esempio n. 9
0
    for i in range(k):
        print(i)
        i = 0
        # test data
        X_fold_pos_test = X_pos[i * num_fold_pos:(i + 1) * num_fold_pos]
        X_fold_neg_test = X_neg[i * num_fold_neg:(i + 1) * num_fold_neg]

        # 测试序列的标签,分为正负
        Y_test = np.ones(len(X_fold_pos_test)).tolist()
        Y_test_neg = np.zeros(len(X_fold_neg_test)).tolist()

        X_test = np.vstack((X_fold_pos_test, X_fold_neg_test))
        X_test_left = X_test[:, 0:len(X_test[0]) // 2]
        X_test_right = X_test[:, len(X_test[0]) // 2:len(X_test[0])]
        Y_test.extend(Y_test_neg)
        Y_test = utils.to_categorical(Y_test)
        # train data

        X_fold_pos_before = X_pos[:i * num_fold_pos]
        X_fold_neg_before = X_neg[:i * num_fold_neg]

        X_fold_pos_after = X_pos[(i + 1) * num_fold_pos:]
        X_fold_neg_after = X_neg[(i + 1) * num_fold_neg:]

        X_train_pos = np.vstack(
            (np.array(X_fold_pos_before), np.array(X_fold_pos_after)))
        X_train_neg = np.vstack(
            (np.array(X_fold_neg_before), np.array(X_fold_neg_after)))

        X_train = np.vstack((np.array(X_train_pos), np.array(X_train_neg)))
        X_train_left = X_train[:, 0:len(X_train[0]) // 2]
Esempio n. 10
0
def classify(size, window, maxlen, train_fea_protein_AB, train_label):
    time_start_classify = time()
    sg = 'swissProt_size_' + str(size) + '_window_' + str(window)
    db = sg + '_maxlen_' + str(maxlen)
    #db_dir= 'dataset/11188/different size represented data/size_'+str(size)
    plot_dir = "plot/11188/"
    result_dir = "result/11188/performance/"
    model_dir = "model/dl/11188/"

    mkdir(plot_dir + db)
    #mkdir(result_dir + db)
    mkdir(model_dir + db)

    sequence_len = size * maxlen

    Y = utils.to_categorical(train_label)
    skf = StratifiedKFold(n_splits=5, random_state=20181031, shuffle=True)

    scores = []
    i = 0
    mem_cv = []
    for (train_index, test_index) in skf.split(train_fea_protein_AB,
                                               train_label):
        print("================")

        print(test_index)
        print(train_index)
        X_train, X_val, y_train, y_val = train_test_split(
            train_fea_protein_AB[train_index],
            Y[train_index],
            random_state=20181031,
            test_size=0.1,
            shuffle=True)

        X_train_left = X_train[:, 0:sequence_len]
        X_train_right = X_train[:, sequence_len:sequence_len * 2]

        X_validation_left = X_val[:, 0:sequence_len]
        X_validation_right = X_val[:, sequence_len:sequence_len * 2]

        X_test_left = train_fea_protein_AB[:, 0:sequence_len][test_index]
        X_test_right = train_fea_protein_AB[:, sequence_len:sequence_len *
                                            2][test_index]

        # turn to np.array
        X_train_left = np.array(X_train_left)
        X_train_right = np.array(X_train_right)

        X_test_left = np.array(X_test_left)
        X_test_right = np.array(X_test_right)

        X_validation_left = np.array(X_validation_left)
        X_validation_right = np.array(X_validation_right)
        # label
        y_test = Y[test_index]

        # feed data into model
        model = merged_DBN(sequence_len)
        sgd = SGD(lr=0.01, momentum=0.9, decay=0.001)
        model.compile(loss='categorical_crossentropy',
                      optimizer=sgd,
                      metrics=['precision'])
        hist = model.fit(
            [X_train_left, X_train_right],
            y_train,
            validation_data=([X_validation_left, X_validation_right], y_val),
            batch_size=128,
            nb_epoch=45,
            verbose=1)
        mem_cv.append('round ' + str(i) + ' ' + getMemorystate())
        train_validation__vis(hist, i, plot_dir, db)
        print('******   model created!  ******')
        model.save(model_dir + db + '/round_' + str(i) + '.h5')

        predictions_test = model.predict([X_test_left, X_test_right])

        auc_test = roc_auc_score(y_test[:, 1], predictions_test[:, 1])
        pr_test = average_precision_score(y_test[:, 1], predictions_test[:, 1])

        label_predict_test = utils.categorical_probas_to_classes(
            predictions_test)
        tp_test, fp_test, tn_test, fn_test, accuracy_test, precision_test, sensitivity_test, recall_test, specificity_test, MCC_test, f1_score_test, _, _, _ = utils.calculate_performace(
            len(label_predict_test), label_predict_test, y_test[:, 1])
        print(db + '    test:' + str(i))
        print('\ttp=%0.0f,fp=%0.0f,tn=%0.0f,fn=%0.0f' %
              (tp_test, fp_test, tn_test, fn_test))
        print('\tacc=%0.4f,pre=%0.4f,rec=%0.4f,sp=%0.4f,mcc=%0.4f,f1=%0.4f' %
              (accuracy_test, precision_test, recall_test, specificity_test,
               MCC_test, f1_score_test))
        print('\tauc=%0.4f,pr=%0.4f' % (auc_test, pr_test))
        scores.append([
            accuracy_test, precision_test, recall_test, specificity_test,
            MCC_test, f1_score_test, auc_test, pr_test
        ])

        i = i + 1
        K.clear_session()
        tf.reset_default_graph()

    sc = pd.DataFrame(scores)
    sc.to_csv(result_dir + '5cv_' + db + '_scores.csv')
    scores_array = np.array(scores)
    print(db + '_5cv:')
    print(("accuracy=%.2f%% (+/- %.2f%%)" %
           (np.mean(scores_array, axis=0)[0] * 100,
            np.std(scores_array, axis=0)[0] * 100)))
    print(("precision=%.2f%% (+/- %.2f%%)" %
           (np.mean(scores_array, axis=0)[1] * 100,
            np.std(scores_array, axis=0)[1] * 100)))
    print(
        "recall=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[2] * 100,
                                        np.std(scores_array, axis=0)[2] * 100))
    print("specificity=%.2f%% (+/- %.2f%%)" %
          (np.mean(scores_array, axis=0)[3] * 100,
           np.std(scores_array, axis=0)[3] * 100))
    print("MCC=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[4] * 100,
                                       np.std(scores_array, axis=0)[4] * 100))
    print("f1_score=%.2f%% (+/- %.2f%%)" %
          (np.mean(scores_array, axis=0)[5] * 100,
           np.std(scores_array, axis=0)[5] * 100))
    print("roc_auc=%.2f%% (+/- %.2f%%)" %
          (np.mean(scores_array, axis=0)[6] * 100,
           np.std(scores_array, axis=0)[6] * 100))
    print(
        "roc_pr=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[7] * 100,
                                        np.std(scores_array, axis=0)[7] * 100))
    time_end_classify = time()

    # memory and time for classify
    print('Time of create db(' + db + '):',
          time_end_classify - time_start_classify)
    with open('runInfo/11188_val/cv_mem_time.txt', 'a') as f:
        f.write('Time of cv(' + db + '):' +
                str(time_end_classify - time_start_classify))
        f.write('\n')
        f.write(mem_cv[0])
        f.write('\n')
        f.write(mem_cv[1])
        f.write('\n')
        f.write(mem_cv[2])
        f.write('\n')
        f.write(mem_cv[3])
        f.write('\n')
        f.write(mem_cv[4])
        f.write('\n')

    with open(result_dir + '5cv_' + db + '.txt', 'w') as f:
        f.write('accuracy=%.2f%% (+/- %.2f%%)' %
                (np.mean(scores_array, axis=0)[0] * 100,
                 np.std(scores_array, axis=0)[0] * 100))
        f.write('\n')
        f.write("precision=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[1] * 100,
                 np.std(scores_array, axis=0)[1] * 100))
        f.write('\n')
        f.write("recall=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[2] * 100,
                 np.std(scores_array, axis=0)[2] * 100))
        f.write('\n')
        f.write("specificity=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[3] * 100,
                 np.std(scores_array, axis=0)[3] * 100))
        f.write('\n')
        f.write("MCC=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[4] * 100,
                 np.std(scores_array, axis=0)[4] * 100))
        f.write('\n')
        f.write("f1_score=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[5] * 100,
                 np.std(scores_array, axis=0)[5] * 100))
        f.write('\n')
        f.write("roc_auc=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[6] * 100,
                 np.std(scores_array, axis=0)[6] * 100))
        f.write('\n')
        f.write("roc_pr=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[7] * 100,
                 np.std(scores_array, axis=0)[7] * 100))

        f.write('\n')
        f.write('\n')
Esempio n. 11
0
                        print('dataset is loaded')
                        #swm = 'swissProt_size_'+str(size)+'_window_'+str(window)+'_maxlen_'+str(maxlen) 
                       
#                        #scaler
#                        scaler = StandardScaler().fit(train_fea_protein_AB)
#                        train_fea_protein_AB = scaler.transform(train_fea_protein_AB)
#                    
#                        db_dir= 'H:/dataset/11188/different size represented data/size_'+str(size)
#                        mkdir(db_dir)            
#                        # creat HDF5 file
#                        h5_file = h5py.File(db_dir + '/'+swm+'.h5','w')
#                        h5_file.create_dataset('trainset_x', data = train_fea_protein_AB)
#                        h5_file.create_dataset('trainset_y', data = train_label)
#                        h5_file.close()
#                                                 
                        Y = utils.to_categorical(train_label)  
                        skf = StratifiedKFold(n_splits = 5,random_state= 20181031,shuffle= True)
                      
                        scores = []  
                        i = 0
                      
#                        be = '_batch_size_'+str(batch_size)+'_nb_epoch_'+str(nb_epoch)
#                        model_dir = 'model/dl/11188/'
#                        result_dir = 'result/5cv/11188/'
#                        mkdir(result_dir)  
                        for (train_index, test_index) in skf.split(train_fea_protein_AB,train_label):
                            print("================")
                            print(test_index)
                            print(train_index)
                            
                            X_train_left = train_fea_protein_AB[train_index][:,0:sequence_len]
Esempio n. 12
0
        oof_train_, oof_test_ = get_oof(clf=clf_first,
                                        n_folds=10,
                                        X_train=X[train],
                                        y_train=y[train],
                                        X_test=X[test])
        newfeature_list.append(oof_train_)
        newtestdata_list.append(oof_test_)
    newfeature = reduce(lambda x, y: np.concatenate((x, y), axis=1),
                        newfeature_list)
    newtestdata = reduce(lambda x, y: np.concatenate((x, y), axis=1),
                         newtestdata_list)

    clf_second1 = SVC(kernel='rbf', C=8, gamma=0.0313, probability=True)
    clf_second1.fit(newfeature, y[train])
    y_score = clf_second1.predict_proba(newtestdata)  #clf.predict_proba
    y_test = utils.to_categorical(y[test])
    pred = clf_second1.predict(newtestdata)
    y_pred = utils.to_categorical(pred)
    print(pred.shape)
    print(y_pred.shape)
    yscore = np.vstack((yscore, y_score))
    print(ytest.shape)
    print(y_test.shape)
    ytest = np.vstack((ytest, y_test))
    ypred = np.vstack((ypred, y_pred))

    accuracy = metrics.accuracy_score(y[test], pred) * 100
    print(accuracy)
    #utils.plothistory(hist)
    #prediction probability
Esempio n. 13
0
classifiers = [
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
    KNeighborsClassifier(n_neighbors=10, weights="distance", algorithm="auto"),
    DecisionTreeClassifier(criterion="entropy"),
    RandomForestClassifier(n_estimators=3000),
    AdaBoostClassifier(n_estimators=3000),
    SVC(probability=True, C=3.1748021039363996, gamma=0.00069053396600248786),
]

# scikit-learning
for name, clf in zip(names, classifiers):
    clf.fit(X_train, y_train)
    y_score = clf.predict_proba(X_test)

    y_temp = utils.to_categorical(y_test)
    fpr, tpr, _ = roc_curve(y_temp[:, 0], y_score[:, 0])

    tpr_fpr = pd.DataFrame([fpr, tpr]).T
    tpr_fpr.to_csv(name + '_tpr_fpr.csv', header=None, index=None)

    roc_auc = auc(fpr, tpr)
    y_class = utils.categorical_probas_to_classes(y_score)

    acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
        len(y_class), y_class, y_test)
    print((
        '%s:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,roc_auc=%f'
        % (name, acc, precision, npv, sensitivity, specificity, mcc, roc_auc)))
    cvscores.append(
        [acc, precision, npv, sensitivity, specificity, mcc, roc_auc])
Esempio n. 14
0
train_label = y

data_test = sio.loadmat('Wnt_feature_end.mat')
test_proteinA = data_test.get('feature_A')
test_protein_A = np.array(test_proteinA)
test_proteinB = data_test.get('feature_B')
test_protein_B = np.array(test_proteinB)
test_protein = np.concatenate((test_protein_A, test_protein_B), axis=1)
test_protein = np.array(test_protein)
test_protein = scaler.transform(test_protein)
test_dim = test_protein[:, mask]
test_shu = np.reshape(test_dim, (test_dim.shape[0], test_dim.shape[2]))
[row1, column1] = np.shape(test_shu)
test_y_raw = np.ones(int(row1))

test_y_ = np.mat(test_y_raw)
test_y = np.transpose(test_y_)
test_label = np.array(test_y)

with open("model_gc4.pkl", "rb") as f:
    gc = pickle.load(f)
y_score = gc.predict_proba(test_shu)
y_test = utils.to_categorical(test_label)
y_class = utils.categorical_probas_to_classes(y_score)
y_test_tmp = test_label
accu = accuracy_score(y_test_tmp, y_class)
print(accu)
acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
    len(y_class), y_class, y_test_tmp)
sio.savemat('yeast_Wnt_class.mat', {'yeast_Wnt_class': y_class})
Esempio n. 15
0
 train_fea_protein_AB = get_each_dataset(dict_index2seq, model_wv.wv, train[:,0:-1],maxlen,size)
 print('train_fea_protein_AB')
 c1_fea_protein_AB = get_each_dataset(dict_index2seq, model_wv.wv, c1[:,0:-1],maxlen,size)
 print('c1_fea_protein_AB')
 c2_fea_protein_AB = get_each_dataset(dict_index2seq, model_wv.wv, c2[:,0:-1],maxlen,size)
 print('c2_fea_protein_AB')
 c3_fea_protein_AB = get_each_dataset(dict_index2seq, model_wv.wv, c3[:,0:-1],maxlen,size)
 print('c3_fea_protein_AB')
 
 scalered_train_fea_protein_AB,scalered_c1_fea_protein_AB,scalered_c2_fea_protein_AB,scalered_c3_fea_protein_AB = save_data(train_fea_protein_AB, c2_fea_protein_AB,c3_fea_protein_AB, c1_fea_protein_AB)
 
 # train data and label
 X_train_left = np.array(scalered_train_fea_protein_AB[:,0:sequence_len])
 X_train_right = np.array(scalered_train_fea_protein_AB[:,sequence_len:sequence_len*2])
      
 Y_train = utils.to_categorical(train[:,-1]) 
    
 # c1 
 c1_test_left = np.array(scalered_c1_fea_protein_AB[:,0:sequence_len])
 c1_test_right = np.array(scalered_c1_fea_protein_AB[:,sequence_len:sequence_len*2])
      
 Y_c1 = utils.to_categorical(c1[:,-1]) 
    
 # c2 
 c2_test_left = np.array(scalered_c2_fea_protein_AB[:,0:sequence_len])
 c2_test_right = np.array(scalered_c2_fea_protein_AB[:,sequence_len:sequence_len*2])
      
 Y_c2 = utils.to_categorical(c2[:,-1]) 
  
 # c3 
 c3_test_left = np.array(scalered_c3_fea_protein_AB[:,0:sequence_len])
Esempio n. 16
0
label = np.append(label1, label2)
shu = scale(data)
X1 = shu
y = label

X = np.reshape(X1, (-1, 1, n1))

sepscores = []

ytest = np.ones((1, 2)) * 0.5
yscore = np.ones((1, 2)) * 0.5

skf = StratifiedKFold(n_splits=10)

for train, test in skf.split(X, y):
    y_train = utils.to_categorical(y[train])  #generate the resonable results
    cv_clf = model
    hist = cv_clf.fit(X[train], y_train, epochs=19)

    y_score = cv_clf.predict(X[test])  #the output of  probability
    y_class = utils.categorical_probas_to_classes(y_score)

    y_test = utils.to_categorical(y[test])  #generate the test
    ytest = np.vstack((ytest, y_test))
    y_test_tmp = y[test]
    yscore = np.vstack((yscore, y_score))

    acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
        len(y_class), y_class, y_test_tmp)
    # fpr, tpr, _ = roc_curve(y_test[:,0], y_score[:,0])
    fpr, tpr, _ = roc_curve(y_test[:, 1], y_score[:, 1])
Esempio n. 17
0
    enetCV = ElasticNetCV(alphas=alpha,l1_ratio=0.1).fit(data,label)
    enet=ElasticNet(alpha=enetCV.alpha_, l1_ratio=0.1)
    enet.fit(data,label)
    mask = enet.coef_ != 0
    new_data = data[:,mask]
    return new_data,mask

data_train = sio.loadmat(r'D:\ctd\T_EN_2(T.mat')
data=data_train.get('T')
row=data.shape[0]
column=data.shape[1]
shu=data[:,np.array(range(1,column))]
X=shu  
y=data[:,0]
data_test= sio.loadmat(r'D:\ctd\T3_EN_2(T3.mat')
test_data=data_test.get('T3')
row1=test_data.shape[0]
column1=test_data.shape[1]
test_shu=test_data[:,np.array(range(1,column1))]
test_label=test_data[:,0]
sepscores = []
ytest=np.ones((1,2))*0.5
yscore=np.ones((1,2))*0.5
cv_clf = RandomForestClassifier(n_estimators=500, criterion='gini', max_depth=10)                                 
y_train=utils.to_categorical(y)
hist=cv_clf.fit(shu,y)
y_score=cv_clf.predict_proba(test_shu)
y_test=utils.to_categorical(test_label)    
y_class= utils.categorical_probas_to_classes(y_score)
y_test_tmp=test_label
acc, precision,npv, sensitivity, specificity, mcc,f1= utils.calculate_performace(len(y_class), y_class, y_test_tmp)