Beispiel #1
0
def start_fit(dataSet):
    index = [i for i in range(len(dataSet))]
    random.shuffle(index)
    data = dataSet[index]
    X = dataSet[:, 0:148]
    Y = dataSet[:, 148]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=0)
    # normalization
    scaler = StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    dbn_model = KerasClassifier(model_init,
                                epochs=500,
                                batch_size=64,
                                verbose=0)
    dbn_model.fit(X_train, y_train)
    y_ped = dbn_model.predict(X_test)
    acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace(
        len(y_ped), y_ped, y_test)
    print(
        'DBN:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,roc_auc=%f'
        % (acc, precision, npv, sensitivity, specificity, mcc, roc_auc))
Beispiel #2
0
def pred_sep_model(model, X_test, y_test):
    y_score = model.predict([X_test, X_test])
    y_test_tmp = utils.to_categorical(y_test)
    fpr, tpr, _ = roc_curve(y_test_tmp[:, 0], y_score[:, 0])
    roc_auc = auc(fpr, tpr)
    y_class = utils.categorical_probas_to_classes(y_score)
    y_test_tmp = y_test
    acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
        len(y_class), y_class, y_test_tmp)
    print((
        'DeepPPI-sep:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,f1=%f,roc_auc=%f'
        % (acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc)))
Beispiel #3
0
def start_fit(dataset, label, title):
    #pre-processing
    dataset = np.array(dataset)
    label = np.array(label)
    dataset, label = get_shuffle(dataset, label, random_state=1)

    #split dataset to train set and test set
    X_train, X_test, y_train, y_test = train_test_split(dataset,
                                                        label,
                                                        random_state=0)

    #normalization
    scaler = StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    X1_train = X_train[:, 0:1164]
    X2_train = X_train[:, 1164:2328]
    X1_test = X_test[:, 0:1164]
    X2_test = X_test[:, 1164:2328]

    model = get_model(dropout_value=0.2)

    y_train = np_utils.to_categorical(y_train)

    #fit
    model.fit(
        [X1_train, X2_train],
        y_train,
        nb_epoch=30,
        batch_size=64,
        verbose=0,
    )

    #prediction probability
    y_probas = model.predict([X1_test, X2_test])
    y_test = np_utils.to_categorical(y_test)
    fpr, tpr, _ = roc_curve(y_test[:, 0], y_probas[:, 0])
    roc_auc = auc(fpr, tpr)

    draw_roc(y_test, y_probas)
    draw_pr(y_test, y_probas)

    y_class = categorical_probas_to_classes(y_probas)
    y_test = categorical_probas_to_classes(y_test)
    acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace(
        len(y_class), y_class, y_test)
    print(title)
    print((
        'DeepPPI:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,roc_auc=%f'
        % (acc, precision, npv, sensitivity, specificity, mcc, roc_auc)))
    def calu_Accuracy(self):
        model = self.res_Net50(self.x)
        if os.path.exists(self.log_dir+'/'+'train_weights.h5'):
            model.load_weights(self.log_dir+'/'+'train_weights.h5')
        else:
            raise RuntimeError('load weights Error!')
        test_images=self.mnist.test.images
        test_labels=self.mnist.test.labels
        print("正在预测测试集样本")
        result=model.predict(test_images)
        with open('temp','w+')as file:
            for i in range(len(result)):
                file.write(str(result[i])+'\n')

        equal=np.equal(np.argmax(test_labels,axis=1),np.argmax(result,axis=1))
        equal=np.where(equal==True,1,0)
        #print(np.cast(equal,np.float32))
        correct_rate=np.mean(equal)
        print("测试集合准确率为:{}".format(correct_rate))

        fw_perf = open(self.save_path + '/index2.txt', 'w')
        fw_perf.write('acc' + ',' + 'precision' + ',' + 'npv' + ',' +
                      'sensitivity' + ',' + 'specificity' + ',' + 'mcc' + ',' +
                      'ppv' + ',' + 'auc' + ',' + 'pr' + '\n')
        auc_ = roc_auc_score(test_labels, result)
        pr = average_precision_score(test_labels, result)
        y_class = utils.categorical_probas_to_classes(result)
        true_y = utils.categorical_probas_to_classes(test_labels)
        acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
            len(y_class), y_class, true_y)
        print("======================")
        print("======================")
        print(
            '\tacc=%0.4f,pre=%0.4f,npv=%0.4f,sn=%0.4f,sp=%0.4f,mcc=%0.4f,f1=%0.4f'
            % (acc, precision, npv, sensitivity, specificity, mcc, f1))
        print('\tauc=%0.4f,pr=%0.4f' % (auc_, pr))

        fw_perf.write(
            str(acc) + ',' + str(precision) + ',' + str(npv) + ',' +
            str(sensitivity) + ',' + str(specificity) + ',' + str(mcc) +
            ',' + str(f1) + ',' + str(auc_) + ',' + str(pr) + '\n')
        end = datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
        print('start: %s' % self.start)
        print('end: %s' % end)
        fw_perf.write('start: %s' % self.start+'\n')
        fw_perf.write('end: %s' % end+'\n')
Beispiel #5
0
skf = StratifiedKFold(n_splits=10)

for train, test in skf.split(X, y):
    y_train = utils.to_categorical(y[train])  #generate the resonable results
    cv_clf = model
    hist = cv_clf.fit(X[train], y_train, epochs=19)

    y_score = cv_clf.predict(X[test])  #the output of  probability
    y_class = utils.categorical_probas_to_classes(y_score)

    y_test = utils.to_categorical(y[test])  #generate the test
    ytest = np.vstack((ytest, y_test))
    y_test_tmp = y[test]
    yscore = np.vstack((yscore, y_score))

    acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
        len(y_class), y_class, y_test_tmp)
    # fpr, tpr, _ = roc_curve(y_test[:,0], y_score[:,0])
    fpr, tpr, _ = roc_curve(y_test[:, 1], y_score[:, 1])
    roc_auc = auc(fpr, tpr)
    sepscores.append(
        [acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc])

scores = np.array(sepscores)
result1 = np.mean(scores, axis=0)
H1 = result1.tolist()
sepscores.append(H1)
result = sepscores

row = yscore.shape[0]
yscore = yscore[np.array(range(1, row)), :]
yscore_sum = pd.DataFrame(data=yscore)
    [X1_train, X2_train],
    y_train,
    nb_epoch=30,
    #validation_split=0.1,
    batch_size=64,
    verbose=2)
#plothistory(hist)
#prediction probability
y_score = model.predict([X1_test, X2_test])
y_test = np_utils.to_categorical(y_test)
fpr, tpr, _ = roc_curve(y_test[:, 0], y_score[:, 0])
roc_auc = auc(fpr, tpr)

y_score = categorical_probas_to_classes(y_score)
y_test = categorical_probas_to_classes(y_test)
acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace(
    len(y_score), y_score, y_test)
print((
    'DeepPPI-sep:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,roc_auc=%f'
    % (acc, precision, npv, sensitivity, specificity, mcc, roc_auc)))

#%%
model = get_con_model()
hist = model.fit(
    X_train,
    y_train,
    nb_epoch=30,
    batch_size=64,
    #validation_split=0.1,
    verbose=0)
plothistory(hist)
#prediction probability
        predictions_prob = model.predict(dataset[test_index])[:, 1]
        auc_ = roc_auc_score(label[test_index], predictions_prob)
        pr = average_precision_score(label[test_index], predictions_prob)

        y_class = utils.categorical_probas_to_classes(predictions)
        # true_y_C_C=utils.categorical_probas_to_classes(true_y_C)
        true_y = utils.categorical_probas_to_classes(y_test)
        (
            acc,
            precision,
            npv,
            sensitivity,
            specificity,
            mcc,
            f1,
        ) = utils.calculate_performace(len(y_class), y_class, true_y)
        print("======================")
        print("======================")
        print(
            "\tacc=%0.4f,pre=%0.4f,npv=%0.4f,sn=%0.4f,sp=%0.4f,mcc=%0.4f,f1=%0.4f"
            % (acc, precision, npv, sensitivity, specificity, mcc, f1))
        print("\tauc=%0.4f,pr=%0.4f" % (auc_, pr))

        fw_perf.write(
            str(acc) + "," + str(precision) + "," + str(npv) + "," +
            str(sensitivity) + "," + str(specificity) + "," + str(mcc) + "," +
            str(f1) + "," + str(auc_) + "," + str(pr) + "\n")

        scores.append([
            acc,
            precision,
Beispiel #8
0
def print_result(predictions_test,y_test):
    auc_test = roc_auc_score(y_test[:,1], predictions_test[:,1])
    pr_test = average_precision_score(y_test[:,1], predictions_test[:,1])
 
    label_predict_test = utils.categorical_probas_to_classes(predictions_test)  
    tp_test,fp_test,tn_test,fn_test,accuracy_test, precision_test, sensitivity_test,recall_test, specificity_test, MCC_test, f1_score_test,_,_,_= utils.calculate_performace(len(label_predict_test), label_predict_test, y_test[:,1])
    print('\ttp=%0.0f,fp=%0.0f,tn=%0.0f,fn=%0.0f'%(tp_test,fp_test,tn_test,fn_test))
    print('\tacc=%0.4f,pre=%0.4f,rec=%0.4f,sp=%0.4f,mcc=%0.4f,f1=%0.4f'
          % (accuracy_test, precision_test, recall_test, specificity_test, MCC_test, f1_score_test))
    print('\tauc=%0.4f,pr=%0.4f'%(auc_test,pr_test))
    print('========================')
Beispiel #9
0
def classify(size, window, maxlen, train_fea_protein_AB, train_label):
    time_start_classify = time()
    sg = 'swissProt_size_' + str(size) + '_window_' + str(window)
    db = sg + '_maxlen_' + str(maxlen)
    #db_dir= 'dataset/11188/different size represented data/size_'+str(size)
    plot_dir = "plot/11188/"
    result_dir = "result/11188/performance/"
    model_dir = "model/dl/11188/"

    mkdir(plot_dir + db)
    #mkdir(result_dir + db)
    mkdir(model_dir + db)

    sequence_len = size * maxlen

    Y = utils.to_categorical(train_label)
    skf = StratifiedKFold(n_splits=5, random_state=20181031, shuffle=True)

    scores = []
    i = 0
    mem_cv = []
    for (train_index, test_index) in skf.split(train_fea_protein_AB,
                                               train_label):
        print("================")

        print(test_index)
        print(train_index)
        X_train, X_val, y_train, y_val = train_test_split(
            train_fea_protein_AB[train_index],
            Y[train_index],
            random_state=20181031,
            test_size=0.1,
            shuffle=True)

        X_train_left = X_train[:, 0:sequence_len]
        X_train_right = X_train[:, sequence_len:sequence_len * 2]

        X_validation_left = X_val[:, 0:sequence_len]
        X_validation_right = X_val[:, sequence_len:sequence_len * 2]

        X_test_left = train_fea_protein_AB[:, 0:sequence_len][test_index]
        X_test_right = train_fea_protein_AB[:, sequence_len:sequence_len *
                                            2][test_index]

        # turn to np.array
        X_train_left = np.array(X_train_left)
        X_train_right = np.array(X_train_right)

        X_test_left = np.array(X_test_left)
        X_test_right = np.array(X_test_right)

        X_validation_left = np.array(X_validation_left)
        X_validation_right = np.array(X_validation_right)
        # label
        y_test = Y[test_index]

        # feed data into model
        model = merged_DBN(sequence_len)
        sgd = SGD(lr=0.01, momentum=0.9, decay=0.001)
        model.compile(loss='categorical_crossentropy',
                      optimizer=sgd,
                      metrics=['precision'])
        hist = model.fit(
            [X_train_left, X_train_right],
            y_train,
            validation_data=([X_validation_left, X_validation_right], y_val),
            batch_size=128,
            nb_epoch=45,
            verbose=1)
        mem_cv.append('round ' + str(i) + ' ' + getMemorystate())
        train_validation__vis(hist, i, plot_dir, db)
        print('******   model created!  ******')
        model.save(model_dir + db + '/round_' + str(i) + '.h5')

        predictions_test = model.predict([X_test_left, X_test_right])

        auc_test = roc_auc_score(y_test[:, 1], predictions_test[:, 1])
        pr_test = average_precision_score(y_test[:, 1], predictions_test[:, 1])

        label_predict_test = utils.categorical_probas_to_classes(
            predictions_test)
        tp_test, fp_test, tn_test, fn_test, accuracy_test, precision_test, sensitivity_test, recall_test, specificity_test, MCC_test, f1_score_test, _, _, _ = utils.calculate_performace(
            len(label_predict_test), label_predict_test, y_test[:, 1])
        print(db + '    test:' + str(i))
        print('\ttp=%0.0f,fp=%0.0f,tn=%0.0f,fn=%0.0f' %
              (tp_test, fp_test, tn_test, fn_test))
        print('\tacc=%0.4f,pre=%0.4f,rec=%0.4f,sp=%0.4f,mcc=%0.4f,f1=%0.4f' %
              (accuracy_test, precision_test, recall_test, specificity_test,
               MCC_test, f1_score_test))
        print('\tauc=%0.4f,pr=%0.4f' % (auc_test, pr_test))
        scores.append([
            accuracy_test, precision_test, recall_test, specificity_test,
            MCC_test, f1_score_test, auc_test, pr_test
        ])

        i = i + 1
        K.clear_session()
        tf.reset_default_graph()

    sc = pd.DataFrame(scores)
    sc.to_csv(result_dir + '5cv_' + db + '_scores.csv')
    scores_array = np.array(scores)
    print(db + '_5cv:')
    print(("accuracy=%.2f%% (+/- %.2f%%)" %
           (np.mean(scores_array, axis=0)[0] * 100,
            np.std(scores_array, axis=0)[0] * 100)))
    print(("precision=%.2f%% (+/- %.2f%%)" %
           (np.mean(scores_array, axis=0)[1] * 100,
            np.std(scores_array, axis=0)[1] * 100)))
    print(
        "recall=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[2] * 100,
                                        np.std(scores_array, axis=0)[2] * 100))
    print("specificity=%.2f%% (+/- %.2f%%)" %
          (np.mean(scores_array, axis=0)[3] * 100,
           np.std(scores_array, axis=0)[3] * 100))
    print("MCC=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[4] * 100,
                                       np.std(scores_array, axis=0)[4] * 100))
    print("f1_score=%.2f%% (+/- %.2f%%)" %
          (np.mean(scores_array, axis=0)[5] * 100,
           np.std(scores_array, axis=0)[5] * 100))
    print("roc_auc=%.2f%% (+/- %.2f%%)" %
          (np.mean(scores_array, axis=0)[6] * 100,
           np.std(scores_array, axis=0)[6] * 100))
    print(
        "roc_pr=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[7] * 100,
                                        np.std(scores_array, axis=0)[7] * 100))
    time_end_classify = time()

    # memory and time for classify
    print('Time of create db(' + db + '):',
          time_end_classify - time_start_classify)
    with open('runInfo/11188_val/cv_mem_time.txt', 'a') as f:
        f.write('Time of cv(' + db + '):' +
                str(time_end_classify - time_start_classify))
        f.write('\n')
        f.write(mem_cv[0])
        f.write('\n')
        f.write(mem_cv[1])
        f.write('\n')
        f.write(mem_cv[2])
        f.write('\n')
        f.write(mem_cv[3])
        f.write('\n')
        f.write(mem_cv[4])
        f.write('\n')

    with open(result_dir + '5cv_' + db + '.txt', 'w') as f:
        f.write('accuracy=%.2f%% (+/- %.2f%%)' %
                (np.mean(scores_array, axis=0)[0] * 100,
                 np.std(scores_array, axis=0)[0] * 100))
        f.write('\n')
        f.write("precision=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[1] * 100,
                 np.std(scores_array, axis=0)[1] * 100))
        f.write('\n')
        f.write("recall=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[2] * 100,
                 np.std(scores_array, axis=0)[2] * 100))
        f.write('\n')
        f.write("specificity=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[3] * 100,
                 np.std(scores_array, axis=0)[3] * 100))
        f.write('\n')
        f.write("MCC=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[4] * 100,
                 np.std(scores_array, axis=0)[4] * 100))
        f.write('\n')
        f.write("f1_score=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[5] * 100,
                 np.std(scores_array, axis=0)[5] * 100))
        f.write('\n')
        f.write("roc_auc=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[6] * 100,
                 np.std(scores_array, axis=0)[6] * 100))
        f.write('\n')
        f.write("roc_pr=%.2f%% (+/- %.2f%%)" %
                (np.mean(scores_array, axis=0)[7] * 100,
                 np.std(scores_array, axis=0)[7] * 100))

        f.write('\n')
        f.write('\n')
Beispiel #10
0
                                      nb_epoch = nb_epoch,
                                      verbose = 1)
                            
                            print('******   model created!  ******')
#                            mkdir(model_dir + swm+be+'/')
#                            mkdir(plot_dir + swm+be+'/')
#                            training_vis(hist,i,plot_dir,swm,be)
#                            model.save(model_dir + swm+be+'/round_'+str(i)+'.h5')
                    
                            predictions_test = model.predict([X_test_left, X_test_right]) 
                            
                            auc_test = roc_auc_score(y_test[:,1], predictions_test[:,1])
                            pr_test = average_precision_score(y_test[:,1], predictions_test[:,1])
                         
                            label_predict_test = utils.categorical_probas_to_classes(predictions_test)  
                            tp_test,fp_test,tn_test,fn_test,accuracy_test, precision_test, sensitivity_test,recall_test, specificity_test, MCC_test, f1_score_test,_,_,_= utils.calculate_performace(len(label_predict_test), label_predict_test, y_test[:,1])
                            print(' ===========  test:'+str(i))
                            print('\ttp=%0.0f,fp=%0.0f,tn=%0.0f,fn=%0.0f'%(tp_test,fp_test,tn_test,fn_test))
                            print('\tacc=%0.4f,pre=%0.4f,rec=%0.4f,sp=%0.4f,mcc=%0.4f,f1=%0.4f'
                                  % (accuracy_test, precision_test, recall_test, specificity_test, MCC_test, f1_score_test))
                            print('\tauc=%0.4f,pr=%0.4f'%(auc_test,pr_test))
                            scores.append([accuracy_test,precision_test, recall_test,specificity_test, MCC_test, f1_score_test, auc_test,pr_test]) 
                            
                            i=i+1
                            K.clear_session()
                            tf.reset_default_graph()
                        
                        sc= pd.DataFrame(scores)   
#                        sc.to_csv(result_dir+swm+be+'.csv')   
                        scores_array = np.array(scores)
                        print(("accuracy=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[0]*100,np.std(scores_array, axis=0)[0]*100)))
Beispiel #11
0
def start_fit(dataset, label, title):
    dataset = np.array(dataset)
    label = np.array(label)
    dataset, label = get_shuffle(dataset, label, random_state=1)

    #normalization
    scaler = StandardScaler().fit(dataset)
    dataset = scaler.transform(dataset)

    X1_train = dataset[:, 0:1164]
    X2_train = dataset[:, 1164:2328]
    label = label.reshape(len(label), )
    y_train = np_utils.to_categorical(label)

    # define 5-fold cross validation test harness
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
    cvscores = []

    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    lw = 2
    i = 0

    for train, test in skf.split(dataset, label):
        model = get_model(dropout_value=0.2)
        model.fit(
            [X1_train[train], X2_train[train]],
            y_train[train],
            epochs=30,
            batch_size=64,
            verbose=0,
        )
        #prediction probability
        y_probas = model.predict([X1_train[test], X2_train[test]])

        fpr, tpr, _ = roc_curve(y_train[test][:, 0], y_probas[:, 0])
        roc_auc = auc(fpr, tpr)

        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        plt.plot(fpr,
                 tpr,
                 lw=lw,
                 color=plt.cm.Set1(i / 10.),
                 label='ROC fold %d (area = %0.2f%%)' % (i, (roc_auc * 100)))

        i += 1

        y_class = categorical_probas_to_classes(y_probas)
        y_test = categorical_probas_to_classes(y_train[test])

        acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace(
            len(y_class), y_class, y_test)
        cvscores.append(
            [acc, precision, npv, sensitivity, specificity, mcc, roc_auc])

    plt.plot([0, 1], [0, 1], linestyle='--', lw=lw, color='k', label='Luck')

    mean_tpr /= skf.get_n_splits(dataset, label)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    plt.plot(mean_fpr,
             mean_tpr,
             color='g',
             linestyle='--',
             label='Mean ROC (area = %0.2f%%)' % (mean_auc * 100),
             lw=lw)

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc="lower right")
    plt.show()
    print(title)
    scores = np.array(cvscores)
    print(
        ("acc=%.2f%% (+/- %.2f%%)" %
         (np.mean(scores, axis=0)[0] * 100, np.std(scores, axis=0)[0] * 100)))
    print(
        ("precision=%.2f%% (+/- %.2f%%)" %
         (np.mean(scores, axis=0)[1] * 100, np.std(scores, axis=0)[1] * 100)))
    print(
        ("npv=%.2f%% (+/- %.2f%%)" %
         (np.mean(scores, axis=0)[2] * 100, np.std(scores, axis=0)[2] * 100)))
    print(
        ("sensitivity=%.2f%% (+/- %.2f%%)" %
         (np.mean(scores, axis=0)[3] * 100, np.std(scores, axis=0)[3] * 100)))
    print(
        ("specificity=%.2f%% (+/- %.2f%%)" %
         (np.mean(scores, axis=0)[4] * 100, np.std(scores, axis=0)[4] * 100)))
    print(
        ("mcc=%.2f%% (+/- %.2f%%)" %
         (np.mean(scores, axis=0)[5] * 100, np.std(scores, axis=0)[5] * 100)))
    print(
        ("roc_auc=%.2f%% (+/- %.2f%%)" %
         (np.mean(scores, axis=0)[6] * 100, np.std(scores, axis=0)[6] * 100)))