ax.set_ylim(2.5, -0.5)
for i in range(3):
    for j in range(3):
        ax.text(j, i, cm[i, j], ha='center', va='center', color='white')
plt.show()

# In[101]:

# Neural network with keras tutorial
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense

# define the keras model
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# compile the keras model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
# fit the keras model on the dataset
model.fit(x_train, y_train, epochs=150, batch_size=10)
# evaluate the keras model
_, accuracy = model.evaluate(x_train, y_train)
print('Test Accuracy: %.2f' % (accuracy * 100))

# make probability predictions with the model
predictions = model.predict(x_test)
# round predictions
Beispiel #2
0
def voting(peptide_predict_file,nucleotide_predict_file,effector_train,noneffector_train):

    total = 0
  
    with open(peptide_predict_file) as f:
     for line in f:
        finded = line.find('>')
        
        if finded == 0:
            total =total+ 1

    print('Total number of sequences to be classified: ',total)
    
    import time
    start_time = time.clock()
    import random
    import pandas
    import numpy as np
    import csv
    from sklearn import svm
    from sklearn.naive_bayes import BernoulliNB, MultinomialNB
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.feature_selection import SelectKBest
    from sklearn.feature_selection import chi2
    from random import shuffle
    from sklearn.preprocessing import LabelEncoder
    from sklearn.model_selection import train_test_split
    f=random.seed()
    from sklearn.metrics import accuracy_score
    import numpy as np
    np.random.seed(123)
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Activation, Flatten
    from keras.layers import Convolution2D, MaxPooling2D
    from keras.utils import np_utils
    from sklearn.model_selection import cross_val_score
    from sklearn.feature_selection import VarianceThreshold
    from sklearn.preprocessing import StandardScaler
    from keras.models import Sequential
    from keras.layers import Dense
    from imblearn.over_sampling import SMOTE, ADASYN
    from collections import Counter
    from sklearn.ensemble import ExtraTreesClassifier
    import warnings
    from sklearn.feature_selection import RFE
    from sklearn.linear_model import LogisticRegression
    warnings.filterwarnings("ignore")
    
    f=random.seed()

    #getting feature vector of sequence to be predicted
    featurevector=featureextraction(peptide_predict_file, nucleotide_predict_file, total)
    print(len(featurevector))

 
    #getting training data
    dataframe = pandas.read_csv(effector_train, header=None, sep=',')
    dataset = dataframe.values
    eff = dataset[:,0:1000].astype(float)

    dataframe = pandas.read_csv(noneffector_train, header=None, sep=',')
    dataset = dataframe.values
    noneff = dataset[:,0:1000].astype(float)


    
    a1=eff.shape
    a2=noneff.shape
    X = np.ones((a1[0]+a2[0],a1[1]))
    Y = np.ones((a1[0]+a2[0],1))
    
    for i in range(a1[0]):
        for j in range(a1[1]):
            X[i][j]=eff[i][j]
        Y[i,0]=0
        #print(i)    
    for i in range(a2[0]):
        for j in range(a2[1]):
            X[i+a1[0]][j]=noneff[i][j]
        Y[i+a1[0]][0]=1
        
        
    
    warnings.filterwarnings("ignore")
    print('Resampling the unbalanced data...')
    X_resampled, Y_resampled = SMOTE(kind='borderline1').fit_sample(X, Y)
    
    #Standardize features by removing the mean and scaling to unit variance
    scaler = StandardScaler().fit(X_resampled)
    X = scaler.transform(X_resampled)


  
    #Removing features with low variance

    model = ExtraTreesClassifier()
    model.fit(X_resampled, Y_resampled)
    X_resampled=model.fit_transform(X_resampled, Y_resampled)
    featurevector=model.transform(featurevector)
    newshape=X_resampled.shape
    

    print("Training Classifiers...")
    #train and test set
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, Y_resampled, test_size=0.15, random_state=f)
    y_t=y_train
    y_te=y_test
    y_train=np.ones((len(y_t),2))
    y_test=np.ones((len(y_te),2))
    for i in range(len(y_t)):
        if y_t[i]==0:
            y_train[i][1]=0
        if y_t[i]==1:
            y_train[i][0]=0
            
    for i in range(len(y_te)):
        if y_te[i]==0:
            y_test[i][1]=0
        if y_te[i]==1:
            y_test[i][0]=0    
    
    #ANN
    print("Training Artificial Neural Network...") 
    model = Sequential()
    model.add(Dense(newshape[1]+1, activation='relu', input_shape=(newshape[1],)))
    model.add(Dense(500, activation='relu'))
    #model.add(Dense(800, activation='relu'))
    #model.add(Dense(500, activation='relu'))
    model.add(Dense(250, activation='relu'))
    model.add(Dense(90, activation='relu'))
    # Add an output layer 
    model.add(Dense(2, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['binary_accuracy'])
    model.fit(X_train, y_train,epochs=1000, batch_size=25, verbose=0)
    score = model.evaluate(X_test, y_test,verbose=0)
    ANN = model.predict(X_test)
    ANN = model.predict(featurevector)

    y_train=[]
    y_test=[]
    y_train=y_t
    y_test=y_te
            
    #SVM
    print("Training Support Vector Machine...") 
    clf1 = svm.SVC(decision_function_shape='ovr', kernel='linear', max_iter=1000)
    clf1.fit(X_train, y_train)
    y_pred=clf1.predict(X_test)
    results=cross_val_score(clf1, X_test, y_test, cv=10)
    SVM=clf1.predict(featurevector)

    #KNN
    print("Training k-Nearest Neighbor ...") 
    neigh = KNeighborsClassifier(n_neighbors=10)
    neigh.fit(X_train, y_train) 
    results=cross_val_score(neigh, X_test, y_test, cv=10)
    y_pred=neigh.predict(X_test)
    KNN=neigh.predict(featurevector)

    #Naive Bayes
    print("Training Naive Bayes...") 
    clf = MultinomialNB()
    clf.fit(X_train, y_train)
    results=cross_val_score(clf, X_test, y_test, cv=10)
    y_pred=clf.predict(X_test)
    DT=clf.predict(featurevector)
     
    #RandomForest
    print("Training Random Forest...") 
    rf = RandomForestClassifier(random_state=0, min_samples_leaf=100)
    rf.fit(X_train, y_train)
    results=cross_val_score(rf, X_test, y_test, cv=10)
    y_pred=rf.predict(X_test)
    RF=clf.predict(featurevector)
    
    vote_result = [[0 for x in range(2)] for y in range(len(SVM))]
    for i in range(len(ANN)):
          if round(ANN[i][0])==1.0:
              vote_result[i][0]=vote_result[i][0]+1
          if round(ANN[i][1])==1.0:
              vote_result[i][1]=vote_result[i][1]+1
          if SVM[i]==0:
              vote_result[i][0]=vote_result[i][0]+1
          if SVM[i]==1:
              vote_result[i][1]=vote_result[i][1]+1
          if KNN[i]==0:
              vote_result[i][0]=vote_result[i][0]+1
          if KNN[i]==1:
              vote_result[i][1]=vote_result[i][1]+1
          if DT[i]==0:
              vote_result[i][0]=vote_result[i][0]+1
          if DT[i]==1:
              vote_result[i][1]=vote_result[i][1]+1
          if RF[i]==0:
              vote_result[i][0]=vote_result[i][0]+1
          if RF[i]==1:
              vote_result[i][1]=vote_result[i][1]+1    

    print('-----------------------Results-----------------------')
    for i in range(len(ANN)):
        if vote_result[i][0]>=vote_result[i][1]:
            print('Sequence ',i+1,' is a probable Type 6 Effector')
        else:    
            print('Sequence ',i+1,' is not a Type 6 Effector')
    end_time = time.clock()
    print('Execution time',(end_time-start_time))
Beispiel #3
0
# model.add(Dense(1, activation='sigmoid'))

# # fully connected neural network with dropout in input layer
# model = Sequential()
# model.add(Dropout(0.2, input_shape=(feature_size,)))
# model.add(Dense(120, activation='relu', kernel_constraint=maxnorm(3)))
# model.add(Dense(30, activation='relu', kernel_constraint=maxnorm(3)))
# model.add(Dense(1, activation='sigmoid'))
# # Compile model
# sgd = SGD(lr=0.1, momentum=0.9)

# fully connected neural network with dropout in hidden layer
model = Sequential()
model.add(
    Dense(500,
          input_dim=feature_size,
          activation='relu',
          kernel_constraint=maxnorm(3)))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(50, activation='relu', kernel_constraint=maxnorm(3)))
model.add(Dropout(0.2))
model.add(BatchNormalization())
# model.add(Dense(20, activation='relu', kernel_constraint=maxnorm(3)))
# model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
# Compile model
# sgd = SGD(lr=0.1, momentum=0.9)

# model summary
plot_model(model,
Beispiel #4
0
L2 = 0.000001

# # LSTM Model (Training)

# In[58]:

# LSTM model
from keras.backend import clear_session

clear_session()
model = Sequential()
model.add(
    LSTM(64,
         return_sequences=True,
         input_shape=(N_TIME_STEPS, N_FEATURES),
         kernel_initializer='orthogonal',
         kernel_regularizer=l2(L2),
         recurrent_regularizer=l2(L2),
         bias_regularizer=l2(L2),
         name="LSTM_1"))
model.add(Bidirectional(LSTM(32, return_sequences=True)))
model.add(Flatten(name='Flatten'))
model.add(
    Dense(N_HIDDEN_UNITS,
          activation='relu',
          kernel_regularizer=l2(L2),
          bias_regularizer=l2(L2),
          name="Dense_1"))
model.add(
    Dense(N_CLASSES,
          activation='softmax',
Beispiel #5
0
#principalDf['target']= y

#sns.pairplot(principalDf, hue='target', diag_kind='hist')

principalDf = principalDf.values
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

model = Sequential()
sgd = keras.optimizers.Adam(lr=0.1)

model.add(
    Dense(3, input_dim=4, kernel_initializer='he_normal', activation='relu'))
model.add(Dense(4, kernel_initializer='he_normal', activation='relu'))
#model.add(Dropout(.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(principalDf, y, epochs=50)

#XGBoost Classifier
model = xg_reg = xgb.XGBClassifier(subsample=1.0,
                                   min_child_weight=10,
                                   learning_rate=0.1,
                                   gamma=1.5,
                                   booster='gbtree',
                                   colsample_bytree=1.0)
model.fit(x_train, y_train)
Beispiel #6
0
plt.xlabel('False Positive Rate')
plt.show()

#DEEP LEARNING

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers.normalization import BatchNormalization

classifier = Sequential()

classifier.add(
    Dense(units=516,
          kernel_initializer='uniform',
          activation='relu',
          input_dim=17))

classifier.add(
    Dense(units=1024, kernel_initializer='uniform', activation='relu'))
classifier.add(BatchNormalization())

classifier.add(
    Dense(units=2048, kernel_initializer='uniform', activation='relu'))
classifier.add(Dropout(p=0.1))

classifier.add(
    Dense(units=1024, kernel_initializer='uniform', activation='relu'))

classifier.add(
y_pred = pd.DataFrame(pred)  # convert to dataframe so that it can be added in
type(y_pred)
confusion_matrix(y_train, y_pred)  # Confusion matrix
print(classification_report(y_train, y_pred))  # accuracy 100%

y_test_pred = rf.predict(pred_test)
confusion_matrix(y_test, y_test_pred)
print(classification_report(y_test, y_test_pred))  # accuracy 92%

#BUilding model with Artificial Neural Network-------------------------

# Importing necessary models for implementation of ANN
from keras.models import Sequential
from keras.layers import Dense  #, Activation,Layer,Lambda
model = Sequential()
model.add(Dense(10, input_dim=18, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# compile keras model, classification model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# fit keras model
model.fit(np.array(predictor), np.array(y_train), epochs=10, batch_size=10)
model_loss = pd.DataFrame(model.history.history)
model_loss.plot()

# evaluate keras mdoel accuracy
_, accuracy = model.evaluate(pred_dt, y_train)
sm = svm.SVC(C=5,kernel='rbf',gamma=0.02)
sm.fit(X_train, y_train) 
preditct_sm = sm.predict(X_test)
print('accuracy using sm:',accuracy_score(preditct_sm, y_test))
### MLP
mlp_clf = MLPClassifier(solver='sgd', alpha=1e-4,hidden_layer_sizes=(10,3),learning_rate='adaptive', random_state=1,activation='tanh')
mlp_clf.fit(X_train, y_train)
preditct_mlp = mlp_clf.predict(X_test)
print('accuracy using NN:',accuracy_score(preditct_mlp, y_test))  

report = classification_report(y_test,preditct_RF)
fpr, tpr, thresholds = roc_curve(y_test, preditct_RF)
roc_auc = auc(fpr, tpr)
## MLP model 2
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(30,)))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='tanh'))
model.add(Dense(2, activation='softmax'))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])
history = model.fit(X_train, y_train,
                    batch_size=500,
                    epochs=10,
                    verbose=1,
                    validation_data=(X_test, y_test))