ax.set_ylim(2.5, -0.5) for i in range(3): for j in range(3): ax.text(j, i, cm[i, j], ha='center', va='center', color='white') plt.show() # In[101]: # Neural network with keras tutorial from numpy import loadtxt from keras.models import Sequential from keras.layers import Dense # define the keras model model = Sequential() model.add(Dense(12, input_dim=8, activation='relu')) model.add(Dense(8, activation='relu')) model.add(Dense(1, activation='sigmoid')) # compile the keras model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # fit the keras model on the dataset model.fit(x_train, y_train, epochs=150, batch_size=10) # evaluate the keras model _, accuracy = model.evaluate(x_train, y_train) print('Test Accuracy: %.2f' % (accuracy * 100)) # make probability predictions with the model predictions = model.predict(x_test) # round predictions
def voting(peptide_predict_file,nucleotide_predict_file,effector_train,noneffector_train): total = 0 with open(peptide_predict_file) as f: for line in f: finded = line.find('>') if finded == 0: total =total+ 1 print('Total number of sequences to be classified: ',total) import time start_time = time.clock() import random import pandas import numpy as np import csv from sklearn import svm from sklearn.naive_bayes import BernoulliNB, MultinomialNB from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import chi2 from random import shuffle from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split f=random.seed() from sklearn.metrics import accuracy_score import numpy as np np.random.seed(123) from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Convolution2D, MaxPooling2D from keras.utils import np_utils from sklearn.model_selection import cross_val_score from sklearn.feature_selection import VarianceThreshold from sklearn.preprocessing import StandardScaler from keras.models import Sequential from keras.layers import Dense from imblearn.over_sampling import SMOTE, ADASYN from collections import Counter from sklearn.ensemble import ExtraTreesClassifier import warnings from sklearn.feature_selection import RFE from sklearn.linear_model import LogisticRegression warnings.filterwarnings("ignore") f=random.seed() #getting feature vector of sequence to be predicted featurevector=featureextraction(peptide_predict_file, nucleotide_predict_file, total) print(len(featurevector)) #getting training data dataframe = pandas.read_csv(effector_train, header=None, sep=',') dataset = dataframe.values eff = dataset[:,0:1000].astype(float) dataframe = pandas.read_csv(noneffector_train, header=None, sep=',') dataset = dataframe.values noneff = dataset[:,0:1000].astype(float) a1=eff.shape a2=noneff.shape X = np.ones((a1[0]+a2[0],a1[1])) Y = np.ones((a1[0]+a2[0],1)) for i in range(a1[0]): for j in range(a1[1]): X[i][j]=eff[i][j] Y[i,0]=0 #print(i) for i in range(a2[0]): for j in range(a2[1]): X[i+a1[0]][j]=noneff[i][j] Y[i+a1[0]][0]=1 warnings.filterwarnings("ignore") print('Resampling the unbalanced data...') X_resampled, Y_resampled = SMOTE(kind='borderline1').fit_sample(X, Y) #Standardize features by removing the mean and scaling to unit variance scaler = StandardScaler().fit(X_resampled) X = scaler.transform(X_resampled) #Removing features with low variance model = ExtraTreesClassifier() model.fit(X_resampled, Y_resampled) X_resampled=model.fit_transform(X_resampled, Y_resampled) featurevector=model.transform(featurevector) newshape=X_resampled.shape print("Training Classifiers...") #train and test set X_train, X_test, y_train, y_test = train_test_split(X_resampled, Y_resampled, test_size=0.15, random_state=f) y_t=y_train y_te=y_test y_train=np.ones((len(y_t),2)) y_test=np.ones((len(y_te),2)) for i in range(len(y_t)): if y_t[i]==0: y_train[i][1]=0 if y_t[i]==1: y_train[i][0]=0 for i in range(len(y_te)): if y_te[i]==0: y_test[i][1]=0 if y_te[i]==1: y_test[i][0]=0 #ANN print("Training Artificial Neural Network...") model = Sequential() model.add(Dense(newshape[1]+1, activation='relu', input_shape=(newshape[1],))) model.add(Dense(500, activation='relu')) #model.add(Dense(800, activation='relu')) #model.add(Dense(500, activation='relu')) model.add(Dense(250, activation='relu')) model.add(Dense(90, activation='relu')) # Add an output layer model.add(Dense(2, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy']) model.fit(X_train, y_train,epochs=1000, batch_size=25, verbose=0) score = model.evaluate(X_test, y_test,verbose=0) ANN = model.predict(X_test) ANN = model.predict(featurevector) y_train=[] y_test=[] y_train=y_t y_test=y_te #SVM print("Training Support Vector Machine...") clf1 = svm.SVC(decision_function_shape='ovr', kernel='linear', max_iter=1000) clf1.fit(X_train, y_train) y_pred=clf1.predict(X_test) results=cross_val_score(clf1, X_test, y_test, cv=10) SVM=clf1.predict(featurevector) #KNN print("Training k-Nearest Neighbor ...") neigh = KNeighborsClassifier(n_neighbors=10) neigh.fit(X_train, y_train) results=cross_val_score(neigh, X_test, y_test, cv=10) y_pred=neigh.predict(X_test) KNN=neigh.predict(featurevector) #Naive Bayes print("Training Naive Bayes...") clf = MultinomialNB() clf.fit(X_train, y_train) results=cross_val_score(clf, X_test, y_test, cv=10) y_pred=clf.predict(X_test) DT=clf.predict(featurevector) #RandomForest print("Training Random Forest...") rf = RandomForestClassifier(random_state=0, min_samples_leaf=100) rf.fit(X_train, y_train) results=cross_val_score(rf, X_test, y_test, cv=10) y_pred=rf.predict(X_test) RF=clf.predict(featurevector) vote_result = [[0 for x in range(2)] for y in range(len(SVM))] for i in range(len(ANN)): if round(ANN[i][0])==1.0: vote_result[i][0]=vote_result[i][0]+1 if round(ANN[i][1])==1.0: vote_result[i][1]=vote_result[i][1]+1 if SVM[i]==0: vote_result[i][0]=vote_result[i][0]+1 if SVM[i]==1: vote_result[i][1]=vote_result[i][1]+1 if KNN[i]==0: vote_result[i][0]=vote_result[i][0]+1 if KNN[i]==1: vote_result[i][1]=vote_result[i][1]+1 if DT[i]==0: vote_result[i][0]=vote_result[i][0]+1 if DT[i]==1: vote_result[i][1]=vote_result[i][1]+1 if RF[i]==0: vote_result[i][0]=vote_result[i][0]+1 if RF[i]==1: vote_result[i][1]=vote_result[i][1]+1 print('-----------------------Results-----------------------') for i in range(len(ANN)): if vote_result[i][0]>=vote_result[i][1]: print('Sequence ',i+1,' is a probable Type 6 Effector') else: print('Sequence ',i+1,' is not a Type 6 Effector') end_time = time.clock() print('Execution time',(end_time-start_time))
# model.add(Dense(1, activation='sigmoid')) # # fully connected neural network with dropout in input layer # model = Sequential() # model.add(Dropout(0.2, input_shape=(feature_size,))) # model.add(Dense(120, activation='relu', kernel_constraint=maxnorm(3))) # model.add(Dense(30, activation='relu', kernel_constraint=maxnorm(3))) # model.add(Dense(1, activation='sigmoid')) # # Compile model # sgd = SGD(lr=0.1, momentum=0.9) # fully connected neural network with dropout in hidden layer model = Sequential() model.add( Dense(500, input_dim=feature_size, activation='relu', kernel_constraint=maxnorm(3))) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(Dense(50, activation='relu', kernel_constraint=maxnorm(3))) model.add(Dropout(0.2)) model.add(BatchNormalization()) # model.add(Dense(20, activation='relu', kernel_constraint=maxnorm(3))) # model.add(Dropout(0.2)) model.add(Dense(1, activation='sigmoid')) # Compile model # sgd = SGD(lr=0.1, momentum=0.9) # model summary plot_model(model,
L2 = 0.000001 # # LSTM Model (Training) # In[58]: # LSTM model from keras.backend import clear_session clear_session() model = Sequential() model.add( LSTM(64, return_sequences=True, input_shape=(N_TIME_STEPS, N_FEATURES), kernel_initializer='orthogonal', kernel_regularizer=l2(L2), recurrent_regularizer=l2(L2), bias_regularizer=l2(L2), name="LSTM_1")) model.add(Bidirectional(LSTM(32, return_sequences=True))) model.add(Flatten(name='Flatten')) model.add( Dense(N_HIDDEN_UNITS, activation='relu', kernel_regularizer=l2(L2), bias_regularizer=l2(L2), name="Dense_1")) model.add( Dense(N_CLASSES, activation='softmax',
#principalDf['target']= y #sns.pairplot(principalDf, hue='target', diag_kind='hist') principalDf = principalDf.values import keras from keras.models import Sequential from keras.layers import Dense, Dropout import xgboost as xgb from sklearn.metrics import accuracy_score from sklearn.ensemble import RandomForestClassifier model = Sequential() sgd = keras.optimizers.Adam(lr=0.1) model.add( Dense(3, input_dim=4, kernel_initializer='he_normal', activation='relu')) model.add(Dense(4, kernel_initializer='he_normal', activation='relu')) #model.add(Dropout(.2)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) model.fit(principalDf, y, epochs=50) #XGBoost Classifier model = xg_reg = xgb.XGBClassifier(subsample=1.0, min_child_weight=10, learning_rate=0.1, gamma=1.5, booster='gbtree', colsample_bytree=1.0) model.fit(x_train, y_train)
plt.xlabel('False Positive Rate') plt.show() #DEEP LEARNING import keras from keras.models import Sequential from keras.layers import Dense from keras.layers import Dropout from keras.layers.normalization import BatchNormalization classifier = Sequential() classifier.add( Dense(units=516, kernel_initializer='uniform', activation='relu', input_dim=17)) classifier.add( Dense(units=1024, kernel_initializer='uniform', activation='relu')) classifier.add(BatchNormalization()) classifier.add( Dense(units=2048, kernel_initializer='uniform', activation='relu')) classifier.add(Dropout(p=0.1)) classifier.add( Dense(units=1024, kernel_initializer='uniform', activation='relu')) classifier.add(
y_pred = pd.DataFrame(pred) # convert to dataframe so that it can be added in type(y_pred) confusion_matrix(y_train, y_pred) # Confusion matrix print(classification_report(y_train, y_pred)) # accuracy 100% y_test_pred = rf.predict(pred_test) confusion_matrix(y_test, y_test_pred) print(classification_report(y_test, y_test_pred)) # accuracy 92% #BUilding model with Artificial Neural Network------------------------- # Importing necessary models for implementation of ANN from keras.models import Sequential from keras.layers import Dense #, Activation,Layer,Lambda model = Sequential() model.add(Dense(10, input_dim=18, activation='relu')) model.add(Dense(4, activation='relu')) model.add(Dense(1, activation='sigmoid')) # compile keras model, classification model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # fit keras model model.fit(np.array(predictor), np.array(y_train), epochs=10, batch_size=10) model_loss = pd.DataFrame(model.history.history) model_loss.plot() # evaluate keras mdoel accuracy _, accuracy = model.evaluate(pred_dt, y_train)
sm = svm.SVC(C=5,kernel='rbf',gamma=0.02) sm.fit(X_train, y_train) preditct_sm = sm.predict(X_test) print('accuracy using sm:',accuracy_score(preditct_sm, y_test)) ### MLP mlp_clf = MLPClassifier(solver='sgd', alpha=1e-4,hidden_layer_sizes=(10,3),learning_rate='adaptive', random_state=1,activation='tanh') mlp_clf.fit(X_train, y_train) preditct_mlp = mlp_clf.predict(X_test) print('accuracy using NN:',accuracy_score(preditct_mlp, y_test)) report = classification_report(y_test,preditct_RF) fpr, tpr, thresholds = roc_curve(y_test, preditct_RF) roc_auc = auc(fpr, tpr) ## MLP model 2 model = Sequential() model.add(Dense(512, activation='relu', input_shape=(30,))) model.add(Dropout(0.2)) model.add(Dense(128, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(10, activation='tanh')) model.add(Dense(2, activation='softmax')) model.add(Dense(1, activation='sigmoid')) model.summary() model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) history = model.fit(X_train, y_train, batch_size=500, epochs=10, verbose=1, validation_data=(X_test, y_test))