from keras.models import Sequential from keras.layers import Dense # define the keras model model = Sequential() model.add(Dense(12, input_dim=8, activation='relu')) model.add(Dense(8, activation='relu')) model.add(Dense(1, activation='sigmoid')) # compile the keras model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # fit the keras model on the dataset model.fit(x_train, y_train, epochs=150, batch_size=10) # evaluate the keras model _, accuracy = model.evaluate(x_train, y_train) print('Test Accuracy: %.2f' % (accuracy * 100)) # make probability predictions with the model predictions = model.predict(x_test) # round predictions rounded = [round(x[0]) for x in predictions] # make class predictions with the model predictions = model.predict_classes(x_test) # In[89]: # Plot accuracy comparisons between scenario 1(assumed features) & scenario 2(features from analysis) plot_data = pd.read_excel( r'C:\Users\Life\Desktop\GMU\CS-504\dataset\AccuracyResults.xlsx')
# compile the keras model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) # fit the keras model on the dataset history = model.fit(X_train, y_train, validation_split=0.33, shuffle=True, epochs=400, batch_size=10000) # evaluate the keras model _, accuracy = model.evaluate(X_test, y_test) print('Accuracy: %.2f' % (accuracy * 100)) # list all data in history print(history.history.keys()) # summarize history for accuracy plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('Model Accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'validation'], loc='upper left') plt.show() # summarize history for loss plt.plot(history.history['loss']) plt.plot(history.history['val_loss'])
def voting(peptide_predict_file,nucleotide_predict_file,effector_train,noneffector_train): total = 0 with open(peptide_predict_file) as f: for line in f: finded = line.find('>') if finded == 0: total =total+ 1 print('Total number of sequences to be classified: ',total) import time start_time = time.clock() import random import pandas import numpy as np import csv from sklearn import svm from sklearn.naive_bayes import BernoulliNB, MultinomialNB from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import chi2 from random import shuffle from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split f=random.seed() from sklearn.metrics import accuracy_score import numpy as np np.random.seed(123) from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Convolution2D, MaxPooling2D from keras.utils import np_utils from sklearn.model_selection import cross_val_score from sklearn.feature_selection import VarianceThreshold from sklearn.preprocessing import StandardScaler from keras.models import Sequential from keras.layers import Dense from imblearn.over_sampling import SMOTE, ADASYN from collections import Counter from sklearn.ensemble import ExtraTreesClassifier import warnings from sklearn.feature_selection import RFE from sklearn.linear_model import LogisticRegression warnings.filterwarnings("ignore") f=random.seed() #getting feature vector of sequence to be predicted featurevector=featureextraction(peptide_predict_file, nucleotide_predict_file, total) print(len(featurevector)) #getting training data dataframe = pandas.read_csv(effector_train, header=None, sep=',') dataset = dataframe.values eff = dataset[:,0:1000].astype(float) dataframe = pandas.read_csv(noneffector_train, header=None, sep=',') dataset = dataframe.values noneff = dataset[:,0:1000].astype(float) a1=eff.shape a2=noneff.shape X = np.ones((a1[0]+a2[0],a1[1])) Y = np.ones((a1[0]+a2[0],1)) for i in range(a1[0]): for j in range(a1[1]): X[i][j]=eff[i][j] Y[i,0]=0 #print(i) for i in range(a2[0]): for j in range(a2[1]): X[i+a1[0]][j]=noneff[i][j] Y[i+a1[0]][0]=1 warnings.filterwarnings("ignore") print('Resampling the unbalanced data...') X_resampled, Y_resampled = SMOTE(kind='borderline1').fit_sample(X, Y) #Standardize features by removing the mean and scaling to unit variance scaler = StandardScaler().fit(X_resampled) X = scaler.transform(X_resampled) #Removing features with low variance model = ExtraTreesClassifier() model.fit(X_resampled, Y_resampled) X_resampled=model.fit_transform(X_resampled, Y_resampled) featurevector=model.transform(featurevector) newshape=X_resampled.shape print("Training Classifiers...") #train and test set X_train, X_test, y_train, y_test = train_test_split(X_resampled, Y_resampled, test_size=0.15, random_state=f) y_t=y_train y_te=y_test y_train=np.ones((len(y_t),2)) y_test=np.ones((len(y_te),2)) for i in range(len(y_t)): if y_t[i]==0: y_train[i][1]=0 if y_t[i]==1: y_train[i][0]=0 for i in range(len(y_te)): if y_te[i]==0: y_test[i][1]=0 if y_te[i]==1: y_test[i][0]=0 #ANN print("Training Artificial Neural Network...") model = Sequential() model.add(Dense(newshape[1]+1, activation='relu', input_shape=(newshape[1],))) model.add(Dense(500, activation='relu')) #model.add(Dense(800, activation='relu')) #model.add(Dense(500, activation='relu')) model.add(Dense(250, activation='relu')) model.add(Dense(90, activation='relu')) # Add an output layer model.add(Dense(2, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy']) model.fit(X_train, y_train,epochs=1000, batch_size=25, verbose=0) score = model.evaluate(X_test, y_test,verbose=0) ANN = model.predict(X_test) ANN = model.predict(featurevector) y_train=[] y_test=[] y_train=y_t y_test=y_te #SVM print("Training Support Vector Machine...") clf1 = svm.SVC(decision_function_shape='ovr', kernel='linear', max_iter=1000) clf1.fit(X_train, y_train) y_pred=clf1.predict(X_test) results=cross_val_score(clf1, X_test, y_test, cv=10) SVM=clf1.predict(featurevector) #KNN print("Training k-Nearest Neighbor ...") neigh = KNeighborsClassifier(n_neighbors=10) neigh.fit(X_train, y_train) results=cross_val_score(neigh, X_test, y_test, cv=10) y_pred=neigh.predict(X_test) KNN=neigh.predict(featurevector) #Naive Bayes print("Training Naive Bayes...") clf = MultinomialNB() clf.fit(X_train, y_train) results=cross_val_score(clf, X_test, y_test, cv=10) y_pred=clf.predict(X_test) DT=clf.predict(featurevector) #RandomForest print("Training Random Forest...") rf = RandomForestClassifier(random_state=0, min_samples_leaf=100) rf.fit(X_train, y_train) results=cross_val_score(rf, X_test, y_test, cv=10) y_pred=rf.predict(X_test) RF=clf.predict(featurevector) vote_result = [[0 for x in range(2)] for y in range(len(SVM))] for i in range(len(ANN)): if round(ANN[i][0])==1.0: vote_result[i][0]=vote_result[i][0]+1 if round(ANN[i][1])==1.0: vote_result[i][1]=vote_result[i][1]+1 if SVM[i]==0: vote_result[i][0]=vote_result[i][0]+1 if SVM[i]==1: vote_result[i][1]=vote_result[i][1]+1 if KNN[i]==0: vote_result[i][0]=vote_result[i][0]+1 if KNN[i]==1: vote_result[i][1]=vote_result[i][1]+1 if DT[i]==0: vote_result[i][0]=vote_result[i][0]+1 if DT[i]==1: vote_result[i][1]=vote_result[i][1]+1 if RF[i]==0: vote_result[i][0]=vote_result[i][0]+1 if RF[i]==1: vote_result[i][1]=vote_result[i][1]+1 print('-----------------------Results-----------------------') for i in range(len(ANN)): if vote_result[i][0]>=vote_result[i][1]: print('Sequence ',i+1,' is a probable Type 6 Effector') else: print('Sequence ',i+1,' is not a Type 6 Effector') end_time = time.clock() print('Execution time',(end_time-start_time))
model.add(Dense(10, input_dim=18, activation='relu')) model.add(Dense(4, activation='relu')) model.add(Dense(1, activation='sigmoid')) # compile keras model, classification model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # fit keras model model.fit(np.array(predictor), np.array(y_train), epochs=10, batch_size=10) model_loss = pd.DataFrame(model.history.history) model_loss.plot() # evaluate keras mdoel accuracy _, accuracy = model.evaluate(pred_dt, y_train) print('Accuracy: %.2f' % (accuracy * 100)) train_pred = model.predict_classes(predictor) print(classification_report(y_train, train_pred)) print(confusion_matrix(y_train, train_pred)) #[[286 1] #Building KNN model ----------------------------------------------- from sklearn.neighbors import KNeighborsClassifier as KNC knn = KNC(n_neighbors=7) knn.fit(predictor, y_train.values.ravel()) y_train_pred = knn.predict(predictor) knn.score(predictor, y_train) # 81.86% # check prediction accuracy of train data and classification error print(confusion_matrix(y_train, y_train_pred))
# 'scores': str(accuracy) # } # print(str(json.dumps(result))) # quit() # ------------------------------------------------------ # NN Execute # ------------------------------------------------------ history = model.fit(X_train, y_train, epochs=epochs, batch_size=batchSize, callbacks=callbacks_list) scores = model.evaluate(x=X_test, y=y_test, batch_size=X_test.shape[1], verbose=1, sample_weight=None, steps=None) loss = scores[0] metric = scores[1] ## ------------------------------------------------------ # Create a dataframe from prediction and test # ------------------------------------------------------ from math import sqrt y_pred = model.predict(X_test) y_pred = y_pred.astype('int64') y_pred = pd.Series(y_pred.flatten().tolist()) y_test = pd.Series(y_test.tolist())
def _main(): # firebase = pyrebase.initialize_app(config) # db = firebase.database() # all_users_r1= db.child("cho").child("r-1").get().val() # all_users_r2= db.child("cho").child("r-2").get().val() # # del all_users_r2['cho'] # all_users=db.child("cho").child("users").get().val() # # all_users.update(all_users_t) # # # a = {'hello': 'world'} # with open('r-2.pickle', 'wb') as handle: # pickle.dump(all_users_r2, handle, protocol=pickle.HIGHEST_PROTOCOL) # for user in all_users: # db.child("cho").child("users") # user_node = db.child("cho").child("r-1").child(user) # for attr in all_users[user]: # user_node.child(attr).set(all_users[user][attr]) # print(attr) # child(user).set() .remove() with open('users.pickle', 'rb') as handle: all_users = pickle.load(handle) users_data = [] targets = { 'BA_S': int(0), 'MIS_S': int(1), 'MOS_S': int(2), 'SES_S': int(3) } for user in all_users: data = get_user_data(all_users[user]) data.update({'name': user}) users_data.append(data) df = pd.DataFrame() excluded = pd.DataFrame() count = 0 excludedPanelistsN = 1 randomExcluded = 4 for user_data in users_data: if user_data["name"] == "sameha" or user_data["name"] == "rna": continue count += 1 # if count==randomExcluded: # df=excluded # else: # df=included try: BA_S_features_chunks = calculate_panelist_features( user_data, 'BA_S') MIS_S_features_chunks = calculate_panelist_features( user_data, 'MIS_S') MOS_S_features_chunks = calculate_panelist_features( user_data, 'MOS_S') SES_S_features_chunks = calculate_panelist_features( user_data, 'SES_S') # RE_S_features_chunks = calculate_panelist_features(user_data, 'RE_S') for chunk in range(len(BA_S_features_chunks)): BA_S_features_chunks[chunk].update({ 'tar': targets['BA_S'], 'pi': user_data['name'] }) df = df.append(BA_S_features_chunks[chunk], ignore_index=True) for chunk in range(len(MIS_S_features_chunks)): MIS_S_features_chunks[chunk].update({ 'tar': targets['MIS_S'], 'pi': user_data['name'] }) df = df.append(MIS_S_features_chunks[chunk], ignore_index=True) for chunk in range(len(MOS_S_features_chunks)): MOS_S_features_chunks[chunk].update({ 'tar': targets['MOS_S'], 'pi': user_data['name'] }) df = df.append(MOS_S_features_chunks[chunk], ignore_index=True) for chunk in range(len(SES_S_features_chunks)): SES_S_features_chunks[chunk].update({ 'tar': targets['SES_S'], 'pi': user_data['name'] }) df = df.append(SES_S_features_chunks[chunk], ignore_index=True) except: print('err : ' + user_data["name"]) df['sampen'] = df['sampen'].apply(lambda x: x if (x < 100 and x > -100) else 0) dfFeatures = df.loc[:, df.columns.difference(['tar', 'pi'])].copy() dfNoramlized = df.loc[:, df.columns.difference(['tar'])].copy().groupby( 'pi').transform(lambda x: (x - x.mean()) / x.std() if (x.dtype == np.number) else x) # dfNoramlized = dfFeatures.apply(lambda x: x if x.isnumeric() else 0 , axis=0) # dfNoramlized = dfFeatures.apply(lambda x: (x-x.mean())/x.std(), axis=0) # dfNoramlized = dfFeatures.apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=0) # dfNoramlized = dfFeatures # df.dropna(axis=0,inplace=True) # df.reset_index(inplace=True,drop=True) # df.drop(inplace=True, columns='index') # df['sampen']=df['sampen'].apply(lambda x: x if (x<100 and x>-100) else 0 ) # dfNoramlized = df.copy() dfNoramlized['tar'] = df['tar'] dfNoramlized['pi'] = df['pi'] # dfNoramlized=dfNoramlized.dropna(axis=1) # # excluded= dfNoramlized.loc[0:46].copy().dropna() excluded = dfNoramlized.loc[dfNoramlized['pi'].isin(['', 'ran', 'mah'])] # excluded.loc[excluded['tar'] == 0.0, 'tar'] = -1 # excluded.loc[excluded['tar'] != -1.0, 'tar'] = 0 # excluded.loc[excluded['tar'] == -1.0, 'tar'] = 1 # included= dfNoramlized.loc[47:len(df)].copy().dropna() included = dfNoramlized.loc[~dfNoramlized['pi'].isin(['', 'ran', 'mah'])] features_name = [ 'ApEN', 'SD1', 'SD2', 'cvnni', 'cvsd', 'hf', 'hfnu', 'lf', 'lf_hf_ratio', 'lfnu', 'max_hr', 'mean_hr', 'mean_nni', 'median_nni', 'min_hr', 'nni_20', 'nni_50', 'pnni_20', 'pnni_50', 'range_nni', 'rmssd', 'sampen', 'sc_avg', 'scl_avg', 'scl_slope', 'scr_avg', 'scr_max', 'scr_peak', 'sdnn', 'sdsd', 'skt_avg', 'skt_slope', 'skt_std', 'std_hr', 'total_power', 'vlf', 'co_he' ] features_name = ('ApEN', 'SD1', 'SD2', 'hf', 'lf', 'lf_hf_ratio', 'mean_hr', 'pnni_20', 'pnni_50', 'rmssd', 'sampen', 'sc_avg', 'scl_avg', 'scl_slope', 'scr_avg', 'scr_max', 'scr_peak', 'sdnn', 'sdsd', 'skt_avg', 'skt_slope', 'skt_std', 'co_he') includedTargets = [ i if i else i for i in included['tar'].values.astype(int) ] excludedTargets = [ i if i else i for i in excluded['tar'].values.astype(int) ] includedFeatures = included.loc[:, features_name].copy().values excludedFeatures = excluded.loc[:, features_name].copy().values # includedFeatures = included.loc[:, included.columns.difference(['tar', 'pi'])].copy().values # excludedFeatures = excluded.loc[:, excluded.columns.difference(['tar', 'pi'])].copy().values regersors = {} for indexTarget in range(len(set(list(included['tar'].values)))): # indexTarget=0 includedTargets = [ 1 if i == indexTarget else 0 for i in included['tar'].values.astype(int) ] excludedTargets = [ 1 if i == indexTarget else 0 for i in excluded['tar'].values.astype(int) ] # includedFeatures = included.loc[:, included.columns.difference(['tar','pi'])].copy().values includedFeatures = included.loc[:, included.columns.difference( ['tar', 'pi'])].copy().values # excludedFeatures = excluded.loc[:, excluded.columns.difference(['tar','pi'])].copy().values excludedFeatures = excluded.loc[:, excluded.columns.difference( ['tar', 'pi'])].copy().values model = ExtraTreesClassifier(max_depth=1000, ) model.fit(includedFeatures, includedTargets) y_pred = model.predict_proba(excludedFeatures) predictions = [round(value[1]) for value in y_pred] accuracy = accuracy_score(excludedTargets, predictions) print('accuracy :' + str(accuracy)) importances = model.feature_importances_ std = np.std([tree.feature_importances_ for tree in model.estimators_], axis=0) indices = np.argsort(importances)[::-1] # Print the feature ranking print("Feature ranking:") for f in range(includedFeatures.shape[1]): print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]])) # Plot the feature importances of the forest plt.figure() plt.title("Feature importances") plt.bar(range(includedFeatures.shape[1]), importances[indices], color="r", yerr=std[indices], align="center") plt.xticks(range(includedFeatures.shape[1]), indices) plt.xlim([-1, includedFeatures.shape[1]]) plt.show() model = Sequential([ Dense(200, activation='relu', input_shape=(len(includedFeatures[0]), )), Dense(150, activation='relu'), Dense(100, activation='relu'), Dense(20, activation='relu'), Dense(2, activation='softmax'), ]) # from keras import optimizers # Compile the model. model.compile( optimizer=optimizers.Adam(learning_rate=0.1), loss='categorical_crossentropy', metrics=['accuracy'], ) # Train the model. model.fit( includedFeatures, to_categorical(includedTargets), epochs=200, batch_size=200, ) accuracy = model.evaluate(excludedFeatures, to_categorical(excludedTargets)) y_pred = model.predict_proba(excludedFeatures) predictions = [round(value[1]) for value in y_pred] # accuracy = accuracy_score(excludedTargets, predictions) print('accuracy :' + str(accuracy)) regersors[indexTarget] = {'model': model, 'accuracy': accuracy} excludedTargets = [ i if i else i for i in excluded['tar'].values.astype(int) ] dfTest = pd.DataFrame() dfTest['test'] = excludedTargets for indexTarget in range(len(regersors)): y_pred = regersors[indexTarget]['model'].predict_proba( excludedFeatures) dfTest[indexTarget] = [val[1] for val in y_pred] finalTest = [] for index, row in dfTest.iterrows(): maxIndex = -1 maxVal = -1 for indexTarget in range(len(regersors)): if row[indexTarget] > maxVal: maxVal = row[indexTarget] maxIndex = indexTarget finalTest.append(maxIndex) accuracy = accuracy_score(excludedTargets, finalTest) print("final excluded Accuracy: %.2f%%" % (accuracy * 100.0)) pyplot.bar(included.columns.difference(['tar', 'pi']), model.feature_importances_) pyplot.show() model = Sequential([ Dense(500, activation='relu', input_shape=(len(includedFeatures[0]), )), Dense(200, activation='relu'), Dense(100, activation='relu'), Dense(20, activation='relu'), Dense(4, activation='softmax'), ]) # from keras import optimizers # Compile the model. model.compile( optimizer=optimizers.Adam(learning_rate=0.0005), loss='categorical_crossentropy', metrics=['accuracy'], ) # Train the model. model.fit( includedFeatures, to_categorical(includedTargets), epochs=200, batch_size=200, ) model.evaluate(excludedFeatures, to_categorical(excludedTargets))