from keras.models import Sequential
from keras.layers import Dense

# define the keras model
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# compile the keras model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
# fit the keras model on the dataset
model.fit(x_train, y_train, epochs=150, batch_size=10)
# evaluate the keras model
_, accuracy = model.evaluate(x_train, y_train)
print('Test Accuracy: %.2f' % (accuracy * 100))

# make probability predictions with the model
predictions = model.predict(x_test)
# round predictions
rounded = [round(x[0]) for x in predictions]

# make class predictions with the model
predictions = model.predict_classes(x_test)

# In[89]:

# Plot accuracy comparisons between scenario 1(assumed features) & scenario 2(features from analysis)
plot_data = pd.read_excel(
    r'C:\Users\Life\Desktop\GMU\CS-504\dataset\AccuracyResults.xlsx')
Beispiel #2
0
# compile the keras model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
# model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

# fit the keras model on the dataset
history = model.fit(X_train,
                    y_train,
                    validation_split=0.33,
                    shuffle=True,
                    epochs=400,
                    batch_size=10000)

# evaluate the keras model
_, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy * 100))

# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
Beispiel #3
0
def voting(peptide_predict_file,nucleotide_predict_file,effector_train,noneffector_train):

    total = 0
  
    with open(peptide_predict_file) as f:
     for line in f:
        finded = line.find('>')
        
        if finded == 0:
            total =total+ 1

    print('Total number of sequences to be classified: ',total)
    
    import time
    start_time = time.clock()
    import random
    import pandas
    import numpy as np
    import csv
    from sklearn import svm
    from sklearn.naive_bayes import BernoulliNB, MultinomialNB
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.feature_selection import SelectKBest
    from sklearn.feature_selection import chi2
    from random import shuffle
    from sklearn.preprocessing import LabelEncoder
    from sklearn.model_selection import train_test_split
    f=random.seed()
    from sklearn.metrics import accuracy_score
    import numpy as np
    np.random.seed(123)
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Activation, Flatten
    from keras.layers import Convolution2D, MaxPooling2D
    from keras.utils import np_utils
    from sklearn.model_selection import cross_val_score
    from sklearn.feature_selection import VarianceThreshold
    from sklearn.preprocessing import StandardScaler
    from keras.models import Sequential
    from keras.layers import Dense
    from imblearn.over_sampling import SMOTE, ADASYN
    from collections import Counter
    from sklearn.ensemble import ExtraTreesClassifier
    import warnings
    from sklearn.feature_selection import RFE
    from sklearn.linear_model import LogisticRegression
    warnings.filterwarnings("ignore")
    
    f=random.seed()

    #getting feature vector of sequence to be predicted
    featurevector=featureextraction(peptide_predict_file, nucleotide_predict_file, total)
    print(len(featurevector))

 
    #getting training data
    dataframe = pandas.read_csv(effector_train, header=None, sep=',')
    dataset = dataframe.values
    eff = dataset[:,0:1000].astype(float)

    dataframe = pandas.read_csv(noneffector_train, header=None, sep=',')
    dataset = dataframe.values
    noneff = dataset[:,0:1000].astype(float)


    
    a1=eff.shape
    a2=noneff.shape
    X = np.ones((a1[0]+a2[0],a1[1]))
    Y = np.ones((a1[0]+a2[0],1))
    
    for i in range(a1[0]):
        for j in range(a1[1]):
            X[i][j]=eff[i][j]
        Y[i,0]=0
        #print(i)    
    for i in range(a2[0]):
        for j in range(a2[1]):
            X[i+a1[0]][j]=noneff[i][j]
        Y[i+a1[0]][0]=1
        
        
    
    warnings.filterwarnings("ignore")
    print('Resampling the unbalanced data...')
    X_resampled, Y_resampled = SMOTE(kind='borderline1').fit_sample(X, Y)
    
    #Standardize features by removing the mean and scaling to unit variance
    scaler = StandardScaler().fit(X_resampled)
    X = scaler.transform(X_resampled)


  
    #Removing features with low variance

    model = ExtraTreesClassifier()
    model.fit(X_resampled, Y_resampled)
    X_resampled=model.fit_transform(X_resampled, Y_resampled)
    featurevector=model.transform(featurevector)
    newshape=X_resampled.shape
    

    print("Training Classifiers...")
    #train and test set
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, Y_resampled, test_size=0.15, random_state=f)
    y_t=y_train
    y_te=y_test
    y_train=np.ones((len(y_t),2))
    y_test=np.ones((len(y_te),2))
    for i in range(len(y_t)):
        if y_t[i]==0:
            y_train[i][1]=0
        if y_t[i]==1:
            y_train[i][0]=0
            
    for i in range(len(y_te)):
        if y_te[i]==0:
            y_test[i][1]=0
        if y_te[i]==1:
            y_test[i][0]=0    
    
    #ANN
    print("Training Artificial Neural Network...") 
    model = Sequential()
    model.add(Dense(newshape[1]+1, activation='relu', input_shape=(newshape[1],)))
    model.add(Dense(500, activation='relu'))
    #model.add(Dense(800, activation='relu'))
    #model.add(Dense(500, activation='relu'))
    model.add(Dense(250, activation='relu'))
    model.add(Dense(90, activation='relu'))
    # Add an output layer 
    model.add(Dense(2, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['binary_accuracy'])
    model.fit(X_train, y_train,epochs=1000, batch_size=25, verbose=0)
    score = model.evaluate(X_test, y_test,verbose=0)
    ANN = model.predict(X_test)
    ANN = model.predict(featurevector)

    y_train=[]
    y_test=[]
    y_train=y_t
    y_test=y_te
            
    #SVM
    print("Training Support Vector Machine...") 
    clf1 = svm.SVC(decision_function_shape='ovr', kernel='linear', max_iter=1000)
    clf1.fit(X_train, y_train)
    y_pred=clf1.predict(X_test)
    results=cross_val_score(clf1, X_test, y_test, cv=10)
    SVM=clf1.predict(featurevector)

    #KNN
    print("Training k-Nearest Neighbor ...") 
    neigh = KNeighborsClassifier(n_neighbors=10)
    neigh.fit(X_train, y_train) 
    results=cross_val_score(neigh, X_test, y_test, cv=10)
    y_pred=neigh.predict(X_test)
    KNN=neigh.predict(featurevector)

    #Naive Bayes
    print("Training Naive Bayes...") 
    clf = MultinomialNB()
    clf.fit(X_train, y_train)
    results=cross_val_score(clf, X_test, y_test, cv=10)
    y_pred=clf.predict(X_test)
    DT=clf.predict(featurevector)
     
    #RandomForest
    print("Training Random Forest...") 
    rf = RandomForestClassifier(random_state=0, min_samples_leaf=100)
    rf.fit(X_train, y_train)
    results=cross_val_score(rf, X_test, y_test, cv=10)
    y_pred=rf.predict(X_test)
    RF=clf.predict(featurevector)
    
    vote_result = [[0 for x in range(2)] for y in range(len(SVM))]
    for i in range(len(ANN)):
          if round(ANN[i][0])==1.0:
              vote_result[i][0]=vote_result[i][0]+1
          if round(ANN[i][1])==1.0:
              vote_result[i][1]=vote_result[i][1]+1
          if SVM[i]==0:
              vote_result[i][0]=vote_result[i][0]+1
          if SVM[i]==1:
              vote_result[i][1]=vote_result[i][1]+1
          if KNN[i]==0:
              vote_result[i][0]=vote_result[i][0]+1
          if KNN[i]==1:
              vote_result[i][1]=vote_result[i][1]+1
          if DT[i]==0:
              vote_result[i][0]=vote_result[i][0]+1
          if DT[i]==1:
              vote_result[i][1]=vote_result[i][1]+1
          if RF[i]==0:
              vote_result[i][0]=vote_result[i][0]+1
          if RF[i]==1:
              vote_result[i][1]=vote_result[i][1]+1    

    print('-----------------------Results-----------------------')
    for i in range(len(ANN)):
        if vote_result[i][0]>=vote_result[i][1]:
            print('Sequence ',i+1,' is a probable Type 6 Effector')
        else:    
            print('Sequence ',i+1,' is not a Type 6 Effector')
    end_time = time.clock()
    print('Execution time',(end_time-start_time))
model.add(Dense(10, input_dim=18, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# compile keras model, classification model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# fit keras model
model.fit(np.array(predictor), np.array(y_train), epochs=10, batch_size=10)
model_loss = pd.DataFrame(model.history.history)
model_loss.plot()

# evaluate keras mdoel accuracy
_, accuracy = model.evaluate(pred_dt, y_train)
print('Accuracy: %.2f' % (accuracy * 100))
train_pred = model.predict_classes(predictor)
print(classification_report(y_train, train_pred))
print(confusion_matrix(y_train, train_pred))
#[[286   1]

#Building KNN model -----------------------------------------------
from sklearn.neighbors import KNeighborsClassifier as KNC
knn = KNC(n_neighbors=7)
knn.fit(predictor, y_train.values.ravel())
y_train_pred = knn.predict(predictor)
knn.score(predictor, y_train)  # 81.86%

# check prediction accuracy of train data and classification error
print(confusion_matrix(y_train, y_train_pred))
Beispiel #5
0
#          'scores': str(accuracy)
#          }
#    print(str(json.dumps(result)))
#    quit()

# ------------------------------------------------------
# NN Execute
# ------------------------------------------------------
history = model.fit(X_train,
                    y_train,
                    epochs=epochs,
                    batch_size=batchSize,
                    callbacks=callbacks_list)
scores = model.evaluate(x=X_test,
                        y=y_test,
                        batch_size=X_test.shape[1],
                        verbose=1,
                        sample_weight=None,
                        steps=None)
loss = scores[0]
metric = scores[1]

## ------------------------------------------------------
# Create a dataframe from prediction and test
# ------------------------------------------------------
from math import sqrt

y_pred = model.predict(X_test)

y_pred = y_pred.astype('int64')
y_pred = pd.Series(y_pred.flatten().tolist())
y_test = pd.Series(y_test.tolist())
Beispiel #6
0
def _main():

    # firebase = pyrebase.initialize_app(config)

    # db = firebase.database()
    # all_users_r1= db.child("cho").child("r-1").get().val()
    # all_users_r2= db.child("cho").child("r-2").get().val()
    #
    # del all_users_r2['cho']
    # all_users=db.child("cho").child("users").get().val()
    # # all_users.update(all_users_t)
    #
    #
    # a = {'hello': 'world'}

    # with open('r-2.pickle', 'wb') as handle:
    #     pickle.dump(all_users_r2, handle, protocol=pickle.HIGHEST_PROTOCOL)
    # for user in all_users:
    #     db.child("cho").child("users")
    #     user_node = db.child("cho").child("r-1").child(user)
    #     for attr in all_users[user]:
    #         user_node.child(attr).set(all_users[user][attr])
    #         print(attr)

    # child(user).set() .remove()

    with open('users.pickle', 'rb') as handle:
        all_users = pickle.load(handle)

    users_data = []
    targets = {
        'BA_S': int(0),
        'MIS_S': int(1),
        'MOS_S': int(2),
        'SES_S': int(3)
    }
    for user in all_users:
        data = get_user_data(all_users[user])
        data.update({'name': user})
        users_data.append(data)

    df = pd.DataFrame()
    excluded = pd.DataFrame()
    count = 0
    excludedPanelistsN = 1
    randomExcluded = 4
    for user_data in users_data:
        if user_data["name"] == "sameha" or user_data["name"] == "rna":
            continue
        count += 1
        # if count==randomExcluded:
        #     df=excluded
        # else:
        #     df=included
        try:
            BA_S_features_chunks = calculate_panelist_features(
                user_data, 'BA_S')
            MIS_S_features_chunks = calculate_panelist_features(
                user_data, 'MIS_S')
            MOS_S_features_chunks = calculate_panelist_features(
                user_data, 'MOS_S')
            SES_S_features_chunks = calculate_panelist_features(
                user_data, 'SES_S')
            # RE_S_features_chunks = calculate_panelist_features(user_data, 'RE_S')

            for chunk in range(len(BA_S_features_chunks)):
                BA_S_features_chunks[chunk].update({
                    'tar': targets['BA_S'],
                    'pi': user_data['name']
                })
                df = df.append(BA_S_features_chunks[chunk], ignore_index=True)

            for chunk in range(len(MIS_S_features_chunks)):
                MIS_S_features_chunks[chunk].update({
                    'tar': targets['MIS_S'],
                    'pi': user_data['name']
                })
                df = df.append(MIS_S_features_chunks[chunk], ignore_index=True)

            for chunk in range(len(MOS_S_features_chunks)):
                MOS_S_features_chunks[chunk].update({
                    'tar': targets['MOS_S'],
                    'pi': user_data['name']
                })
                df = df.append(MOS_S_features_chunks[chunk], ignore_index=True)

            for chunk in range(len(SES_S_features_chunks)):
                SES_S_features_chunks[chunk].update({
                    'tar': targets['SES_S'],
                    'pi': user_data['name']
                })
                df = df.append(SES_S_features_chunks[chunk], ignore_index=True)
        except:
            print('err : ' + user_data["name"])

    df['sampen'] = df['sampen'].apply(lambda x: x
                                      if (x < 100 and x > -100) else 0)

    dfFeatures = df.loc[:, df.columns.difference(['tar', 'pi'])].copy()
    dfNoramlized = df.loc[:, df.columns.difference(['tar'])].copy().groupby(
        'pi').transform(lambda x: (x - x.mean()) / x.std()
                        if (x.dtype == np.number) else x)
    # dfNoramlized = dfFeatures.apply(lambda x: x if x.isnumeric() else 0 , axis=0)
    # dfNoramlized = dfFeatures.apply(lambda x: (x-x.mean())/x.std(), axis=0)
    # dfNoramlized = dfFeatures.apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=0)
    # dfNoramlized = dfFeatures
    # df.dropna(axis=0,inplace=True)
    # df.reset_index(inplace=True,drop=True)
    # df.drop(inplace=True, columns='index')
    # df['sampen']=df['sampen'].apply(lambda x: x if (x<100 and x>-100)  else 0 )
    # dfNoramlized = df.copy()
    dfNoramlized['tar'] = df['tar']
    dfNoramlized['pi'] = df['pi']
    # dfNoramlized=dfNoramlized.dropna(axis=1)
    #
    # excluded= dfNoramlized.loc[0:46].copy().dropna()
    excluded = dfNoramlized.loc[dfNoramlized['pi'].isin(['', 'ran', 'mah'])]
    # excluded.loc[excluded['tar'] == 0.0, 'tar'] = -1
    # excluded.loc[excluded['tar'] != -1.0, 'tar'] = 0
    # excluded.loc[excluded['tar'] == -1.0, 'tar'] = 1

    # included= dfNoramlized.loc[47:len(df)].copy().dropna()
    included = dfNoramlized.loc[~dfNoramlized['pi'].isin(['', 'ran', 'mah'])]

    features_name = [
        'ApEN', 'SD1', 'SD2', 'cvnni', 'cvsd', 'hf', 'hfnu', 'lf',
        'lf_hf_ratio', 'lfnu', 'max_hr', 'mean_hr', 'mean_nni', 'median_nni',
        'min_hr', 'nni_20', 'nni_50', 'pnni_20', 'pnni_50', 'range_nni',
        'rmssd', 'sampen', 'sc_avg', 'scl_avg', 'scl_slope', 'scr_avg',
        'scr_max', 'scr_peak', 'sdnn', 'sdsd', 'skt_avg', 'skt_slope',
        'skt_std', 'std_hr', 'total_power', 'vlf', 'co_he'
    ]
    features_name = ('ApEN', 'SD1', 'SD2', 'hf', 'lf', 'lf_hf_ratio',
                     'mean_hr', 'pnni_20', 'pnni_50', 'rmssd', 'sampen',
                     'sc_avg', 'scl_avg', 'scl_slope', 'scr_avg', 'scr_max',
                     'scr_peak', 'sdnn', 'sdsd', 'skt_avg', 'skt_slope',
                     'skt_std', 'co_he')

    includedTargets = [
        i if i else i for i in included['tar'].values.astype(int)
    ]
    excludedTargets = [
        i if i else i for i in excluded['tar'].values.astype(int)
    ]
    includedFeatures = included.loc[:, features_name].copy().values
    excludedFeatures = excluded.loc[:, features_name].copy().values

    # includedFeatures = included.loc[:, included.columns.difference(['tar', 'pi'])].copy().values
    # excludedFeatures = excluded.loc[:, excluded.columns.difference(['tar', 'pi'])].copy().values

    regersors = {}

    for indexTarget in range(len(set(list(included['tar'].values)))):
        # indexTarget=0
        includedTargets = [
            1 if i == indexTarget else 0
            for i in included['tar'].values.astype(int)
        ]
        excludedTargets = [
            1 if i == indexTarget else 0
            for i in excluded['tar'].values.astype(int)
        ]
        # includedFeatures = included.loc[:, included.columns.difference(['tar','pi'])].copy().values
        includedFeatures = included.loc[:,
                                        included.columns.difference(
                                            ['tar', 'pi'])].copy().values
        # excludedFeatures = excluded.loc[:, excluded.columns.difference(['tar','pi'])].copy().values
        excludedFeatures = excluded.loc[:,
                                        excluded.columns.difference(
                                            ['tar', 'pi'])].copy().values

        model = ExtraTreesClassifier(max_depth=1000, )
        model.fit(includedFeatures, includedTargets)

        y_pred = model.predict_proba(excludedFeatures)
        predictions = [round(value[1]) for value in y_pred]
        accuracy = accuracy_score(excludedTargets, predictions)
        print('accuracy :' + str(accuracy))

        importances = model.feature_importances_
        std = np.std([tree.feature_importances_ for tree in model.estimators_],
                     axis=0)
        indices = np.argsort(importances)[::-1]

        # Print the feature ranking
        print("Feature ranking:")

        for f in range(includedFeatures.shape[1]):
            print("%d. feature %d (%f)" %
                  (f + 1, indices[f], importances[indices[f]]))

        # Plot the feature importances of the forest
        plt.figure()
        plt.title("Feature importances")
        plt.bar(range(includedFeatures.shape[1]),
                importances[indices],
                color="r",
                yerr=std[indices],
                align="center")
        plt.xticks(range(includedFeatures.shape[1]), indices)
        plt.xlim([-1, includedFeatures.shape[1]])
        plt.show()

        model = Sequential([
            Dense(200,
                  activation='relu',
                  input_shape=(len(includedFeatures[0]), )),
            Dense(150, activation='relu'),
            Dense(100, activation='relu'),
            Dense(20, activation='relu'),
            Dense(2, activation='softmax'),
        ])
        # from keras import optimizers

        # Compile the model.
        model.compile(
            optimizer=optimizers.Adam(learning_rate=0.1),
            loss='categorical_crossentropy',
            metrics=['accuracy'],
        )

        # Train the model.
        model.fit(
            includedFeatures,
            to_categorical(includedTargets),
            epochs=200,
            batch_size=200,
        )
        accuracy = model.evaluate(excludedFeatures,
                                  to_categorical(excludedTargets))
        y_pred = model.predict_proba(excludedFeatures)
        predictions = [round(value[1]) for value in y_pred]
        # accuracy = accuracy_score(excludedTargets, predictions)
        print('accuracy :' + str(accuracy))
        regersors[indexTarget] = {'model': model, 'accuracy': accuracy}
    excludedTargets = [
        i if i else i for i in excluded['tar'].values.astype(int)
    ]
    dfTest = pd.DataFrame()
    dfTest['test'] = excludedTargets
    for indexTarget in range(len(regersors)):
        y_pred = regersors[indexTarget]['model'].predict_proba(
            excludedFeatures)
        dfTest[indexTarget] = [val[1] for val in y_pred]

    finalTest = []
    for index, row in dfTest.iterrows():
        maxIndex = -1
        maxVal = -1
        for indexTarget in range(len(regersors)):
            if row[indexTarget] > maxVal:
                maxVal = row[indexTarget]
                maxIndex = indexTarget

        finalTest.append(maxIndex)

    accuracy = accuracy_score(excludedTargets, finalTest)
    print("final excluded Accuracy: %.2f%%" % (accuracy * 100.0))
    pyplot.bar(included.columns.difference(['tar', 'pi']),
               model.feature_importances_)
    pyplot.show()

    model = Sequential([
        Dense(500, activation='relu',
              input_shape=(len(includedFeatures[0]), )),
        Dense(200, activation='relu'),
        Dense(100, activation='relu'),
        Dense(20, activation='relu'),
        Dense(4, activation='softmax'),
    ])
    # from keras import optimizers

    # Compile the model.
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.0005),
        loss='categorical_crossentropy',
        metrics=['accuracy'],
    )

    # Train the model.
    model.fit(
        includedFeatures,
        to_categorical(includedTargets),
        epochs=200,
        batch_size=200,
    )
    model.evaluate(excludedFeatures, to_categorical(excludedTargets))