def lrcn_test(X, y): auc = [] pr = [] sr = [] for i in range(0, 10): model = keras.models.load_model( '../saved_models/LRCN/lrcn_' + str(i) + '.h5', custom_objects={ 'masked_loss_function': masked_loss_function, 'masked_accuracy': masked_accuracy }) score_for_each_drug = ROC_PR.ROC(model, X, y, ("LRCN" + "BO_delete"), True, bccdc=True) spec_recall, prec_recall = ROC_PR.PR(model, X, y, bccdc=True) # print('AUC-ROC:', score_for_each_drug) # print("recall at 95 spec: ", spec_recall) # print("precision recall: ", prec_recall) auc.append(score_for_each_drug) pr.append(prec_recall) sr.append(spec_recall) print(auc) print(pr) print(sr)
def wnd_test(X, y): auc = [] pr = [] sr = [] # X_val2 = X.tolist() # for i in range(0, len(X_val2)): # X_val2[i] = X_val2[i][0:3967] # X = np.array(X_val2) for i in range(1, 11): model = keras.models.load_model( '../saved_models/WnD/WnD' + str(i) + '.h5', custom_objects={ 'masked_loss_function': masked_loss_function, 'masked_accuracy': masked_accuracy }) score_for_each_drug = ROC_PR.ROC(model, X, y, ("wide-n-deep" + "BO_delete"), True, bccdc=True) spec_recall, prec_recall = ROC_PR.PR(model, X, y, bccdc=True) # print('AUC-ROC:', score_for_each_drug) # print("recall at 95 spec: ", spec_recall) # print("precision recall: ", prec_recall) auc.append(score_for_each_drug) pr.append(prec_recall) sr.append(spec_recall) print(auc) print(pr) print(sr)
def run_one_fold(model): model.compile(loss=masked_loss_function, optimizer='Adam', metrics=[masked_accuracy]) history = model.fit( X_train, y_train, epochs=epochs, batch_size=128, # shuffle=True, verbose=2, validation_data=(X_val, y_val), callbacks=[MyCustomCallback()]) score = ROC_PR.ROC_Score(model, X_val, y_val) score_test = ROC_PR.ROC_Score(model, X_test, y_test) score_for_each_drug = ROC_PR.ROC(model, X_test, y_test, ("wide-n-deep" + "BO_delete"), True) spec_recall, prec_recall = ROC_PR.PR(model, X_test, y_test) print('area under ROC curve for val:', score) print('area under ROC curve for test:', score_test) print(score_for_each_drug) print("recall at 95 spec: ", spec_recall) print("precision recall: ", prec_recall) string_random = get_random_string(17) print(string_random) model.save('wnd_' + string_random + '.h5') return score
def model_CNN_LSTM_random_data(FrameSize, X, X_train, X_test, y_train, y_test, epoch, earlyStopping, name): print(X.shape) print(FrameSize) model = Sequential() model.add(Dropout(0.3311428861138142)) model.add( Conv1D(filters=4, kernel_size=6, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size=4, padding='same')) model.add(Dropout(0.3311428861138142)) model.add( Conv1D(filters=7, kernel_size=4, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size=4, padding='same')) model.add(Dropout(0.3311428861138142)) model.add( Conv1D(filters=6, kernel_size=6, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size=4, padding='same')) model.add(Dropout(0.3311428861138142)) model.add( Conv1D(filters=4, kernel_size=4, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size=4, padding='same')) model.add(LSTM(425, return_sequences=True, recurrent_dropout=0.3)) model.add(Dropout(0.3311428861138142)) model.add(LSTM(189, return_sequences=True, recurrent_dropout=0.3)) model.add(Dropout(0.3311428861138142)) model.add(LSTM(283, return_sequences=True, recurrent_dropout=0.3)) model.add(Dropout(0.3311428861138142)) model.add(LSTM(333, return_sequences=False, recurrent_dropout=0.3)) model.add(Dropout(0.3311428861138142)) model.add(Dense(331)) model.add(Dropout(0.3311428861138142)) model.add(Dense(12, activation='sigmoid')) model.compile(loss=masked_loss_function, optimizer='Adam', metrics=[masked_accuracy]) history = model.fit(X_train, y_train, epochs=epoch, batch_size=128, verbose=2, validation_data=(X_test, y_test), callbacks=[ earlyStopping, ModelCheckpoint('result/CNN256_LSTM128_64_2.h5', monitor='val_masked_accuracy', mode='max', save_best_only=True) ]) plot.plot(history, ("LRCN" + name)) score = ROC_PR.ROC(model, X_test, y_test, ("LRCN" + name), True) return score, ROC_PR.ROC_Score(model, X_train, y_train, limited=False)
def model_CNN_LSTM_time(FrameSize, X, X_train, X_test, y_train, y_test, epoch, earlyStopping, name): print(X.shape) print(X_train.shape) print(X_test.shape) print(FrameSize) print(y_train.shape) print(y_test.shape) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1) # y_train = y_train.reshape(7060, 12, 1) # y_test = y_test.reshape(785, 12, 1) model = Sequential() model.add(Dropout(0.1)) model.add( TimeDistributed( Conv1D(filters=8, kernel_size=3, activation='relu', padding='same'))) model.add(TimeDistributed(MaxPooling1D(pool_size=3, padding='same'))) model.add( TimeDistributed( Conv1D(filters=8, kernel_size=3, activation='relu', padding='same'))) model.add(TimeDistributed(MaxPooling1D(pool_size=3, padding='same'))) model.add(TimeDistributed(Flatten())) model.add(LSTM(518, return_sequences=False, recurrent_dropout=0.3)) model.add(Dropout(0.1)) model.add(Dense(64)) model.add(Dropout(0.1)) model.add(Dense(12, activation='sigmoid')) model.compile(loss=masked_loss_function, optimizer='Adam', metrics=[masked_accuracy]) history = model.fit(X_train, y_train, epochs=epoch, batch_size=128, verbose=2, validation_data=(X_test, y_test), callbacks=[ earlyStopping, ModelCheckpoint('result/CNN256_LSTM128_64_2.h5', monitor='val_masked_accuracy', mode='max', save_best_only=True) ]) plot.plot(history, ("LRCN" + name)) score = ROC_PR.ROC(model, X_test, y_test, ("LRCN" + name), True) return score, ROC_PR.ROC_Score(model, X_train, y_train, limited=False)
def model_256_128_64_2_100Ep(FrameSize, X, X_train, X_test, y_train, y_test): model = Sequential() # model.add(Embedding(2, 50, input_length=None)) # model.add(LSTM(256, return_sequences=True)) model.add( LSTM(256, input_shape=(FrameSize, X[0].shape[1]), return_sequences=True, recurrent_dropout=0.3)) model.add(SpatialDropout1D(0.2)) model.add(LSTM(128, return_sequences=False, recurrent_dropout=0.3)) model.add(Dropout(0.2)) model.add(Dense(64)) model.add(Dropout(0.2)) model.add(Dense(2, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy']) history = model.fit(X_train, y_train, epochs=100, batch_size=128, shuffle=True, verbose=2, validation_data=(X_test, y_test)) plot.plot(history, "One_256_128_64_2_100Ep") ROC_PR.ROC(model, X_test, y_test, "One_256_128_64_2_100Ep")
def model_CNN_LSTM_limited_1(FrameSize, X, X_train, X_test, y_train, y_test, epoch, earlyStopping, name): print(X.shape) print(FrameSize) model = Sequential() model.add(Dropout(0.43369355853937297)) model.add( Conv1D(filters=5, kernel_size=4, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size=4, padding='same')) model.add( Conv1D(filters=7, kernel_size=7, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size=7, padding='same')) model.add(LSTM(398, return_sequences=True, recurrent_dropout=0.3)) model.add(SpatialDropout1D(0.43369355853937297)) model.add(LSTM(106, return_sequences=True, recurrent_dropout=0.3)) model.add(SpatialDropout1D(0.43369355853937297)) model.add(LSTM(475, return_sequences=True, recurrent_dropout=0.3)) model.add(SpatialDropout1D(0.43369355853937297)) model.add(LSTM(264, return_sequences=False, recurrent_dropout=0.3)) model.add(Dropout(0.43369355853937297)) model.add(Dense(352)) model.add(Dropout(0.43369355853937297)) model.add(Dense(378)) model.add(Dropout(0.43369355853937297)) model.add(Dense(7, activation='sigmoid')) model.compile(loss=masked_loss_function, optimizer='Adam', metrics=[masked_accuracy]) history = model.fit( X_train, y_train, epochs=epoch, batch_size=128, # shuffle=True, verbose=2, validation_data=(X_test, y_test), callbacks=[ earlyStopping, ModelCheckpoint('result/CNN_LSTM_limited_1.h5', monitor='val_masked_accuracy', mode='max', save_best_only=True) ]) # plot_model(model, to_file='model_plot.png', show_shapes=True) plot.plot(history, ("CNN_LSTM_limited_1" + name)) score = ROC_PR.ROC(model, X_test, y_test, ("CNN_LSTM_limited_1" + name), True, limited=True) return score
def rf_kfold(X, y, i): global res X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=True) X = np.append(X_train, X_test, axis=0) y = np.append(y_train, y_test, axis=0) cvscores1 = [] for i2 in range(0, 10): length = int(len(X) / 10) if i2 == 0: X_train = X[length:] X_test = X[0:length] y_train = y[length:] y_test = y[0:length] elif i2 != 9: X_train = np.append(X[0:length * i2], X[length * (i2 + 1):], axis=0) X_test = X[length * i2:length * (i2 + 1)] y_train = np.append(y[0:length * i2], y[length * (i2 + 1):], axis=0) y_test = y[length * i2:length * (i2 + 1)] else: X_train = X[0:length * i2] X_test = X[length * i2:] y_train = y[0:length * i2] y_test = y[length * i2:] from sklearn.ensemble import RandomForestClassifier rf_model_linear = RandomForestClassifier(n_estimators=140, min_samples_split=4, bootstrap=False, max_depth=50).fit( X_train, y_train) score1 = ROC_PR.ROC_ML(rf_model_linear, X_test, y_test, "LR", i, rf=True) accuracy = rf_model_linear.score(X_test, y_test) print(accuracy) res.append(accuracy) print("Area for 1") cvscores1.append(score1) f = open('result/RFResult' + str(i) + '.txt', 'w') for ele in cvscores1: f.write(str(ele) + '\n')
def model_lrcn_simple(FrameSize, X, X_train, X_test, y_train, y_test, epoch, earlyStopping, name, limited=False): print(X.shape) print(FrameSize) model = Sequential() model.add(Dropout(0.2)) model.add( Conv1D(filters=5, kernel_size=5, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size=3, padding='same')) model.add(LSTM(256, return_sequences=True, recurrent_dropout=0.3)) model.add(SpatialDropout1D(0.2)) model.add(LSTM(128, return_sequences=False, recurrent_dropout=0.3)) model.add(Dropout(0.2)) model.add(Dense(128)) model.add(Dropout(0.2)) if limited: model.add(Dense(7, activation='sigmoid')) else: model.add(Dense(12, activation='sigmoid')) model.compile(loss=masked_loss_function, optimizer='Adam', metrics=[masked_accuracy]) history = model.fit(X_train, y_train, epochs=epoch, batch_size=128, verbose=2, validation_data=(X_test, y_test), callbacks=[ earlyStopping, ModelCheckpoint('result/CNN256_LSTM128_64_2.h5', monitor='val_masked_accuracy', mode='max', save_best_only=True) ]) plot.plot(history, ("LRCN" + name)) score = ROC_PR.ROC(model, X_test, y_test, ("LRCN" + name), True, limited=limited) return score
def lr_kfold(X, y, i): global res X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=True) X = np.append(X_train, X_test, axis=0) y = np.append(y_train, y_test, axis=0) cvscores1 = [] for i2 in range(0, 10): length = int(len(X) / 10) if i2 == 0: X_train = X[length:] X_test = X[0:length] y_train = y[length:] y_test = y[0:length] elif i2 != 9: X_train = np.append(X[0:length * i2], X[length * (i2 + 1):], axis=0) X_test = X[length * i2:length * (i2 + 1)] y_train = np.append(y[0:length * i2], y[length * (i2 + 1):], axis=0) y_test = y[length * i2:length * (i2 + 1)] else: X_train = X[0:length * i2] X_test = X[length * i2:] y_train = y[0:length * i2] y_test = y[length * i2:] from sklearn.linear_model import LogisticRegression lr_model_linear = LogisticRegression(C=1, penalty='l2', solver='newton-cg', max_iter=2677).fit( X_train, y_train) score1 = ROC_PR.ROC_ML(lr_model_linear, X_test, y_test, "LR", i) accuracy = lr_model_linear.score(X_test, y_test) print(accuracy) res.append(accuracy) print("Area for 1") cvscores1.append(score1) f = open('result/LRResult' + str(i) + '.txt', 'w') for ele in cvscores1: f.write(str(ele) + '\n')
def gbt_test(X, y): auc = [] pr = [] sr = [] drugs = [0, 1, 2, 6, 8] # for i in range(0, len(y[0])): # X_val2 = X.tolist() # y_val2 = y[:, i] # y_val2 = y_val2.tolist() # # for i2 in range(len(y_val2) - 1, -1, -1): # if y_val2[i2] != 0.0 and y_val2[i2] != 1.0: # del y_val2[i2] # del X_val2[i2] for i in range(0, 5): a, p, s = [], [], [] for j in range(0, len(drugs)): X_val2 = X.tolist() # for i2 in range(0, len(X_val2)): # X_val2[i2] = X_val2[i2][0:3967] y_val2 = y[:, j] y_val2 = y_val2.tolist() for i2 in range(len(y_val2) - 1, -1, -1): if y_val2[i2] != 0.0 and y_val2[i2] != 1.0: del y_val2[i2] del X_val2[i2] model = pickle.load( open( '../saved_models/GBT/gbt' + str(drugs[j]) + '_' + str(i) + '.sav', 'rb')) score_test, score_sr, score_pr = ROC_PR.ROC_ML(model, np.array(X_val2), np.array(y_val2), "GBT", 0, xgb=True) a.append(score_test) p.append(score_pr) s.append(score_sr) auc.append(a) pr.append(p) sr.append(a) print(auc) print(pr) print(sr)
def run_ELI5(model, X_train, X_test, X_val, y_train, y_test, y_val): X_train2 = np.array(X_train).astype(np.float) X_test2 = np.array(X_test).astype(np.float) X_val2 = np.array(X_val).astype(np.float) y_train2 = np.array(y_train).astype(np.float) y_test2 = np.array(y_test).astype(np.float) y_val2 = np.array(y_val).astype(np.float) score = ROC_PR.ROC_Score(model, X_val2, y_val2) score_test = ROC_PR.ROC_Score(model, X_test2, y_test2) # score_for_each_drug = ROC_PR.ROC(model, X_test2, y_test2, ("LRCN" + "BO_delete"), True) spec_recall, prec_recall = ROC_PR.PR(model, X_test2, y_test2) print('area under ROC curve for val:', score) print('area under ROC curve for test:', score_test) print("recall at 95 spec: ", spec_recall) print("precision recall: ", prec_recall) def score(X_test, y_test): return ROC_PR.ROC_Score(model, X_test, y_test) from eli5.permutation_importance import get_score_importances feature_score = [] for i in range(0, len(X_test2[0])): lst = [] lst.append(i) base_score, score_decreases = get_score_importances( score, X_test2, y_test2, n_iter=1, columns_to_shuffle=lst) feature_importances = np.mean(score_decreases, axis=0) feature_score.append(feature_importances[0]) print(i) print(feature_score)
def svm_kfold(X, y, i): global res X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=True) X = np.append(X_train, X_test, axis=0) y = np.append(y_train, y_test, axis=0) cvscores1 = [] for i2 in range(0, 10): length = int(len(X) / 10) if i2 == 0: X_train = X[length:] X_test = X[0:length] y_train = y[length:] y_test = y[0:length] elif i2 != 9: X_train = np.append(X[0:length * i2], X[length * (i2 + 1):], axis=0) X_test = X[length * i2:length * (i2 + 1)] y_train = np.append(y[0:length * i2], y[length * (i2 + 1):], axis=0) y_test = y[length * i2:length * (i2 + 1)] else: X_train = X[0:length * i2] X_test = X[length * i2:] y_train = y[0:length * i2] y_test = y[length * i2:] from sklearn.svm import SVC svm_model_linear = SVC(kernel='linear', C=0.1).fit(X_train, y_train) score1 = ROC_PR.ROC_ML(svm_model_linear, X_test, y_test, "SVM", i2) accuracy = svm_model_linear.score(X_test, y_test) print(accuracy) res.append(accuracy) print("Area for 1") cvscores1.append(score1) f = open('result/SVMResult' + str(i) + '.txt', 'w') for ele in cvscores1: f.write(str(ele) + '\n')
def svm(X, y, i): global res X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=True) cvscores1 = [] from sklearn.svm import SVC svm_model_linear = SVC(kernel='linear', C=0.1).fit(X_train, y_train) score1 = ROC_PR.ROC_ML(svm_model_linear, X_test, y_test, "SVM", i) accuracy = svm_model_linear.score(X_test, y_test) print(accuracy) print(score1) print("_______________________________") res.append(accuracy) return score1
def model_256_128_64_2BS(FrameSize, X, X_train, X_test, y_train, y_test, epoch): model = Sequential() # model.add(Embedding(2, 50, input_length=None)) # model.add(LSTM(256, return_sequences=True)) model.add( LSTM(256, input_shape=(FrameSize, X[0].shape[1]), return_sequences=True, recurrent_dropout=0.3)) model.add(SpatialDropout1D(0.2)) model.add(LSTM(128, return_sequences=False, recurrent_dropout=0.3)) model.add(Dropout(0.2)) model.add(Dense(64)) model.add(Dropout(0.2)) model.add(Dense(2, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy']) history = model.fit(X_train, y_train, epochs=epoch, batch_size=128, verbose=2, shuffle=True, validation_data=(X_test, y_test), callbacks=[ ModelCheckpoint('result/One_256_128_64_2BS.h5', monitor='val_accuracy', mode='max', save_best_only=True) ]) # model.save_weights("result/One_256_128_64_2BS.h5") plot.plot(history, "One_256_128_64_2BS") ROC_PR.ROC(model, X_test, y_test, "One_256_128_64_2BS")
def get_model_LR(C=1, penalty=1, solver=1, l1_ratio=1, max_iter=2): from sklearn.linear_model import LogisticRegression all_scores = 0 C = 10 ** (int(C)) penalty = int(penalty) solver = int(solver) l1_ratio = l1_ratio / 10 max_iter = 10 ** max_iter print(max_iter) for i in range(0, len(labels)): dfCurrentDrug = labels[i] X = df_train.values.tolist() y = dfCurrentDrug.values.tolist() for i2 in range(len(y) - 1, -1, -1): if y[i2][0] != 0.0 and y[i2][0] != 1.0: del y[i2] del X[i2] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=True) if penalty == 0: lr_model_linear = LogisticRegression(C=C, penalty='l1', solver='liblinear', max_iter=max_iter).fit(X_train, y_train) elif penalty == 1: if solver == 0: lr_model_linear = LogisticRegression(C=C, penalty='l2', solver='newton-cg', max_iter=max_iter).fit(X_train, y_train) elif solver == 1: lr_model_linear = LogisticRegression(C=C, penalty='l2', solver='sag', max_iter=max_iter).fit(X_train, y_train) else: lr_model_linear = LogisticRegression(C=C, penalty='l2', solver='lbfgs', max_iter=max_iter).fit(X_train, y_train) elif penalty == 2: lr_model_linear = LogisticRegression(C=C, penalty='elasticnet', solver='saga', max_iter=max_iter, l1_ratio=l1_ratio).fit(X_train, y_train) else: lr_model_linear = LogisticRegression(C=C, penalty='none', max_iter=max_iter).fit(X_train, y_train) score1 = ROC_PR.ROC_ML(lr_model_linear, X_test, y_test, "LR", 0) # accuracy = svm_model_linear.score(X_test, y_test) print(i, flush=True) print(score1, flush=True) all_scores = all_scores + score1 print(all_scores / len(labels), flush=True) return all_scores / len(labels)
def get_model_SVM(kernel=0, degree=1, C=1, gamma=1): from sklearn.svm import SVC all_scores = 0 C = 10**(int(C)) gamma = 10**(int(gamma)) degree = int(degree) kernel = int(kernel) for i in range(0, len(labels)): dfCurrentDrug = labels[i] X = df_train.values.tolist() y = dfCurrentDrug.values.tolist() for i2 in range(len(y) - 1, -1, -1): if y[i2][0] != 0.0 and y[i2][0] != 1.0: del y[i2] del X[i2] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=True) if kernel == 0: svm_model_linear = SVC(kernel='linear', C=C).fit(X_train, y_train) elif kernel == 1: svm_model_linear = SVC(kernel='poly', C=C, degree=degree).fit(X_train, y_train) else: svm_model_linear = SVC(kernel='rbf', C=C, gamma=gamma).fit(X_train, y_train) try: score1 = ROC_PR.ROC_ML(svm_model_linear, X_test, y_test, "SVM", 0) except: score1 = svm_model_linear.score(X_test, y_test) print(i, flush=True) print(score1, flush=True) all_scores = all_scores + score1 print(all_scores / len(labels), flush=True) return all_scores / len(labels)
def lr(X, y, i): global res X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=True) cvscores1 = [] from sklearn.linear_model import LogisticRegression lr_model_linear = LogisticRegression(C=0.1, penalty='l2', solver='newton-cg').fit( X_train, y_train) score1 = ROC_PR.ROC_ML(lr_model_linear, X_test, y_test, "LR", i) accuracy = lr_model_linear.score(X_test, y_test) print(accuracy) print(score1) print("_______________________________") res.append(accuracy) return score1
def run_single_fold(model): X_train2 = np.array(X_train).astype(np.float) X_test2 = np.array(X_test).astype(np.float) X_val2 = np.array(X_val).astype(np.float) y_train2 = np.array(y_train).astype(np.float) y_test2 = np.array(y_test).astype(np.float) y_val2 = np.array(y_val).astype(np.float) model.compile(loss=masked_loss_function, optimizer='Adam', metrics=[masked_accuracy]) # Train the model with the train dataset. history = model.fit( X_train2, y_train2, epochs=epochs, batch_size=128, # shuffle=True, verbose=2, validation_data=(X_val2, y_val2)) # score = ROC_PR.ROC_Score(model, X_val2, y_val2) # score_for_each_drug = ROC_PR.ROC(model, X_test2, y_test2, ("LRCN" + "BO_delete"), True) y_p = model.predict(X_test2) i = 0 while i < len(y_test2): if y_test2[i] != 0 and y_test2[i] != 1: y_test2 = np.delete(y_test2, i) y_p = np.delete(y_p, i) else: i = i + 1 score = ROC_PR.ROC_maker(y_test, y_p, "asd") print('area under ROC curve for val:', score) # print(score_for_each_drug) return score
def model_CNN256_LSTM128_64_2(FrameSize, X, X_train, X_test, y_train, y_test, epoch): model = Sequential() model.add(Dropout(0.2)) model.add( Conv1D(filters=5, kernel_size=3, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size=3)) model.add(LSTM(256, return_sequences=True, recurrent_dropout=0.3)) model.add(SpatialDropout1D(0.2)) model.add(LSTM(128, return_sequences=False, recurrent_dropout=0.3)) model.add(Dropout(0.2)) model.add(Dense(64)) model.add(Dropout(0.2)) model.add(Dense(2, activation='sigmoid')) model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy']) history = model.fit(X_train, y_train, epochs=epoch, batch_size=128, shuffle=True, verbose=2, validation_data=(X_test, y_test), callbacks=[ ModelCheckpoint('result/CNN256_LSTM128_64_2.h5', monitor='accuracy', mode='max', save_best_only=True) ]) # plot_model(model, to_file='model_plot.png', show_shapes=True) plot.plot(history, "One_CNN256_LSTM128_64_2") ROC_PR.ROC(model, X_test, y_test, "One_CNN256_LSTM128_64_2", False)
def model_256_128_64_2(FrameSize, X, X_train, X_test, y_train, y_test, epoch, earlyStopping): model = Sequential() model.add( LSTM(256, input_shape=(FrameSize, X[0].shape[1]), return_sequences=True, recurrent_dropout=0.3)) model.add(SpatialDropout1D(0.2)) model.add(LSTM(128, return_sequences=False, recurrent_dropout=0.3)) model.add(Dropout(0.2)) model.add(Dense(64)) model.add(Dropout(0.2)) model.add(Dense(12, activation='sigmoid')) model.compile(loss=masked_loss_function, optimizer='Adam', metrics=[masked_accuracy]) print(model.summary()) history = model.fit(X_train, y_train, epochs=epoch, batch_size=128, shuffle=True, verbose=2, validation_data=(X_test, y_test), callbacks=[ earlyStopping, ModelCheckpoint('result/256_128_64_2.h5', monitor='val_masked_accuracy', mode='max', save_best_only=True) ]) plot.plot(history, "256_128_64_2") ROC_PR.ROC(model, X_test, y_test, "256_128_64_2", True)
def performance_calculation(self, array1, array2, array3): from evaluations import ROC_PR # print(array1) # print(array2) for i in range(len(array1) - 1, -1, -1): if array1[i] == -1: array1 = np.delete(array1, i) array2 = np.delete(array2, i, 0) array3 = np.delete(array3, i, 0) tn, fp, fn, tp = confusion_matrix(array1, array2).ravel() # total=tn+fp+fn+tp # acc= (tn+tp)/total sen = tp / (tp + fn) sps = tn / (tn + fp) fpr, tpr, thresholds = metrics.roc_curve(array1, array3) roc_auc = metrics.auc(fpr, tpr) precision = tp / (tp + fp) f1_score = 2 * (precision * sen) / (precision + sen) se95spe, pr = ROC_PR.SR_maker(array1, array3) return roc_auc, se95spe, pr, sen, sps, roc_auc, f1_score
def original_score(df_train, labels): X, y, FrameSize = prepare_data(df_train, labels) scores = [] for i in range(0, 10): print("fold: " + str(i)) length = int(len(X) / 10) if i == 0: X_train = X[length:] X_test = X[0:length] y_train = y[length:] y_test = y[0:length] elif i != 9: X_train = np.append(X[0:length * i], X[length * (i + 1):], axis=0) X_test = X[length * i:length * (i + 1)] y_train = np.append(y[0:length * i], y[length * (i + 1):], axis=0) y_test = y[length * i:length * (i + 1)] else: X_train = X[0:length * i] X_test = X[length * i:] y_train = y[0:length * i] y_test = y[length * i:] X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=1, shuffle=False) model = load_model(i) X_test2 = np.array(X_test).astype(np.float) y_test2 = np.array(y_test).astype(np.float) scores.append(ROC_PR.ROC_Score(model, X_test2, y_test2)) return scores
def get_model_SVM_new(kernel=0, degree=1, C=1, gamma=1): from sklearn.svm import SVC all_scores = 0 C = 10**(int(C)) gamma = 10**(int(gamma)) degree = int(degree) kernel = int(kernel) global X_train global X_test global X_val global y_train global y_test global y_val res_test = [] res_val = [] res_sr = [] res_pr = [] string_random = get_random_string(20) for i in range(0, len(y_train[0])): X_train2 = X_train.tolist() X_test2 = X_test.tolist() X_val2 = X_val.tolist() y_train2 = y_train[:, i] y_test2 = y_test[:, i] y_val2 = y_val[:, i] y_train2 = y_train2.tolist() y_test2 = y_test2.tolist() y_val2 = y_val2.tolist() for i2 in range(len(y_train2) - 1, -1, -1): if y_train2[i2] != 0.0 and y_train2[i2] != 1.0: del y_train2[i2] del X_train2[i2] for i2 in range(len(y_test2) - 1, -1, -1): if y_test2[i2] != 0.0 and y_test2[i2] != 1.0: del y_test2[i2] del X_test2[i2] for i2 in range(len(y_val2) - 1, -1, -1): if y_val2[i2] != 0.0 and y_val2[i2] != 1.0: del y_val2[i2] del X_val2[i2] # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, # shuffle=True) if kernel == 0: svm_model_linear = SVC(kernel='linear', C=C).fit(X_train2, y_train2) elif kernel == 1: svm_model_linear = SVC(kernel='poly', C=C, degree=degree).fit(X_train2, y_train2) else: svm_model_linear = SVC(kernel='rbf', C=C, gamma=gamma).fit(X_train2, y_train2) # try: # score1 = ROC_PR.ROC_ML(svm_model_linear, X_test, y_test, "SVM", 0) # except: # score1 = svm_model_linear.score(X_test, y_test) score_val, _, _ = ROC_PR.ROC_ML(svm_model_linear, X_val2, y_val2, "LR", 0) score_test, score_sr, score_pr = ROC_PR.ROC_ML(svm_model_linear, X_test2, y_test2, "LR", 0) print(i, flush=True) # print(score1, flush=True) res_test.append(score_test) res_val.append(score_val) res_sr.append(score_sr) res_pr.append(score_pr) all_scores = all_scores + score_val print('svm' + str(i) + string_random + '.sav') pickle.dump(svm_model_linear, open('svm' + str(i) + string_random + '.sav', 'wb')) global rf_val_score, rf_test_score res_val.append(all_scores / len(y_train[0])) rf_val_score.append(res_val) rf_test_score.append(res_test) rf_sr_score.append(res_sr) print("val score", res_val) print("test score", res_test) print("recall at 95 spec: ", res_sr) print("precision recall: ", res_pr) print(all_scores / len(y_train[0]), flush=True) print(string_random) return all_scores / len(y_train[0])
def get_model_GBT(n_estimators=10, min_samples_split=2, max_depth=1, random_state=0): import xgboost.sklearn as xgb all_scores = 0 n_estimators = 10 * int(n_estimators) min_samples_split = int(min_samples_split) if random_state < 0: random_state = None else: random_state = int(random_state) if max_depth > 15: max_depth = None else: max_depth = 10 * int(max_depth) global X_train global X_test global X_val global y_train global y_test global y_val res_test = [] res_val = [] res_sr = [] res_pr = [] string_random = get_random_string(20) for i in range(0, len(y_train[0])): X_train2 = X_train.tolist() X_test2 = X_test.tolist() X_val2 = X_val.tolist() y_train2 = y_train[:, i] y_test2 = y_test[:, i] y_val2 = y_val[:, i] y_train2 = y_train2.tolist() y_test2 = y_test2.tolist() y_val2 = y_val2.tolist() for i2 in range(len(y_train2) - 1, -1, -1): if y_train2[i2] != 0.0 and y_train2[i2] != 1.0: del y_train2[i2] del X_train2[i2] for i2 in range(len(y_test2) - 1, -1, -1): if y_test2[i2] != 0.0 and y_test2[i2] != 1.0: del y_test2[i2] del X_test2[i2] for i2 in range(len(y_val2) - 1, -1, -1): if y_val2[i2] != 0.0 and y_val2[i2] != 1.0: del y_val2[i2] del X_val2[i2] # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, # shuffle=True) param = { 'n_estimators': n_estimators, 'min_samples_split': min_samples_split, 'random_state': random_state, 'max_depth': max_depth } print(n_estimators) print(min_samples_split) print(random_state) print(max_depth) try: gbt_model = xgb.XGBModel(n_estimators=n_estimators, min_samples_split=min_samples_split, random_state=random_state, max_depth=max_depth).fit( np.array(X_train2), np.array(y_train2)) score_val, _, _ = ROC_PR.ROC_ML(gbt_model, np.array(X_val2), np.array(y_val2), "GBT", 0, xgb=True) score_test, score_sr, score_pr = ROC_PR.ROC_ML(gbt_model, np.array(X_test2), np.array(y_test2), "GBT", 0, xgb=True) print('gbt' + str(i) + string_random + '.sav') pickle.dump(gbt_model, open('gbt' + str(i) + string_random + '.sav', 'wb')) except (): print("errorrrrrr in GBT", flush=True) score_test, score_sr, score_pr, score_val = 0, 0, 0, 0 print(i, flush=True) # print(score1, flush=True) res_test.append(score_test) res_val.append(score_val) res_sr.append(score_sr) res_pr.append(score_pr) all_scores = all_scores + score_val global rf_val_score, rf_test_score res_val.append(all_scores / len(y_train[0])) rf_val_score.append(res_val) rf_test_score.append(res_test) rf_sr_score.append(res_sr) print("val score", res_val) print("test score", res_test) print("recall at 95 spec: ", res_sr) print("precision recall: ", res_pr) print(all_scores / len(y_train[0]), flush=True) print(string_random) return all_scores / len(y_train[0])
def run_k_fold(model): global X_train, X_test, y_train, y_test global X, y, check if check == 0: check = 1 X = np.append(X_train, X_test, axis=0) y = np.append(y_train, y_test, axis=0) X_train = 0 X_test = 0 y_train = 0 y_test = 0 cvscores = [] scores_each_drug = [] for i in range(0, 10): print("fold:" + str(i)) length = int(len(X) / 10) if i == 0: X_train_tmp = X[length:] X_test_tmp = X[0:length] y_train_tmp = y[length:] y_test_tmp = y[0:length] elif i != 9: X_train_tmp = np.append(X[0:length * i], X[length * (i + 1):], axis=0) X_test_tmp = X[length * i:length * (i + 1)] y_train_tmp = np.append(y[0:length * i], y[length * (i + 1):], axis=0) y_test_tmp = y[length * i:length * (i + 1)] else: X_train_tmp = X[0:length * i] X_test_tmp = X[length * i:] y_train_tmp = y[0:length * i] y_test_tmp = y[length * i:] model.compile(loss=masked_loss_function, optimizer='Adam', metrics=[masked_accuracy]) # plot_model(model, to_file='model_plot.png', show_shapes=True) history = model.fit( X_train_tmp, y_train_tmp, epochs=epochs, batch_size=128, # shuffle=True, verbose=2, validation_data=(X_test_tmp, y_test_tmp)) score = ROC_PR.ROC_Score(model, X_train_tmp, y_train_tmp, limited=limited) print('area under ROC curve:', score) cvscores.append(score) scores_each_drug.append( ROC_PR.ROC(model, X_test_tmp, y_test_tmp, ("LRCN" + "BO_delete" + str(i)), True)) print(np.mean(cvscores)) if np.mean(cvscores) > 0.97: model.save() print(scores_each_drug) return np.mean(cvscores)
def model_CNN256_LSTM128_64_2(FrameSize, X, X_train, X_test, y_train, y_test, epoch, earlyStopping, name, dropout2_rate, dense_1, filterCNN, kernelCNN, LSTM1, LSTM2, recurrent_dropout, limited=False): print(X.shape) print(FrameSize) model = Sequential() # model.add(TimeDistributed(Conv1D(filters=1, kernel_size=3, activation='relu', padding='same', input_shape=(FrameSize, X[0].shape[1], 1)))) # model.add(TimeDistributed(MaxPooling1D(pool_size=3))) # model.add(TimeDistributed(Flatten())) model.add(Dropout(dropout2_rate)) # model.add(Conv1D(filters=5, kernel_size=3, activation='relu', padding='same')) model.add( Conv1D(filters=filterCNN, kernel_size=kernelCNN, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size=3, padding='same')) # model.add(TimeDistributed(Flatten())) # model.add(LSTM(256, return_sequences=True, recurrent_dropout=0.3)) model.add( LSTM(LSTM1, return_sequences=True, recurrent_dropout=recurrent_dropout)) model.add(SpatialDropout1D(dropout2_rate)) # model.add(LSTM(128, return_sequences=False, recurrent_dropout=0.3)) model.add( LSTM(LSTM2, return_sequences=False, recurrent_dropout=recurrent_dropout)) model.add(Dropout(dropout2_rate)) # model.add(Dense(64)) model.add(Dense(dense_1)) model.add(Dropout(dropout2_rate)) if limited: model.add(Dense(7, activation='sigmoid')) else: model.add(Dense(12, activation='sigmoid')) model.compile(loss=masked_loss_function, optimizer='Adam', metrics=[masked_accuracy]) history = model.fit( X_train, y_train, epochs=epoch, batch_size=128, # shuffle=True, verbose=2, validation_data=(X_test, y_test), callbacks=[ earlyStopping, ModelCheckpoint('result/CNN256_LSTM128_64_2.h5', monitor='val_masked_accuracy', mode='max', save_best_only=True) ]) # plot_model(model, to_file='model_plot.png', show_shapes=True) plot.plot(history, ("LRCN" + name)) score = ROC_PR.ROC(model, X_test, y_test, ("LRCN" + name), True, limited=limited) return score
def run_one_fold(model): # # X_train2 = tf.cast(X_train, tf.float32) # X_test2 = tf.cast(X_test, tf.float32) # X_val2 = tf.cast(X_val, tf.float32) # y_train2 = tf.cast(y_train, tf.float32) # y_test2 = tf.cast(y_test, tf.float32) # y_val2 = tf.cast(y_val, tf.float32) # X_train2 = np.array(X_train) # X_train2 = tf.convert_to_tensor(X_train2,dtype=tf.float32) # X_train2 = tf.cast(X_train2, tf.float32) # y_train2 = np.array(y_train) # y_train2 = tf.convert_to_tensor(y_train2, dtype=tf.float32) # y_train2 = np.array(y_train) # y_train2 = tf.cast(y_train2, tf.float32) # print(X_train.) # Train the model for a specified number of epochs. X_train2 = np.array(X_train).astype(np.float) X_test2 = np.array(X_test).astype(np.float) X_val2 = np.array(X_val).astype(np.float) y_train2 = np.array(y_train).astype(np.float) y_test2 = np.array(y_test).astype(np.float) y_val2 = np.array(y_val).astype(np.float) model.compile(loss=masked_loss_function, optimizer='Adam', metrics=[masked_accuracy]) # Train the model with the train dataset. history = model.fit( X_train2, y_train2, epochs=epochs, batch_size=128, # shuffle=True, verbose=2, validation_data=(X_val2, y_val2)) # Evaluate the model with the eval dataset. # score = model.evaluate(X_test, y_test, steps=10, verbose=0) # print('Test loss:', score[0]) # print('Test accuracy:', score[1]) # Return the accuracy. # print(history.history['val_masked_accuracy']) score = ROC_PR.ROC_Score(model, X_val2, y_val2) score_test = ROC_PR.ROC_Score(model, X_test2, y_test2) score_for_each_drug = ROC_PR.ROC(model, X_test2, y_test2, ("LRCN" + "BO_delete"), True) spec_recall, prec_recall = ROC_PR.PR(model, X_test2, y_test2) print('area under ROC curve for val:', score) print('area under ROC curve for test:', score_test) print(score_for_each_drug) print("recall at 95 spec: ", spec_recall) print("precision recall: ", prec_recall) global scores global fold_num global comp if len(scores) == 0: # string_random = get_random_string(17) # print(string_random) print(scores) print(score) model.save('LRCN' + str(comp) + '_' + str(fold_num) + '.h5') scores.append(score) else: br = 0 for iter in range(0, len(scores)): if scores[iter] > score: print(scores) print(score) br = 1 break if br == 0: print(br) print(scores) print(score) model.save('LRCN' + str(comp) + '_' + str(fold_num) + '.h5') scores.append(score) # from lime import lime_tabular # ins = lime_tabular.LimeTabularExplainer # explainer = lime_tabular.LimeTabularExplainer.explain_instance(self=ins ,data_row=X_train, predict_fn=model, num_samples=6354) # explainer = lime_tabular.LimeTabularExplainer.explain_instance # import lime # import lime.lime_tabular # import pandas as pd # explainer = lime.lime_tabular.LimeTabularExplainer(X_train) # print(len(X_train)) # exp = explainer.explain_instance(len(X_train), model.predict, num_features=len(X_train[0])) # # exp.show_in_notebook(show_table=True) # # exp.as_list() # shap.initjs() # # explainer = shap.DeepExplainer(model, X_train2[:100]) # shap_values = explainer.shap_values(X_test2[:10]) # shap.summary_plot(shap_values, X_test2, plot_type='bar') # worked # TODO this block worked # def score(X_test, y_test): # return ROC_PR.ROC_Score(model, X_test, y_test) # # from eli5.permutation_importance import get_score_importances # # feature_score = [] # # for i in range(0, len(X_test2[0])): # lst = [] # lst.append(i) # base_score, score_decreases = get_score_importances(score, X_test2, y_test2, n_iter=1, columns_to_shuffle=lst) # feature_importances = np.mean(score_decreases, axis=0) # feature_score.append(feature_importances[0]) # print(feature_score) # # print(feature_score) # model.save('model_save.h5') # # import deeplift # from deeplift.conversion import kerasapi_conversion as kc # # import keras # # print(keras.__version__) # # deeplift_model = kc.convert_sequential_model(model) # deeplift_model = \ # kc.convert_model_from_saved_files( # 'model_save.h5', # nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.DeepLIFT_GenomicsDefault) # # find_scores_layer_idx = 0 # # deeplift_contribs_func = deeplift_model.get_target_contribs_func( # find_scores_layer_idx=find_scores_layer_idx, # target_layer_idx=-1) # # scores = np.array(deeplift_contribs_func(task_idx=0, # input_data_list=[X], # batch_size=10, # progress_update=1000)) # print(scores) return score
def decrease_score(model, score, X_test, y_test): X_test2 = np.array(X_test).astype(np.float) y_test2 = np.array(y_test).astype(np.float) new_score = ROC_PR.ROC_Score(model, X_test2, y_test2) return score - new_score
def score(X_test, y_test): return ROC_PR.ROC_Score(model, X_test, y_test)