def run_one_fold(model): model.compile(loss=masked_loss_function, optimizer='Adam', metrics=[masked_accuracy]) history = model.fit( X_train, y_train, epochs=epochs, batch_size=128, # shuffle=True, verbose=2, validation_data=(X_val, y_val), callbacks=[MyCustomCallback()]) score = ROC_PR.ROC_Score(model, X_val, y_val) score_test = ROC_PR.ROC_Score(model, X_test, y_test) score_for_each_drug = ROC_PR.ROC(model, X_test, y_test, ("wide-n-deep" + "BO_delete"), True) spec_recall, prec_recall = ROC_PR.PR(model, X_test, y_test) print('area under ROC curve for val:', score) print('area under ROC curve for test:', score_test) print(score_for_each_drug) print("recall at 95 spec: ", spec_recall) print("precision recall: ", prec_recall) string_random = get_random_string(17) print(string_random) model.save('wnd_' + string_random + '.h5') return score
def get_model_SVM_new(kernel=0, degree=1, C=1, gamma=1): from sklearn.svm import SVC all_scores = 0 C = 10**(int(C)) gamma = 10**(int(gamma)) degree = int(degree) kernel = int(kernel) global X_train global X_test global X_val global y_train global y_test global y_val res_test = [] res_val = [] res_sr = [] res_pr = [] string_random = get_random_string(20) for i in range(0, len(y_train[0])): X_train2 = X_train.tolist() X_test2 = X_test.tolist() X_val2 = X_val.tolist() y_train2 = y_train[:, i] y_test2 = y_test[:, i] y_val2 = y_val[:, i] y_train2 = y_train2.tolist() y_test2 = y_test2.tolist() y_val2 = y_val2.tolist() for i2 in range(len(y_train2) - 1, -1, -1): if y_train2[i2] != 0.0 and y_train2[i2] != 1.0: del y_train2[i2] del X_train2[i2] for i2 in range(len(y_test2) - 1, -1, -1): if y_test2[i2] != 0.0 and y_test2[i2] != 1.0: del y_test2[i2] del X_test2[i2] for i2 in range(len(y_val2) - 1, -1, -1): if y_val2[i2] != 0.0 and y_val2[i2] != 1.0: del y_val2[i2] del X_val2[i2] # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, # shuffle=True) if kernel == 0: svm_model_linear = SVC(kernel='linear', C=C).fit(X_train2, y_train2) elif kernel == 1: svm_model_linear = SVC(kernel='poly', C=C, degree=degree).fit(X_train2, y_train2) else: svm_model_linear = SVC(kernel='rbf', C=C, gamma=gamma).fit(X_train2, y_train2) # try: # score1 = ROC_PR.ROC_ML(svm_model_linear, X_test, y_test, "SVM", 0) # except: # score1 = svm_model_linear.score(X_test, y_test) score_val, _, _ = ROC_PR.ROC_ML(svm_model_linear, X_val2, y_val2, "LR", 0) score_test, score_sr, score_pr = ROC_PR.ROC_ML(svm_model_linear, X_test2, y_test2, "LR", 0) print(i, flush=True) # print(score1, flush=True) res_test.append(score_test) res_val.append(score_val) res_sr.append(score_sr) res_pr.append(score_pr) all_scores = all_scores + score_val print('svm' + str(i) + string_random + '.sav') pickle.dump(svm_model_linear, open('svm' + str(i) + string_random + '.sav', 'wb')) global rf_val_score, rf_test_score res_val.append(all_scores / len(y_train[0])) rf_val_score.append(res_val) rf_test_score.append(res_test) rf_sr_score.append(res_sr) print("val score", res_val) print("test score", res_test) print("recall at 95 spec: ", res_sr) print("precision recall: ", res_pr) print(all_scores / len(y_train[0]), flush=True) print(string_random) return all_scores / len(y_train[0])
def get_model_GBT(n_estimators=10, min_samples_split=2, max_depth=1, random_state=0): import xgboost.sklearn as xgb all_scores = 0 n_estimators = 10 * int(n_estimators) min_samples_split = int(min_samples_split) if random_state < 0: random_state = None else: random_state = int(random_state) if max_depth > 15: max_depth = None else: max_depth = 10 * int(max_depth) global X_train global X_test global X_val global y_train global y_test global y_val res_test = [] res_val = [] res_sr = [] res_pr = [] string_random = get_random_string(20) for i in range(0, len(y_train[0])): X_train2 = X_train.tolist() X_test2 = X_test.tolist() X_val2 = X_val.tolist() y_train2 = y_train[:, i] y_test2 = y_test[:, i] y_val2 = y_val[:, i] y_train2 = y_train2.tolist() y_test2 = y_test2.tolist() y_val2 = y_val2.tolist() for i2 in range(len(y_train2) - 1, -1, -1): if y_train2[i2] != 0.0 and y_train2[i2] != 1.0: del y_train2[i2] del X_train2[i2] for i2 in range(len(y_test2) - 1, -1, -1): if y_test2[i2] != 0.0 and y_test2[i2] != 1.0: del y_test2[i2] del X_test2[i2] for i2 in range(len(y_val2) - 1, -1, -1): if y_val2[i2] != 0.0 and y_val2[i2] != 1.0: del y_val2[i2] del X_val2[i2] # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, # shuffle=True) param = { 'n_estimators': n_estimators, 'min_samples_split': min_samples_split, 'random_state': random_state, 'max_depth': max_depth } print(n_estimators) print(min_samples_split) print(random_state) print(max_depth) try: gbt_model = xgb.XGBModel(n_estimators=n_estimators, min_samples_split=min_samples_split, random_state=random_state, max_depth=max_depth).fit( np.array(X_train2), np.array(y_train2)) score_val, _, _ = ROC_PR.ROC_ML(gbt_model, np.array(X_val2), np.array(y_val2), "GBT", 0, xgb=True) score_test, score_sr, score_pr = ROC_PR.ROC_ML(gbt_model, np.array(X_test2), np.array(y_test2), "GBT", 0, xgb=True) print('gbt' + str(i) + string_random + '.sav') pickle.dump(gbt_model, open('gbt' + str(i) + string_random + '.sav', 'wb')) except (): print("errorrrrrr in GBT", flush=True) score_test, score_sr, score_pr, score_val = 0, 0, 0, 0 print(i, flush=True) # print(score1, flush=True) res_test.append(score_test) res_val.append(score_val) res_sr.append(score_sr) res_pr.append(score_pr) all_scores = all_scores + score_val global rf_val_score, rf_test_score res_val.append(all_scores / len(y_train[0])) rf_val_score.append(res_val) rf_test_score.append(res_test) rf_sr_score.append(res_sr) print("val score", res_val) print("test score", res_test) print("recall at 95 spec: ", res_sr) print("precision recall: ", res_pr) print(all_scores / len(y_train[0]), flush=True) print(string_random) return all_scores / len(y_train[0])
def get_model_RF(n_estimators=10, min_samples_split=2, max_depth=1, bootstrap=0): from sklearn.ensemble import RandomForestClassifier all_scores = 0 n_estimators = 10 * int(n_estimators) min_samples_split = int(min_samples_split) if bootstrap < 0: bootstrap = False else: bootstrap = True if max_depth > 15: max_depth = None else: max_depth = 10 * int(max_depth) global X_train global X_test global X_val global y_train global y_test global y_val res_test = [] res_val = [] res_sr = [] res_pr = [] string_random = get_random_string(20) for i in range(0, len(y_train[0])): X_train2 = X_train.tolist() X_test2 = X_test.tolist() X_val2 = X_val.tolist() y_train2 = y_train[:, i] y_test2 = y_test[:, i] y_val2 = y_val[:, i] y_train2 = y_train2.tolist() y_test2 = y_test2.tolist() y_val2 = y_val2.tolist() for i2 in range(len(y_train2) - 1, -1, -1): if y_train2[i2] != 0.0 and y_train2[i2] != 1.0: del y_train2[i2] del X_train2[i2] for i2 in range(len(y_test2) - 1, -1, -1): if y_test2[i2] != 0.0 and y_test2[i2] != 1.0: del y_test2[i2] del X_test2[i2] for i2 in range(len(y_val2) - 1, -1, -1): if y_val2[i2] != 0.0 and y_val2[i2] != 1.0: del y_val2[i2] del X_val2[i2] # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, # shuffle=True) rf_model = RandomForestClassifier(n_estimators=n_estimators, min_samples_split=min_samples_split, bootstrap=bootstrap, max_depth=max_depth).fit( X_train2, y_train2) score_val, _, _ = ROC_PR.ROC_ML(rf_model, X_val2, y_val2, "RF", 0, rf=True) score_test, score_sr, score_pr = ROC_PR.ROC_ML(rf_model, X_test2, y_test2, "RF", 0, rf=True) print(i, flush=True) # print(score1, flush=True) res_test.append(score_test) res_val.append(score_val) res_sr.append(score_sr) res_pr.append(score_pr) all_scores = all_scores + score_val print('rf' + str(i) + string_random + '.sav') pickle.dump(rf_model, open('rf' + str(i) + string_random + '.sav', 'wb')) global rf_val_score, rf_test_score res_val.append(all_scores / len(y_train[0])) rf_val_score.append(res_val) rf_test_score.append(res_test) rf_sr_score.append(res_sr) print("val score", res_val) print("test score", res_test) print("recall at 95 spec: ", res_sr) print("precision recall: ", res_pr) print(all_scores / len(y_train[0]), flush=True) print(string_random) return all_scores / len(y_train[0])
def get_model_LR_new(C=1, penalty=1, solver=1, l1_ratio=1, max_iter=2): from sklearn.linear_model import LogisticRegression all_scores = 0 C = 10**(int(C)) penalty = int(penalty) solver = int(solver) l1_ratio = l1_ratio / 10 max_iter = 10**max_iter print(max_iter) global X_train global X_test global X_val global y_train global y_test global y_val res_test = [] res_val = [] res_sr = [] res_pr = [] string_random = get_random_string(20) for i in range(0, len(y_train[0])): X_train2 = X_train.tolist() X_test2 = X_test.tolist() X_val2 = X_val.tolist() y_train2 = y_train[:, i] y_test2 = y_test[:, i] y_val2 = y_val[:, i] y_train2 = y_train2.tolist() y_test2 = y_test2.tolist() y_val2 = y_val2.tolist() for i2 in range(len(y_train2) - 1, -1, -1): if y_train2[i2] != 0.0 and y_train2[i2] != 1.0: del y_train2[i2] del X_train2[i2] for i2 in range(len(y_test2) - 1, -1, -1): if y_test2[i2] != 0.0 and y_test2[i2] != 1.0: del y_test2[i2] del X_test2[i2] for i2 in range(len(y_val2) - 1, -1, -1): if y_val2[i2] != 0.0 and y_val2[i2] != 1.0: del y_val2[i2] del X_val2[i2] # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, # shuffle=True) if penalty == 0: lr_model_linear = LogisticRegression(C=C, penalty='l1', solver='liblinear', max_iter=max_iter).fit( X_train2, y_train2) elif penalty == 1: if solver == 0: lr_model_linear = LogisticRegression(C=C, penalty='l2', solver='newton-cg', max_iter=max_iter).fit( X_train2, y_train2) elif solver == 1: lr_model_linear = LogisticRegression(C=C, penalty='l2', solver='sag', max_iter=max_iter).fit( X_train2, y_train2) else: lr_model_linear = LogisticRegression(C=C, penalty='l2', solver='lbfgs', max_iter=max_iter).fit( X_train2, y_train2) elif penalty == 2: lr_model_linear = LogisticRegression(C=C, penalty='elasticnet', solver='saga', max_iter=max_iter, l1_ratio=l1_ratio).fit( X_train2, y_train2) else: lr_model_linear = LogisticRegression(C=C, penalty='none', max_iter=max_iter).fit( X_train2, y_train2) score_val, _, _ = ROC_PR.ROC_ML(lr_model_linear, X_val2, y_val2, "LR", 0) score_test, score_sr, score_pr = ROC_PR.ROC_ML(lr_model_linear, X_test2, y_test2, "LR", 0) print(i, flush=True) # print(score1, flush=True) res_test.append(score_test) res_val.append(score_val) res_sr.append(score_sr) res_pr.append(score_pr) all_scores = all_scores + score_val print('lr' + str(i) + string_random + '.sav') pickle.dump(lr_model_linear, open('lr' + str(i) + string_random + '.sav', 'wb')) global rf_val_score, rf_test_score res_val.append(all_scores / len(y_train[0])) rf_val_score.append(res_val) rf_test_score.append(res_test) rf_sr_score.append(res_sr) print("val score", res_val) print("test score", res_test) print("recall at 95 spec: ", res_sr) print("precision recall: ", res_pr) print(all_scores / len(y_train[0]), flush=True) print(string_random) return all_scores / len(y_train[0])