def NeuralNetworkModel(splitData, X_train, X_test, y_train, y_test): clf = MLPClassifier(alpha=1e-4, max_iter=1000) layers = [(4, 6), (5, 7), (8, 10)] grid_values = { 'hidden_layer_sizes': layers, 'activation': ['tanh', 'relu'], 'learning_rate': ['constant', 'invscaling'] } grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc', 'f1', 'accuracy'], refit='roc_auc') grid_clf_acc.fit(X_train, y_train.ravel()) clf = grid_clf_acc.best_estimator_ if splitData: y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="NeuralNetworkGS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def LogisticRegressionModel(splitData, X_train, X_test, y_train, y_test): clf = LogisticRegression(solver='liblinear', multi_class='ovr', class_weight={ 0: 0.7, 1: 1.5 }) grid_values = { 'penalty': ['l1', 'l2'], 'C': [0.01, .09, 1, 5, 25, 50, 100] } grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc', 'f1', 'accuracy'], refit='roc_auc') grid_clf_acc.fit(X_train, y_train.ravel()) clf = grid_clf_acc.best_estimator_ if splitData: y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="LogisticRegressionGS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def AdaBoostModel(splitData, X_train, X_test, y_train, y_test): svc = SVC() clf = AdaBoostClassifier(base_estimator=svc, algorithm='SAMME') grid_values = { 'base_estimator__kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'base_estimator__C': [x / 10 for x in range(1, 11)], 'base_estimator__degree': list(range(3, 5)) } grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc', 'f1', 'accuracy'], refit='roc_auc') grid_clf_acc.fit(X_train, y_train.ravel()) clf = grid_clf_acc.best_estimator_ if splitData: y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train).reshape(-1, 1) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="AdaBoostGS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def LogisticRegressionModel(splitData, X_train, X_test, y_train, y_test): clf = LogisticRegression(penalty='l1', solver='liblinear', multi_class='ovr', class_weight={ 0: 0.7, 1: 1.5 }) clf.fit(X_train, y_train.ravel()) if splitData: y_preds = clf.predict(X_test) printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="LogisticRegression", clf=clf, metrics=metrics, val_metrics=val_metrics)
def XGBClassifierModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = xgb.XGBClassifier(objective="multi:softmax", eval_metric="mlogloss") grid_values = { 'learning_rate': [x / 10 for x in range(1, 5)], 'max_depth': list(range(10, 21, 1)) } grid_clf_acc = GridSearchCV( clf, param_grid=grid_values, scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'], refit='f1_weighted', n_jobs=2, verbose=0) grid_clf_acc.fit(X_train, y_train) clf = grid_clf_acc.best_estimator_ # print(clf) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSave(name_of_model="XGBClassifierModelV2GS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def XGBClassifierModel(splitData, X_train, X_test, y_train, y_test): clf = xgb.XGBClassifier(objective="binary:logistic", eval_metric="auc") grid_values = { 'learning_rate': [x / 10 for x in range(1, 11)], 'max_depth': list(range(10, 21, 1)), 'gamma ': [x / 10 for x in range(1, 11)] } grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc', 'f1', 'accuracy'], refit='roc_auc') grid_clf_acc.fit(X_train, y_train.ravel()) clf = grid_clf_acc.best_estimator_ if splitData: y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="XGBClassifierGS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def AdaBoostModel(splitData, X_train, X_test, y_train, y_test): svc = SVC() clf = AdaBoostClassifier(base_estimator=svc, n_estimators=100, algorithm='SAMME') clf.fit(X_train, y_train.ravel()) if splitData: y_preds = clf.predict(X_test) printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train).reshape(-1, 1) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n") logAndSave(name_of_model="AdaBoost", clf=clf, metrics=metrics, val_metrics=val_metrics)
def RandomForestModel(splitData, X_train, X_test, y_train, y_test): clf = RandomForestClassifier(n_estimators=100, max_depth=11) grid_values = {'n_estimators': list(range(100, 501, 50)), 'criterion': ['gini', 'entropy'], 'max_depth': list(range(10, 21, 1))} grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc', 'f1', 'accuracy'], refit='roc_auc') grid_clf_acc.fit(X_train, y_train.ravel()) clf = grid_clf_acc.best_estimator_ if splitData: y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="RandomForestClassifierGS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def XGBClassifierModel(splitData, X_train, X_test, y_train, y_test): clf = xgb.XGBClassifier(objective="binary:logistic", eval_metric="auc") clf.fit(X_train, y_train.ravel()) if splitData: y_preds = clf.predict(X_test) printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n") logAndSave(name_of_model="XGBClassifier", clf=clf, metrics=metrics, val_metrics=val_metrics)
def RandomForestModel(splitData, X_train, X_test, y_train, y_test): clf = RandomForestClassifier(max_depth=14) clf.fit(X_train, y_train.ravel()) if splitData: y_preds = clf.predict(X_test) printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="RandomForestClassifier", clf=clf, metrics=metrics, val_metrics=val_metrics)
min_samples_leaf=9, min_samples_split=18, n_estimators=100)), StackingEstimator( estimator=MLPClassifier(alpha=0.001, learning_rate_init=1.0)), StackingEstimator( estimator=GradientBoostingClassifier(learning_rate=0.5, max_depth=4, max_features=0.6500000000000001, min_samples_leaf=9, min_samples_split=19, n_estimators=100, subsample=0.9500000000000001)), BernoulliNB(alpha=10.0, fit_prior=False)) # Fix random state for all the steps in exported pipeline set_param_recursive(exported_pipeline.steps, 'random_state', 101) exported_pipeline.fit(X_train, y_train) y_preds = exported_pipeline.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) y_preds = exported_pipeline.predict(X_test) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSave(name_of_model="TPOT_Classifier", clf=exported_pipeline, metrics=metrics, val_metrics=val_metrics)