def NuSVRRegressor(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = NuSVR() reg1.fit(X_train, y_train1) reg2 = NuSVR() reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="NuSVRRegressor", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def XGBClassifierModel(splitData, X_train, X_test, y_train, y_test): clf = xgb.XGBClassifier(objective="binary:logistic", eval_metric="auc") grid_values = { 'learning_rate': [x / 10 for x in range(1, 11)], 'max_depth': list(range(10, 21, 1)), 'gamma ': [x / 10 for x in range(1, 11)] } grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc', 'f1', 'accuracy'], refit='roc_auc') grid_clf_acc.fit(X_train, y_train.ravel()) clf = grid_clf_acc.best_estimator_ if splitData: y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="XGBClassifierGS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def XGBClassifierModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = xgb.XGBClassifier(objective="multi:softmax", eval_metric="mlogloss") grid_values = { 'learning_rate': [x / 10 for x in range(1, 5)], 'max_depth': list(range(10, 21, 1)) } grid_clf_acc = GridSearchCV( clf, param_grid=grid_values, scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'], refit='f1_weighted', n_jobs=2, verbose=0) grid_clf_acc.fit(X_train, y_train) clf = grid_clf_acc.best_estimator_ # print(clf) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSave(name_of_model="XGBClassifierModelV2GS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def LinearSVRRegressor(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = LinearSVR(epsilon=0.001, max_iter=5000, C=3, loss='squared_epsilon_insensitive') reg1.fit(X_train, y_train1) reg2 = LinearSVR(epsilon=0.001, max_iter=5000, C=3, loss='squared_epsilon_insensitive') reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="LinearSVRRegressor", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def ExtraTreeGS(X_train, X_test, y_train, y_test): reg = ExtraTreeRegressor() grid_values = { 'criterion': ["mse", "mae"], 'max_depth': list(range(20, 25)) } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="ExtraTreeGS", best_params=best_params) logSave(nameOfModel="ExtraTreeGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def AdaBoost(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = AdaBoostRegressor(base_estimator=LinearSVR(), loss='exponential', n_estimators=5) reg1.fit(X_train, y_train1) reg2 = AdaBoostRegressor(base_estimator=LinearSVR(), loss='exponential', n_estimators=5) reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="AdaBoost", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def LogisticRegressionModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial', max_iter=700, class_weight='balanced') grid_values = {'C': [0.01, .09, 1, 5, 25, 50, 100, 1000]} grid_clf_acc = GridSearchCV( clf, param_grid=grid_values, scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'], refit='f1_weighted', n_jobs=2, verbose=0) grid_clf_acc.fit(X_train, y_train) clf = grid_clf_acc.best_estimator_ # print(clf) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSaveV2(name_of_model="LogisticRegressionModelV2GS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def AdaBoostModel(splitData, X_train, X_test, y_train, y_test): svc = SVC() clf = AdaBoostClassifier(base_estimator=svc, algorithm='SAMME') grid_values = { 'base_estimator__kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'base_estimator__C': [x / 10 for x in range(1, 11)], 'base_estimator__degree': list(range(3, 5)) } grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc', 'f1', 'accuracy'], refit='roc_auc') grid_clf_acc.fit(X_train, y_train.ravel()) clf = grid_clf_acc.best_estimator_ if splitData: y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train).reshape(-1, 1) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="AdaBoostGS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def NeuralNetGS(X_train, X_test, y_train, y_test): reg = MLPRegressor() grid_values = { 'hidden_layer_sizes': [(8, 16, 32, 64, 128, 64, 32, 64, 16, 8), (8, 16, 32, 64, 32, 16, 8), (8, 16, 32, 16, 8)], 'solver': ['adam'], 'learning_rate': ['constant', 'invscaling'] } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="NeuralNetGS", best_params=best_params) logSave(nameOfModel="NeuralNetGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def GradientBoosting(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = GradientBoostingRegressor(loss='huber') reg1.fit(X_train, y_train1) reg2 = GradientBoostingRegressor(loss='huber') reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="GradientBoosting", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def NeuralNetworkModel(splitData, X_train, X_test, y_train, y_test): clf = MLPClassifier(alpha=1e-4, max_iter=1000) layers = [(4, 6), (5, 7), (8, 10)] grid_values = { 'hidden_layer_sizes': layers, 'activation': ['tanh', 'relu'], 'learning_rate': ['constant', 'invscaling'] } grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc', 'f1', 'accuracy'], refit='roc_auc') grid_clf_acc.fit(X_train, y_train.ravel()) clf = grid_clf_acc.best_estimator_ if splitData: y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="NeuralNetworkGS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def XgBoost(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = xg.XGBRegressor(objective='reg:squarederror') reg1.fit(X=X_train, y=y_train1) reg2 = xg.XGBRegressor(objective='reg:squarederror') reg2.fit(X=X_train, y=y_train2) y_pred1 = reg1.predict(X_test) y_pred2 = reg2.predict(X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X_train) y_pred2 = reg2.predict(X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="XgBoost", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def LogisticRegressionModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial', max_iter=700) clf.fit(X_train, y_train) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n") logAndSaveV2(name_of_model="LogisticRegressionModelV2", clf=clf, metrics=metrics, val_metrics=val_metrics)
def LogisticRegressionModel(splitData, X_train, X_test, y_train, y_test): clf = LogisticRegression(penalty='l1', solver='liblinear', multi_class='ovr', class_weight={ 0: 0.7, 1: 1.5 }) clf.fit(X_train, y_train.ravel()) if splitData: y_preds = clf.predict(X_test) printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="LogisticRegression", clf=clf, metrics=metrics, val_metrics=val_metrics)
def LarsRegressorGS(X_train, X_test, y_train, y_test): reg = Lars() grid_values = { 'n_nonzero_coefs': list(range(100, 500, 100)), } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="LarsRegressorGS", best_params=best_params) logSave(nameOfModel="LarsRegressorGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def RidgeRegressorGS(X_train, X_test, y_train, y_test): reg = Ridge() grid_values = { 'alpha': list(range(1, 3)) + [value * 0.01 for value in range(1, 3)], 'solver': ['svd', 'cholesky', 'saga'] } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="RidgeRegressorGS", best_params=best_params) logSave(nameOfModel="RidgeRegressorGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def LogisticRegressionModel(splitData, X_train, X_test, y_train, y_test): clf = LogisticRegression(solver='liblinear', multi_class='ovr', class_weight={ 0: 0.7, 1: 1.5 }) grid_values = { 'penalty': ['l1', 'l2'], 'C': [0.01, .09, 1, 5, 25, 50, 100] } grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc', 'f1', 'accuracy'], refit='roc_auc') grid_clf_acc.fit(X_train, y_train.ravel()) clf = grid_clf_acc.best_estimator_ if splitData: y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="LogisticRegressionGS", clf=clf, metrics=metrics, val_metrics=val_metrics)
def AdaBoostGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = AdaBoostRegressor(base_estimator=LinearSVR(), n_estimators=3) reg2 = AdaBoostRegressor(base_estimator=LinearSVR(), n_estimators=3) grid_values = { 'base_estimator__epsilon': [value * 0.1 for value in range(0, 2)], 'base_estimator__C': list(range(1, 2)), 'base_estimator__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive'] } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="AdaBoostGS", best_params=best_params) logSave(nameOfModel="AdaBoostGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def SGD_GS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = SGDRegressor() reg2 = SGDRegressor() grid_values = { 'alpha': [value * 0.001 for value in range(1, 3)], 'loss': ['squared_loss', 'huber'], 'penalty': ['l2', 'l1'], 'l1_ratio': [value * 0.1 for value in range(0, 3)] } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="SGD_GS", best_params=best_params) logSave(nameOfModel="SGD_GS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def GradientBoostingGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = GradientBoostingRegressor() reg2 = GradientBoostingRegressor() grid_values = { 'loss': ['ls', 'huber'], 'learning_rate': [value * 0.1 for value in range(1, 3)], 'criterion': ["mse", "mae"], 'alpha': [0.25, 0.5, 0.75, 0.9], } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="GradientBoostingGS", best_params=best_params) logSave(nameOfModel="GradientBoostingGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def NuSVRRegressorGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = NuSVR() reg2 = NuSVR() grid_values = { 'nu': [value * 0.1 for value in range(1, 3)], 'C': list(range(1, 3)), 'kernel': ['poly', 'rbf'], 'degree': list(range(1, 3)) } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="NuSVRRegressorGS", best_params=best_params) logSave(nameOfModel="NuSVRRegressorGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def XgBoostGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = xg.XGBRegressor(objective='reg:squarederror') reg2 = xg.XGBRegressor(objective='reg:squarederror') grid_values = { 'learning_rate': [x / 10 for x in range(1, 5)], 'max_depth': list(range(11, 15)) } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X_test) y_pred2 = reg2.predict(X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X_train) y_pred2 = reg2.predict(X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="XgBoostGS", best_params=best_params) logSave(nameOfModel="XgBoostGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def LassoRegressor(X_train, X_test, y_train, y_test): reg = Lasso(alpha=0.01) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="LassoRegressor", reg=reg, metrics=metrics, val_metrics=val_metrics)
def ElasticNetRegressor(X_train, X_test, y_train, y_test): reg = ElasticNet(alpha=10, l1_ratio=0.2) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="ElasticNetRegressor", reg=reg, metrics=metrics, val_metrics=val_metrics)
def NeuralNet(X_train, X_test, y_train, y_test): reg = MLPRegressor(hidden_layer_sizes=(32, 64, 128, 256, 128, 64)) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="NeuralNet", reg=reg, metrics=metrics, val_metrics=val_metrics)
def RidgeRegressor(X_train, X_test, y_train, y_test): reg = Ridge() reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="RidgeRegressor", reg=reg, metrics=metrics, val_metrics=val_metrics)
def AdaBoostModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = AdaBoostClassifier(base_estimator=RandomForestClassifier(), n_estimators=200, algorithm='SAMME') clf.fit(X_train, y_train) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n") logAndSaveV2(name_of_model="AdaBoostModelV2", clf=clf, metrics=metrics, val_metrics=val_metrics)
def DecisionTree(X_train, X_test, y_train, y_test): reg = DecisionTreeRegressor() reg.fit(X_train, y_train) y_pred1 = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred1) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred1) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="DecisionTree", reg=reg, metrics=metrics, val_metrics=val_metrics)
def AdaBoostModel(splitData, X_train, X_test, y_train, y_test): svc = SVC() clf = AdaBoostClassifier(base_estimator=svc, n_estimators=100, algorithm='SAMME') clf.fit(X_train, y_train.ravel()) if splitData: y_preds = clf.predict(X_test) printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train).reshape(-1, 1) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n") logAndSave(name_of_model="AdaBoost", clf=clf, metrics=metrics, val_metrics=val_metrics)
def RandomForestModelV2(X_train, X_test, y_train, y_test): multi_class = True clf = RandomForestClassifier() grid_values = {'n_estimators': list(range(100, 501, 50)), 'criterion': ['gini', 'entropy'], 'max_depth': list(range(10, 21, 1))} grid_clf_acc = GridSearchCV(clf, param_grid=grid_values, scoring=['roc_auc_ovr_weighted', 'f1_weighted', 'accuracy'], refit='f1_weighted', n_jobs=2, verbose=0) grid_clf_acc.fit(X_train, y_train) clf = grid_clf_acc.best_estimator_ # print(clf) y_preds = clf.predict(X_test) # printMetrics(y_test, y_preds, multi_class=multi_class) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds, multi_class=multi_class) y_preds = clf.predict(X_train) # printMetrics(y_train, y_preds, multi_class=multi_class) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds, multi_class=multi_class) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) logAndSaveV2(name_of_model="RandomForestModelV2GS", clf=clf, metrics=metrics, val_metrics=val_metrics)