def LarsRegressorGS(X_train, X_test, y_train, y_test): reg = Lars() grid_values = { 'n_nonzero_coefs': list(range(100, 500, 100)), } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="LarsRegressorGS", best_params=best_params) logSave(nameOfModel="LarsRegressorGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def RidgeRegressorGS(X_train, X_test, y_train, y_test): reg = Ridge() grid_values = { 'alpha': list(range(1, 3)) + [value * 0.01 for value in range(1, 3)], 'solver': ['svd', 'cholesky', 'saga'] } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="RidgeRegressorGS", best_params=best_params) logSave(nameOfModel="RidgeRegressorGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def NeuralNetGS(X_train, X_test, y_train, y_test): reg = MLPRegressor() grid_values = { 'hidden_layer_sizes': [(8, 16, 32, 64, 128, 64, 32, 64, 16, 8), (8, 16, 32, 64, 32, 16, 8), (8, 16, 32, 16, 8)], 'solver': ['adam'], 'learning_rate': ['constant', 'invscaling'] } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="NeuralNetGS", best_params=best_params) logSave(nameOfModel="NeuralNetGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def ExtraTreeGS(X_train, X_test, y_train, y_test): reg = ExtraTreeRegressor() grid_values = { 'criterion': ["mse", "mae"], 'max_depth': list(range(20, 25)) } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="ExtraTreeGS", best_params=best_params) logSave(nameOfModel="ExtraTreeGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def AdaBoostGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = AdaBoostRegressor(base_estimator=LinearSVR(), n_estimators=3) reg2 = AdaBoostRegressor(base_estimator=LinearSVR(), n_estimators=3) grid_values = { 'base_estimator__epsilon': [value * 0.1 for value in range(0, 2)], 'base_estimator__C': list(range(1, 2)), 'base_estimator__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive'] } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="AdaBoostGS", best_params=best_params) logSave(nameOfModel="AdaBoostGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def NuSVRRegressorGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = NuSVR() reg2 = NuSVR() grid_values = { 'nu': [value * 0.1 for value in range(1, 3)], 'C': list(range(1, 3)), 'kernel': ['poly', 'rbf'], 'degree': list(range(1, 3)) } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="NuSVRRegressorGS", best_params=best_params) logSave(nameOfModel="NuSVRRegressorGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def SGD_GS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = SGDRegressor() reg2 = SGDRegressor() grid_values = { 'alpha': [value * 0.001 for value in range(1, 3)], 'loss': ['squared_loss', 'huber'], 'penalty': ['l2', 'l1'], 'l1_ratio': [value * 0.1 for value in range(0, 3)] } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="SGD_GS", best_params=best_params) logSave(nameOfModel="SGD_GS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def GradientBoostingGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = GradientBoostingRegressor() reg2 = GradientBoostingRegressor() grid_values = { 'loss': ['ls', 'huber'], 'learning_rate': [value * 0.1 for value in range(1, 3)], 'criterion': ["mse", "mae"], 'alpha': [0.25, 0.5, 0.75, 0.9], } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="GradientBoostingGS", best_params=best_params) logSave(nameOfModel="GradientBoostingGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def XgBoostGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = xg.XGBRegressor(objective='reg:squarederror') reg2 = xg.XGBRegressor(objective='reg:squarederror') grid_values = { 'learning_rate': [x / 10 for x in range(1, 5)], 'max_depth': list(range(11, 15)) } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X_test) y_pred2 = reg2.predict(X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X_train) y_pred2 = reg2.predict(X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="XgBoostGS", best_params=best_params) logSave(nameOfModel="XgBoostGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)