def LarsRegressorGS(X_train, X_test, y_train, y_test): reg = Lars() grid_values = { 'n_nonzero_coefs': list(range(100, 500, 100)), } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="LarsRegressorGS", best_params=best_params) logSave(nameOfModel="LarsRegressorGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def LinearSVRRegressor(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = LinearSVR(epsilon=0.001, max_iter=5000, C=3, loss='squared_epsilon_insensitive') reg1.fit(X_train, y_train1) reg2 = LinearSVR(epsilon=0.001, max_iter=5000, C=3, loss='squared_epsilon_insensitive') reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="LinearSVRRegressor", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def GradientBoosting(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = GradientBoostingRegressor(loss='huber') reg1.fit(X_train, y_train1) reg2 = GradientBoostingRegressor(loss='huber') reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="GradientBoosting", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def RidgeRegressorGS(X_train, X_test, y_train, y_test): reg = Ridge() grid_values = { 'alpha': list(range(1, 3)) + [value * 0.01 for value in range(1, 3)], 'solver': ['svd', 'cholesky', 'saga'] } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="RidgeRegressorGS", best_params=best_params) logSave(nameOfModel="RidgeRegressorGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def XgBoost(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = xg.XGBRegressor(objective='reg:squarederror') reg1.fit(X=X_train, y=y_train1) reg2 = xg.XGBRegressor(objective='reg:squarederror') reg2.fit(X=X_train, y=y_train2) y_pred1 = reg1.predict(X_test) y_pred2 = reg2.predict(X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X_train) y_pred2 = reg2.predict(X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="XgBoost", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def AdaBoost(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = AdaBoostRegressor(base_estimator=LinearSVR(), loss='exponential', n_estimators=5) reg1.fit(X_train, y_train1) reg2 = AdaBoostRegressor(base_estimator=LinearSVR(), loss='exponential', n_estimators=5) reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="AdaBoost", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def LogisticRegressionModel(splitData, X_train, X_test, y_train, y_test): clf = LogisticRegression(penalty='l1', solver='liblinear', multi_class='ovr', class_weight={ 0: 0.7, 1: 1.5 }) clf.fit(X_train, y_train.ravel()) if splitData: y_preds = clf.predict(X_test) printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="LogisticRegression", clf=clf, metrics=metrics, val_metrics=val_metrics)
def NeuralNetGS(X_train, X_test, y_train, y_test): reg = MLPRegressor() grid_values = { 'hidden_layer_sizes': [(8, 16, 32, 64, 128, 64, 32, 64, 16, 8), (8, 16, 32, 64, 32, 16, 8), (8, 16, 32, 16, 8)], 'solver': ['adam'], 'learning_rate': ['constant', 'invscaling'] } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="NeuralNetGS", best_params=best_params) logSave(nameOfModel="NeuralNetGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def ExtraTreeGS(X_train, X_test, y_train, y_test): reg = ExtraTreeRegressor() grid_values = { 'criterion': ["mse", "mae"], 'max_depth': list(range(20, 25)) } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="ExtraTreeGS", best_params=best_params) logSave(nameOfModel="ExtraTreeGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def NuSVRRegressor(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = NuSVR() reg1.fit(X_train, y_train1) reg2 = NuSVR() reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="NuSVRRegressor", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def AdaBoostGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = AdaBoostRegressor(base_estimator=LinearSVR(), n_estimators=3) reg2 = AdaBoostRegressor(base_estimator=LinearSVR(), n_estimators=3) grid_values = { 'base_estimator__epsilon': [value * 0.1 for value in range(0, 2)], 'base_estimator__C': list(range(1, 2)), 'base_estimator__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive'] } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="AdaBoostGS", best_params=best_params) logSave(nameOfModel="AdaBoostGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def SGD_GS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = SGDRegressor() reg2 = SGDRegressor() grid_values = { 'alpha': [value * 0.001 for value in range(1, 3)], 'loss': ['squared_loss', 'huber'], 'penalty': ['l2', 'l1'], 'l1_ratio': [value * 0.1 for value in range(0, 3)] } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="SGD_GS", best_params=best_params) logSave(nameOfModel="SGD_GS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def GradientBoostingGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = GradientBoostingRegressor() reg2 = GradientBoostingRegressor() grid_values = { 'loss': ['ls', 'huber'], 'learning_rate': [value * 0.1 for value in range(1, 3)], 'criterion': ["mse", "mae"], 'alpha': [0.25, 0.5, 0.75, 0.9], } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="GradientBoostingGS", best_params=best_params) logSave(nameOfModel="GradientBoostingGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def NuSVRRegressorGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = NuSVR() reg2 = NuSVR() grid_values = { 'nu': [value * 0.1 for value in range(1, 3)], 'C': list(range(1, 3)), 'kernel': ['poly', 'rbf'], 'degree': list(range(1, 3)) } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="NuSVRRegressorGS", best_params=best_params) logSave(nameOfModel="NuSVRRegressorGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def XgBoostGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = xg.XGBRegressor(objective='reg:squarederror') reg2 = xg.XGBRegressor(objective='reg:squarederror') grid_values = { 'learning_rate': [x / 10 for x in range(1, 5)], 'max_depth': list(range(11, 15)) } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X_test) y_pred2 = reg2.predict(X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X_train) y_pred2 = reg2.predict(X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="XgBoostGS", best_params=best_params) logSave(nameOfModel="XgBoostGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def LassoRegressor(X_train, X_test, y_train, y_test): reg = Lasso(alpha=0.01) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="LassoRegressor", reg=reg, metrics=metrics, val_metrics=val_metrics)
def NeuralNet(X_train, X_test, y_train, y_test): reg = MLPRegressor(hidden_layer_sizes=(32, 64, 128, 256, 128, 64)) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="NeuralNet", reg=reg, metrics=metrics, val_metrics=val_metrics)
def RidgeRegressor(X_train, X_test, y_train, y_test): reg = Ridge() reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="RidgeRegressor", reg=reg, metrics=metrics, val_metrics=val_metrics)
def ElasticNetRegressor(X_train, X_test, y_train, y_test): reg = ElasticNet(alpha=10, l1_ratio=0.2) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="ElasticNetRegressor", reg=reg, metrics=metrics, val_metrics=val_metrics)
def DecisionTree(X_train, X_test, y_train, y_test): reg = DecisionTreeRegressor() reg.fit(X_train, y_train) y_pred1 = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred1) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred1) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="DecisionTree", reg=reg, metrics=metrics, val_metrics=val_metrics)
def AdaBoostModel(splitData, X_train, X_test, y_train, y_test): svc = SVC() clf = AdaBoostClassifier(base_estimator=svc, n_estimators=100, algorithm='SAMME') clf.fit(X_train, y_train.ravel()) if splitData: y_preds = clf.predict(X_test) printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics(y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train).reshape(-1, 1) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n") logAndSave(name_of_model="AdaBoost", clf=clf, metrics=metrics, val_metrics=val_metrics)
def XGBClassifierModel(splitData, X_train, X_test, y_train, y_test): clf = xgb.XGBClassifier(objective="binary:logistic", eval_metric="auc") clf.fit(X_train, y_train.ravel()) if splitData: y_preds = clf.predict(X_test) printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\n") logAndSave(name_of_model="XGBClassifier", clf=clf, metrics=metrics, val_metrics=val_metrics)
def RandomForestModel(splitData, X_train, X_test, y_train, y_test): clf = RandomForestClassifier(max_depth=14) clf.fit(X_train, y_train.ravel()) if splitData: y_preds = clf.predict(X_test) printMetrics(y_test, y_preds) val_acc, val_pre, val_recall, val_auc, val_f1 = getMetrics( y_test, y_preds) else: val_acc, val_pre, val_recall, val_auc, val_f1 = 0, 0, 0, 0, 0 y_preds = clf.predict(X_train) acc, pre, recall, auc, f1 = getMetrics(y_train, y_preds) val_metrics = (val_acc, val_pre, val_recall, val_auc, val_f1) metrics = (acc, pre, recall, auc, f1) # print("acc-" + str(acc) + "\tprecision-" + str(pre) + "\trecall-" + str(recall) + "\tauc-" + str(auc) + "\tf1-" + str(f1) + "\tval_accuracy-" + str(val_acc) + "\tval_precision-" + str(val_pre) + "\tval_recall-" + str(val_recall) + "\tval_auc-" + str(val_auc) + "\tval_f1-" + str(val_f1) + "\n") logAndSave(name_of_model="RandomForestClassifier", clf=clf, metrics=metrics, val_metrics=val_metrics)
import autokeras as ak from Utility import getPlantsPropulsionData, getMetrics, printMetrics, logSave X_train, X_test, y_train, y_test = getPlantsPropulsionData( splitData=True, makePolynomialFeatures=True) reg = ak.StructuredDataRegressor( loss='mean_absolute_error', metrics=['mean_squared_error', 'mean_absolute_error'], objective='val_mean_absolute_error', overwrite=True, max_trials=10) reg.fit(x=X_train, y=y_train, epochs=20, validation_data=(X_test, y_test)) y_preds = reg.predict(X_train) printMetrics(y_true=y_train, y_pred=y_preds) metrics = getMetrics(y_true=y_train, y_pred=y_preds) y_preds = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_preds) val_metrics = getMetrics(y_true=y_test, y_pred=y_preds) logSave(nameOfModel="AutoKeras", reg=None, metrics=metrics, val_metrics=val_metrics)
'GT_Turbine_decay_state_coefficient' ], axis=1) y1 = pd.DataFrame(data=y_train[:, 0], columns=[final_cols[-2]]) y2 = pd.DataFrame(data=y_train[:, 1], columns=[final_cols[-1]]) scaler = StandardScaler() X_train = scaler.fit_transform(X_train) scaled_X = pd.DataFrame(data=X_train, columns=final_cols[:-2]) reg1 = NuSVR() reg1.fit(X_train, y_train[:, 0]) reg2 = NuSVR() reg2.fit(X_train, y_train[:, 1]) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_train, y_pred=y_pred) metrics = getMetrics(y_true=y_train, y_pred=y_pred) fig1, ax1 = plt.subplots(figsize=(15, 15)) myplot1 = plot_partial_dependence(reg1, scaled_X, final_cols[:-2], ax=ax1, n_jobs=-1) myplot1.plot() fig1.savefig('GT_Compressor_decay_state_coefficient.png') fig2, ax2 = plt.subplots(figsize=(15, 15)) myplot2 = plot_partial_dependence(reg2, scaled_X, final_cols[:-2], ax=ax2, n_jobs=-1)