def make_svr_pred(df, next_week, debug=0): """ This method creates predictions using support vector regression. """ #Tuned## rand_space = { 'estimator__kernel': ['linear', 'rbf', 'sigmoid'], 'estimator__gamma': ['auto', 1e-10, 1e-6, 0.9], 'estimator__epsilon': [1e-10, 1e-6, 0.1, 1], 'estimator__C': [1e-2, 1, 10], 'estimator__shrinking': [True, False], 'estimator__max_iter': [-1, 1, 5, 10, 100, 1000] } space = { 'estimator__kernel': ['linear', 'rbf', 'sigmoid'], 'estimator__gamma': ['auto'], 'estimator__epsilon': [1e-10, 1e-9, 1e-8, 1e-7, 1e-6], 'estimator__C': [4, 5, 6], 'estimator__shrinking': [False], 'estimator__max_iter': [19, 20, 21] } params_old = { 'estimator__kernel': 'rbf', 'estimator__gamma': 'auto', 'estimator__epsilon': 1e-8, 'estimator__C': 5, 'estimator__shrinking': False, 'estimator__max_iter': 20 } params = { 'estimator__kernel': 'linear', 'estimator__gamma': 'auto', 'estimator__epsilon': 1e-10, 'estimator__C': 1e-2, 'n_jobs': -1, 'estimator__shrinking': False, 'estimator__max_iter': -1 } X_train, X_test, Y_train, Y_test = process_data(df, next_week) multi_svr = MultiOutputRegressor(SVR()) multi_svr.set_params(**params) #best_random = random_search(multi_svr, rand_space, next_week, 100, 3, X_train, Y_train) #best_random = grid_search(multi_svr, space, next_week, 3, X_train, Y_train) multi_svr.fit(X_train, Y_train) next_week[Y_train.columns] = multi_svr.predict(next_week[X_train.columns]) if debug: y_pred_untrain = multi_svr.predict(X_train) print(next_week) print("Score: ", multi_svr.score(X_train, Y_train) * 100) print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain)) print( "CV: ", ms.cross_val_score(multi_svr, Y_train, y_pred_untrain, cv=10, scoring='neg_mean_squared_error')) return next_week
def make_lr_pred(df, next_week, debug=0): """ This method creates predictions using linear regression. """ #Tuned space = { 'estimator__fit_intercept': [True, False], 'estimator__normalize': [True, False] } params = {'estimator__fit_intercept': True, 'estimator__normalize': False} X_train, X_test, Y_train, Y_test = process_data(df, next_week) multi_lr = MultiOutputRegressor(LinearRegression()) #best_random = grid_search(multi_lr, space, next_week, 10, X_train, Y_train) multi_lr.set_params(**params) multi_lr.fit(X_train, Y_train) next_week[Y_train.columns] = multi_lr.predict(next_week[X_train.columns]) y_pred_untrain = multi_lr.predict(X_train) if debug: print(next_week) print("Score: ", multi_lr.score(X_train, Y_train) * 100) print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain)) print( "CV: ", ms.cross_val_score(multi_lr, Y_train, y_pred_untrain, cv=10, scoring='neg_mean_squared_error')) return next_week
def make_lasso_pred(df, next_week, debug=0): """ This method makes predictions using lasso regression. """ #Tuned## rand_space = { 'estimator__alpha': [900, 1000, 1100], 'estimator__normalize': [True, False], 'estimator__fit_intercept': [True, False], 'estimator__positive': [True, False], 'estimator__max_iter': [10000, 50000, 100000] } space = { 'estimator__alpha': [3, 4, 5], 'estimator__normalize': [True], 'estimator__fit_intercept': [True], 'estimator__positive': [False], 'estimator__max_iter': [1] } params_old = { 'estimator__alpha': 3, 'estimator__normalize': True, 'estimator__fit_intercept': True, 'estimator__positive': False, 'estimator__max_iter': 1 } params = { 'estimator__alpha': 10, 'estimator__normalize': False, 'estimator__fit_intercept': True, 'n_jobs': -1, 'estimator__positive': False, 'estimator__max_iter': 750 } #'estimator__max_iter':10} X_train, X_test, Y_train, Y_test = process_data(df, next_week) multi_lasso = MultiOutputRegressor(Lasso()) multi_lasso.set_params(**params) #best_random = random_search(multi_lasso, rand_space, next_week, 50, 3, X_train, Y_train) #best_random = grid_search(multi_lasso, space, next_week, 3, X_train, Y_train) multi_lasso.fit(X_train, Y_train) next_week[Y_train.columns] = multi_lasso.predict( next_week[X_train.columns]) if debug: y_pred_untrain = multi_lasso.predict(X_train) print(next_week) print("Score: ", multi_lasso.score(X_train, Y_train) * 100) print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain)) print( "CV: ", ms.cross_val_score(multi_lasso, Y_train, y_pred_untrain, cv=10, scoring='neg_mean_squared_error')) return next_week
def make_elastic_pred(df, next_week, debug=0): """ This method creates predictions using elastic net regression. """ #Tuned## rand_space = { 'estimator__alpha': [1e-1], 'estimator__l1_ratio': [0.7, 0.8], 'estimator__fit_intercept': [True], 'estimator__normalize': [True], 'estimator__precompute': [False], 'estimator__positive': [True], 'estimator__max_iter': [11000, 12000, 13000], 'estimator__selection': ['random'] } space = { 'estimator__alpha': [1e-5, 1e-1, 1, 10], 'estimator__l1_ratio': [0, 0.25, 0.5, 0.75, 1], 'estimator__fit_intercept': [True, False], 'estimator__normalize': [True, False], 'estimator__precompute': [True, False], 'estimator__positive': [True, False], 'estimator__max_iter': [10, 100, 1000, 10000], 'estimator__selection': ['cyclic', 'random'] } params_old = { 'estimator__alpha': 0.1, 'estimator__l1_ratio': 0.7, 'estimator__fit_intercept': True, 'estimator__normalize': True, 'estimator__precompute': False, 'estimator__positive': True, 'estimator__max_iter': 11000, 'estimator__selection': 'random' } params = { 'estimator__alpha': 10, 'estimator__l1_ratio': 1, 'estimator__fit_intercept': True, 'estimator__normalize': False, 'estimator__precompute': True, 'n_jobs': -1, 'estimator__positive': True, #'estimator__max_iter': 10, 'estimator__max_iter': 500, 'estimator__selection': 'random' } X_train, X_test, Y_train, Y_test = process_data(df, next_week) multi_en = MultiOutputRegressor(ElasticNet()) multi_en.set_params(**params) #best_random = random_search(multi_en, rand_space, next_week, 100, 3, X_train, Y_train) #best_random = grid_search(multi_en, rand_space, next_week, 3, X_train, Y_train) multi_en.fit(X_train, Y_train) next_week[Y_train.columns] = multi_en.predict(next_week[X_train.columns]) if debug: y_pred_untrain = multi_en.predict(X_train) print(next_week) print("Score: ", multi_en.score(X_train, Y_train) * 100) print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain)) print( "CV: ", ms.cross_val_score(multi_en, Y_train, y_pred_untrain, cv=10, scoring='neg_mean_squared_error')) return next_week
def make_gb_pred(df, next_week, debug=0): """ This method creates predictions using gradient boosting regression. """ #Tuned## rand_space = { 'estimator__alpha': [1e-6, 1e-5, 1e-4], 'estimator__learning_rate': [0.4, 0.5, 0.6], 'estimator__loss': ['ls', 'lad', 'huber', 'quantile'], 'estimator__n_estimators': [500, 1000, 1500], 'estimator__max_leaf_nodes': [50, 100, 200], 'estimator__min_samples_split': [4, 5, 6], 'estimator__min_samples_leaf': [5, 10, 50], 'estimator__min_weight_fraction_leaf': [0.4, 0.5], 'estimator__max_depth': [5, 10, 50], 'estimator__max_features': ['auto', 'sqrt', None, 1, 5] } space = { 'estimator__alpha': [0.6], 'estimator__learning_rate': [0.5], 'estimator__loss': ['ls'], 'estimator__n_estimators': [1000], 'estimator__max_leaf_nodes': [36, 37, 38, 39], 'estimator__min_samples_split': [4], 'estimator__min_samples_leaf': [10], 'estimator__min_weight_fraction_leaf': [0.5], 'estimator__max_depth': [14], 'estimator__max_features': [1] } params_old = { 'estimator__alpha': 0.6, 'estimator__learning_rate': 0.5, 'estimator__loss': 'ls', 'estimator__n_estimators': 1000, 'estimator__max_leaf_nodes': 38, 'estimator__min_samples_split': 4, 'estimator__min_samples_leaf': 10, 'estimator__min_weight_fraction_leaf': 0.5, 'estimator__max_depth': 14, 'estimator__max_features': 1 } params = { 'estimator__learning_rate': 0.9, 'estimator__loss': 'ls', 'estimator__n_estimators': 1, 'estimator__max_leaf_nodes': 50, 'estimator__min_samples_split': 10, 'estimator__min_samples_leaf': 5, 'estimator__min_weight_fraction_leaf': 0.2, 'n_jobs': -1, 'estimator__max_depth': 10, 'estimator__max_features': 5 } X_train, X_test, Y_train, Y_test = process_data(df, next_week) multi_gbr = MultiOutputRegressor(GradientBoostingRegressor()) #best_random = random_search(multi_gbr, rand_space, next_week, 200, 3, X_train, Y_train) #best_random = grid_search(multi_gbr, space, next_week, 3, X_train, Y_train) multi_gbr.set_params(**params) multi_gbr.fit(X_train, Y_train) next_week[Y_train.columns] = multi_gbr.predict(next_week[X_train.columns]) if debug: y_pred_untrain = multi_gbr.predict(X_train) print(next_week.to_string()) print("Score: ", multi_gbr.score(X_train, Y_train) * 100) print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain)) print( "CV: ", ms.cross_val_score(multi_gbr, Y_train, y_pred_untrain, cv=3, scoring='neg_mean_squared_error')) return next_week
def make_knn_pred(df, next_week, debug=0): """ This method creates predictions using k-nearest neighbors. """ #Tuned## rand_space = { 'estimator__n_neighbors': [5, 10, 15], 'estimator__weights': ['uniform', 'distance'], 'estimator__algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'], 'estimator__leaf_size': [50, 100, 150, 200], 'estimator__p': [1, 2, 3] } space = { 'estimator__n_neighbors': [14, 15, 16], 'estimator__weights': ['distance'], 'estimator__algorithm': ['auto', 'brute'], 'estimator__leaf_size': [50, 90, 100, 110, 150], 'estimator__p': [1] } params_old = { 'estimator__n_neighbors': 15, 'estimator__weights': 'distance', 'estimator__algorithm': 'brute', 'estimator__leaf_size': 50, 'estimator__p': 1 } params = { 'estimator__n_neighbors': 10, 'estimator__weights': 'uniform', 'estimator__algorithm': 'auto', 'estimator__leaf_size': 1, 'estimator__n_jobs': -1, 'n_jobs': -1, 'estimator__p': 1 } X_train, X_test, Y_train, Y_test = process_data(df, next_week) multi_knn = MultiOutputRegressor(neighbors.KNeighborsRegressor()) multi_knn.set_params(**params) #best_random = random_search(multi_knn, rand_space, next_week, 100, 3, X_train, Y_train) #best_random = grid_search(multi_knn, space, next_week, 3, X_train, Y_train) try: multi_knn.fit(X_train, Y_train) next_week[Y_train.columns] = multi_knn.predict( next_week[X_train.columns]) except ValueError as error: params = { 'estimator__n_neighbors': len(df.index) - 1, #'verbose':0, 'estimator__weights': 'distance', 'estimator__algorithm': 'brute', 'estimator__leaf_size': 50, 'estimator__p': 1 } multi_knn.set_params(**params) multi_knn.fit(X_train, Y_train) next_week[Y_train.columns] = multi_knn.predict( next_week[X_train.columns]) if debug: y_pred_untrain = multi_knn.predict(X_train) print(next_week) print("Score: ", multi_knn.score(X_train, Y_train) * 100) print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain)) print( "CV: ", ms.cross_val_score(multi_knn, Y_train, y_pred_untrain, cv=10, scoring='neg_mean_squared_error')) return next_week
def make_rf_pred(df, next_week, debug=0): """ This method creates predictions using random forest. """ #Tuned## params_old = { 'estimator__bootstrap': True, 'estimator__max_depth': 5, 'estimator__max_features': 'sqrt', 'estimator__random_state': 4, 'estimator__min_samples_leaf': 9, 'estimator__min_samples_split': 20, 'estimator__n_estimators': 800 } params = { 'estimator__bootstrap': False, 'estimator__max_depth': 3, 'estimator__max_features': 'sqrt', 'estimator__random_state': 4, 'estimator__min_samples_leaf': 1, 'estimator__min_samples_split': 2, 'estimator__n_jobs': -1, 'n_jobs': -1, 'estimator__n_estimators': 200 } rand_space = { 'estimator__bootstrap': [True, False], 'estimator__max_depth': [int(x) for x in np.linspace(10, 110, num=11)], 'estimator__max_features': ['auto', 'sqrt'], 'estimator__random_state': [4], 'estimator__min_samples_leaf': [1, 2, 4, 8], #132 'estimator__min_samples_split': [2, 5, 10], #396 'estimator__n_estimators': [int(x) for x in np.linspace(200, 2000, num=10)] } #3960 space = { 'estimator__bootstrap': [True], 'estimator__max_depth': [5], 'estimator__max_features': ['sqrt'], 'estimator__random_state': [4], 'estimator__min_samples_leaf': [9], #132 'estimator__min_samples_split': [15, 20, 25], #396 'estimator__n_estimators': [800] } #3960 X_train, X_test, Y_train, Y_test = process_data(df, next_week) multi_rf = MultiOutputRegressor(RandomForestRegressor()) multi_rf.set_params(**params) #best_random = random_search(multi_rf, rand_space, next_week, 100, 3, X_train, Y_train), #best_random = grid_search(multi_rf, space, next_week, 3, X_train, Y_train) multi_rf.fit(X_train, Y_train) next_week[Y_train.columns] = multi_rf.predict(next_week[X_train.columns]) if debug: y_pred_untrain = multi_rf.predict(X_train) print(next_week.to_string()) print("Score: ", multi_rf.score(X_train, Y_train) * 100) print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain)) print( "CV: ", ms.cross_val_score(multi_rf, Y_train, y_pred_untrain, cv=10, scoring='neg_mean_squared_error')) return next_week
from sklearn.multioutput import MultiOutputRegressor from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error from sklearn.model_selection import train_test_split x, y = datahelper.get_xy('data/', num_hours=3, error_minutes=15) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) gradient_boost = GradientBoostingRegressor(learning_rate=0.1) model = MultiOutputRegressor(estimator=gradient_boost, n_jobs=-1) estimators = np.arange(10, 1000, 10) scores = dict() current_index = 0 for n in estimators: model.set_params( estimator=GradientBoostingRegressor(n_estimators=n, learning_rate=0.1)) model.fit(x_train, y_train) scores[current_index] = model.score(x_test, y_test) current_index += 1 sorted_by_scores = [(k, scores[k]) for k in sorted(scores, key=scores.get, reverse=True)] print('Results of 5 estimators giving best results:\n') for i in range(0, 5): index, score = sorted_by_scores[i] print("Number of estimators = ", estimators[index]) model.set_params(estimator=GradientBoostingRegressor( n_estimators=estimators[index], learning_rate=0.1))