def run_linear_regression(down_station, input_list, include_time, sample_size, network_type, nr_layers, nr_units): start_time_run = time.time() result_dir = util.get_result_dir(down_station, network_type, nr_layers, nr_units, sample_size) if not os.path.exists(result_dir): os.makedirs(result_dir) (y_train, x_train, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _, _, _, _) = data.construct(down_station, input_list, include_time, sample_size, network_type) #poly = PolynomialFeatures(degree=2) #x_train_poly = poly.fit_transform(x_train) #x_cv_poly = poly.fit_transform(x_cv) #regr = linear_model.ElasticNet(alpha=1e-3, tol=1e-9) #regr.fit(x_train, y_train) #y_pred = regr.predict(x_cv) # regr = linear_model.LinearRegression(fit_intercept=False) regr = linear_model.LinearRegression() regr.fit(x_train, y_train) y_pred = regr.predict(x_cv) util.save_linreg(regr, result_dir) util.save_sklearn_model(regr, result_dir) predict.plot_prediction(y_pred, result_dir, y_cv, train_y_max, train_y_min) elapsed_time_run = time.time() - start_time_run print( time.strftime("Fitting time : %H:%M:%S", time.gmtime(elapsed_time_run)))
def run_gpr(down_station, input_list, include_time, sample_size, network_type, include_diff, n_estimators, b): start_time_run = time.time() result_dir = util.get_result_dir(down_station, network_type, n_estimators, b, sample_size) if not os.path.exists(result_dir): os.makedirs(result_dir) (y_train, x_train, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _, _, _, _) = data.construct(down_station, input_list, include_time, sample_size, network_type) # n_estimators = 50 gpr = BaggingRegressor(GaussianProcessRegressor(copy_X_train=False), max_samples=1.0 / n_estimators, n_estimators=n_estimators, n_jobs=1) # svr = SVR(C=_C, epsilon=_epsilon, verbose=True, cache_size=1024) # No bagging # gpr = GaussianProcessRegressor(copy_X_train=False) gpr.fit(x_train, y_train) util.save_sklearn_model(gpr, result_dir) y_cv_pred = gpr.predict(x_cv) predict.plot_prediction(y_cv_pred, result_dir, y_cv, train_y_max, train_y_min) elapsed_time_run = time.time() - start_time_run print( time.strftime("Fitting time : %H:%M:%S", time.gmtime(elapsed_time_run)))
def run_prob(down_station, input_list, include_time, sample_size, network_type, nr_layers, nr_units): """Runner""" start_time_run = time.time() result_dir = util.get_result_dir(down_station, network_type, nr_layers, nr_units, sample_size) (y_train, x_train, y_cv, x_cv, _, _, _, _, _, _, _, _, _, _, _) = data.construct(down_station, input_list, include_time, sample_size, network_type) input_dim = 0 input_dim_2 = 0 if (network_type == 'bnn'): (_, input_dim, input_dim_2) = x_train.shape else: (_, input_dim) = x_train.shape my_model = model.create(result_dir, input_dim, nr_layers, nr_units, network_type, input_dim_2) trained_model_file = util.model_file_name_lowest_cv(result_dir) my_model.load_weights(trained_model_file, by_name=True) print(my_model.get_config()) elapsed_time_run = time.time() - start_time_run print( time.strftime("Training time : %H:%M:%S", time.gmtime(elapsed_time_run)))
def run_sklearn(down_station, input_list, include_time, sample_size, network_type, nr_layers, nr_units): """Runner""" result_dir = util.get_result_dir(down_station, network_type, nr_layers, nr_units, sample_size) (_, _, y_cv, x_cv, y_test, x_test, _, _, train_y_max, train_y_min, _, _, _, _, _) = data.construct(down_station, input_list, include_time, sample_size, network_type) my_model = util.load_sklearn_model(result_dir) start_time = time.time() # y_pred = my_model.predict(x_cv) # uncomment for validation y_pred = my_model.predict(x_test) # uncomment for test elapsed_time = time.time() - start_time print( time.strftime("Prediction done : %H:%M:%S", time.gmtime(elapsed_time))) print("{0} predictions in {1} seconds ({2} per second)".format( len(y_pred), elapsed_time, len(y_pred) / elapsed_time)) # plot_prediction(y_pred, result_dir, y_cv, train_y_max, train_y_min) # uncomment for validation plot_prediction(y_pred, result_dir, y_test, train_y_max, train_y_min) # uncomment for test
def run(down_station, input_list, include_time, sample_size, network_type, nr_layers, nr_units): """Runner""" result_dir = util.get_result_dir(down_station, network_type, nr_layers, nr_units, sample_size) util.plot_training_performance(result_dir) model_file = util.model_file_name(result_dir) # model_file = util.model_file_name_lowest_cv(result_dir) # lowest cv model my_model = util.load_model(model_file) # uncomment for DWS prediction # for specific dates, see internals of data.construct #(_, _, _, _, _, _, _, _, train_y_max, train_y_min, _, _, _, full_x, full_y) = data.construct(down_station, input_list, include_time, sample_size, network_type) #predict(my_model, result_dir, full_x, full_y, train_y_max, train_y_min) # uncomment for normal prediction #(_, _, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _, _, full_x, full_y) = data.construct(down_station, input_list, include_time, sample_size, network_type) #predict(my_model, result_dir, x_cv, y_cv, train_y_max, train_y_min) # uncomment for test prediction (_, _, _, _, y_test, x_test, _, _, train_y_max, train_y_min, _, _, _, full_x, full_y) = data.construct(down_station, input_list, include_time, sample_size, network_type) predict(my_model, result_dir, x_test, y_test, train_y_max, train_y_min)
def run_svr(down_station, input_list, include_time, sample_size, network_type, include_diff, _C, _epsilon): start_time_run = time.time() result_dir = util.get_result_dir(down_station, network_type, _C, _epsilon, sample_size) if not os.path.exists(result_dir): os.makedirs(result_dir) (y_train, x_train, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _, _, _, _) = data.construct(down_station, input_list, include_time, sample_size, network_type) #svr = GridSearchCV(SVR(kernel='rbf', gamma=0.1), cv=5, # param_grid={"C": [1e0, 1e1, 1e2, 1e3], # "gamma": np.logspace(-2, 2, 5)}) if network_type == 'svr': n_estimators = 12 svr = BaggingRegressor(SVR(C=_C, epsilon=_epsilon, verbose=True, cache_size=768), max_samples=1.0 / n_estimators, n_estimators=n_estimators, n_jobs=-1) # svr = SVR(C=_C, epsilon=_epsilon, verbose=True, cache_size=1024) # No bagging else: svr = LinearSVR(C=_C, epsilon=_epsilon, verbose=1, max_iter=20000) svr.fit(x_train, y_train) util.save_sklearn_model(svr, result_dir) y_cv_pred = svr.predict(x_cv) predict.plot_prediction(y_cv_pred, result_dir, y_cv, train_y_max, train_y_min) elapsed_time_run = time.time() - start_time_run print( time.strftime("Fitting time : %H:%M:%S", time.gmtime(elapsed_time_run)))
def run(down_station, input_list, include_time, sample_size, network_type, nr_layers, nr_units, nr_epochs): """Runner""" start_time_run = time.time() result_dir = util.get_result_dir(down_station, network_type, nr_layers, nr_units, sample_size) if not os.path.exists(result_dir): os.makedirs(result_dir) # down_station, input_list, include_time, sample_size, network_type (y_train, x_train, y_cv, x_cv, _, _, _, _, _, _, _, _, _, _, _) = data.construct(down_station, input_list, include_time, sample_size, network_type) input_dim = 0 input_dim_2 = 0 if (network_type == 'bnn') or (network_type == 'cnn') or ( network_type == 'rnn_lstm') or (network_type == 'rnn_gru'): (_, input_dim, input_dim_2) = x_train.shape elif (network_type == 'multi_cnn'): input_dim = [] for x_train_i in x_train: (_, input_dim_i, _) = x_train_i.shape input_dim.append(input_dim_i) else: (_, input_dim) = x_train.shape my_model = model.create(result_dir, input_dim, nr_layers, nr_units, network_type, input_dim_2) train(my_model, result_dir, y_train, x_train, y_cv, x_cv, nr_epochs) util.plot_training_performance(result_dir) predict.run(down_station, input_list, include_time, sample_size, network_type, nr_layers, nr_units) elapsed_time_run = time.time() - start_time_run print( time.strftime("Training time : %H:%M:%S", time.gmtime(elapsed_time_run)))
def run_sgdreg(down_station, input_list, include_time, sample_size, network_type, _tol, _eta0): start_time_run = time.time() result_dir = util.get_result_dir(down_station, network_type, _tol, _eta0, sample_size) if not os.path.exists(result_dir): os.makedirs(result_dir) (y_train, x_train, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _, _, _, _) = data.construct(down_station, input_list, include_time, sample_size, network_type) sgdreg = linear_model.SGDRegressor(max_iter=100000, tol=_tol, eta0=_eta0) sgdreg.fit(x_train, y_train) y_pred = sgdreg.predict(x_cv) predict.plot_prediction(y_pred, result_dir, y_cv, train_y_max, train_y_min) elapsed_time_run = time.time() - start_time_run print( time.strftime("Fitting time : %H:%M:%S", time.gmtime(elapsed_time_run)))
def run_prob(down_station, input_list, include_time, sample_size, network_type, nr_layers, nr_units): """Runner""" result_dir = util.get_result_dir(down_station, network_type, nr_layers, nr_units, sample_size) (y_train, x_train, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _, _, _, _) = data.construct(down_station, input_list, include_time, sample_size, network_type) input_dim = 0 input_dim_2 = 0 if (network_type == 'bnn'): (_, input_dim, input_dim_2) = x_train.shape else: (_, input_dim) = x_train.shape my_model = model.create(result_dir, input_dim, nr_layers, nr_units, network_type, input_dim_2) trained_model_file = util.model_file_name_lowest_cv(result_dir) my_model.load_weights(trained_model_file, by_name=True) print(my_model.get_config()) predict(my_model, result_dir, x_cv, y_cv, train_y_max, train_y_min)