def run_linear_regression(down_station, input_list, include_time, sample_size,
                          network_type, nr_layers, nr_units):
    start_time_run = time.time()

    result_dir = util.get_result_dir(down_station, network_type, nr_layers,
                                     nr_units, sample_size)
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)

    (y_train, x_train, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _,
     _, _, _) = data.construct(down_station, input_list, include_time,
                               sample_size, network_type)

    #poly = PolynomialFeatures(degree=2)
    #x_train_poly = poly.fit_transform(x_train)
    #x_cv_poly = poly.fit_transform(x_cv)

    #regr = linear_model.ElasticNet(alpha=1e-3, tol=1e-9)
    #regr.fit(x_train, y_train)
    #y_pred = regr.predict(x_cv)

    # regr = linear_model.LinearRegression(fit_intercept=False)
    regr = linear_model.LinearRegression()
    regr.fit(x_train, y_train)
    y_pred = regr.predict(x_cv)

    util.save_linreg(regr, result_dir)
    util.save_sklearn_model(regr, result_dir)
    predict.plot_prediction(y_pred, result_dir, y_cv, train_y_max, train_y_min)

    elapsed_time_run = time.time() - start_time_run
    print(
        time.strftime("Fitting time : %H:%M:%S",
                      time.gmtime(elapsed_time_run)))
def run_gpr(down_station, input_list, include_time, sample_size, network_type,
            include_diff, n_estimators, b):
    start_time_run = time.time()

    result_dir = util.get_result_dir(down_station, network_type, n_estimators,
                                     b, sample_size)
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)

    (y_train, x_train, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _,
     _, _, _) = data.construct(down_station, input_list, include_time,
                               sample_size, network_type)

    # n_estimators = 50
    gpr = BaggingRegressor(GaussianProcessRegressor(copy_X_train=False),
                           max_samples=1.0 / n_estimators,
                           n_estimators=n_estimators,
                           n_jobs=1)
    # svr = SVR(C=_C, epsilon=_epsilon, verbose=True, cache_size=1024) # No bagging
    # gpr = GaussianProcessRegressor(copy_X_train=False)

    gpr.fit(x_train, y_train)
    util.save_sklearn_model(gpr, result_dir)

    y_cv_pred = gpr.predict(x_cv)

    predict.plot_prediction(y_cv_pred, result_dir, y_cv, train_y_max,
                            train_y_min)

    elapsed_time_run = time.time() - start_time_run
    print(
        time.strftime("Fitting time : %H:%M:%S",
                      time.gmtime(elapsed_time_run)))
Example #3
0
def run_prob(down_station, input_list, include_time, sample_size, network_type,
             nr_layers, nr_units):
    """Runner"""
    start_time_run = time.time()

    result_dir = util.get_result_dir(down_station, network_type, nr_layers,
                                     nr_units, sample_size)

    (y_train, x_train, y_cv, x_cv, _, _, _, _, _, _, _, _, _, _,
     _) = data.construct(down_station, input_list, include_time, sample_size,
                         network_type)

    input_dim = 0
    input_dim_2 = 0
    if (network_type == 'bnn'):
        (_, input_dim, input_dim_2) = x_train.shape
    else:
        (_, input_dim) = x_train.shape

    my_model = model.create(result_dir, input_dim, nr_layers, nr_units,
                            network_type, input_dim_2)
    trained_model_file = util.model_file_name_lowest_cv(result_dir)
    my_model.load_weights(trained_model_file, by_name=True)
    print(my_model.get_config())

    elapsed_time_run = time.time() - start_time_run
    print(
        time.strftime("Training time : %H:%M:%S",
                      time.gmtime(elapsed_time_run)))
Example #4
0
def run_sklearn(down_station, input_list, include_time, sample_size,
                network_type, nr_layers, nr_units):
    """Runner"""
    result_dir = util.get_result_dir(down_station, network_type, nr_layers,
                                     nr_units, sample_size)

    (_, _, y_cv, x_cv, y_test, x_test, _, _, train_y_max, train_y_min, _, _, _,
     _, _) = data.construct(down_station, input_list, include_time,
                            sample_size, network_type)

    my_model = util.load_sklearn_model(result_dir)

    start_time = time.time()
    # y_pred = my_model.predict(x_cv) # uncomment for validation
    y_pred = my_model.predict(x_test)  # uncomment for test
    elapsed_time = time.time() - start_time
    print(
        time.strftime("Prediction done : %H:%M:%S", time.gmtime(elapsed_time)))
    print("{0} predictions in {1} seconds ({2} per second)".format(
        len(y_pred), elapsed_time,
        len(y_pred) / elapsed_time))

    # plot_prediction(y_pred, result_dir, y_cv, train_y_max, train_y_min) # uncomment for validation
    plot_prediction(y_pred, result_dir, y_test, train_y_max,
                    train_y_min)  # uncomment for test
Example #5
0
def run(down_station, input_list, include_time, sample_size, network_type,
        nr_layers, nr_units):
    """Runner"""
    result_dir = util.get_result_dir(down_station, network_type, nr_layers,
                                     nr_units, sample_size)

    util.plot_training_performance(result_dir)

    model_file = util.model_file_name(result_dir)
    # model_file = util.model_file_name_lowest_cv(result_dir) # lowest cv model
    my_model = util.load_model(model_file)

    # uncomment for DWS prediction
    # for specific dates, see internals of data.construct
    #(_, _, _, _, _, _, _, _, train_y_max, train_y_min, _, _, _, full_x, full_y) = data.construct(down_station, input_list, include_time, sample_size, network_type)
    #predict(my_model, result_dir, full_x, full_y, train_y_max, train_y_min)

    # uncomment for normal prediction
    #(_, _, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _, _, full_x, full_y) = data.construct(down_station, input_list, include_time, sample_size, network_type)
    #predict(my_model, result_dir, x_cv, y_cv, train_y_max, train_y_min)

    # uncomment for test prediction
    (_, _, _, _, y_test, x_test, _, _, train_y_max, train_y_min, _, _, _,
     full_x, full_y) = data.construct(down_station, input_list, include_time,
                                      sample_size, network_type)
    predict(my_model, result_dir, x_test, y_test, train_y_max, train_y_min)
def run_svr(down_station, input_list, include_time, sample_size, network_type,
            include_diff, _C, _epsilon):
    start_time_run = time.time()

    result_dir = util.get_result_dir(down_station, network_type, _C, _epsilon,
                                     sample_size)
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)

    (y_train, x_train, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _,
     _, _, _) = data.construct(down_station, input_list, include_time,
                               sample_size, network_type)

    #svr = GridSearchCV(SVR(kernel='rbf', gamma=0.1), cv=5,
    #               param_grid={"C": [1e0, 1e1, 1e2, 1e3],
    #                           "gamma": np.logspace(-2, 2, 5)})

    if network_type == 'svr':
        n_estimators = 12
        svr = BaggingRegressor(SVR(C=_C,
                                   epsilon=_epsilon,
                                   verbose=True,
                                   cache_size=768),
                               max_samples=1.0 / n_estimators,
                               n_estimators=n_estimators,
                               n_jobs=-1)
        # svr = SVR(C=_C, epsilon=_epsilon, verbose=True, cache_size=1024) # No bagging
    else:
        svr = LinearSVR(C=_C, epsilon=_epsilon, verbose=1, max_iter=20000)
    svr.fit(x_train, y_train)
    util.save_sklearn_model(svr, result_dir)

    y_cv_pred = svr.predict(x_cv)

    predict.plot_prediction(y_cv_pred, result_dir, y_cv, train_y_max,
                            train_y_min)

    elapsed_time_run = time.time() - start_time_run
    print(
        time.strftime("Fitting time : %H:%M:%S",
                      time.gmtime(elapsed_time_run)))
Example #7
0
def run(down_station, input_list, include_time, sample_size, network_type,
        nr_layers, nr_units, nr_epochs):
    """Runner"""
    start_time_run = time.time()

    result_dir = util.get_result_dir(down_station, network_type, nr_layers,
                                     nr_units, sample_size)
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)

    # down_station, input_list, include_time, sample_size, network_type
    (y_train, x_train, y_cv, x_cv, _, _, _, _, _, _, _, _, _, _,
     _) = data.construct(down_station, input_list, include_time, sample_size,
                         network_type)

    input_dim = 0
    input_dim_2 = 0
    if (network_type == 'bnn') or (network_type == 'cnn') or (
            network_type == 'rnn_lstm') or (network_type == 'rnn_gru'):
        (_, input_dim, input_dim_2) = x_train.shape
    elif (network_type == 'multi_cnn'):
        input_dim = []
        for x_train_i in x_train:
            (_, input_dim_i, _) = x_train_i.shape
            input_dim.append(input_dim_i)
    else:
        (_, input_dim) = x_train.shape

    my_model = model.create(result_dir, input_dim, nr_layers, nr_units,
                            network_type, input_dim_2)
    train(my_model, result_dir, y_train, x_train, y_cv, x_cv, nr_epochs)

    util.plot_training_performance(result_dir)
    predict.run(down_station, input_list, include_time, sample_size,
                network_type, nr_layers, nr_units)

    elapsed_time_run = time.time() - start_time_run
    print(
        time.strftime("Training time : %H:%M:%S",
                      time.gmtime(elapsed_time_run)))
def run_sgdreg(down_station, input_list, include_time, sample_size,
               network_type, _tol, _eta0):
    start_time_run = time.time()

    result_dir = util.get_result_dir(down_station, network_type, _tol, _eta0,
                                     sample_size)
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)

    (y_train, x_train, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _,
     _, _, _) = data.construct(down_station, input_list, include_time,
                               sample_size, network_type)

    sgdreg = linear_model.SGDRegressor(max_iter=100000, tol=_tol, eta0=_eta0)
    sgdreg.fit(x_train, y_train)
    y_pred = sgdreg.predict(x_cv)

    predict.plot_prediction(y_pred, result_dir, y_cv, train_y_max, train_y_min)

    elapsed_time_run = time.time() - start_time_run
    print(
        time.strftime("Fitting time : %H:%M:%S",
                      time.gmtime(elapsed_time_run)))
Example #9
0
def run_prob(down_station, input_list, include_time, sample_size, network_type,
             nr_layers, nr_units):
    """Runner"""
    result_dir = util.get_result_dir(down_station, network_type, nr_layers,
                                     nr_units, sample_size)

    (y_train, x_train, y_cv, x_cv, _, _, _, _, train_y_max, train_y_min, _, _,
     _, _, _) = data.construct(down_station, input_list, include_time,
                               sample_size, network_type)

    input_dim = 0
    input_dim_2 = 0
    if (network_type == 'bnn'):
        (_, input_dim, input_dim_2) = x_train.shape
    else:
        (_, input_dim) = x_train.shape

    my_model = model.create(result_dir, input_dim, nr_layers, nr_units,
                            network_type, input_dim_2)
    trained_model_file = util.model_file_name_lowest_cv(result_dir)
    my_model.load_weights(trained_model_file, by_name=True)
    print(my_model.get_config())

    predict(my_model, result_dir, x_cv, y_cv, train_y_max, train_y_min)