Beispiel #1
0
def ts_forecasting():

    args = input_cmd()

    # get energy consumption data
    load = args.load
    f_steps = args.steps

    data = get_dataset(load_to_predict=load)

    c_target = data["energy"]
    t_target, f_target, fcast_range = forecast_split(c_target, n_steps=f_steps)

    # ML methods
    features, target = get_features(t_target)
    lags = [int(f.split("_")[1]) for f in features if "lag" in f]
    forecaster = Forecaster(f_steps, lags=lags)

    print("Forecast with Linear Regression model")
    model, cv_score, test_score = linear_model(features, target)

    if args.fcast == "direct":
        fcast_linear = forecaster.direct(t_target, linear_model)
    elif args.fcast == "recursive":
        fcast_linear = forecaster.recursive(t_target, model)

    fcast_score = mape(f_target, fcast_linear)
    print(f"""
Linear Regression scores
--------------
Cross-validation MAPE: {round(cv_score, 2)}%
Test MAPE: {round(test_score, 2)}%
Direct Forecast MAPE: {round(fcast_score, 2)}%
    """)

    print("Forecast with XGBoost model")
    model, cv_score, test_score = xgboost_model(features, target, max_evals=25)

    if args.fcast == "direct":
        fcast_xgb = forecaster.direct(t_target, xgboost_model)
    elif args.fcast == "recursive":
        fcast_xgb = forecaster.recursive(t_target, model)

    fcast_score = mape(f_target, fcast_xgb)
    print(f"""
XGBoost scores
--------------
Cross-validation MAPE: {round(cv_score, 2)}%
Test MAPE: {round(test_score, 2)}%
Recursive Forecast MAPE: {round(fcast_score, 2)}%
    """)
def main():
    results = init_results()
    for model_name, predictor in predictors.items():
        for sku in configuration.SKUS:
            for period_ind in range(len(configuration.PERIODS)):
                period = configuration.PERIODS[period_ind]
                res_path = configuration.FORECAST_RES_DIR + model_name + "\\" + sku + "\\" + str(
                    period_ind)
                end_of_period = period[1]
                real_series = loader.load_test_sku(
                    sku,
                    base_dir=configuration.BASE_DIR,
                    end_of_period=end_of_period)
                train, test = train_test_split(real_series,
                                               configuration.N_PREDS)
                train = utils.remove_holidays(train)
                predictor.fit(train, configuration.N_PREDS)
                forecast = predictor.predict(configuration.N_PREDS)
                resid = predictor.resid
                forecast_scaled = utils.scale_by_max(forecast)
                test_scaled = utils.scale_by_max(test)
                save_plot(test_scaled, forecast_scaled, end_of_period,
                          res_path)
                save_forecast_resid(forecast, resid, res_path)
                mape = utils.mape(y_true=test, y_pred=forecast)
                rmse = utils.rmse(y_true=test_scaled, y_pred=forecast_scaled)
                save_result(results, model_name, sku, period_ind, mape, rmse,
                            predictor.describe())
    def test_RNN(self):
        # create recurrent neural network
        NN = RNN()

        # create training and testing inputs and targets
        train_input_1 = [[100, 100] for i in range(100)]
        train_target = [[100] for i in range(100)]
        train_input_2 = train_target
        train_input_3 = train_target

        test_input_1 = [[101, 101] for i in range(50)]
        test_target = [[101] for i in range(50)]
        test_input_2 = test_target
        test_input_3 = test_target

        # convert to array and normalize
        train_input_1 = np.array(train_input_1) / 1000
        train_target = np.array(train_target) / 1000
        train_input_2 = train_target
        train_input_3 = train_target

        test_input_1 = np.array(test_input_1) / 1000
        test_target = np.array(test_target) / 1000
        test_input_2 = test_target
        test_input_3 = test_target

        # number of training cycles
        epochs = 100

        # train the neural network
        for e in range(epochs):
            for p in train_input_1:
                train_output = NN.train(train_input_1, train_input_2,
                                        train_input_3, train_target)

        # test on unseen data
        test_output = NN.test(test_input_1, test_input_2, test_input_3)

        # de-normalize
        train_output *= 1000
        train_target *= 1000

        test_output *= 1000
        test_target *= 1000

        self.assertGreaterEqual(100 - mape(train_target, train_output), 99.00)
        self.assertGreaterEqual(100 - mape(test_target, test_output), 97.00)
    def test(testx, testy):
        print("Test")
        stime = time.time()
        predlist = list()
        for idx in range(testx.shape[0]):
            model = ARIMA(testx[0], order=(1, 1, 0))
            model_fit = model.fit(disp=0)
            pred, stderr, conf_int = model_fit.forecast(config.out_seq_length)
            predlist.append(pred)
            if idx % 20 == 0:
                print("Test %d %d" % (idx, time.time() - stime),
                      utils.mape(pred, testy[idx]))

        etime = time.time()
        print("Test %d" % (etime - stime))
        predlist = np.stack(predlist, axis=0)
        mapeloss = utils.mape(predlist, testy)
        tloss = np.mean(mapeloss, axis=0)
        print("Test ", tloss)
        return tloss
    def test_FeedForward(self):
        # create Neural Network
        NN = FeedForward()

        # create training and testing inputs and targets
        train_input = [[100, 100] for i in range(100)]
        train_target = [[100] for i in range(100)]

        test_input = [[101, 101] for i in range(50)]
        test_target = [[101] for i in range(50)]

        # normalize
        train_input = np.array(train_input) / 1000
        train_target = np.array(train_target) / 1000
        test_input = np.array(test_input) / 1000

        # convert to array
        test_target = np.array(test_target)

        # number of training cycles
        epochs = 100

        # train the neural network
        for e in range(epochs):
            for p in train_input:
                train_output = NN.train(train_input, train_target)

        # test on unseen data
        test_output = NN.test(test_input)

        # de-normalize
        train_output *= 1000
        train_target *= 1000

        test_output *= 1000

        # ensure network can predict a line with high accuracy
        self.assertGreaterEqual(100 - mape(train_target, train_output), 99.00)
        self.assertGreaterEqual(100 - mape(test_target, test_output), 97.00)
    def test_RNN_V2(self):
        # create Neural Network
        NN = RNN_V2()

        # create training and testing inputs and targets
        train_input = [[100, 100] for i in range(100)]
        train_target = [[100] for i in range(100)]

        test_input = [[101, 101] for i in range(50)]
        test_target = [[101] for i in range(50)]

        # normalize
        train_input = np.array(train_input) / 1000
        train_target = np.array(train_target) / 1000
        test_input = np.array(test_input) / 1000

        # convert to array
        test_target = np.array(test_target)

        # convert to 3d array of format [inputs, timesteps, features]
        train_input = to_3d(train_input)
        test_input = to_3d(test_input)

        # train the neural network
        train_output = NN.train(train_input, train_target, epochs=100)

        # test on unseen data
        test_output = NN.test(test_input)

        # de-normalize
        train_output *= 1000
        train_target *= 1000

        test_output *= 1000

        # ensure network can predict a line with high accuracy
        self.assertGreaterEqual(100 - mape(train_target, train_output), 99.00)
        self.assertGreaterEqual(100 - mape(test_target, test_output), 97.00)
 def rank_model(self,
                fcst_model,
                act_st,
                fcst_st,
                test_type,
                test_st,
                rank_by='mae',
                error_by='mape'):
     """Rank model based on historical forecast"""
     df_act = pd.DataFrame()
     for i in self.df_act['id'].unique():
         df_i = self.df_act[self.df_act['id'] == i].copy()
         df_i = TimeSeriesForecasting.filldaily(
             df_i, act_st, fcst_st + datetime.timedelta(days=-1))
         df_i = df_i if test_type == 'daily' else TimeSeriesForecasting.daytomth(
             df_i)
         df_i['id'] = i
         df_act = df_act.append(df_i[['id', 'ds', 'y']], ignore_index=True)
     df_rank = self.df_fcstlog[(self.df_fcstlog['dsr'] >= test_st)
                               & (self.df_fcstlog['dsr'] < fcst_st)].copy()
     # select only in config file
     df_rank['val'] = df_rank['period'].map(fcst_model)
     df_rank = df_rank[df_rank['val'].notnull()].copy()
     df_rank['val'] = df_rank.apply(lambda x: True
                                    if x['model'] in x['val'] else False,
                                    axis=1)
     df_rank = df_rank[df_rank['val'] == True].copy()
     # # calculate error comparing with actual
     df_rank = pd.merge(df_rank,
                        df_act.rename(columns={'y': 'actual'}),
                        on=['id', 'ds'],
                        how='left')
     df_rank['mae'] = df_rank.apply(
         lambda x: abs(x['actual'] - x['forecast']), axis=1)
     df_rank['mape'] = df_rank.apply(
         lambda x: mape(x['actual'], x['forecast']), axis=1)
     df_rank[['mae', 'mape']] = df_rank[['mae', 'mape']].fillna(0)
     # ranking error
     df_rank = df_rank.groupby(['id', 'period', 'model'],
                               as_index=False).agg({
                                   'mae': 'mean',
                                   'mape': 'mean'
                               })
     df_rank['rank'] = df_rank.groupby(['id', 'period'
                                        ])[rank_by].rank(method='dense',
                                                         ascending=True)
     df_rank['error'] = df_rank[error_by]
     return df_rank
def baseline(df):
    # predict next year by just taking the values from last year
    predicted = df.loc['2016-01-01':'2016-12-31']
    actual = df.loc['2017-01-01':'2017-12-31']
    # and padding the missing values with the last datapoint
    actual.loc[actual < 1] = np.nan
    actual = actual.fillna(method='pad')
    # transform df to series
    predicted = pd.Series(predicted)
    actual = pd.Series(actual)
    print("Number of actual days:", len(actual))
    print('Number of predicted days:', len(predicted))
    rmse = sqrt(mean_squared_error(actual, predicted))
    print('For 12 Months RMSE: %.3f' % rmse)
    mape_value = mape(actual, predicted)
    print ("For 12 Months MAPE :", mape_value)
    def test(testx, testy):
        print("Test")
        stime = time.time()
        predlist = list()
        for pid in range(config.out_seq_length):
            print("Test %d %d" % (pid, time.time() - stime))
            pred = model.predict(testx)
            predlist.append(pred)
            testx[:, :-1] = testx[:, 1:]
            testx[:, -1] = pred
        etime = time.time()
        print("Test %d" % (etime - stime))

        predlist = np.stack(predlist, axis=-1)
        mapeloss = utils.mape(predlist, testy)
        tloss = np.mean(mapeloss, axis=0)
        print("Test ", tloss)
Beispiel #10
0
def test_model(model, X_test, y_test):
    """
    Get the RMSE for a given model on a test dataset

    Parameters
    ----------
    model: a model implementing the standard scikit-learn interface
    X_test: pd.DataFrame holding the features of the test set
    y_test: pd.Series holding the test set target

    Returns
    -------
    test_score: the RMSE on the test dataset
    """

    predictions = model.predict(X_test)
    test_score = mape(y_test.values, predictions)
    return test_score
Beispiel #11
0
    def error(self, data, times=None, metric='mape'):
        """
        Model prediction error.

        metric : str
            Error metric to use. It can be "mape", "smape", "logaccratio", and
            "rmse". Default: mape.
        """
        if times is None:
            times = numpy.arange(len(data))
        y = self.simulate(times)
        if metric == 'mape':
            return mape(y, data)
        elif metric == 'smape':
            return smape(y, data)
        elif metric == 'logaccratio':
            return logaccratio(y, data)
        elif metric == 'rmse':
            return numpy.sqrt(self.cost_)
        else:
            raise ValueError("No such metric: {}".format(metric))
Beispiel #12
0
def main():
    np.seterr(all='raise')
    data = loader.load_product_class_data("rohliky.tsv", False)
    data = data[data.index < pd.Timestamp('2017-10-01')]
    series = data.groupby(
        pd.Grouper(freq='D'))['product_count'].sum().fillna(0)
    series = series.astype('float')
    n_preds = 28
    predictor = Smooth_Predictor()
    predictor.fit(series[:-n_preds])
    res = predictor.predict(npred=n_preds)
    fig = plt.figure(figsize=(12, 8))
    # series.plot()
    plt.plot([i for i in range(n_preds)],
             res[-n_preds:].values,
             label='Result')
    plt.plot([i for i in range(n_preds)],
             series.values[-n_preds:],
             label='Real')
    res[series[-n_preds:].values == 0] = 0
    mape = utils.mape(series[-n_preds:], res)
    plt.legend()
    plt.show()
    print(predictor.describe(), ", mape ", mape)
 def test_mape(self):
     test = np.array([10, 10, 10, 10, 10])
     self.assertEqual(mape(test, test), 0.0)
Beispiel #14
0
def run_training(
    energy,
    T_val,
    LATENT_DIM_1,
    LATENT_DIM_2,
    BATCH_SIZE,
    LEARNING_RATE,
    ALPHA,
):
    from utils import create_evaluation_df, mape

    train_inputs, valid_inputs, test_inputs, y_scaler = create_input(
        energy, T_val)

    # Initialize the model
    model = get_model(LEARNING_RATE, T_val, ALPHA, LATENT_DIM_1, LATENT_DIM_2)
    earlystop = EarlyStopping(monitor="val_loss", min_delta=0, patience=5)
    best_val = ModelCheckpoint(
        "model_{epoch:02d}.h5",
        save_best_only=True,
        mode="min",
        period=1,
        save_weights_only=True,
    )

    # Train the model
    history = model.fit(
        train_inputs["X"],
        train_inputs["target"],
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=(valid_inputs["X"], valid_inputs["target"]),
        callbacks=[earlystop, best_val, LogRunMetrics()],
        verbose=0,
    )

    # load the model with the smallest validation MAPE
    best_epoch = np.argmin(np.array(history.history["val_loss"])) + 1
    validationLoss = np.min(np.array(history.history["val_loss"]))
    model.load_weights("model_{:02d}.h5".format(best_epoch))

    # Save best model for this experiment
    model_name = "bestmodel"
    # serialize NN architecture to JSON
    model_json = model.to_json()
    # save model JSON
    with open("{}.json".format(model_name), "w") as f:
        f.write(model_json)
    # save model weights
    model.save_weights("{}.h5".format(model_name))

    # Compute test MAPE
    predictions = model.predict(test_inputs["X"])
    eval_df = create_evaluation_df(predictions, test_inputs, HORIZON, y_scaler)
    testMAPE = mape(eval_df["prediction"], eval_df["actual"])

    # clean up model files
    for m in glob("model_*.h5"):
        os.remove(m)

    # Log validation loss and test MAPE
    run.log("validationLoss", validationLoss)
    run.log("testMAPE", testMAPE)

    # create a ./outputs/model folder in the compute target
    # files saved in the "./outputs" folder are automatically uploaded into run history
    os.makedirs("./outputs/model", exist_ok=True)
    model_files = glob("bestmodel*")
    for f in model_files:
        shutil.move(f, "./outputs/model")
Beispiel #15
0
plt.hold(True)
plt.plot(x[n / 2:, :], yhat)
plt.savefig('./bases/results/polinomio_estimado')
plt.clf()

print 'Letra A'
print 'Polinomio encontrado: '
print 'y = {:3.3f} + {:3.3f}x {: 3.3f}x^2\n'.format(what[0][0], what[1][0],
                                                    what[2][0])

# b) Obtenha o RMSE e MAPE do modelo obtido sobre os dados da segunda metade dos
# dados;
print 'Letra B'
rmse = utils.rmse(y[n / 2:, :], yhat)
print 'RMSE = ' + str(rmse) + '\n'
mape = utils.mape(y[n / 2:, :], yhat)
print 'MAPE = ' + str(mape) + '\n'

# c) Estimar o modelo que melhor se ajusta aos dados usando todos os dados.
# Informe os parametros do modelo encontrado. Use os fatores de determinacao de
# complexidade do modelo para auxiliar a encontrar o modelo. Obtenha o RMSE e MAPE
# do modelo obtido sobre os dados.
print 'Letra C'
MAXDEGREE = 5
plt.Figure
plt.hold(True)
plt.grid(True)
plt.plot(x, y)
plt.title('Ajuste Polinomial')
plt.ylabel('y')
plt.xlabel('x')
Beispiel #16
0
                                              2 * y_train_pred[:, 1:])
y_train_pred = y_scaler.inverse_transform(y_train_pred[:, :1])

y_test = y_scaler.inverse_transform(y_test)
y_test_pred = net(x_test).cpu().detach().numpy()
y_test_pred_min = y_scaler.inverse_transform(y_test_pred[:, :1] -
                                             2 * y_test_pred[:, 1:])
y_test_pred_max = y_scaler.inverse_transform(y_test_pred[:, :1] +
                                             2 * y_test_pred[:, 1:])
y_test_pred = y_scaler.inverse_transform(y_test_pred[:, :1])

plt.plot(y_train)
plt.plot(y_train_pred)
plt.fill_between(np.arange(y_train.shape[0]),
                 y_train_pred_min.squeeze(),
                 y_train_pred_max.squeeze(),
                 color='b',
                 alpha=.1)

plt.plot(np.arange(y_train.shape[0], df.shape[0]), y_test)
plt.plot(np.arange(y_train.shape[0], df.shape[0]), y_test_pred)
plt.fill_between(np.arange(y_train.shape[0], df.shape[0]),
                 y_test_pred_min.squeeze(),
                 y_test_pred_max.squeeze(),
                 color='b',
                 alpha=.1)
plt.show()

print('TEST RMSE: {}'.format(rmse(y_test, y_test_pred[:, 0])))
print('TEST MAPE: {}'.format(mape(y_test, y_test_pred[:, 0])))
Beispiel #17
0
plt.xlabel('x')
plt.hold(True)
plt.plot(x[n / 2:, :], yhat)
plt.savefig('./bases/results/polinomio_estimado')
plt.clf()

print 'Letra A'
print 'Polinomio encontrado: '
print 'y = {:3.3f} + {:3.3f}x {: 3.3f}x^2\n'.format(what[0][0], what[1][0], what[2][0])

# b) Obtenha o RMSE e MAPE do modelo obtido sobre os dados da segunda metade dos
# dados;
print 'Letra B'
rmse = utils.rmse(y[n / 2:, :], yhat)
print 'RMSE = ' + str(rmse) + '\n'
mape = utils.mape(y[n / 2:, :], yhat)
print 'MAPE = ' + str(mape) + '\n'

# c) Estimar o modelo que melhor se ajusta aos dados usando todos os dados.
# Informe os parametros do modelo encontrado. Use os fatores de determinacao de
# complexidade do modelo para auxiliar a encontrar o modelo. Obtenha o RMSE e MAPE
# do modelo obtido sobre os dados.
print 'Letra C'
MAXDEGREE = 5
plt.Figure
plt.hold(True)
plt.grid(True)
plt.plot(x, y)
plt.title('Ajuste Polinomial')
plt.ylabel('y')
plt.xlabel('x')
Beispiel #18
0
# desvio padrao). Selecione aleatoriamente 75% dos dados para treinamento.
# Retorne a estrutura da arvore construida.
nclasses = np.union1d(y, y).size
n = len(y)
randind = np.arange(0, n)
np.random.shuffle(randind)
ind_train = randind[0:0.75 * n]
ind_test = randind[0.75 * n:n]

tree = RegressionTree(nclasses)
tree.train(x[ind_train, :], y[ind_train], SDRMIN=0.1, NMIN=3)

g, pos = tree.gerar_grafo()
utils.draw_graph(g, pos)

# b) Use os restantes 25% dos dados para avaliacao. Retorne as medidas MAPE e
# RMSE.

yhat = tree.estimate(x[ind_test, :])

rmse = utils.rmse(y[ind_test], yhat)
mape = utils.mape(y[ind_test], yhat)
print 'RMSE encontrado: {:3.2f}\nMAPE encontrado: {:3.2f}'.format(rmse, mape)

plt.plot(y[ind_test])
plt.hold(True)
plt.plot(yhat)
plt.legend(['real', 'estimado'])
plt.show()

# c) Tente obter as regras de decisao a partir da arvore construida.
Beispiel #19
0
#Transform data
data = data.map(lambda x: (x[0], x[1], transform(x[2])))

#Split train and test
train, test = utils.train_test_split(data)

#Labelling Points
train = utils.labelled_points(train)
test = utils.labelled_points(test)

#Regression (this is not least square but SGD)
lrm = LinearRegressionWithSGD()
model = lrm.train(train)

#Test
mape_train = utils.mape(
    train.map(lambda x: (x.label, model.predict(x.features))))
mape_test = utils.mape(
    test.map(lambda x: (x.label, model.predict(x.features))))

#Prediction
actual_pred = data.map(lambda x: (x[0], x[1], model.predict(x[2])))

#split
actual = actual_pred.map(lambda x: (x[0], x[1]))
prediction = actual_pred.map(lambda x: (x[0], x[2]))

#denormalization
actual = utils.denormalization(sc, actual, data_min, data_max)
prediction = utils.denormalization(sc, prediction, data_min, data_max)

#JOIN
test_data_features = Features.feature_extraction(test_data, y_col='quantity')
X_test = test_data_features.toarray()
y_test = test_data['sales'].values


print("test data shape: {}".format(test_data.shape))

## Linear Regression

ols = LinearRegression(fit_intercept=True)
ols.fit(X_train, y_train)
y_hat = ols.predict(X_test)
test_data["y_hat"] = y_hat
test_mae = mae(y_hat, y_test)
test_rmse = rmse(y_hat, y_test)
test_mape = mape(y_hat, y_test)

print("--OLS--")
print("MAE - (test): {:.2f}".format(test_mae))
print("RMSE - (test): {:.2f}".format(test_rmse))
print("MAPE: - (test): {:.4f}".format(test_mape))

prod_errors = test_data[['region', 'time', 'sales', 'y_hat']].groupby(['time', "region"]).sum()
prod_mae = mae(prod_errors.y_hat, prod_errors.sales)
prod_rmse = rmse(prod_errors.y_hat, prod_errors.sales)
prod_mape = mape(prod_errors.y_hat, prod_errors.sales)
print("Region MAE - (test):  {:.2f}".format(prod_mae))
print("Region RMSE - (test):  {:.2f}".format(prod_rmse))
print("Region MAPE - (test):  {:.4f}".format(prod_mape))

Beispiel #21
0
# desvio padrao). Selecione aleatoriamente 75% dos dados para treinamento.
# Retorne a estrutura da arvore construida.
nclasses = np.union1d(y, y).size
n = len(y)
randind = np.arange(0, n)
np.random.shuffle(randind)
ind_train = randind[0:0.75 * n]
ind_test = randind[0.75 * n:n]

tree = RegressionTree(nclasses)
tree.train(x[ind_train, :], y[ind_train], SDRMIN=0.1, NMIN=3)

g, pos = tree.gerar_grafo()
utils.draw_graph(g, pos)

# b) Use os restantes 25% dos dados para avaliacao. Retorne as medidas MAPE e
# RMSE.

yhat = tree.estimate(x[ind_test, :])

rmse = utils.rmse(y[ind_test], yhat)
mape = utils.mape(y[ind_test], yhat)
print 'RMSE encontrado: {:3.2f}\nMAPE encontrado: {:3.2f}'.format(rmse,mape)

plt.plot(y[ind_test])
plt.hold(True)
plt.plot(yhat)
plt.legend(['real','estimado'])
plt.show()

# c) Tente obter as regras de decisao a partir da arvore construida.