Example #1
0
        def test_likelihoods_and_resulting_mean_forecasts(self):
            def _get_avgs(series):
                return np.mean(series.all_values()[:, 0, :]), np.mean(
                    series.all_values()[:, 1, :])

            for lkl, series, diff1, diff2 in self.lkl_series:
                model = RNNModel(input_chunk_length=5, likelihood=lkl)
                model.fit(series, epochs=50)
                pred = model.predict(n=50, num_samples=50)

                avgs_orig, avgs_pred = _get_avgs(series), _get_avgs(pred)
                self.assertLess(
                    abs(avgs_orig[0] - avgs_pred[0]),
                    diff1,
                    "The difference between the mean forecast and the mean series is larger "
                    "than expected on component 0 for distribution {}".format(
                        lkl),
                )
                self.assertLess(
                    abs(avgs_orig[1] - avgs_pred[1]),
                    diff2,
                    "The difference between the mean forecast and the mean series is larger "
                    "than expected on component 1 for distribution {}".format(
                        lkl),
                )
Example #2
0
def lstm():
    for company in lstCompanies:
        df = pd.DataFrame(list(db[company].find({})))
        df = df.drop('_id', axis=1)
        df['Open'] = df['Open'].astype('float')
        df['Close'] = df['Close'].astype('float')
        series = TimeSeries.from_dataframe(
            df, 'Date', ['Close'], freq='B',
            fill_missing_dates=True)  # 'B' = Business day
        series = auto_fillna(series)

        model = RNNModel(
            model=
            'LSTM',  # Either a string specifying the RNN module type (“RNN”, “LSTM” or “GRU”)
            output_length=
            1,  # Number of time steps to be output by the forecasting module
            hidden_size=
            25,  # Size for feature maps for each hidden RNN layer (hn)
            n_rnn_layers=1,  # Number of layers in the RNN module
            input_length=
            12,  # The dimensionality of the TimeSeries instances that will be fed to the fit function
            batch_size=
            16,  # The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters
            n_epochs=
            200,  # The number of epochs is a hyperparameter that defines the number times that the learning algorithm will work through the entire training dataset
            optimizer_kwargs={'lr': 1e-3},
            model_name='{}_RNN'.format(company))

        model.fit(series)
        lstmPred = model.predict(1).values()[0][0]
        db.prediction.insert_one({
            "Date": datetime.datetime.today(),
            "Company": company,
            "Prediction": round(float(lstmPred), 2)
        })
Example #3
0
        def test_max_samples_per_ts(self):
            """
            Checking that we can fit TorchForecastingModels with max_samples_per_ts, without crash
            """

            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)

            model.fit(ts, max_samples_per_ts=5)
Example #4
0
        def test_sample_smaller_than_batch_size(self):
            """
            Checking that the TorchForecastingModels do not crash even if the number of available samples for training
            is strictly lower than the selected batch_size
            """
            # TS with 50 timestamps. TorchForecastingModels will use the SequentialDataset for producing training
            # samples, which means we will have 50 - 22 - 2 + 1 = 27 samples, which is < 32 (batch_size). The model
            # should still train on those samples and not crash in any way
            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)
            model.fit(ts)
Example #5
0
    def test_ensemble_models_denoising_multi_input(self):
        # for every model, test whether it correctly denoises ts_sum_2 using ts_random_multi and ts_sum_2 as inputs
        # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients
        horizon = 10
        _, _, ts_sum2, ts_cov2 = self.denoising_input()
        torch.manual_seed(self.RANDOM_SEED)

        ensemble_models = [
            RNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            BlockRNNModel(
                input_chunk_length=20,
                output_chunk_length=horizon,
                n_epochs=1,
                random_state=self.RANDOM_SEED,
            ),
            RegressionModel(lags_past_covariates=[-1]),
            RegressionModel(lags_past_covariates=[-1]),
        ]

        ensemble = RegressionEnsembleModel(ensemble_models, horizon)
        self.helper_test_models_accuracy(ensemble, horizon, ts_sum2, ts_cov2,
                                         3)
Example #6
0
 def test_call_predict_global_models_multivariate_input_no_covariates(self):
     naive_ensemble = NaiveEnsembleModel([
         RNNModel(12, n_epochs=1),
         TCNModel(10, 2, n_epochs=1),
         NBEATSModel(10, 2, n_epochs=1),
     ])
     naive_ensemble.fit(self.seq1)
     naive_ensemble.predict(n=5, series=self.seq1)
Example #7
0
    def test_call_predict_global_models_univariate_input_no_covariates(self):
        naive_ensemble = NaiveEnsembleModel([
            RNNModel(12, n_epochs=1),
            TCNModel(10, 2, n_epochs=1),
            NBEATSModel(10, 2, n_epochs=1),
        ])
        with self.assertRaises(Exception):
            naive_ensemble.predict(5)

        naive_ensemble.fit(self.series1)
        naive_ensemble.predict(5)
Example #8
0
def make_lstm_prediction():
    for ticker in lst_tickers_of_interest:
        df_ticker = pd.DataFrame(
            list(col_price_history.find({'Ticker':
                                         ticker})))[["DailyChangePct",
                                                     "Date"]].set_index('Date')
        df_ticker.index = pd.to_datetime(df_ticker.index)
        df_ticker = df_ticker.reindex(index=df_ticker.index[::-1])
        series = TimeSeries.from_dataframe(df_ticker,
                                           time_col=None,
                                           value_cols='DailyChangePct',
                                           freq='B',
                                           fill_missing_dates=True)
        series = auto_fillna(series)

        SEQ_LENGTH = 6
        HIDDEN_SIZE = 5
        OUTPUT_LEN = 1
        NUM_LAYERS = 1

        model = RNNModel(model='LSTM',
                         output_length=OUTPUT_LEN,
                         hidden_size=HIDDEN_SIZE,
                         n_rnn_layers=NUM_LAYERS,
                         input_length=SEQ_LENGTH,
                         batch_size=16,
                         n_epochs=10,
                         optimizer_kwargs={'lr': 1e-3},
                         model_name=f'{ticker}_RNN',
                         log_tensorboard=False)

        model.fit(series)
        lstm_prediction = model.predict(1).values()[0][0]
        lstm_prediction_history.insert_one({
            "Date":
            datetime.datetime.today(),
            "Ticker":
            ticker,
            "LSTM_prediction":
            float(lstm_prediction)
        })
Example #9
0
 def test_call_predict_global_models_multivariate_input_with_covariates(
         self):
     naive_ensemble = NaiveEnsembleModel([
         RNNModel(12, n_epochs=1),
         TCNModel(10, 2, n_epochs=1),
         NBEATSModel(10, 2, n_epochs=1),
     ])
     naive_ensemble.fit(self.seq1, self.cov1)
     predict_series = [s[:12] for s in self.seq1]
     predict_covariates = [c[:14] for c in self.cov1]
     naive_ensemble.predict(n=2,
                            series=predict_series,
                            past_covariates=predict_covariates)
Example #10
0
        def test_stochastic_inputs(self):
            model = RNNModel(input_chunk_length=5)
            model.fit(self.constant_ts, epochs=2)

            # build a stochastic series
            target_vals = self.constant_ts.values()
            stochastic_vals = np.random.normal(loc=target_vals,
                                               scale=1.0,
                                               size=(len(self.constant_ts),
                                                     100))
            stochastic_vals = np.expand_dims(stochastic_vals, axis=1)
            stochastic_series = TimeSeries.from_times_and_values(
                self.constant_ts.time_index, stochastic_vals)

            # A deterministic model forecasting a stochastic series
            # should return stochastic samples
            preds = [
                model.predict(series=stochastic_series, n=10) for _ in range(2)
            ]

            # random samples should differ
            self.assertFalse(
                np.alltrue(preds[0].values() == preds[1].values()))
Example #11
0
 def get_global_models(self, output_chunk_length=5):
     return [
         RNNModel(
             input_chunk_length=20,
             output_chunk_length=output_chunk_length,
             n_epochs=1,
             random_state=42,
         ),
         BlockRNNModel(
             input_chunk_length=20,
             output_chunk_length=output_chunk_length,
             n_epochs=1,
             random_state=42,
         ),
     ]
Example #12
0
        def test_future_covariates(self):
            # models with future covariates should produce better predictions over a long forecasting horizon
            # than a model trained with no covariates
            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=5,
                n_epochs=20,
                random_state=0,
            )

            model.fit(series=self.target_past)
            long_pred_no_cov = model.predict(n=160)

            model = TCNModel(
                input_chunk_length=50,
                output_chunk_length=5,
                n_epochs=20,
                random_state=0,
            )
            model.fit(series=self.target_past,
                      past_covariates=self.covariates_past)
            long_pred_with_cov = model.predict(n=160,
                                               past_covariates=self.covariates)
            self.assertTrue(
                mape(self.target_future, long_pred_no_cov) > mape(
                    self.target_future, long_pred_with_cov),
                "Models with future covariates should produce better predictions.",
            )

            # block models can predict up to self.output_chunk_length points beyond the last future covariate...
            model.predict(n=165, past_covariates=self.covariates)

            # ... not more
            with self.assertRaises(ValueError):
                model.predict(n=166, series=self.ts_pass_train)

            # recurrent models can only predict data points for time steps where future covariates are available
            model = RNNModel(12, n_epochs=1)
            model.fit(series=self.target_past,
                      future_covariates=self.covariates_past)
            model.predict(n=160, future_covariates=self.covariates)
            with self.assertRaises(ValueError):
                model.predict(n=161, future_covariates=self.covariates)
Example #13
0
])

for model_type in configurations.model_types:
    print(f'Current Model: {model_type}', file=sys.stderr)

    for parameter in configurations.parameters:
        print(f'Current Parameter: {parameter.upper()}', file=sys.stderr)
        start_time = time.time()

        for window_idx in range(configurations.n_windows):
            print(f'Current Window: {window_idx}', file=sys.stderr)

            # Create model
            model = RNNModel(
                model=model_type,
                input_chunk_length=configurations.input_length,
                output_chunk_length=configurations.output_length,
                # batch_size must be <= input_length (bug fixed in Darts version 0.9.0)
                batch_size=configurations.input_length)

            # Read time-series input
            train_series = read_pickle_file(
                f'{script_path}/time_series/time_series_{parameter}_win{window_idx}_train_'
                f'{endogenous_input}.pickle')

            pred_series = read_pickle_file(
                f'{script_path}/time_series/time_series_{parameter}_win{window_idx}_pred_'
                f'{endogenous_input}.pickle')

            print('Pre-train ...', file=sys.stderr)

            # Pre-train with (e.g. 80%) of relevant MEDIAN series (steady training set)
Example #14
0
print(len(val))

plot_acf(train, m=32, max_lag=240, alpha=.05)

#Normalize the time series (note: we avoid filtering the transformer on the validation set)
transformer = Scaler()
train_transformed = transformer.fit_transform(train)
val_transformed = transformer.transform(val)
series_transformed = transformer.transform(eq_series)

my_model = RNNModel(model='LSTM',
                    input_chunk_length=32,
                    output_chunk_length=1,
                    hidden_size=25,
                    n_rnn_layers=1,
                    dropout=0.4,
                    batch_size=16,
                    n_epochs=500,
                    optimizer_kwargs={'lr': 1e-3},
                    model_name='Eq_RNN',
                    log_tensorboard=True,
                    random_state=42)

my_model.fit(train_transformed, val_series=val_transformed, verbose=True)


def eval_model(model):
    pred_series = model.predict(n=96)
    plt.figure(figsize=(8, 5))
    series_transformed.plot(label='actual')
    pred_series.plot(label='forecast')
    plt.title('MAPE: {:.2f}%'.format(mape(pred_series, val_transformed)))
Example #15
0
])

for model_type in configurations.model_types:
    print(f'Current Model: {model_type}', file=sys.stderr)

    for parameter in configurations.parameters:
        print(f'Current Parameter: {parameter.upper()}', file=sys.stderr)
        start_time = time.time()

        for window_idx in range(configurations.n_windows):
            print(f'Current Window: {window_idx}', file=sys.stderr)

            # Create model
            model = RNNModel(
                model=model_type,
                input_chunk_length=configurations.input_length,
                output_chunk_length=configurations.output_length,
                # batch_size must be <= input_length (bug fixed in Darts version 0.9.0)
                batch_size=configurations.input_length)

            # Read time-series input
            train_series_endo_low = read_pickle_file(
                f'{script_path}/time_series/time_series_{parameter}_win{window_idx}'
                f'_train_{endogenous_input_low}.pickle')

            train_series_endo_high = read_pickle_file(
                f'{script_path}/time_series/time_series_{parameter}_win{window_idx}'
                f'_train_{endogenous_input_high}.pickle')

            train_series_exo = read_pickle_file(
                f'{script_path}/time_series/time_series_{parameter}_win{window_idx}_'
                f'train_{exogenous_input}.pickle')
Example #16
0
train, val = df_series.split_before(pd.Timestamp("2021-03-01 00:00:00+00:00"))

# Normalize the time series (note: we avoid fitting the transformer on the validation set)
transformer = Scaler()
train_transformed = transformer.fit_transform(train)
val_transformed = transformer.transform(val)
series_transformed = transformer.transform(df_series)

# Define the LSTM Model parameters
my_model = RNNModel(
    model='LSTM',
    input_chunk_length=24,
    output_chunk_length=1,
    hidden_size=25,
    n_rnn_layers=1,
    dropout=0.2,
    batch_size=16,
    n_epochs=20,
    optimizer_kwargs={'lr': 1e-3},
    model_name='Forecast_LSTM_next_hour',
    log_tensorboard=True,
    random_state=42
)

# Either train a new model or load best model from checkpoint
if train_new_model==True:
    my_model.fit(train_transformed, val_series=val_transformed, verbose=True)
else:
    my_model = RNNModel.load_from_checkpoint(model_name='Forecast_LSTM_next_hour', best=True)

# Evaluate Predictions 
def eval_model(model):
Example #17
0
 def test_input_models_global_models(self):
     NaiveEnsembleModel([RNNModel(12), TCNModel(10, 2), NBEATSModel(10, 2)])
Example #18
0
 def test_input_models_mixed(self):
     with self.assertRaises(ValueError):
         NaiveEnsembleModel([NaiveDrift(), Theta(), RNNModel(12)])
Example #19
0
filler = MissingValuesFiller()

for model_type in model_types:
    print(
        f'\n##############################\nCurrent Model Type: {model_type}\n##############################\n',
        file=sys.stderr)

    # Create sub folder for each model type
    if not os.path.isdir(
            f'./data/{approach}/{n_chunks}_chunks/{style}/{model_type}'):
        os.mkdir(f'./data/{approach}/{n_chunks}_chunks/{style}/{model_type}')

    # Create model per model type
    model = RNNModel(
        model=model_type,
        input_chunk_length=input_length,
        output_chunk_length=output_length,
        batch_size=input_length
    )  # batch_size must be <= input_length (bug fixed in Darts version 0.9.0)

    for parameter in parameters:
        print(
            f'\n##############################\nCurrent Parameter: {parameter.upper()}\n'
            f'##############################\n',
            file=sys.stderr)

        start_time = time.time()

        # Create sub folder for each parameter
        if not os.path.isdir(
                f'./data/{approach}/{n_chunks}_chunks/{style}/{model_type}/{parameter}'
        ):
Example #20
0
def get_lstm_model(dataset=None, plot=False, verbose=False):
    if (dataset is None):
        df = pd.read_csv("jeans_day.csv")
    else:
        df = pd.DataFrame.from_dict(dataset)

    ts = TimeSeries.from_dataframe(df,
                                   time_col='time_interval',
                                   value_cols=['count'])

    train, val = ts.split_after(0.8)

    scaler = Scaler()
    train_transformed = scaler.fit_transform(train)
    val_transformed = scaler.transform(val)
    ts_transformed = scaler.transform(ts)

    params = dict()
    params['model'] = ["LSTM"]
    params['hidden_size'] = [50, 75, 100]
    params['n_rnn_layers'] = [1]
    params['input_chunk_length'] = [14]
    params['output_chunk_length'] = [1]
    params['n_epochs'] = [100]
    params['dropout'] = [0]
    params['batch_size'] = [4, 6]
    params['random_state'] = [0, 1]
    params['loss_fn'] = [MSELoss()]

    lstm = RNNModel.gridsearch(parameters=params,
                               series=train_transformed,
                               val_series=val_transformed,
                               verbose=verbose,
                               metric=mse)

    params = lstm[1]
    lstm_model = lstm[0]

    lstm_model.fit(train_transformed, verbose=True)

    if (plot):
        backtest = lstm_model.historical_forecasts(series=ts_transformed,
                                                   start=0.8,
                                                   forecast_horizon=1,
                                                   stride=1,
                                                   retrain=False,
                                                   verbose=False)
        print(val)
        print(backtest[1:])
        print("R2: {}".format(
            r2_score(scaler.inverse_transform(val_transformed),
                     scaler.inverse_transform(backtest[1:]),
                     intersect=False)))
        print("MAPE: {}".format(
            mape(scaler.inverse_transform(val_transformed),
                 scaler.inverse_transform(backtest[1:]))))
        print("MASE: {}".format(
            mase(scaler.inverse_transform(val_transformed),
                 scaler.inverse_transform(backtest[1:]), train)))
        print("MAE: {}".format(
            mae(scaler.inverse_transform(val_transformed),
                scaler.inverse_transform(backtest[1:]))))
        scaler.inverse_transform(backtest).plot(label='backtest')
        scaler.inverse_transform(ts_transformed).plot(label='actual')
        plt.title("H&M Daily, LSTM Model")
        plt.xlabel("Date")
        plt.ylabel("Count")
        plt.legend()
        plt.show()
    else:
        return [lstm_model, params]