def test_likelihoods_and_resulting_mean_forecasts(self): def _get_avgs(series): return np.mean(series.all_values()[:, 0, :]), np.mean( series.all_values()[:, 1, :]) for lkl, series, diff1, diff2 in self.lkl_series: model = RNNModel(input_chunk_length=5, likelihood=lkl) model.fit(series, epochs=50) pred = model.predict(n=50, num_samples=50) avgs_orig, avgs_pred = _get_avgs(series), _get_avgs(pred) self.assertLess( abs(avgs_orig[0] - avgs_pred[0]), diff1, "The difference between the mean forecast and the mean series is larger " "than expected on component 0 for distribution {}".format( lkl), ) self.assertLess( abs(avgs_orig[1] - avgs_pred[1]), diff2, "The difference between the mean forecast and the mean series is larger " "than expected on component 1 for distribution {}".format( lkl), )
def lstm(): for company in lstCompanies: df = pd.DataFrame(list(db[company].find({}))) df = df.drop('_id', axis=1) df['Open'] = df['Open'].astype('float') df['Close'] = df['Close'].astype('float') series = TimeSeries.from_dataframe( df, 'Date', ['Close'], freq='B', fill_missing_dates=True) # 'B' = Business day series = auto_fillna(series) model = RNNModel( model= 'LSTM', # Either a string specifying the RNN module type (“RNN”, “LSTM” or “GRU”) output_length= 1, # Number of time steps to be output by the forecasting module hidden_size= 25, # Size for feature maps for each hidden RNN layer (hn) n_rnn_layers=1, # Number of layers in the RNN module input_length= 12, # The dimensionality of the TimeSeries instances that will be fed to the fit function batch_size= 16, # The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters n_epochs= 200, # The number of epochs is a hyperparameter that defines the number times that the learning algorithm will work through the entire training dataset optimizer_kwargs={'lr': 1e-3}, model_name='{}_RNN'.format(company)) model.fit(series) lstmPred = model.predict(1).values()[0][0] db.prediction.insert_one({ "Date": datetime.datetime.today(), "Company": company, "Prediction": round(float(lstmPred), 2) })
def test_max_samples_per_ts(self): """ Checking that we can fit TorchForecastingModels with max_samples_per_ts, without crash """ ts = linear_timeseries(start_value=0, end_value=1, length=50) model = RNNModel(input_chunk_length=20, output_chunk_length=2, n_epochs=2, batch_size=32) model.fit(ts, max_samples_per_ts=5)
def test_sample_smaller_than_batch_size(self): """ Checking that the TorchForecastingModels do not crash even if the number of available samples for training is strictly lower than the selected batch_size """ # TS with 50 timestamps. TorchForecastingModels will use the SequentialDataset for producing training # samples, which means we will have 50 - 22 - 2 + 1 = 27 samples, which is < 32 (batch_size). The model # should still train on those samples and not crash in any way ts = linear_timeseries(start_value=0, end_value=1, length=50) model = RNNModel(input_chunk_length=20, output_chunk_length=2, n_epochs=2, batch_size=32) model.fit(ts)
def test_ensemble_models_denoising_multi_input(self): # for every model, test whether it correctly denoises ts_sum_2 using ts_random_multi and ts_sum_2 as inputs # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients horizon = 10 _, _, ts_sum2, ts_cov2 = self.denoising_input() torch.manual_seed(self.RANDOM_SEED) ensemble_models = [ RNNModel( input_chunk_length=20, output_chunk_length=horizon, n_epochs=1, random_state=self.RANDOM_SEED, ), BlockRNNModel( input_chunk_length=20, output_chunk_length=horizon, n_epochs=1, random_state=self.RANDOM_SEED, ), RegressionModel(lags_past_covariates=[-1]), RegressionModel(lags_past_covariates=[-1]), ] ensemble = RegressionEnsembleModel(ensemble_models, horizon) self.helper_test_models_accuracy(ensemble, horizon, ts_sum2, ts_cov2, 3)
def test_call_predict_global_models_multivariate_input_no_covariates(self): naive_ensemble = NaiveEnsembleModel([ RNNModel(12, n_epochs=1), TCNModel(10, 2, n_epochs=1), NBEATSModel(10, 2, n_epochs=1), ]) naive_ensemble.fit(self.seq1) naive_ensemble.predict(n=5, series=self.seq1)
def test_call_predict_global_models_univariate_input_no_covariates(self): naive_ensemble = NaiveEnsembleModel([ RNNModel(12, n_epochs=1), TCNModel(10, 2, n_epochs=1), NBEATSModel(10, 2, n_epochs=1), ]) with self.assertRaises(Exception): naive_ensemble.predict(5) naive_ensemble.fit(self.series1) naive_ensemble.predict(5)
def make_lstm_prediction(): for ticker in lst_tickers_of_interest: df_ticker = pd.DataFrame( list(col_price_history.find({'Ticker': ticker})))[["DailyChangePct", "Date"]].set_index('Date') df_ticker.index = pd.to_datetime(df_ticker.index) df_ticker = df_ticker.reindex(index=df_ticker.index[::-1]) series = TimeSeries.from_dataframe(df_ticker, time_col=None, value_cols='DailyChangePct', freq='B', fill_missing_dates=True) series = auto_fillna(series) SEQ_LENGTH = 6 HIDDEN_SIZE = 5 OUTPUT_LEN = 1 NUM_LAYERS = 1 model = RNNModel(model='LSTM', output_length=OUTPUT_LEN, hidden_size=HIDDEN_SIZE, n_rnn_layers=NUM_LAYERS, input_length=SEQ_LENGTH, batch_size=16, n_epochs=10, optimizer_kwargs={'lr': 1e-3}, model_name=f'{ticker}_RNN', log_tensorboard=False) model.fit(series) lstm_prediction = model.predict(1).values()[0][0] lstm_prediction_history.insert_one({ "Date": datetime.datetime.today(), "Ticker": ticker, "LSTM_prediction": float(lstm_prediction) })
def test_call_predict_global_models_multivariate_input_with_covariates( self): naive_ensemble = NaiveEnsembleModel([ RNNModel(12, n_epochs=1), TCNModel(10, 2, n_epochs=1), NBEATSModel(10, 2, n_epochs=1), ]) naive_ensemble.fit(self.seq1, self.cov1) predict_series = [s[:12] for s in self.seq1] predict_covariates = [c[:14] for c in self.cov1] naive_ensemble.predict(n=2, series=predict_series, past_covariates=predict_covariates)
def test_stochastic_inputs(self): model = RNNModel(input_chunk_length=5) model.fit(self.constant_ts, epochs=2) # build a stochastic series target_vals = self.constant_ts.values() stochastic_vals = np.random.normal(loc=target_vals, scale=1.0, size=(len(self.constant_ts), 100)) stochastic_vals = np.expand_dims(stochastic_vals, axis=1) stochastic_series = TimeSeries.from_times_and_values( self.constant_ts.time_index, stochastic_vals) # A deterministic model forecasting a stochastic series # should return stochastic samples preds = [ model.predict(series=stochastic_series, n=10) for _ in range(2) ] # random samples should differ self.assertFalse( np.alltrue(preds[0].values() == preds[1].values()))
def get_global_models(self, output_chunk_length=5): return [ RNNModel( input_chunk_length=20, output_chunk_length=output_chunk_length, n_epochs=1, random_state=42, ), BlockRNNModel( input_chunk_length=20, output_chunk_length=output_chunk_length, n_epochs=1, random_state=42, ), ]
def test_future_covariates(self): # models with future covariates should produce better predictions over a long forecasting horizon # than a model trained with no covariates model = TCNModel( input_chunk_length=50, output_chunk_length=5, n_epochs=20, random_state=0, ) model.fit(series=self.target_past) long_pred_no_cov = model.predict(n=160) model = TCNModel( input_chunk_length=50, output_chunk_length=5, n_epochs=20, random_state=0, ) model.fit(series=self.target_past, past_covariates=self.covariates_past) long_pred_with_cov = model.predict(n=160, past_covariates=self.covariates) self.assertTrue( mape(self.target_future, long_pred_no_cov) > mape( self.target_future, long_pred_with_cov), "Models with future covariates should produce better predictions.", ) # block models can predict up to self.output_chunk_length points beyond the last future covariate... model.predict(n=165, past_covariates=self.covariates) # ... not more with self.assertRaises(ValueError): model.predict(n=166, series=self.ts_pass_train) # recurrent models can only predict data points for time steps where future covariates are available model = RNNModel(12, n_epochs=1) model.fit(series=self.target_past, future_covariates=self.covariates_past) model.predict(n=160, future_covariates=self.covariates) with self.assertRaises(ValueError): model.predict(n=161, future_covariates=self.covariates)
]) for model_type in configurations.model_types: print(f'Current Model: {model_type}', file=sys.stderr) for parameter in configurations.parameters: print(f'Current Parameter: {parameter.upper()}', file=sys.stderr) start_time = time.time() for window_idx in range(configurations.n_windows): print(f'Current Window: {window_idx}', file=sys.stderr) # Create model model = RNNModel( model=model_type, input_chunk_length=configurations.input_length, output_chunk_length=configurations.output_length, # batch_size must be <= input_length (bug fixed in Darts version 0.9.0) batch_size=configurations.input_length) # Read time-series input train_series = read_pickle_file( f'{script_path}/time_series/time_series_{parameter}_win{window_idx}_train_' f'{endogenous_input}.pickle') pred_series = read_pickle_file( f'{script_path}/time_series/time_series_{parameter}_win{window_idx}_pred_' f'{endogenous_input}.pickle') print('Pre-train ...', file=sys.stderr) # Pre-train with (e.g. 80%) of relevant MEDIAN series (steady training set)
print(len(val)) plot_acf(train, m=32, max_lag=240, alpha=.05) #Normalize the time series (note: we avoid filtering the transformer on the validation set) transformer = Scaler() train_transformed = transformer.fit_transform(train) val_transformed = transformer.transform(val) series_transformed = transformer.transform(eq_series) my_model = RNNModel(model='LSTM', input_chunk_length=32, output_chunk_length=1, hidden_size=25, n_rnn_layers=1, dropout=0.4, batch_size=16, n_epochs=500, optimizer_kwargs={'lr': 1e-3}, model_name='Eq_RNN', log_tensorboard=True, random_state=42) my_model.fit(train_transformed, val_series=val_transformed, verbose=True) def eval_model(model): pred_series = model.predict(n=96) plt.figure(figsize=(8, 5)) series_transformed.plot(label='actual') pred_series.plot(label='forecast') plt.title('MAPE: {:.2f}%'.format(mape(pred_series, val_transformed)))
]) for model_type in configurations.model_types: print(f'Current Model: {model_type}', file=sys.stderr) for parameter in configurations.parameters: print(f'Current Parameter: {parameter.upper()}', file=sys.stderr) start_time = time.time() for window_idx in range(configurations.n_windows): print(f'Current Window: {window_idx}', file=sys.stderr) # Create model model = RNNModel( model=model_type, input_chunk_length=configurations.input_length, output_chunk_length=configurations.output_length, # batch_size must be <= input_length (bug fixed in Darts version 0.9.0) batch_size=configurations.input_length) # Read time-series input train_series_endo_low = read_pickle_file( f'{script_path}/time_series/time_series_{parameter}_win{window_idx}' f'_train_{endogenous_input_low}.pickle') train_series_endo_high = read_pickle_file( f'{script_path}/time_series/time_series_{parameter}_win{window_idx}' f'_train_{endogenous_input_high}.pickle') train_series_exo = read_pickle_file( f'{script_path}/time_series/time_series_{parameter}_win{window_idx}_' f'train_{exogenous_input}.pickle')
train, val = df_series.split_before(pd.Timestamp("2021-03-01 00:00:00+00:00")) # Normalize the time series (note: we avoid fitting the transformer on the validation set) transformer = Scaler() train_transformed = transformer.fit_transform(train) val_transformed = transformer.transform(val) series_transformed = transformer.transform(df_series) # Define the LSTM Model parameters my_model = RNNModel( model='LSTM', input_chunk_length=24, output_chunk_length=1, hidden_size=25, n_rnn_layers=1, dropout=0.2, batch_size=16, n_epochs=20, optimizer_kwargs={'lr': 1e-3}, model_name='Forecast_LSTM_next_hour', log_tensorboard=True, random_state=42 ) # Either train a new model or load best model from checkpoint if train_new_model==True: my_model.fit(train_transformed, val_series=val_transformed, verbose=True) else: my_model = RNNModel.load_from_checkpoint(model_name='Forecast_LSTM_next_hour', best=True) # Evaluate Predictions def eval_model(model):
def test_input_models_global_models(self): NaiveEnsembleModel([RNNModel(12), TCNModel(10, 2), NBEATSModel(10, 2)])
def test_input_models_mixed(self): with self.assertRaises(ValueError): NaiveEnsembleModel([NaiveDrift(), Theta(), RNNModel(12)])
filler = MissingValuesFiller() for model_type in model_types: print( f'\n##############################\nCurrent Model Type: {model_type}\n##############################\n', file=sys.stderr) # Create sub folder for each model type if not os.path.isdir( f'./data/{approach}/{n_chunks}_chunks/{style}/{model_type}'): os.mkdir(f'./data/{approach}/{n_chunks}_chunks/{style}/{model_type}') # Create model per model type model = RNNModel( model=model_type, input_chunk_length=input_length, output_chunk_length=output_length, batch_size=input_length ) # batch_size must be <= input_length (bug fixed in Darts version 0.9.0) for parameter in parameters: print( f'\n##############################\nCurrent Parameter: {parameter.upper()}\n' f'##############################\n', file=sys.stderr) start_time = time.time() # Create sub folder for each parameter if not os.path.isdir( f'./data/{approach}/{n_chunks}_chunks/{style}/{model_type}/{parameter}' ):
def get_lstm_model(dataset=None, plot=False, verbose=False): if (dataset is None): df = pd.read_csv("jeans_day.csv") else: df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) scaler = Scaler() train_transformed = scaler.fit_transform(train) val_transformed = scaler.transform(val) ts_transformed = scaler.transform(ts) params = dict() params['model'] = ["LSTM"] params['hidden_size'] = [50, 75, 100] params['n_rnn_layers'] = [1] params['input_chunk_length'] = [14] params['output_chunk_length'] = [1] params['n_epochs'] = [100] params['dropout'] = [0] params['batch_size'] = [4, 6] params['random_state'] = [0, 1] params['loss_fn'] = [MSELoss()] lstm = RNNModel.gridsearch(parameters=params, series=train_transformed, val_series=val_transformed, verbose=verbose, metric=mse) params = lstm[1] lstm_model = lstm[0] lstm_model.fit(train_transformed, verbose=True) if (plot): backtest = lstm_model.historical_forecasts(series=ts_transformed, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=False) print(val) print(backtest[1:]) print("R2: {}".format( r2_score(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:]), intersect=False))) print("MAPE: {}".format( mape(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:])))) print("MASE: {}".format( mase(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:]), train))) print("MAE: {}".format( mae(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:])))) scaler.inverse_transform(backtest).plot(label='backtest') scaler.inverse_transform(ts_transformed).plot(label='actual') plt.title("H&M Daily, LSTM Model") plt.xlabel("Date") plt.ylabel("Count") plt.legend() plt.show() else: return [lstm_model, params]