def lstm(): for company in lstCompanies: df = pd.DataFrame(list(db[company].find({}))) df = df.drop('_id', axis=1) df['Open'] = df['Open'].astype('float') df['Close'] = df['Close'].astype('float') series = TimeSeries.from_dataframe( df, 'Date', ['Close'], freq='B', fill_missing_dates=True) # 'B' = Business day series = auto_fillna(series) model = RNNModel( model= 'LSTM', # Either a string specifying the RNN module type (“RNN”, “LSTM” or “GRU”) output_length= 1, # Number of time steps to be output by the forecasting module hidden_size= 25, # Size for feature maps for each hidden RNN layer (hn) n_rnn_layers=1, # Number of layers in the RNN module input_length= 12, # The dimensionality of the TimeSeries instances that will be fed to the fit function batch_size= 16, # The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters n_epochs= 200, # The number of epochs is a hyperparameter that defines the number times that the learning algorithm will work through the entire training dataset optimizer_kwargs={'lr': 1e-3}, model_name='{}_RNN'.format(company)) model.fit(series) lstmPred = model.predict(1).values()[0][0] db.prediction.insert_one({ "Date": datetime.datetime.today(), "Company": company, "Prediction": round(float(lstmPred), 2) })
def test_likelihoods_and_resulting_mean_forecasts(self): def _get_avgs(series): return np.mean(series.all_values()[:, 0, :]), np.mean( series.all_values()[:, 1, :]) for lkl, series, diff1, diff2 in self.lkl_series: model = RNNModel(input_chunk_length=5, likelihood=lkl) model.fit(series, epochs=50) pred = model.predict(n=50, num_samples=50) avgs_orig, avgs_pred = _get_avgs(series), _get_avgs(pred) self.assertLess( abs(avgs_orig[0] - avgs_pred[0]), diff1, "The difference between the mean forecast and the mean series is larger " "than expected on component 0 for distribution {}".format( lkl), ) self.assertLess( abs(avgs_orig[1] - avgs_pred[1]), diff2, "The difference between the mean forecast and the mean series is larger " "than expected on component 1 for distribution {}".format( lkl), )
def test_max_samples_per_ts(self): """ Checking that we can fit TorchForecastingModels with max_samples_per_ts, without crash """ ts = linear_timeseries(start_value=0, end_value=1, length=50) model = RNNModel(input_chunk_length=20, output_chunk_length=2, n_epochs=2, batch_size=32) model.fit(ts, max_samples_per_ts=5)
def test_sample_smaller_than_batch_size(self): """ Checking that the TorchForecastingModels do not crash even if the number of available samples for training is strictly lower than the selected batch_size """ # TS with 50 timestamps. TorchForecastingModels will use the SequentialDataset for producing training # samples, which means we will have 50 - 22 - 2 + 1 = 27 samples, which is < 32 (batch_size). The model # should still train on those samples and not crash in any way ts = linear_timeseries(start_value=0, end_value=1, length=50) model = RNNModel(input_chunk_length=20, output_chunk_length=2, n_epochs=2, batch_size=32) model.fit(ts)
def make_lstm_prediction(): for ticker in lst_tickers_of_interest: df_ticker = pd.DataFrame( list(col_price_history.find({'Ticker': ticker})))[["DailyChangePct", "Date"]].set_index('Date') df_ticker.index = pd.to_datetime(df_ticker.index) df_ticker = df_ticker.reindex(index=df_ticker.index[::-1]) series = TimeSeries.from_dataframe(df_ticker, time_col=None, value_cols='DailyChangePct', freq='B', fill_missing_dates=True) series = auto_fillna(series) SEQ_LENGTH = 6 HIDDEN_SIZE = 5 OUTPUT_LEN = 1 NUM_LAYERS = 1 model = RNNModel(model='LSTM', output_length=OUTPUT_LEN, hidden_size=HIDDEN_SIZE, n_rnn_layers=NUM_LAYERS, input_length=SEQ_LENGTH, batch_size=16, n_epochs=10, optimizer_kwargs={'lr': 1e-3}, model_name=f'{ticker}_RNN', log_tensorboard=False) model.fit(series) lstm_prediction = model.predict(1).values()[0][0] lstm_prediction_history.insert_one({ "Date": datetime.datetime.today(), "Ticker": ticker, "LSTM_prediction": float(lstm_prediction) })
def test_stochastic_inputs(self): model = RNNModel(input_chunk_length=5) model.fit(self.constant_ts, epochs=2) # build a stochastic series target_vals = self.constant_ts.values() stochastic_vals = np.random.normal(loc=target_vals, scale=1.0, size=(len(self.constant_ts), 100)) stochastic_vals = np.expand_dims(stochastic_vals, axis=1) stochastic_series = TimeSeries.from_times_and_values( self.constant_ts.time_index, stochastic_vals) # A deterministic model forecasting a stochastic series # should return stochastic samples preds = [ model.predict(series=stochastic_series, n=10) for _ in range(2) ] # random samples should differ self.assertFalse( np.alltrue(preds[0].values() == preds[1].values()))
input_chunk_length=24, output_chunk_length=1, hidden_size=25, n_rnn_layers=1, dropout=0.2, batch_size=16, n_epochs=20, optimizer_kwargs={'lr': 1e-3}, model_name='Forecast_LSTM_next_hour', log_tensorboard=True, random_state=42 ) # Either train a new model or load best model from checkpoint if train_new_model==True: my_model.fit(train_transformed, val_series=val_transformed, verbose=True) else: my_model = RNNModel.load_from_checkpoint(model_name='Forecast_LSTM_next_hour', best=True) # Evaluate Predictions def eval_model(model): pred_series = model.predict(n=len(val_transformed)) plt.figure(figsize=(8,5)) val_transformed.plot(label='actual') pred_series.plot(label='forecast') plt.title('MAPE: {:.2f}%'.format(mape(pred_series, val_transformed))) plt.legend() plt.show() def backtest(model):
# batch_size must be <= input_length (bug fixed in Darts version 0.9.0) batch_size=configurations.input_length) # Read time-series input train_series = read_pickle_file( f'{script_path}/time_series/time_series_{parameter}_win{window_idx}_train_' f'{endogenous_input}.pickle') pred_series = read_pickle_file( f'{script_path}/time_series/time_series_{parameter}_win{window_idx}_pred_' f'{endogenous_input}.pickle') print('Pre-train ...', file=sys.stderr) # Pre-train with (e.g. 80%) of relevant MEDIAN series (steady training set) model.fit(series=list(train_series.values()), verbose=True) write_pickle_file( f'{script_path}/training/pre-trained_model_{model_type}_{parameter}_' f'win{window_idx}.pickle', model) confusion_matrix_chunks = pd.DataFrame(columns=[ 'CHUNK_ID', 'SCALING', 'PARAMETER', 'MODEL', 'ENDOGENOUS', 'EXOGENOUS', 'FIRST_FORECAST', 'ALARM_TYPE', 'FP', 'TP', 'FN', 'TN', 'N_HIGH_ALARMS', 'N_LOW_ALARMS', 'N_ITERATIONS' ]) print('Series forecasting ...', file=sys.stderr) # Iterate chunk IDs we want to predict for chunk_id in pred_series.keys():