Example #1
0
def lstm():
    for company in lstCompanies:
        df = pd.DataFrame(list(db[company].find({})))
        df = df.drop('_id', axis=1)
        df['Open'] = df['Open'].astype('float')
        df['Close'] = df['Close'].astype('float')
        series = TimeSeries.from_dataframe(
            df, 'Date', ['Close'], freq='B',
            fill_missing_dates=True)  # 'B' = Business day
        series = auto_fillna(series)

        model = RNNModel(
            model=
            'LSTM',  # Either a string specifying the RNN module type (“RNN”, “LSTM” or “GRU”)
            output_length=
            1,  # Number of time steps to be output by the forecasting module
            hidden_size=
            25,  # Size for feature maps for each hidden RNN layer (hn)
            n_rnn_layers=1,  # Number of layers in the RNN module
            input_length=
            12,  # The dimensionality of the TimeSeries instances that will be fed to the fit function
            batch_size=
            16,  # The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters
            n_epochs=
            200,  # The number of epochs is a hyperparameter that defines the number times that the learning algorithm will work through the entire training dataset
            optimizer_kwargs={'lr': 1e-3},
            model_name='{}_RNN'.format(company))

        model.fit(series)
        lstmPred = model.predict(1).values()[0][0]
        db.prediction.insert_one({
            "Date": datetime.datetime.today(),
            "Company": company,
            "Prediction": round(float(lstmPred), 2)
        })
Example #2
0
        def test_likelihoods_and_resulting_mean_forecasts(self):
            def _get_avgs(series):
                return np.mean(series.all_values()[:, 0, :]), np.mean(
                    series.all_values()[:, 1, :])

            for lkl, series, diff1, diff2 in self.lkl_series:
                model = RNNModel(input_chunk_length=5, likelihood=lkl)
                model.fit(series, epochs=50)
                pred = model.predict(n=50, num_samples=50)

                avgs_orig, avgs_pred = _get_avgs(series), _get_avgs(pred)
                self.assertLess(
                    abs(avgs_orig[0] - avgs_pred[0]),
                    diff1,
                    "The difference between the mean forecast and the mean series is larger "
                    "than expected on component 0 for distribution {}".format(
                        lkl),
                )
                self.assertLess(
                    abs(avgs_orig[1] - avgs_pred[1]),
                    diff2,
                    "The difference between the mean forecast and the mean series is larger "
                    "than expected on component 1 for distribution {}".format(
                        lkl),
                )
Example #3
0
        def test_max_samples_per_ts(self):
            """
            Checking that we can fit TorchForecastingModels with max_samples_per_ts, without crash
            """

            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)

            model.fit(ts, max_samples_per_ts=5)
Example #4
0
        def test_sample_smaller_than_batch_size(self):
            """
            Checking that the TorchForecastingModels do not crash even if the number of available samples for training
            is strictly lower than the selected batch_size
            """
            # TS with 50 timestamps. TorchForecastingModels will use the SequentialDataset for producing training
            # samples, which means we will have 50 - 22 - 2 + 1 = 27 samples, which is < 32 (batch_size). The model
            # should still train on those samples and not crash in any way
            ts = linear_timeseries(start_value=0, end_value=1, length=50)

            model = RNNModel(input_chunk_length=20,
                             output_chunk_length=2,
                             n_epochs=2,
                             batch_size=32)
            model.fit(ts)
Example #5
0
def make_lstm_prediction():
    for ticker in lst_tickers_of_interest:
        df_ticker = pd.DataFrame(
            list(col_price_history.find({'Ticker':
                                         ticker})))[["DailyChangePct",
                                                     "Date"]].set_index('Date')
        df_ticker.index = pd.to_datetime(df_ticker.index)
        df_ticker = df_ticker.reindex(index=df_ticker.index[::-1])
        series = TimeSeries.from_dataframe(df_ticker,
                                           time_col=None,
                                           value_cols='DailyChangePct',
                                           freq='B',
                                           fill_missing_dates=True)
        series = auto_fillna(series)

        SEQ_LENGTH = 6
        HIDDEN_SIZE = 5
        OUTPUT_LEN = 1
        NUM_LAYERS = 1

        model = RNNModel(model='LSTM',
                         output_length=OUTPUT_LEN,
                         hidden_size=HIDDEN_SIZE,
                         n_rnn_layers=NUM_LAYERS,
                         input_length=SEQ_LENGTH,
                         batch_size=16,
                         n_epochs=10,
                         optimizer_kwargs={'lr': 1e-3},
                         model_name=f'{ticker}_RNN',
                         log_tensorboard=False)

        model.fit(series)
        lstm_prediction = model.predict(1).values()[0][0]
        lstm_prediction_history.insert_one({
            "Date":
            datetime.datetime.today(),
            "Ticker":
            ticker,
            "LSTM_prediction":
            float(lstm_prediction)
        })
Example #6
0
        def test_stochastic_inputs(self):
            model = RNNModel(input_chunk_length=5)
            model.fit(self.constant_ts, epochs=2)

            # build a stochastic series
            target_vals = self.constant_ts.values()
            stochastic_vals = np.random.normal(loc=target_vals,
                                               scale=1.0,
                                               size=(len(self.constant_ts),
                                                     100))
            stochastic_vals = np.expand_dims(stochastic_vals, axis=1)
            stochastic_series = TimeSeries.from_times_and_values(
                self.constant_ts.time_index, stochastic_vals)

            # A deterministic model forecasting a stochastic series
            # should return stochastic samples
            preds = [
                model.predict(series=stochastic_series, n=10) for _ in range(2)
            ]

            # random samples should differ
            self.assertFalse(
                np.alltrue(preds[0].values() == preds[1].values()))
Example #7
0
    input_chunk_length=24,
    output_chunk_length=1,
    hidden_size=25,
    n_rnn_layers=1,
    dropout=0.2,
    batch_size=16,
    n_epochs=20,
    optimizer_kwargs={'lr': 1e-3},
    model_name='Forecast_LSTM_next_hour',
    log_tensorboard=True,
    random_state=42
)

# Either train a new model or load best model from checkpoint
if train_new_model==True:
    my_model.fit(train_transformed, val_series=val_transformed, verbose=True)
else:
    my_model = RNNModel.load_from_checkpoint(model_name='Forecast_LSTM_next_hour', best=True)

# Evaluate Predictions 
def eval_model(model):
    pred_series = model.predict(n=len(val_transformed))

    plt.figure(figsize=(8,5))
    val_transformed.plot(label='actual')
    pred_series.plot(label='forecast')
    plt.title('MAPE: {:.2f}%'.format(mape(pred_series, val_transformed)))
    plt.legend()
    plt.show()

def backtest(model):
Example #8
0
                # batch_size must be <= input_length (bug fixed in Darts version 0.9.0)
                batch_size=configurations.input_length)

            # Read time-series input
            train_series = read_pickle_file(
                f'{script_path}/time_series/time_series_{parameter}_win{window_idx}_train_'
                f'{endogenous_input}.pickle')

            pred_series = read_pickle_file(
                f'{script_path}/time_series/time_series_{parameter}_win{window_idx}_pred_'
                f'{endogenous_input}.pickle')

            print('Pre-train ...', file=sys.stderr)

            # Pre-train with (e.g. 80%) of relevant MEDIAN series (steady training set)
            model.fit(series=list(train_series.values()), verbose=True)

            write_pickle_file(
                f'{script_path}/training/pre-trained_model_{model_type}_{parameter}_'
                f'win{window_idx}.pickle', model)

            confusion_matrix_chunks = pd.DataFrame(columns=[
                'CHUNK_ID', 'SCALING', 'PARAMETER', 'MODEL', 'ENDOGENOUS',
                'EXOGENOUS', 'FIRST_FORECAST', 'ALARM_TYPE', 'FP', 'TP', 'FN',
                'TN', 'N_HIGH_ALARMS', 'N_LOW_ALARMS', 'N_ITERATIONS'
            ])

            print('Series forecasting ...', file=sys.stderr)

            # Iterate chunk IDs we want to predict
            for chunk_id in pred_series.keys():