def eval_tcn_model(serialized_model, dataset): tcn_model = pickle.loads(serialized_model) df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) #80% train, 20% val scaler = Scaler() ts = scaler.fit_transform(ts) val_transformed = scaler.transform(val) train_transformed = scaler.transform(train) backtest = tcn_model.historical_forecasts( series=ts, start=0.8, forecast_horizon=1, stride=1, retrain=False, ) val_transformed = scaler.inverse_transform(val_transformed) backtest = scaler.inverse_transform(backtest) train_transformed = scaler.inverse_transform(train_transformed) scores = dict() scores['r2'] = r2_score(val_transformed, backtest[1:]) scores['mase_score'] = mase(val_transformed, backtest[1:], train_transformed) scores['mae_score'] = mae(val_transformed, backtest[1:]) scores['rmse_score'] = np.sqrt(mse(val_transformed, backtest[1:])) try: scores['mape_score'] = mape(val_transformed, backtest[1:]) except: scores[ 'mape_score'] = "Could not be calculated (Zero value in time series)" return scores
def helper_generate_multivariate_case_data(self, season_length, n_repeat): """generates multivariate test case data. Target series is a sine wave stacked with a repeating linear curve of equal seasonal length. Covariates are datetime attributes for 'hours'. """ # generate sine wave ts_sine = tg.sine_timeseries( value_frequency=1 / season_length, length=n_repeat * season_length, freq="h", ) # generate repeating linear curve ts_linear = tg.linear_timeseries(0, 1, length=season_length, start=ts_sine.end_time() + ts_sine.freq) for i in range(n_repeat - 1): start = ts_linear.end_time() + ts_linear.freq new_ts = tg.linear_timeseries(0, 1, length=season_length, start=start) ts_linear = ts_linear.append(new_ts) ts_linear = TimeSeries.from_times_and_values( times=ts_sine.time_index, values=ts_linear.values()) # create multivariate TimeSeries by stacking sine and linear curves ts = ts_sine.stack(ts_linear) # create train/test sets val_length = 10 * season_length ts_train, ts_val = ts[:-val_length], ts[-val_length:] # scale data scaler_ts = Scaler() ts_train_scaled = scaler_ts.fit_transform(ts_train) ts_val_scaled = scaler_ts.transform(ts_val) ts_scaled = scaler_ts.transform(ts) # generate long enough covariates (past and future covariates will be the same for simplicity) long_enough_ts = tg.sine_timeseries(value_frequency=1 / season_length, length=1000, freq=ts.freq) covariates = tg.datetime_attribute_timeseries(long_enough_ts, attribute="hour") scaler_covs = Scaler() covariates_scaled = scaler_covs.fit_transform(covariates) return ts_scaled, ts_train_scaled, ts_val_scaled, covariates_scaled
def test_scaling(self): self.series3 = self.series1[:1] transformer1 = Scaler(MinMaxScaler(feature_range=(0, 2))) transformer2 = Scaler(StandardScaler()) series1_tr1 = transformer1.fit_transform(self.series1) series1_tr2 = transformer2.fit_transform(self.series1) series3_tr2 = transformer2.transform(self.series3) # should comply with scaling constraints self.assertAlmostEqual(min(series1_tr1.values().flatten()), 0.) self.assertAlmostEqual(max(series1_tr1.values().flatten()), 2.) self.assertAlmostEqual(np.mean(series1_tr2.values().flatten()), 0.) self.assertAlmostEqual(np.std(series1_tr2.values().flatten()), 1.) # test inverse transform series1_recovered = transformer2.inverse_transform(series1_tr2) series3_recovered = transformer2.inverse_transform(series3_tr2) np.testing.assert_almost_equal(series1_recovered.values().flatten(), self.series1.values().flatten()) self.assertEqual(series1_recovered.width, self.series1.width) self.assertEqual(series3_recovered, series1_recovered[:1])
df.set_index("time") df["time"] = pd.to_datetime(df["time"], utc=True) # Transform DataFrame to Time Series Object df_series = TimeSeries.from_dataframe(df[["time","price actual"]], time_col='time', value_cols="price actual") ### Train and Test Model ####################################################### # Train Test Split train, val = df_series.split_before(pd.Timestamp("2021-03-01 00:00:00+00:00")) # Normalize the time series (note: we avoid fitting the transformer on the validation set) transformer = Scaler() train_transformed = transformer.fit_transform(train) val_transformed = transformer.transform(val) series_transformed = transformer.transform(df_series) # Define the LSTM Model parameters my_model = RNNModel( model='LSTM', input_chunk_length=24, output_chunk_length=1, hidden_size=25, n_rnn_layers=1, dropout=0.2, batch_size=16, n_epochs=20, optimizer_kwargs={'lr': 1e-3}, model_name='Forecast_LSTM_next_hour', log_tensorboard=True,
def get_tcn_model(dataset=None, plot=False, verbose=False): if (dataset is None): df = pd.read_csv("jeans_day.csv") else: df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) #80% train, 20% val scaler = Scaler() train_transformed = scaler.fit_transform(train) val_transformed = scaler.transform(val) ts_transformed = scaler.transform(ts) params = dict() params['kernel_size'] = [4, 6] params['num_filters'] = [10] params['random_state'] = [0, 1] params['input_chunk_length'] = [14] params['output_chunk_length'] = [1] params['dilation_base'] = [2, 3] params['n_epochs'] = [100] params['dropout'] = [0] params['loss_fn'] = [MSELoss()] params['weight_norm'] = [True] tcn = TCNModel.gridsearch(parameters=params, series=train_transformed, val_series=val_transformed, verbose=verbose, metric=mse) params = tcn[1] tcn_model = tcn[0] tcn_model.fit(series=train_transformed) if (plot): backtest = tcn_model.historical_forecasts(series=ts_transformed, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=verbose) val = scaler.inverse_transform(val_transformed) backtest = scaler.inverse_transform(backtest) train = scaler.inverse_transform(train_transformed) print(scaler.inverse_transform(tcn_model.predict(7))) print("R2: {}".format(r2_score(val, backtest[1:], intersect=False))) print("MAPE: {}".format(mape(val, backtest[1:]))) print("MASE: {}".format(mase(val, backtest[1:], train))) print("MAE: {}".format(mae(val, backtest[1:]))) print("RMSE: {}".format(np.sqrt(mse(val, backtest[1:])))) backtest.plot(label='backtest') ts.plot(label='actual') plt.title("H&M Daily, TCN Model") plt.xlabel("Date") plt.ylabel("Count") plt.legend() plt.show() else: return [tcn_model, params]
def get_lstm_model(dataset=None, plot=False, verbose=False): if (dataset is None): df = pd.read_csv("jeans_day.csv") else: df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) scaler = Scaler() train_transformed = scaler.fit_transform(train) val_transformed = scaler.transform(val) ts_transformed = scaler.transform(ts) params = dict() params['model'] = ["LSTM"] params['hidden_size'] = [50, 75, 100] params['n_rnn_layers'] = [1] params['input_chunk_length'] = [14] params['output_chunk_length'] = [1] params['n_epochs'] = [100] params['dropout'] = [0] params['batch_size'] = [4, 6] params['random_state'] = [0, 1] params['loss_fn'] = [MSELoss()] lstm = RNNModel.gridsearch(parameters=params, series=train_transformed, val_series=val_transformed, verbose=verbose, metric=mse) params = lstm[1] lstm_model = lstm[0] lstm_model.fit(train_transformed, verbose=True) if (plot): backtest = lstm_model.historical_forecasts(series=ts_transformed, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=False) print(val) print(backtest[1:]) print("R2: {}".format( r2_score(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:]), intersect=False))) print("MAPE: {}".format( mape(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:])))) print("MASE: {}".format( mase(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:]), train))) print("MAE: {}".format( mae(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:])))) scaler.inverse_transform(backtest).plot(label='backtest') scaler.inverse_transform(ts_transformed).plot(label='actual') plt.title("H&M Daily, LSTM Model") plt.xlabel("Date") plt.ylabel("Count") plt.legend() plt.show() else: return [lstm_model, params]