def eval_tcn_model(serialized_model, dataset): tcn_model = pickle.loads(serialized_model) df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) #80% train, 20% val scaler = Scaler() ts = scaler.fit_transform(ts) val_transformed = scaler.transform(val) train_transformed = scaler.transform(train) backtest = tcn_model.historical_forecasts( series=ts, start=0.8, forecast_horizon=1, stride=1, retrain=False, ) val_transformed = scaler.inverse_transform(val_transformed) backtest = scaler.inverse_transform(backtest) train_transformed = scaler.inverse_transform(train_transformed) scores = dict() scores['r2'] = r2_score(val_transformed, backtest[1:]) scores['mase_score'] = mase(val_transformed, backtest[1:], train_transformed) scores['mae_score'] = mae(val_transformed, backtest[1:]) scores['rmse_score'] = np.sqrt(mse(val_transformed, backtest[1:])) try: scores['mape_score'] = mape(val_transformed, backtest[1:]) except: scores[ 'mape_score'] = "Could not be calculated (Zero value in time series)" return scores
def get_lstm_backtest(serialized_model, dataset): df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) scaler = Scaler() ts = scaler.fit_transform(ts) model = pickle.loads(serialized_model) backtest = model.historical_forecasts(series=ts, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=False) backtest = scaler.inverse_transform(backtest[1:]) ts = scaler.inverse_transform(ts) backtest.plot(label='LSTM Model', lw=3, c='orange')
def get_tcn_backtest(serialized_model, dataset, topic): df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) scaler = Scaler() ts = scaler.fit_transform(ts) model = pickle.loads(serialized_model) backtest = model.historical_forecasts(series=ts, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=False) backtest = scaler.inverse_transform(backtest[1:]) ts = scaler.inverse_transform(ts) backtest.plot(label='TCN Model', lw=3, c='red') plt.title("{} Daily".format(topic)) plt.xlabel("Date") plt.ylabel("Count")
def plot_tcn_predictions(serialized_model, dataset): df = pd.DataFrame.from_dict(dataset) model = pickle.loads(serialized_model) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) scaler = Scaler() ts = scaler.fit_transform(ts) model.fit(series=ts) prediction = scaler.inverse_transform( model.predict(7)) #Predict a week ahead prediction.plot(label='TCN Prediction', lw=3, c='red')
def test_multi_ts_scaling(self): transformer1 = Scaler(MinMaxScaler(feature_range=(0, 2))) transformer2 = Scaler(StandardScaler()) series_array = [self.series1, self.series2] series_array_tr1 = transformer1.fit_transform(series_array) series_array_tr2 = transformer2.fit_transform(series_array) for index in range(len(series_array)): self.assertAlmostEqual(min(series_array_tr1[index].values().flatten()), 0.) self.assertAlmostEqual(max(series_array_tr1[index].values().flatten()), 2.) self.assertAlmostEqual(np.mean(series_array_tr2[index].values().flatten()), 0.) self.assertAlmostEqual(np.std(series_array_tr2[index].values().flatten()), 1.) series_array_rec1 = transformer1.inverse_transform(series_array_tr1) series_array_rec2 = transformer2.inverse_transform(series_array_tr2) for index in range(len(series_array)): np.testing.assert_almost_equal(series_array_rec1[index].values().flatten(), series_array[index].values().flatten()) np.testing.assert_almost_equal(series_array_rec2[index].values().flatten(), series_array[index].values().flatten())
def get_tcn_predictions(model, dataset): df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) scaler = Scaler() ts = scaler.fit_transform(ts) model.fit(series=ts) prediction = scaler.inverse_transform( model.predict(7)) #Predict a week ahead prediction_json = json.loads(prediction.to_json()) dates = prediction_json['index'] counts = prediction_json['data'] prediction_dataset = to_dataset(dates, counts) logging.debug(prediction_dataset) return prediction_dataset
def test_scaling(self): self.series3 = self.series1[:1] transformer1 = Scaler(MinMaxScaler(feature_range=(0, 2))) transformer2 = Scaler(StandardScaler()) series1_tr1 = transformer1.fit_transform(self.series1) series1_tr2 = transformer2.fit_transform(self.series1) series3_tr2 = transformer2.transform(self.series3) # should comply with scaling constraints self.assertAlmostEqual(min(series1_tr1.values().flatten()), 0.) self.assertAlmostEqual(max(series1_tr1.values().flatten()), 2.) self.assertAlmostEqual(np.mean(series1_tr2.values().flatten()), 0.) self.assertAlmostEqual(np.std(series1_tr2.values().flatten()), 1.) # test inverse transform series1_recovered = transformer2.inverse_transform(series1_tr2) series3_recovered = transformer2.inverse_transform(series3_tr2) np.testing.assert_almost_equal(series1_recovered.values().flatten(), self.series1.values().flatten()) self.assertEqual(series1_recovered.width, self.series1.width) self.assertEqual(series3_recovered, series1_recovered[:1])
def eval_model(model): pred_series = model.predict(n=96) plt.figure(figsize=(8, 5)) series_transformed.plot(label='actual') pred_series.plot(label='forecast') plt.title('MAPE: {:.2f}%'.format(mape(pred_series, val_transformed))) plt.legend() eval_model(my_model) best_model = RNNModel.load_from_checkpoint(model_name='Eq_RNN', best=True) eval_model(best_model) backtest_series = my_model.historical_forecasts(series_transformed, start=pd.Timestamp('20021231'), forecast_horizon=12, retrain=False, verbose=True) plt.figure(figsize=(8, 5)) series_transformed.plot(label='actual') backtest_series.plot(label='backtest') plt.legend() plt.title('Backtest, starting Jan 2003, 12-months horizon') print('MAPE: {:.2f}%'.format( mape(transformer.inverse_transform(series_transformed), transformer.inverse_transform(backtest_series))))
def get_tcn_model(dataset=None, plot=False, verbose=False): if (dataset is None): df = pd.read_csv("jeans_day.csv") else: df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) #80% train, 20% val scaler = Scaler() train_transformed = scaler.fit_transform(train) val_transformed = scaler.transform(val) ts_transformed = scaler.transform(ts) params = dict() params['kernel_size'] = [4, 6] params['num_filters'] = [10] params['random_state'] = [0, 1] params['input_chunk_length'] = [14] params['output_chunk_length'] = [1] params['dilation_base'] = [2, 3] params['n_epochs'] = [100] params['dropout'] = [0] params['loss_fn'] = [MSELoss()] params['weight_norm'] = [True] tcn = TCNModel.gridsearch(parameters=params, series=train_transformed, val_series=val_transformed, verbose=verbose, metric=mse) params = tcn[1] tcn_model = tcn[0] tcn_model.fit(series=train_transformed) if (plot): backtest = tcn_model.historical_forecasts(series=ts_transformed, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=verbose) val = scaler.inverse_transform(val_transformed) backtest = scaler.inverse_transform(backtest) train = scaler.inverse_transform(train_transformed) print(scaler.inverse_transform(tcn_model.predict(7))) print("R2: {}".format(r2_score(val, backtest[1:], intersect=False))) print("MAPE: {}".format(mape(val, backtest[1:]))) print("MASE: {}".format(mase(val, backtest[1:], train))) print("MAE: {}".format(mae(val, backtest[1:]))) print("RMSE: {}".format(np.sqrt(mse(val, backtest[1:])))) backtest.plot(label='backtest') ts.plot(label='actual') plt.title("H&M Daily, TCN Model") plt.xlabel("Date") plt.ylabel("Count") plt.legend() plt.show() else: return [tcn_model, params]
def get_lstm_model(dataset=None, plot=False, verbose=False): if (dataset is None): df = pd.read_csv("jeans_day.csv") else: df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) scaler = Scaler() train_transformed = scaler.fit_transform(train) val_transformed = scaler.transform(val) ts_transformed = scaler.transform(ts) params = dict() params['model'] = ["LSTM"] params['hidden_size'] = [50, 75, 100] params['n_rnn_layers'] = [1] params['input_chunk_length'] = [14] params['output_chunk_length'] = [1] params['n_epochs'] = [100] params['dropout'] = [0] params['batch_size'] = [4, 6] params['random_state'] = [0, 1] params['loss_fn'] = [MSELoss()] lstm = RNNModel.gridsearch(parameters=params, series=train_transformed, val_series=val_transformed, verbose=verbose, metric=mse) params = lstm[1] lstm_model = lstm[0] lstm_model.fit(train_transformed, verbose=True) if (plot): backtest = lstm_model.historical_forecasts(series=ts_transformed, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=False) print(val) print(backtest[1:]) print("R2: {}".format( r2_score(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:]), intersect=False))) print("MAPE: {}".format( mape(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:])))) print("MASE: {}".format( mase(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:]), train))) print("MAE: {}".format( mae(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:])))) scaler.inverse_transform(backtest).plot(label='backtest') scaler.inverse_transform(ts_transformed).plot(label='actual') plt.title("H&M Daily, LSTM Model") plt.xlabel("Date") plt.ylabel("Count") plt.legend() plt.show() else: return [lstm_model, params]