def eval_lstm_model(serialized_model, dataset): lstm_model = pickle.loads(serialized_model) df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) # 80% train, 20% val scaler = Scaler() ts = scaler.fit_transform(ts) val_transformed = scaler.transform(val) train_transformed = scaler.transform(train) backtest = lstm_model.historical_forecasts( series=ts, start=0.8, forecast_horizon=1, stride=1, retrain=False, ) val_transformed = scaler.inverse_transform(val_transformed) backtest = scaler.inverse_transform(backtest) train_transformed = scaler.inverse_transform(train_transformed) scores = dict() scores['r2'] = r2_score(val_transformed, backtest[1:]) scores['mase_score'] = mase(val_transformed, backtest[1:], train_transformed) scores['mae_score'] = mae(val_transformed, backtest[1:]) scores['rmse_score'] = np.sqrt(mse(val_transformed, backtest[1:])) try: scores['mape_score'] = mape(val_transformed, backtest[1:]) except: scores[ 'mape_score'] = "Could not be calculated (Zero value in time series)" return scores
def create_time_series(resampling_methods, chunk_ids, chunk_type, original_chunks, parameter, window_idx, configs, mean=0, std=1): # Apply filler as some time series have missing measurements what would lead to ValueError in prediction filler = MissingValuesFiller() for resampling in resampling_methods: series_per_resampling = dict() pred_scalers = dict() for chunk_id in chunk_ids: current_chunk = original_chunks[original_chunks['CHUNK_ID_FILLED_TH'] == chunk_id] # Scale chunk values if it is configured and create filled time series if configs.scaling_method == 'standard': current_chunk[f'SCALED_{resampling}'] = apply_standard_scaling( current_chunk[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'], mean, std) series_per_resampling[chunk_id] = filler.transform(TimeSeries.from_dataframe( df=current_chunk, time_col='CHARTTIME', value_cols=[f'SCALED_{resampling}'], freq='H')) elif configs.scaling_method == 'min-max': # Darts uses MinMaxScaler by default current_scaler = Scaler() series_per_resampling[chunk_id] = current_scaler.fit_transform(filler.transform( TimeSeries.from_dataframe( df=current_chunk, time_col='CHARTTIME', value_cols=[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'], freq='H'))) if chunk_type == 'pred' and \ ((configs.with_exogenous_input and resampling != 'MEDIAN') or not configs.with_exogenous_input): pred_scalers[chunk_id] = current_scaler else: # apply no scaling series_per_resampling[chunk_id] = filler.transform(TimeSeries.from_dataframe( df=current_chunk, time_col='CHARTTIME', value_cols=[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'], freq='H')) # Save series dict path = get_script_path(configs) write_pickle_file(f'{path}/time_series/time_series_{parameter}_win{window_idx}_{chunk_type}_' f'{resampling.capitalize()}.pickle', series_per_resampling) # Save scaler dict if it was filled if pred_scalers: write_pickle_file(f'{path}/scalers/scalers_{parameter}_win{window_idx}_{resampling.capitalize()}.pickle', pred_scalers)
def test_scaling(self): self.series3 = self.series1[:1] transformer1 = Scaler(MinMaxScaler(feature_range=(0, 2))) transformer2 = Scaler(StandardScaler()) series1_tr1 = transformer1.fit_transform(self.series1) series1_tr2 = transformer2.fit_transform(self.series1) series3_tr2 = transformer2.transform(self.series3) # should comply with scaling constraints self.assertAlmostEqual(min(series1_tr1.values().flatten()), 0.) self.assertAlmostEqual(max(series1_tr1.values().flatten()), 2.) self.assertAlmostEqual(np.mean(series1_tr2.values().flatten()), 0.) self.assertAlmostEqual(np.std(series1_tr2.values().flatten()), 1.) # test inverse transform series1_recovered = transformer2.inverse_transform(series1_tr2) series3_recovered = transformer2.inverse_transform(series3_tr2) np.testing.assert_almost_equal(series1_recovered.values().flatten(), self.series1.values().flatten()) self.assertEqual(series1_recovered.width, self.series1.width) self.assertEqual(series3_recovered, series1_recovered[:1])
def plot_tcn_predictions(serialized_model, dataset): df = pd.DataFrame.from_dict(dataset) model = pickle.loads(serialized_model) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) scaler = Scaler() ts = scaler.fit_transform(ts) model.fit(series=ts) prediction = scaler.inverse_transform( model.predict(7)) #Predict a week ahead prediction.plot(label='TCN Prediction', lw=3, c='red')
def test_multi_ts_scaling(self): transformer1 = Scaler(MinMaxScaler(feature_range=(0, 2))) transformer2 = Scaler(StandardScaler()) series_array = [self.series1, self.series2] series_array_tr1 = transformer1.fit_transform(series_array) series_array_tr2 = transformer2.fit_transform(series_array) for index in range(len(series_array)): self.assertAlmostEqual(min(series_array_tr1[index].values().flatten()), 0.) self.assertAlmostEqual(max(series_array_tr1[index].values().flatten()), 2.) self.assertAlmostEqual(np.mean(series_array_tr2[index].values().flatten()), 0.) self.assertAlmostEqual(np.std(series_array_tr2[index].values().flatten()), 1.) series_array_rec1 = transformer1.inverse_transform(series_array_tr1) series_array_rec2 = transformer2.inverse_transform(series_array_tr2) for index in range(len(series_array)): np.testing.assert_almost_equal(series_array_rec1[index].values().flatten(), series_array[index].values().flatten()) np.testing.assert_almost_equal(series_array_rec2[index].values().flatten(), series_array[index].values().flatten())
def get_tcn_predictions(model, dataset): df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) scaler = Scaler() ts = scaler.fit_transform(ts) model.fit(series=ts) prediction = scaler.inverse_transform( model.predict(7)) #Predict a week ahead prediction_json = json.loads(prediction.to_json()) dates = prediction_json['index'] counts = prediction_json['data'] prediction_dataset = to_dataset(dates, counts) logging.debug(prediction_dataset) return prediction_dataset
def get_lstm_backtest(serialized_model, dataset): df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) scaler = Scaler() ts = scaler.fit_transform(ts) model = pickle.loads(serialized_model) backtest = model.historical_forecasts(series=ts, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=False) backtest = scaler.inverse_transform(backtest[1:]) ts = scaler.inverse_transform(ts) backtest.plot(label='LSTM Model', lw=3, c='orange')
def get_tcn_backtest(serialized_model, dataset, topic): df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) scaler = Scaler() ts = scaler.fit_transform(ts) model = pickle.loads(serialized_model) backtest = model.historical_forecasts(series=ts, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=False) backtest = scaler.inverse_transform(backtest[1:]) ts = scaler.inverse_transform(ts) backtest.plot(label='TCN Model', lw=3, c='red') plt.title("{} Daily".format(topic)) plt.xlabel("Date") plt.ylabel("Count")
df.reset_index() df.set_index("time") df["time"] = pd.to_datetime(df["time"], utc=True) # Transform DataFrame to Time Series Object df_series = TimeSeries.from_dataframe(df[["time","price actual"]], time_col='time', value_cols="price actual") ### Train and Test Model ####################################################### # Train Test Split train, val = df_series.split_before(pd.Timestamp("2021-03-01 00:00:00+00:00")) # Normalize the time series (note: we avoid fitting the transformer on the validation set) transformer = Scaler() train_transformed = transformer.fit_transform(train) val_transformed = transformer.transform(val) series_transformed = transformer.transform(df_series) # Define the LSTM Model parameters my_model = RNNModel( model='LSTM', input_chunk_length=24, output_chunk_length=1, hidden_size=25, n_rnn_layers=1, dropout=0.2, batch_size=16, n_epochs=20, optimizer_kwargs={'lr': 1e-3}, model_name='Forecast_LSTM_next_hour',
} # Extract 80% of series for training train_series = { chunk_id: relevant_series[chunk_id] for chunk_id in relevant_chunk_ids if chunk_id not in list(pred_series.keys()) } # Define and fit scalers for training and prediction set pred_scalers = dict() # Normalize values for chunk_id in train_series.keys(): current_scaler = Scaler() train_series[chunk_id] = current_scaler.fit_transform( train_series[chunk_id]) for chunk_id in pred_series.keys(): current_scaler = Scaler() pred_series[chunk_id] = current_scaler.fit_transform( pred_series[chunk_id]) pred_scalers[chunk_id] = current_scaler print(f'#Chunks for training: {len(train_series)}', file=sys.stderr) print(f'#Chunks for prediction: {len(pred_series)}', file=sys.stderr) # Save training dict as pickle file train_series_f = open( f'./data/{approach}/{n_chunks}_chunks/{style}/{parameter}/{endogenous_input}/' f'01_train_series_scaled_window{window_idx}.pickle', 'wb') pickle.dump(train_series,
def get_tcn_model(dataset=None, plot=False, verbose=False): if (dataset is None): df = pd.read_csv("jeans_day.csv") else: df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) #80% train, 20% val scaler = Scaler() train_transformed = scaler.fit_transform(train) val_transformed = scaler.transform(val) ts_transformed = scaler.transform(ts) params = dict() params['kernel_size'] = [4, 6] params['num_filters'] = [10] params['random_state'] = [0, 1] params['input_chunk_length'] = [14] params['output_chunk_length'] = [1] params['dilation_base'] = [2, 3] params['n_epochs'] = [100] params['dropout'] = [0] params['loss_fn'] = [MSELoss()] params['weight_norm'] = [True] tcn = TCNModel.gridsearch(parameters=params, series=train_transformed, val_series=val_transformed, verbose=verbose, metric=mse) params = tcn[1] tcn_model = tcn[0] tcn_model.fit(series=train_transformed) if (plot): backtest = tcn_model.historical_forecasts(series=ts_transformed, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=verbose) val = scaler.inverse_transform(val_transformed) backtest = scaler.inverse_transform(backtest) train = scaler.inverse_transform(train_transformed) print(scaler.inverse_transform(tcn_model.predict(7))) print("R2: {}".format(r2_score(val, backtest[1:], intersect=False))) print("MAPE: {}".format(mape(val, backtest[1:]))) print("MASE: {}".format(mase(val, backtest[1:], train))) print("MAE: {}".format(mae(val, backtest[1:]))) print("RMSE: {}".format(np.sqrt(mse(val, backtest[1:])))) backtest.plot(label='backtest') ts.plot(label='actual') plt.title("H&M Daily, TCN Model") plt.xlabel("Date") plt.ylabel("Count") plt.legend() plt.show() else: return [tcn_model, params]
ts_var = 'Memory_Used' time_df = df.filter(['date', ts_var]) #convert variable from object dtype to numeric dtype time_df[ts_var] = pd.to_numeric(time_df[ts_var], errors='coerce') #generate time series using darts series = TimeSeries.from_dataframe(time_df, 'date', ts_var, freq='S') #treat missing values filler = MissingValuesFiller() series = filler.transform(series) #scale the values scaler = Scaler() rescaled = scaler.fit_transform(series) #training and testing dataset train, val = rescaled.split_after(pd.Timestamp('2020-01-23 19:41:50')) #Exponential smoothing model model = ExponentialSmoothing() model.fit(train) prediction = model.predict(len(val)) #Evaluation metrics rescaled.plot(label='actual') prediction.plot(label='forecast', lw=3) plt.legend() print("MAPE:", mape(prediction, val))
if chunk_id not in list(pred_series_endo_low.keys()) } train_series_exo = { chunk_id: relevant_series_exo[chunk_id] for chunk_id in relevant_chunk_ids if chunk_id not in list(pred_series_exo.keys()) } # Define and fit scalers for training and prediction set pred_scalers_high, pred_scalers_low = dict(), dict() # Normalize values for chunk_id in train_series_endo_high.keys(): current_scaler_endo_high = Scaler() train_series_endo_high[ chunk_id] = current_scaler_endo_high.fit_transform( train_series_endo_high[chunk_id]) current_scaler_endo_low = Scaler() train_series_endo_low[ chunk_id] = current_scaler_endo_low.fit_transform( train_series_endo_low[chunk_id]) current_scaler_exo = Scaler() train_series_exo[chunk_id] = current_scaler_exo.fit_transform( train_series_exo[chunk_id]) for chunk_id in pred_series_endo_high.keys(): current_scaler_endo_high = Scaler() pred_series_endo_high[ chunk_id] = current_scaler_endo_high.fit_transform( pred_series_endo_high[chunk_id])
def get_lstm_model(dataset=None, plot=False, verbose=False): if (dataset is None): df = pd.read_csv("jeans_day.csv") else: df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) scaler = Scaler() train_transformed = scaler.fit_transform(train) val_transformed = scaler.transform(val) ts_transformed = scaler.transform(ts) params = dict() params['model'] = ["LSTM"] params['hidden_size'] = [50, 75, 100] params['n_rnn_layers'] = [1] params['input_chunk_length'] = [14] params['output_chunk_length'] = [1] params['n_epochs'] = [100] params['dropout'] = [0] params['batch_size'] = [4, 6] params['random_state'] = [0, 1] params['loss_fn'] = [MSELoss()] lstm = RNNModel.gridsearch(parameters=params, series=train_transformed, val_series=val_transformed, verbose=verbose, metric=mse) params = lstm[1] lstm_model = lstm[0] lstm_model.fit(train_transformed, verbose=True) if (plot): backtest = lstm_model.historical_forecasts(series=ts_transformed, start=0.8, forecast_horizon=1, stride=1, retrain=False, verbose=False) print(val) print(backtest[1:]) print("R2: {}".format( r2_score(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:]), intersect=False))) print("MAPE: {}".format( mape(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:])))) print("MASE: {}".format( mase(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:]), train))) print("MAE: {}".format( mae(scaler.inverse_transform(val_transformed), scaler.inverse_transform(backtest[1:])))) scaler.inverse_transform(backtest).plot(label='backtest') scaler.inverse_transform(ts_transformed).plot(label='actual') plt.title("H&M Daily, LSTM Model") plt.xlabel("Date") plt.ylabel("Count") plt.legend() plt.show() else: return [lstm_model, params]