def test_lagged_variables(test_data, kwargs): dataset = TimeSeriesDataSet( test_data.copy(), time_idx="time_idx", group_ids=["agency", "sku"], max_encoder_length=5, max_prediction_length=2, min_prediction_length=1, min_encoder_length=3, # one more than max lag for validation time_varying_unknown_reals=["volume"], time_varying_unknown_categoricals=["agency"], lags={ "volume": [1, 2], "agency": [1, 2] }, add_encoder_length=False, **kwargs, ) x_all, _ = next(iter(dataset.to_dataloader())) for name in ["volume", "agency"]: if name in dataset.reals: vars = dataset.reals x = x_all["encoder_cont"] else: vars = dataset.flat_categoricals x = x_all["encoder_cat"] target_idx = vars.index(name) for lag in [1, 2]: lag_idx = vars.index(f"{name}_lagged_by_{lag}") target = x[..., target_idx][:, 0] lagged_target = torch.roll(x[..., lag_idx], -lag, dims=1)[:, 0] assert torch.isclose(target, lagged_target).all( ), "lagged target must be the same as non-lagged target"
def test_new_group_ids(test_data, kwargs): """Test for new group ids in dataset""" train_agency = test_data["agency"].iloc[0] train_dataset = TimeSeriesDataSet( test_data[lambda x: x.agency == train_agency], time_idx="time_idx", target="volume", group_ids=["agency", "sku"], max_encoder_length=5, max_prediction_length=2, min_prediction_length=1, min_encoder_length=1, categorical_encoders=dict(agency=NaNLabelEncoder(add_nan=True), sku=NaNLabelEncoder(add_nan=True)), **kwargs, ) # test sampling from training dataset next(iter(train_dataset.to_dataloader())) # create test dataset with group ids that have not been observed before test_dataset = TimeSeriesDataSet.from_dataset(train_dataset, test_data) # check that we can iterate through dataset without error for _ in iter(test_dataset.to_dataloader()): pass
def test_categorical_target(test_data): dataset = TimeSeriesDataSet( test_data, time_idx="time_idx", target="agency", group_ids=["agency", "sku"], max_encoder_length=5, max_prediction_length=2, min_prediction_length=1, min_encoder_length=1, ) _, y = next(iter(dataset.to_dataloader())) assert y[0].dtype is torch.long, "target must be of type long"
def test_encoder_normalizer_for_covariates(test_data): dataset = TimeSeriesDataSet( test_data, time_idx="time_idx", target="volume", group_ids=["agency", "sku"], max_encoder_length=5, max_prediction_length=2, min_prediction_length=1, min_encoder_length=1, time_varying_known_reals=["price_regular"], scalers={"price_regular": EncoderNormalizer()}, ) next(iter(dataset.to_dataloader()))
def test_multitarget(test_data, kwargs): dataset = TimeSeriesDataSet( test_data.assign(volume1=lambda x: x.volume), time_idx="time_idx", target=["volume", "volume1"], group_ids=["agency", "sku"], max_encoder_length=5, max_prediction_length=2, min_prediction_length=1, min_encoder_length=1, time_varying_known_reals=["price_regular"], scalers={"price_regular": EncoderNormalizer()}, **kwargs, ) next(iter(dataset.to_dataloader()))
def test_TimeSeriesDataSet(test_data, kwargs): defaults = dict( time_idx="time_idx", target="volume", group_ids=["agency", "sku"], max_encoder_length=5, max_prediction_length=2, ) defaults.update(kwargs) kwargs = defaults if kwargs.get("allow_missings", False): np.random.seed(2) test_data = test_data.sample(frac=0.5) # create dataset and sample from it dataset = TimeSeriesDataSet(test_data, **kwargs) check_dataloader_output(dataset, next(iter(dataset.to_dataloader(num_workers=0))))
), # use softplus with beta=1.0 and normalize by group add_relative_time_idx=True, # add as feature add_target_scales=True, # add as feature add_encoder_length=True, # add as feature ) # create validation set (predict=True) which means to predict the # last max_prediction_length points in time for each series validation = TimeSeriesDataSet.from_dataset(training, data, predict=True, stop_randomization=True) # create dataloaders for model batch_size = 128 train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0) val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0) #%% """ Training the Temporal Fusion Transformer with PyTorch Lightning """ import pytorch_lightning as pl from pytorch_lightning.callbacks import EarlyStopping, LearningRateLogger from pytorch_lightning.loggers import TensorBoardLogger from pytorch_forecasting.metrics import QuantileLoss from pytorch_forecasting.models import TemporalFusionTransformer