Esempio n. 1
0
def test_lagged_variables(test_data, kwargs):
    dataset = TimeSeriesDataSet(
        test_data.copy(),
        time_idx="time_idx",
        group_ids=["agency", "sku"],
        max_encoder_length=5,
        max_prediction_length=2,
        min_prediction_length=1,
        min_encoder_length=3,  # one more than max lag for validation
        time_varying_unknown_reals=["volume"],
        time_varying_unknown_categoricals=["agency"],
        lags={
            "volume": [1, 2],
            "agency": [1, 2]
        },
        add_encoder_length=False,
        **kwargs,
    )

    x_all, _ = next(iter(dataset.to_dataloader()))

    for name in ["volume", "agency"]:
        if name in dataset.reals:
            vars = dataset.reals
            x = x_all["encoder_cont"]
        else:
            vars = dataset.flat_categoricals
            x = x_all["encoder_cat"]
        target_idx = vars.index(name)
        for lag in [1, 2]:
            lag_idx = vars.index(f"{name}_lagged_by_{lag}")
            target = x[..., target_idx][:, 0]
            lagged_target = torch.roll(x[..., lag_idx], -lag, dims=1)[:, 0]
            assert torch.isclose(target, lagged_target).all(
            ), "lagged target must be the same as non-lagged target"
Esempio n. 2
0
def test_new_group_ids(test_data, kwargs):
    """Test for new group ids in dataset"""
    train_agency = test_data["agency"].iloc[0]
    train_dataset = TimeSeriesDataSet(
        test_data[lambda x: x.agency == train_agency],
        time_idx="time_idx",
        target="volume",
        group_ids=["agency", "sku"],
        max_encoder_length=5,
        max_prediction_length=2,
        min_prediction_length=1,
        min_encoder_length=1,
        categorical_encoders=dict(agency=NaNLabelEncoder(add_nan=True),
                                  sku=NaNLabelEncoder(add_nan=True)),
        **kwargs,
    )

    # test sampling from training dataset
    next(iter(train_dataset.to_dataloader()))

    # create test dataset with group ids that have not been observed before
    test_dataset = TimeSeriesDataSet.from_dataset(train_dataset, test_data)

    # check that we can iterate through dataset without error
    for _ in iter(test_dataset.to_dataloader()):
        pass
def test_categorical_target(test_data):
    dataset = TimeSeriesDataSet(
        test_data,
        time_idx="time_idx",
        target="agency",
        group_ids=["agency", "sku"],
        max_encoder_length=5,
        max_prediction_length=2,
        min_prediction_length=1,
        min_encoder_length=1,
    )
    _, y = next(iter(dataset.to_dataloader()))
    assert y[0].dtype is torch.long, "target must be of type long"
Esempio n. 4
0
def test_encoder_normalizer_for_covariates(test_data):
    dataset = TimeSeriesDataSet(
        test_data,
        time_idx="time_idx",
        target="volume",
        group_ids=["agency", "sku"],
        max_encoder_length=5,
        max_prediction_length=2,
        min_prediction_length=1,
        min_encoder_length=1,
        time_varying_known_reals=["price_regular"],
        scalers={"price_regular": EncoderNormalizer()},
    )
    next(iter(dataset.to_dataloader()))
Esempio n. 5
0
def test_multitarget(test_data, kwargs):
    dataset = TimeSeriesDataSet(
        test_data.assign(volume1=lambda x: x.volume),
        time_idx="time_idx",
        target=["volume", "volume1"],
        group_ids=["agency", "sku"],
        max_encoder_length=5,
        max_prediction_length=2,
        min_prediction_length=1,
        min_encoder_length=1,
        time_varying_known_reals=["price_regular"],
        scalers={"price_regular": EncoderNormalizer()},
        **kwargs,
    )
    next(iter(dataset.to_dataloader()))
Esempio n. 6
0
def test_TimeSeriesDataSet(test_data, kwargs):

    defaults = dict(
        time_idx="time_idx",
        target="volume",
        group_ids=["agency", "sku"],
        max_encoder_length=5,
        max_prediction_length=2,
    )
    defaults.update(kwargs)
    kwargs = defaults

    if kwargs.get("allow_missings", False):
        np.random.seed(2)
        test_data = test_data.sample(frac=0.5)

    # create dataset and sample from it
    dataset = TimeSeriesDataSet(test_data, **kwargs)
    check_dataloader_output(dataset, next(iter(dataset.to_dataloader(num_workers=0))))
    ),  # use softplus with beta=1.0 and normalize by group
    add_relative_time_idx=True,  # add as feature
    add_target_scales=True,  # add as feature
    add_encoder_length=True,  # add as feature
)

# create validation set (predict=True) which means to predict the
# last max_prediction_length points in time for each series
validation = TimeSeriesDataSet.from_dataset(training,
                                            data,
                                            predict=True,
                                            stop_randomization=True)
# create dataloaders for model
batch_size = 128
train_dataloader = training.to_dataloader(train=True,
                                          batch_size=batch_size,
                                          num_workers=0)
val_dataloader = validation.to_dataloader(train=False,
                                          batch_size=batch_size * 10,
                                          num_workers=0)

#%%
"""
Training the Temporal Fusion Transformer with PyTorch Lightning
"""

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateLogger
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_forecasting.models import TemporalFusionTransformer