def test_EncoderNormalizer(kwargs): data = torch.rand(100) defaults = dict(method="standard", log_scale=False, coerce_positive=False, center=True, log_zero_value=0.0) defaults.update(kwargs) kwargs = defaults if kwargs["coerce_positive"] and kwargs["log_scale"]: with pytest.raises(AssertionError): normalizer = EncoderNormalizer(**kwargs) else: normalizer = EncoderNormalizer(**kwargs) if kwargs["coerce_positive"]: data = data - 0.5 if kwargs["coerce_positive"]: assert ( normalizer.inverse_transform(normalizer.fit_transform(data)) >= 0).all(), "Inverse transform should yield only positive values" else: assert torch.isclose( normalizer.inverse_transform(normalizer.fit_transform(data)), data, atol=1e-5).all(), "Inverse transform should reverse transform"
def dataloaders_fixed_window_without_covariates(): data = generate_ar_data(seasonality=10.0, timesteps=400, n_series=10) validation = data.series.iloc[:2] max_encoder_length = 60 max_prediction_length = 20 training = TimeSeriesDataSet( data[lambda x: ~x.series.isin(validation)], time_idx="time_idx", target="value", categorical_encoders={"series": NaNLabelEncoder().fit(data.series)}, group_ids=["series"], static_categoricals=[], max_encoder_length=max_encoder_length, max_prediction_length=max_prediction_length, time_varying_unknown_reals=["value"], target_normalizer=EncoderNormalizer(), ) validation = TimeSeriesDataSet.from_dataset( training, data[lambda x: x.series.isin(validation)], stop_randomization=True, ) batch_size = 4 train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0) val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0) return dict(train=train_dataloader, val=val_dataloader)
def test_encoder_normalizer_for_covariates(test_data): dataset = TimeSeriesDataSet( test_data, time_idx="time_idx", target="volume", group_ids=["agency", "sku"], max_encoder_length=5, max_prediction_length=2, min_prediction_length=1, min_encoder_length=1, time_varying_known_reals=["price_regular"], scalers={"price_regular": EncoderNormalizer()}, ) next(iter(dataset.to_dataloader()))
def test_multitarget(test_data, kwargs): dataset = TimeSeriesDataSet( test_data.assign(volume1=lambda x: x.volume), time_idx="time_idx", target=["volume", "volume1"], group_ids=["agency", "sku"], max_encoder_length=5, max_prediction_length=2, min_prediction_length=1, min_encoder_length=1, time_varying_known_reals=["price_regular"], scalers={"price_regular": EncoderNormalizer()}, **kwargs, ) next(iter(dataset.to_dataloader()))
def test_EncoderNormalizer(kwargs): data = torch.rand(100) defaults = dict(method="standard", center=True) defaults.update(kwargs) kwargs = defaults normalizer = EncoderNormalizer(**kwargs) if kwargs.get("transformation") in ["relu", "softplus"]: data = data - 0.5 if kwargs.get("transformation") in ["relu", "softplus", "log1p"]: assert (normalizer.inverse_transform(normalizer.fit_transform(data)) >= 0).all(), "Inverse transform should yield only positive values" else: assert torch.isclose( normalizer.inverse_transform(normalizer.fit_transform(data)), data, atol=1e-5).all(), "Inverse transform should reverse transform"
def test_EncoderNormalizer(kwargs): kwargs.setdefault("method", "standard") kwargs.setdefault("center", True) kwargs.setdefault("data", torch.rand(100)) data = kwargs.pop("data") normalizer = EncoderNormalizer(**kwargs) if kwargs.get("transformation") in ["relu", "softplus", "log1p"]: assert (normalizer.inverse_transform( torch.as_tensor(normalizer.fit_transform(data))) >= 0).all(), "Inverse transform should yield only positive values" else: assert torch.isclose( normalizer.inverse_transform( torch.as_tensor(normalizer.fit_transform(data))), torch.as_tensor(data), atol=1e-5, ).all(), "Inverse transform should reverse transform"
]), time_varying_known_reals=[ "time_idx", "price_regular", "price_actual", "discount", "discount_in_percent" ], time_varying_unknown_categoricals=[], time_varying_unknown_reals=[ "volume", "log_volume", "industry_volume", "soda_volume", "avg_max_temp" ], constant_fill_strategy={"volume": 0}, categorical_encoders={"sku": NaNLabelEncoder(add_nan=True)}, ), dict(static_categoricals=["agency", "sku"]), dict(randomize_length=True, min_encoder_length=2), dict(target_normalizer=EncoderNormalizer(), min_encoder_length=2), dict(target_normalizer=GroupNormalizer(transformation="log1p")), dict(target_normalizer=GroupNormalizer(groups=["agency", "sku"], transformation="softplus", center=False)), dict(target="agency"), # test multiple targets dict(target=["industry_volume", "volume"]), dict(target=["agency", "volume"]), dict(target=["agency", "volume"], min_encoder_length=1, min_prediction_length=1), dict(target=["agency", "volume"], weight="volume"), # test weights dict(target="volume", weight="volume"), ],
class TestTabularForecaster(TaskTester): task = TabularForecaster # TODO: Reduce number of required parameters task_kwargs = { "parameters": { "time_idx": "time_idx", "target": "value", "group_ids": ["series"], "weight": None, "max_encoder_length": 60, "min_encoder_length": 60, "min_prediction_idx": 0, "min_prediction_length": 20, "max_prediction_length": 20, "static_categoricals": [], "static_reals": [], "time_varying_known_categoricals": [], "time_varying_known_reals": [], "time_varying_unknown_categoricals": [], "time_varying_unknown_reals": ["value"], "variable_groups": {}, "constant_fill_strategy": {}, "allow_missing_timesteps": False, "lags": {}, "add_relative_time_idx": False, "add_target_scales": False, "add_encoder_length": False, "target_normalizer": EncoderNormalizer(), "categorical_encoders": { "series": NaNLabelEncoder(), "__group_id__series": NaNLabelEncoder() }, "scalers": {}, "randomize_length": None, "predict_mode": False, "data_sample": { "series": { 0: 0 }, "time_idx": { 0: 0 }, "value": { 0: 0.0 }, }, }, "backbone": "n_beats", "backbone_kwargs": { "widths": [32, 512], "backcast_loss_ratio": 0.1 }, } cli_command = "tabular_forecasting" is_testing = _TABULAR_TESTING is_available = _TABULAR_AVAILABLE # # TODO: Resolve JIT issues scriptable = False traceable = False @property def example_forward_input(self): return { "encoder_cat": torch.empty(2, 60, 0, dtype=torch.int64), "encoder_cont": torch.zeros(2, 60, 1), "encoder_target": torch.zeros(2, 60), "encoder_lengths": torch.tensor([60, 60]), "decoder_cat": torch.empty(2, 20, 0, dtype=torch.int64), "decoder_cont": torch.zeros(2, 20, 1), "decoder_target": torch.zeros(2, 20), "decoder_lengths": torch.tensor([20, 20]), "decoder_time_idx": torch.ones(2, 20).long(), "groups": torch.tensor([[0], [1]]), "target_scale": torch.zeros(2, 2), } def check_forward_output(self, output: Any): assert isinstance(output["prediction"], torch.Tensor) assert output["prediction"].shape == torch.Size([2, 20])
"music_fest", ]), ), dict(time_varying_known_reals=[ "time_idx", "price_regular", "discount_in_percent" ]), dict(time_varying_unknown_reals=[ "volume", "log_volume", "industry_volume", "soda_volume", "avg_max_temp" ]), dict(target_normalizer=GroupNormalizer( groups=["agency", "sku"], transformation="log1p", scale_by_group=True, )), dict(target_normalizer=EncoderNormalizer(), min_encoder_length=2), dict(randomize_length=True, min_encoder_length=2, min_prediction_length=1), dict(predict_mode=True), dict(add_target_scales=True), dict(add_encoder_length=True), dict(add_encoder_length=True), dict(add_relative_time_idx=True), dict(weight="volume"), dict( scalers=dict(time_idx=GroupNormalizer(), price_regular=StandardScaler()), categorical_encoders=dict(month=NaNLabelEncoder()), time_varying_known_categoricals=["month"], time_varying_known_reals=["time_idx", "price_regular"],
"football_gold_cup", "beer_capital", "music_fest", ] ), ), dict(time_varying_known_reals=["time_idx", "price_regular", "discount_in_percent"]), dict(time_varying_unknown_reals=["volume", "log_volume", "industry_volume", "soda_volume", "avg_max_temp"]), dict( target_normalizer=GroupNormalizer( groups=["agency", "sku"], transformation="log1p", scale_by_group=True, ) ), dict(target_normalizer=EncoderNormalizer(), min_encoder_length=2), dict(randomize_length=True, min_encoder_length=2, min_prediction_length=1), dict(predict_mode=True), dict(add_target_scales=True), dict(add_encoder_length=True), dict(add_encoder_length=True), dict(add_relative_time_idx=True), dict(weight="volume"), dict( scalers=dict(time_idx=GroupNormalizer(), price_regular=StandardScaler()), categorical_encoders=dict(month=NaNLabelEncoder()), time_varying_known_categoricals=["month"], time_varying_known_reals=["time_idx", "price_regular"], ), dict(dropout_categoricals=["month"], time_varying_known_categoricals=["month"]), dict(constant_fill_strategy=dict(volume=0.0), allow_missings=True),
def test_EncoderNormalizer_with_limited_history(): data = torch.rand(100) normalizer = EncoderNormalizer(max_length=[1, 2]).fit(data) assert normalizer.center_ == data[-1]