def from_synthetic_ar_data( seasonality: float = 10.0, timesteps: int = 400, n_series: int = 100, max_encoder_length: int = 60, max_prediction_length: int = 20, batch_size: int = 4, num_workers: int = 0, **time_series_dataset_kwargs, ) -> TabularForecastingData: """Creates and loads a synthetic Auto-Regressive (AR) data set.""" data = generate_ar_data(seasonality=seasonality, timesteps=timesteps, n_series=n_series, seed=42) data["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta( data.time_idx, "D") training_cutoff = data["time_idx"].max() - max_prediction_length return TabularForecastingData.from_data_frame( time_idx="time_idx", target="value", categorical_encoders={"series": NaNLabelEncoder().fit(data.series)}, group_ids=["series"], # only unknown variable is "value" - and N-Beats can also not take any additional variables time_varying_unknown_reals=["value"], max_encoder_length=max_encoder_length, max_prediction_length=max_prediction_length, train_data_frame=data[lambda x: x.time_idx <= training_cutoff], val_data_frame=data, batch_size=batch_size, num_workers=num_workers, **time_series_dataset_kwargs, )
def dataloaders_fixed_window_without_covariates(): data = generate_ar_data(seasonality=10.0, timesteps=400, n_series=10) validation = data.series.iloc[:2] max_encoder_length = 60 max_prediction_length = 20 training = TimeSeriesDataSet( data[lambda x: ~x.series.isin(validation)], time_idx="time_idx", target="value", categorical_encoders={"series": NaNLabelEncoder().fit(data.series)}, group_ids=["series"], static_categoricals=[], max_encoder_length=max_encoder_length, max_prediction_length=max_prediction_length, time_varying_unknown_reals=["value"], target_normalizer=EncoderNormalizer(), ) validation = TimeSeriesDataSet.from_dataset( training, data[lambda x: x.series.isin(validation)], stop_randomization=True, ) batch_size = 4 train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0) val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0) return dict(train=train_dataloader, val=val_dataloader)
def sample_data(): data = generate_ar_data(seasonality=10.0, timesteps=100, n_series=2, seed=42) data["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta( data.time_idx, "D") max_prediction_length = 20 training_cutoff = data["time_idx"].max() - max_prediction_length return data, training_cutoff, max_prediction_length
import flash from flash.core.integrations.pytorch_forecasting import convert_predictions from flash.core.utilities.imports import example_requires from flash.tabular.forecasting import TabularForecaster, TabularForecastingData example_requires(["tabular", "matplotlib"]) import matplotlib.pyplot as plt # noqa: E402 import pandas as pd # noqa: E402 from pytorch_forecasting.data import NaNLabelEncoder # noqa: E402 from pytorch_forecasting.data.examples import generate_ar_data # noqa: E402 # Example based on this tutorial: https://pytorch-forecasting.readthedocs.io/en/latest/tutorials/ar.html # 1. Create the DataModule data = generate_ar_data(seasonality=10.0, timesteps=400, n_series=100, seed=42) data["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta(data.time_idx, "D") max_prediction_length = 20 training_cutoff = data["time_idx"].max() - max_prediction_length datamodule = TabularForecastingData.from_data_frame( time_idx="time_idx", target="value", categorical_encoders={"series": NaNLabelEncoder().fit(data.series)}, group_ids=["series"], # only unknown variable is "value" - and N-Beats can also not take any additional variables time_varying_unknown_reals=["value"], max_encoder_length=60, max_prediction_length=max_prediction_length,