Exemple #1
0
def from_synthetic_ar_data(
    seasonality: float = 10.0,
    timesteps: int = 400,
    n_series: int = 100,
    max_encoder_length: int = 60,
    max_prediction_length: int = 20,
    batch_size: int = 4,
    num_workers: int = 0,
    **time_series_dataset_kwargs,
) -> TabularForecastingData:
    """Creates and loads a synthetic Auto-Regressive (AR) data set."""
    data = generate_ar_data(seasonality=seasonality,
                            timesteps=timesteps,
                            n_series=n_series,
                            seed=42)
    data["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta(
        data.time_idx, "D")

    training_cutoff = data["time_idx"].max() - max_prediction_length

    return TabularForecastingData.from_data_frame(
        time_idx="time_idx",
        target="value",
        categorical_encoders={"series": NaNLabelEncoder().fit(data.series)},
        group_ids=["series"],
        # only unknown variable is "value" - and N-Beats can also not take any additional variables
        time_varying_unknown_reals=["value"],
        max_encoder_length=max_encoder_length,
        max_prediction_length=max_prediction_length,
        train_data_frame=data[lambda x: x.time_idx <= training_cutoff],
        val_data_frame=data,
        batch_size=batch_size,
        num_workers=num_workers,
        **time_series_dataset_kwargs,
    )
def dataloaders_fixed_window_without_covariates():
    data = generate_ar_data(seasonality=10.0, timesteps=400, n_series=10)
    validation = data.series.iloc[:2]

    max_encoder_length = 60
    max_prediction_length = 20

    training = TimeSeriesDataSet(
        data[lambda x: ~x.series.isin(validation)],
        time_idx="time_idx",
        target="value",
        categorical_encoders={"series": NaNLabelEncoder().fit(data.series)},
        group_ids=["series"],
        static_categoricals=[],
        max_encoder_length=max_encoder_length,
        max_prediction_length=max_prediction_length,
        time_varying_unknown_reals=["value"],
        target_normalizer=EncoderNormalizer(),
    )

    validation = TimeSeriesDataSet.from_dataset(
        training,
        data[lambda x: x.series.isin(validation)],
        stop_randomization=True,
    )
    batch_size = 4
    train_dataloader = training.to_dataloader(train=True,
                                              batch_size=batch_size,
                                              num_workers=0)
    val_dataloader = validation.to_dataloader(train=False,
                                              batch_size=batch_size,
                                              num_workers=0)

    return dict(train=train_dataloader, val=val_dataloader)
Exemple #3
0
def sample_data():
    data = generate_ar_data(seasonality=10.0,
                            timesteps=100,
                            n_series=2,
                            seed=42)
    data["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta(
        data.time_idx, "D")
    max_prediction_length = 20
    training_cutoff = data["time_idx"].max() - max_prediction_length
    return data, training_cutoff, max_prediction_length
import flash
from flash.core.integrations.pytorch_forecasting import convert_predictions
from flash.core.utilities.imports import example_requires
from flash.tabular.forecasting import TabularForecaster, TabularForecastingData

example_requires(["tabular", "matplotlib"])

import matplotlib.pyplot as plt  # noqa: E402
import pandas as pd  # noqa: E402
from pytorch_forecasting.data import NaNLabelEncoder  # noqa: E402
from pytorch_forecasting.data.examples import generate_ar_data  # noqa: E402

# Example based on this tutorial: https://pytorch-forecasting.readthedocs.io/en/latest/tutorials/ar.html
# 1. Create the DataModule
data = generate_ar_data(seasonality=10.0, timesteps=400, n_series=100, seed=42)
data["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta(data.time_idx, "D")

max_prediction_length = 20

training_cutoff = data["time_idx"].max() - max_prediction_length

datamodule = TabularForecastingData.from_data_frame(
    time_idx="time_idx",
    target="value",
    categorical_encoders={"series": NaNLabelEncoder().fit(data.series)},
    group_ids=["series"],
    # only unknown variable is "value" - and N-Beats can also not take any additional variables
    time_varying_unknown_reals=["value"],
    max_encoder_length=60,
    max_prediction_length=max_prediction_length,