Exemple #1
0
def test_NormalDistributionLoss(center, transformation):
    mean = 1.0
    std = 0.1
    n = 100000
    target = NormalDistributionLoss.distribution_class(loc=mean,
                                                       scale=std).sample((n, ))
    normalizer = TorchNormalizer(center=center, transformation=transformation)
    if transformation in ["log", "log1p", "relu", "softplus"]:
        target = target.abs()
    target = normalizer.inverse_preprocess(target)

    normalized_target = normalizer.fit_transform(target).view(1, -1)
    target_scale = normalizer.get_parameters().unsqueeze(0)
    scale = torch.ones_like(normalized_target) * normalized_target.std()
    parameters = torch.stack(
        [normalized_target, scale],
        dim=-1,
    )
    loss = NormalDistributionLoss()
    rescaled_parameters = loss.rescale_parameters(parameters,
                                                  target_scale=target_scale,
                                                  encoder=normalizer)
    samples = loss.sample(rescaled_parameters, 1)
    assert torch.isclose(target.mean(), samples.mean(), atol=0.1, rtol=0.5)
    if center:  # if not centered, softplus distorts std too much for testing
        assert torch.isclose(target.std(), samples.std(), atol=0.1, rtol=0.7)
 def __init__(self, input_size: int, output_size: int, hidden_size: int,
              n_hidden_layers: int, **kwargs):
     # saves arguments in signature to `.hparams` attribute, mandatory call - do not skip this
     self.save_hyperparameters()
     # pass additional arguments to BaseModel.__init__, mandatory call - do not skip this
     super().__init__(**kwargs)
     self.network = FullyConnectedMultiOutputModule(
         input_size=self.hparams.input_size,
         output_size=self.hparams.output_size,
         hidden_size=self.hparams.hidden_size,
         n_hidden_layers=self.hparams.n_hidden_layers,
         n_outputs=
         2,  # <<<<<<<< we predict two outputs for mean and scale of the normal distribution
     )
     self.loss = NormalDistributionLoss()
Exemple #3
0
    def from_dataset(
        cls,
        dataset: TimeSeriesDataSet,
        allowed_encoder_known_variable_names: List[str] = None,
        **kwargs,
    ):
        """
        Create model from dataset.

        Args:
            dataset: timeseries dataset
            allowed_encoder_known_variable_names: List of known variables that are allowed in encoder, defaults to all
            **kwargs: additional arguments such as hyperparameters for model (see ``__init__()``)

        Returns:
            DeepAR network
        """
        # assert fixed encoder and decoder length for the moment
        new_kwargs = {}
        if dataset.multi_target:
            new_kwargs.setdefault(
                "loss",
                MultiLoss([NormalDistributionLoss()] *
                          len(dataset.target_names)))
        new_kwargs.update(kwargs)
        assert not isinstance(dataset.target_normalizer, NaNLabelEncoder) and (
            not isinstance(dataset.target_normalizer, MultiNormalizer) or all([
                not isinstance(normalizer, NaNLabelEncoder)
                for normalizer in dataset.target_normalizer
            ])
        ), "target(s) should be continuous - categorical targets are not supported"  # todo: remove this restriction
        return super().from_dataset(dataset,
                                    allowed_encoder_known_variable_names=
                                    allowed_encoder_known_variable_names,
                                    **new_kwargs)
Exemple #4
0
def test_NormalDistributionLoss(center, transformation):
    mean = 1000.0
    std = 200.0
    n = 100000
    target = NormalDistributionLoss.distribution_class(loc=mean, scale=std).sample((n,))
    if transformation in ["log", "log1p", "relu", "softplus"]:
        target = target.abs()
    normalizer = TorchNormalizer(center=center, transformation=transformation)
    normalized_target = normalizer.fit_transform(target).view(1, -1)
    target_scale = normalizer.get_parameters().unsqueeze(0)
    scale = torch.ones_like(normalized_target) * normalized_target.std()
    parameters = torch.stack(
        [normalized_target, scale],
        dim=-1,
    )
    loss = NormalDistributionLoss()
    if transformation in ["logit", "log", "log1p", "softplus", "relu", "logit"]:
        with pytest.raises(AssertionError):
            rescaled_parameters = loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer)
    else:
        rescaled_parameters = loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer)
        samples = loss.sample(rescaled_parameters, 1)
        assert torch.isclose(torch.as_tensor(mean), samples.mean(), atol=0.1, rtol=0.2)
        if center:  # if not centered, softplus distorts std too much for testing
            assert torch.isclose(torch.as_tensor(std), samples.std(), atol=0.1, rtol=0.7)
def test_NormalDistributionLoss(log_scale, center, coerce_positive):
    mean = 1000.0
    std = 200.0
    n = 100000
    target = NormalDistributionLoss.distribution_class(loc=mean,
                                                       scale=std).sample_n(n)
    if log_scale or coerce_positive:
        target = target.abs()
    if log_scale and coerce_positive:
        return  # combination invalid for normalizer (tested somewhere else)
    normalizer = TorchNormalizer(log_scale=log_scale,
                                 center=center,
                                 coerce_positive=coerce_positive)
    normalized_target = normalizer.fit_transform(target).view(1, -1)
    target_scale = normalizer.get_parameters().unsqueeze(0)
    scale = torch.ones_like(normalized_target) * normalized_target.std()
    parameters = torch.stack(
        [normalized_target, scale],
        dim=-1,
    )
    loss = NormalDistributionLoss()
    if log_scale or coerce_positive:
        with pytest.raises(AssertionError):
            rescaled_parameters = loss.rescale_parameters(
                parameters, target_scale=target_scale, transformer=normalizer)
    else:
        rescaled_parameters = loss.rescale_parameters(
            parameters, target_scale=target_scale, transformer=normalizer)
        samples = loss.sample_n(rescaled_parameters, 1)
        assert torch.isclose(torch.as_tensor(mean),
                             samples.mean(),
                             atol=0.1,
                             rtol=0.2)
        if center:  # if not centered, softplus distorts std too much for testing
            assert torch.isclose(torch.as_tensor(std),
                                 samples.std(),
                                 atol=0.1,
                                 rtol=0.7)
Exemple #6
0
    def __init__(
        self,
        cell_type: str = "LSTM",
        hidden_size: int = 10,
        rnn_layers: int = 2,
        dropout: float = 0.1,
        static_categoricals: List[str] = [],
        static_reals: List[str] = [],
        time_varying_categoricals_encoder: List[str] = [],
        time_varying_categoricals_decoder: List[str] = [],
        categorical_groups: Dict[str, List[str]] = {},
        time_varying_reals_encoder: List[str] = [],
        time_varying_reals_decoder: List[str] = [],
        embedding_sizes: Dict[str, Tuple[int, int]] = {},
        embedding_paddings: List[str] = [],
        embedding_labels: Dict[str, np.ndarray] = {},
        x_reals: List[str] = [],
        x_categoricals: List[str] = [],
        n_validation_samples: int = None,
        n_plotting_samples: int = None,
        target: Union[str, List[str]] = None,
        loss: DistributionLoss = None,
        logging_metrics: nn.ModuleList = None,
        **kwargs,
    ):
        """
        DeepAR Network.

        The code is based on the article `DeepAR: Probabilistic forecasting with autoregressive recurrent networks
        <https://www.sciencedirect.com/science/article/pii/S0169207019301888>`_.

        Args:
            cell_type (str, optional): Recurrent cell type ["LSTM", "GRU"]. Defaults to "LSTM".
            hidden_size (int, optional): hidden recurrent size - the most important hyperparameter along with
                ``rnn_layers``. Defaults to 10.
            rnn_layers (int, optional): Number of RNN layers - important hyperparameter. Defaults to 2.
            dropout (float, optional): Dropout in RNN layers. Defaults to 0.1.
            static_categoricals: integer of positions of static categorical variables
            static_reals: integer of positions of static continuous variables
            time_varying_categoricals_encoder: integer of positions of categorical variables for encoder
            time_varying_categoricals_decoder: integer of positions of categorical variables for decoder
            time_varying_reals_encoder: integer of positions of continuous variables for encoder
            time_varying_reals_decoder: integer of positions of continuous variables for decoder
            categorical_groups: dictionary where values
                are list of categorical variables that are forming together a new categorical
                variable which is the key in the dictionary
            x_reals: order of continuous variables in tensor passed to forward function
            x_categoricals: order of categorical variables in tensor passed to forward function
            embedding_sizes: dictionary mapping (string) indices to tuple of number of categorical classes and
                embedding size
            embedding_paddings: list of indices for embeddings which transform the zero's embedding to a zero vector
            embedding_labels: dictionary mapping (string) indices to list of categorical labels
            n_validation_samples (int, optional): Number of samples to use for calculating validation metrics.
                Defaults to None, i.e. no sampling at validation stage and using "mean" of distribution for logging
                metrics calculation.
            n_plotting_samples (int, optional): Number of samples to generate for plotting predictions
                during training. Defaults to ``n_validation_samples`` if not None or 100 otherwise.
            target (str, optional): Target variable or list of target variables. Defaults to None.
            loss (DistributionLoss, optional): Distribution loss function. Keep in mind that each distribution
                loss function might have specific requirements for target normalization.
                Defaults to :py:class:`~pytorch_forecasting.metrics.NormalDistributionLoss`.
            logging_metrics (nn.ModuleList, optional): Metrics to log during training.
                Defaults to nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()]).
        """
        if loss is None:
            loss = NormalDistributionLoss()
        if logging_metrics is None:
            logging_metrics = nn.ModuleList(
                [SMAPE(), MAE(), RMSE(),
                 MAPE(), MASE()])
        if n_plotting_samples is None:
            if n_validation_samples is None:
                n_plotting_samples = n_validation_samples
            else:
                n_plotting_samples = 100
        self.save_hyperparameters()
        # store loss function separately as it is a module
        super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)

        self.embeddings = MultiEmbedding(
            embedding_sizes=embedding_sizes,
            embedding_paddings=embedding_paddings,
            categorical_groups=categorical_groups,
            x_categoricals=x_categoricals,
        )

        assert set(self.encoder_variables) - set(to_list(target)) == set(
            self.decoder_variables
        ), "Encoder and decoder variables have to be the same apart from target variable"
        for targeti in to_list(target):
            assert (
                targeti in time_varying_reals_encoder
            ), f"target {targeti} has to be real"  # todo: remove this restriction
        assert (
            isinstance(target, str) and isinstance(loss, DistributionLoss)
        ) or (
            isinstance(target,
                       (list, tuple)) and isinstance(loss, MultiLoss) and
            len(loss)
            == len(target)
        ), "number of targets should be equivalent to number of loss metrics"

        time_series_rnn = get_cell(cell_type)
        self.rnn = time_series_rnn(
            input_size=self.input_size,
            hidden_size=self.hparams.hidden_size,
            num_layers=self.hparams.rnn_layers,
            dropout=self.hparams.dropout if self.hparams.rnn_layers > 1 else 0,
            batch_first=True,
        )

        # add linear layers for argument projects
        if isinstance(loss, MultiLoss):  # multi target
            self.distribution_projector = nn.ModuleList([
                nn.Linear(self.hparams.hidden_size, len(args))
                for args in self.loss.distribution_arguments
            ])
        else:
            self.distribution_projector = nn.Linear(
                self.hparams.hidden_size,
                len(self.loss.distribution_arguments))
Exemple #7
0
def test_predict_average(model, dataloaders_with_covariates):
    prediction = model.predict(dataloaders_with_covariates["val"],
                               fast_dev_run=True,
                               mode="prediction",
                               n_samples=100)
    assert prediction.ndim == 2, "expected averaging of samples"


def test_predict_samples(model, dataloaders_with_covariates):
    prediction = model.predict(dataloaders_with_covariates["val"],
                               fast_dev_run=True,
                               mode="samples",
                               n_samples=100)
    assert prediction.size()[-1] == 100, "expected raw samples"


@pytest.mark.parametrize(
    "loss", [NormalDistributionLoss(),
             MultivariateNormalDistributionLoss()])
def test_pickle(dataloaders_with_covariates, loss):
    dataset = dataloaders_with_covariates["train"].dataset
    model = DeepAR.from_dataset(dataset,
                                hidden_size=5,
                                learning_rate=0.15,
                                log_gradient_flow=True,
                                log_interval=1000,
                                loss=loss)
    pkl = pickle.dumps(model)
    pickle.loads(pkl)
Exemple #8
0
    gradient_clip_val=0.1,
    limit_train_batches=30,
    limit_val_batches=3,
    # fast_dev_run=True,
    # logger=logger,
    # profiler=True,
    callbacks=[lr_logger, early_stop_callback],
)


deepar = DeepAR.from_dataset(
    training,
    learning_rate=0.1,
    hidden_size=32,
    dropout=0.1,
    loss=NormalDistributionLoss(),
    log_interval=10,
    log_val_interval=3,
    # reduce_on_plateau_patience=3,
)
print(f"Number of parameters in network: {deepar.size()/1e3:.1f}k")

# # find optimal learning rate
# deepar.hparams.log_interval = -1
# deepar.hparams.log_val_interval = -1
# trainer.limit_train_batches = 1.0
# res = trainer.tuner.lr_find(
#     deepar, train_dataloader=train_dataloader, val_dataloaders=val_dataloader, min_lr=1e-5, max_lr=1e2
# )

# print(f"suggested learning rate: {res.suggestion()}")
class FullyConnectedForDistributionLossModel(
        BaseModel):  # we inherit the `from_dataset` method
    def __init__(self, input_size: int, output_size: int, hidden_size: int,
                 n_hidden_layers: int, **kwargs):
        # saves arguments in signature to `.hparams` attribute, mandatory call - do not skip this
        self.save_hyperparameters()
        # pass additional arguments to BaseModel.__init__, mandatory call - do not skip this
        super().__init__(**kwargs)
        self.network = FullyConnectedMultiOutputModule(
            input_size=self.hparams.input_size,
            output_size=self.hparams.output_size,
            hidden_size=self.hparams.hidden_size,
            n_hidden_layers=self.hparams.n_hidden_layers,
            n_outputs=
            2,  # <<<<<<<< we predict two outputs for mean and scale of the normal distribution
        )
        self.loss = NormalDistributionLoss()

    @classmethod
    def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs):
        new_kwargs = {
            "output_size": dataset.max_prediction_length,
            "input_size": dataset.max_encoder_length,
        }
        new_kwargs.update(
            kwargs
        )  # use to pass real hyperparameters and override defaults set by dataset
        # example for dataset validation
        assert dataset.max_prediction_length == dataset.min_prediction_length, "Decoder only supports a fixed length"
        assert dataset.min_encoder_length == dataset.max_encoder_length, "Encoder only supports a fixed length"
        assert (
            len(dataset.time_varying_known_categoricals) == 0
            and len(dataset.time_varying_known_reals) == 0
            and len(dataset.time_varying_unknown_categoricals) == 0
            and len(dataset.static_categoricals) == 0
            and len(dataset.static_reals) == 0
            and len(dataset.time_varying_unknown_reals) == 1
            and dataset.time_varying_unknown_reals[0] == dataset.target
        ), "Only covariate should be the target in 'time_varying_unknown_reals'"

        return super().from_dataset(dataset, **new_kwargs)

    def forward(self,
                x: Dict[str, torch.Tensor],
                n_samples: int = None) -> Dict[str, torch.Tensor]:
        # x is a batch generated based on the TimeSeriesDataset
        network_input = x["encoder_cont"].squeeze(-1)
        prediction = self.network(
            network_input)  # shape batch_size x n_decoder_steps x 2
        if (
                self.training or n_samples is None
        ):  # training is a PyTorch variable indicating if a module is being trained (tracing gradients) or evaluated
            assert n_samples is None, "We need to predict parameters when training"
            output_transformation = True
        else:
            # let's sample from our distribution - first we need to scale the parameters to real space
            scaled_parameters = self.transform_output(
                dict(
                    prediction=prediction,
                    target_scale=x["target_scale"],
                ))
            # and then sample from distribution
            prediction = self.loss.sample(scaled_parameters, n_samples)
            output_transformation = None  # predictions are already re-scaled
        return dict(prediction=prediction,
                    target_scale=x["target_scale"],
                    output_transformation=output_transformation)

    def transform_output(self, out: Dict[str, torch.Tensor]) -> torch.Tensor:
        # this is already implemented in pytorch forecasting but this code demonstrates the point
        # input is forward's output
        # depending on output, transform differently
        if out.get("output_transformation",
                   True) is None:  # samples are already rescaled
            out = out["prediction"]
        else:  # parameters need to be rescaled
            out = self.loss.rescale_parameters(
                out["prediction"],
                target_scale=out["target_scale"],
                encoder=self.output_transformer)
        return out
print("parameter predition shape: ", model(x)["prediction"].size())
model.eval()  # set model into eval mode for sampling
print("sample prediction shape: ",
      model(x, n_samples=200)["prediction"].size())

# %%
model.predict(dataloader, mode="quantiles", n_samples=100).shape

# %% [markdown]
# The returned quantiles are here determined by the quantiles defined in the loss function and can be modified by passing a list of quantiles to at initialization.

# %%
model.loss.quantiles

# %%
NormalDistributionLoss(quantiles=[0.2, 0.8]).quantiles

# %% [markdown]
# ## Adding custom plotting and interpretation
# %% [markdown]
# ### Log often whenever an example prediction vs actuals plot is created

# %%
import matplotlib.pyplot as plt


def plot_prediction(
    self,
    x: Dict[str, torch.Tensor],
    out: Dict[str, torch.Tensor],
    idx: int,
Exemple #11
0
    def __init__(
        self,
        n_lags=60,
        n_forecasts=20,
        batch_size=None,
        epochs=100,
        patience_early_stopping=10,
        early_stop=True,
        learning_rate=3e-2,
        auto_lr_find=False,
        num_workers=3,
        loss_func="normaldistributionloss",
        hidden_size=32,
        rnn_layers=2,
        dropout=0.1,
    ):
        """
        Args:
            n_lags: int, — Number of time units that condition the predictions. Also known as 'lookback period'.
                Should be between 1-10 times the prediction length. Can be seen as equivalent for n_lags in NP
            n_forecasts: int - Number of time units that the model predicts
            batch_size: int, — batch_size. If set to None, automatic batch size will be set
            epochs: int, — number of epochs for training. Will be overwritten, if EarlyStopping is applied
            patience_early_stopping: int, — patience parameter of EarlyStopping callback
            early_stop: bool, — whether to use EarlyStopping callback
            learning_rate: float, — learning rate for the model. Will be overwritten, if auto_lr_find is used
            auto_lr_find: bool, — whether to use automatic laerning rate finder
            num_workers: int, — number of workers for DataLoaders
            loss_func: str, Distribution loss function. Keep in mind that each distribution loss function might
                have specific requirements for target normalization. Defaults to NormalDistributionLoss.
            hidden_size: int, hidden recurrent size - the most important hyperparameter along with rnn_layers.
            rnn_layers: int, number of RNN layers - important hyperparameter.
            dropout: float, dropout in RNN layers, should be between 0 and 1.
        """

        self.batch_size = batch_size

        self.epochs = epochs
        self.patience_early_stopping = patience_early_stopping
        self.early_stop = early_stop
        self.learning_rate = learning_rate
        self.auto_lr_find = auto_lr_find
        if self.learning_rate != None:
            self.auto_lr_find = False
        self.num_workers = num_workers

        self.context_length = n_lags
        self.prediction_length = n_forecasts

        self.hidden_size = hidden_size
        self.rnn_layers = rnn_layers
        self.dropout = dropout
        self.loss_func = loss_func

        self.fitted = False
        self.freq = None

        if type(self.loss_func) == str:
            if self.loss_func.lower() in ["huber", "smoothl1", "smoothl1loss"]:
                self.loss_func = torch.nn.SmoothL1Loss()
            elif self.loss_func.lower() in ["mae", "l1", "l1loss"]:
                self.loss_func = torch.nn.L1Loss()
            elif self.loss_func.lower() in ["mse", "mseloss", "l2", "l2loss"]:
                self.loss_func = torch.nn.MSELoss()
            elif self.loss_func.lower() in [
                    "normaldistloss", "ndl", "normaldistributionloss"
            ]:
                self.loss_func = NormalDistributionLoss()
            else:
                raise NotImplementedError(
                    "Loss function {} name not defined".format(self.loss_func))
        elif callable(self.loss_func):
            pass
        elif hasattr(torch.nn.modules.loss, self.loss_func.__class__.__name__):
            pass
        else:
            raise NotImplementedError("Loss function {} not found".format(
                self.loss_func))

        self.metrics = metrics.MetricsCollection(
            metrics=[
                metrics.LossMetric(torch.nn.SmoothL1Loss()),
                metrics.MAE(),
                metrics.MSE(),
            ],
            value_metrics=[
                # metrics.ValueMetric("Loss"),
            ],
        )

        self.val_metrics = metrics.MetricsCollection(
            [m.new() for m in self.metrics.batch_metrics])