Ejemplo n.º 1
0
    def __init__(self,
                 scaler=MinMaxScaler(feature_range=(0, 1)),
                 name="Scaler"):
        """
        Generic wrapper class for using scalers that implement `fit()`, `transform()` and
        `inverse_transform()` methods (typically from scikit-learn) on `TimeSeries`.

        Parameters
        ----------
        scaler
            The scaler to transform the data.
            It must provide the `fit()`, `transform()` and `inverse_transform()` methods.
            Default: `sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))`; this
            will scale all the values of a time series between 0 and 1.
        name
            A specific name for the scaler
        """
        super().__init__(name)

        if (not callable(getattr(scaler, "fit", None))
                or not callable(getattr(scaler, "transform", None))
                or not callable(getattr(scaler, "inverse_transform",
                                        None))):  # noqa W503
            raise_log(
                ValueError(
                    'The provided transformer object must have fit(), transform() and inverse_transform() methods'
                ), logger)

        self.transformer = scaler
        self.train_series = None
Ejemplo n.º 2
0
    def __init__(
        self,
        theta: int = 2,
        seasonality_period: Optional[int] = None,
        season_mode: SeasonalityMode = SeasonalityMode.MULTIPLICATIVE,
    ):
        """
        An implementation of the Theta method with configurable `theta` parameter. See [1]_.

        The training time series is de-seasonalized according to `seasonality_period`,
        or an inferred seasonality period.

        `season_mode` must be a ``SeasonalityMode`` Enum member.

        You can access the Enum with ``from darts import SeasonalityMode``.

        Parameters
        ----------
        theta
            Value of the theta parameter. Defaults to 2. Cannot be set to 0.
            If `theta = 1`, then the theta method restricts to a simple exponential smoothing (SES)
        seasonality_period
            User-defined seasonality period. If not set, will be tentatively inferred from the training series upon
            calling :func:`fit()`.
        season_mode
            Type of seasonality.
            Either ``SeasonalityMode.MULTIPLICATIVE``, ``SeasonalityMode.ADDITIVE`` or ``SeasonalityMode.NONE``.
            Defaults to ``SeasonalityMode.MULTIPLICATIVE``.

        References
        ----------
        .. [1] `Unmasking the Theta method <https://robjhyndman.com/papers/Theta.pdf`
        """

        super().__init__()

        self.model = None
        self.coef = 1
        self.alpha = 1
        self.length = 0
        self.theta = theta
        self.is_seasonal = False
        self.seasonality = None
        self.seasonality_period = seasonality_period
        self.season_period = None
        self.season_mode = season_mode

        raise_if_not(
            season_mode in SeasonalityMode,
            f"Unknown value for season_mode: {season_mode}.",
            logger,
        )

        if self.theta == 0:
            raise_log(ValueError("The parameter theta cannot be equal to 0."), logger)
Ejemplo n.º 3
0
 def _create_from_cls_and_kwargs(cls, kws):
     try:
         return cls(**kws)
     except (TypeError, ValueError) as e:
         raise_log(
             ValueError(
                 "Error when building the optimizer or learning rate scheduler;"
                 "please check the provided class and arguments"
                 "\nclass: {}"
                 "\narguments (kwargs): {}"
                 "\nerror:\n{}".format(cls, kws, e)),
             logger,
         )
Ejemplo n.º 4
0
    def test_raise_log(self):
        exception_was_raised = False
        with LogCapture() as lc:
            logger = get_logger(__name__)
            logger.handlers = []
            try:
                raise_log(Exception("test"), logger)
            except Exception:
                exception_was_raised = True

        # testing correct log message
        lc.check((__name__, "ERROR", "Exception: test"))

        # checking whether exception was properly raised
        self.assertTrue(exception_was_raised)
Ejemplo n.º 5
0
    def __init__(self,
                 scaler=None,
                 name="Scaler",
                 n_jobs: int = 1,
                 verbose: bool = False):
        """
        Generic wrapper class for using scalers that implement `fit()`, `transform()` and
        `inverse_transform()` methods (typically from scikit-learn) on `TimeSeries`.

        Parameters
        ----------
        scaler
            The scaler to transform the data with. It must provide `fit()`, `transform()` and `inverse_transform()`
            methods.
            Default: `sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))`; this will scale all the values
            of a time series between 0 and 1.
            In case the `Scaler` is applied to multiple `TimeSeries` objects, a deep-copy of the chosen scaler
            will be instantiated, fitted, and stored, for each `TimeSeries`.
        name
            A specific name for the scaler
        n_jobs
            The number of jobs to run in parallel. Parallel jobs are created only when a `Sequence[TimeSeries]` is
            passed as input to a method, parallelising operations regarding different `TimeSeries`. Defaults to `1`
            (sequential). Setting the parameter to `-1` means using all the available processors.
            Note: for a small amount of data, the parallelisation overhead could end up increasing the total
            required amount of time.
        verbose
            Optionally, whether to print operations progress
        """

        super().__init__(name=name, n_jobs=n_jobs, verbose=verbose)

        if scaler is None:
            scaler = MinMaxScaler(feature_range=(0, 1))

        if (not callable(getattr(scaler, "fit", None))
                or not callable(getattr(scaler, "transform", None))
                or not callable(getattr(scaler, "inverse_transform",
                                        None))):  # noqa W503
            raise_log(
                ValueError(
                    'The provided transformer object must have fit(), transform() and inverse_transform() methods'
                ), logger)

        self.transformer = scaler
        self.transformer_instances = None
Ejemplo n.º 6
0
    def predict(
        self,
        n: int,
        series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
        covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None
    ) -> Union[TimeSeries, Sequence[TimeSeries]]:
        """ Forecasts values for a certain number of time steps after the end of the series.

        If `fit()` has been called with only one `TimeSeries` as argument, then the `series` argument of this function
        is optional, and it will simply produce the next `horizon` time steps forecast.

        If `fit()` has been called with `series` specified as a `Sequence[TimeSeries]`, the `series` argument must
        be specified.

        When the `series` argument is specified, this function will compute the next `n` time steps forecasts
        for the simple series (or for each series in the sequence) given by `series`.

        If covariates were specified during the training, they must also be specified here.

        Parameters
        ----------
        n
            Forecast horizon - the number of time steps after the end of the series for which to produce predictions.
        series
            The series whose future we want to predict
        covariates
            One or several covariate time series which can be fed as inputs to the model. They must match the
            covariates that have been used with the `fit()` function for training.

        Returns
        -------
        Union[TimeSeries, Sequence[TimeSeries]]
            If `series` is not specified, this function returns a single time series containing the `n`
            next points after then end of the training series.
            If `series` is specified and is a simple `TimeSeries`, this function returns the `n` next points
            after the end of `series`.
            If `series` is a sequence of several time series, this function returns a sequence where each element
            contains the corresponding `n` points forecasts.
        """
        if series is None and covariates is None:
            super().predict(n)
        if self._expect_covariates and covariates is None:
            raise_log(
                ValueError(
                    'The model has been trained with covariates. Some matching covariates '
                    'have to be provided to `predict()`.'))
Ejemplo n.º 7
0
    def predict(self, n: int) -> TimeSeries:
        """ Forecasts values for `n` time steps after the end of the series.

        Parameters
        ----------
        n
            Forecast horizon - the number of time steps after the end of the series for which to produce predictions.

        Returns
        -------
        TimeSeries
            A time series containing the `n` next points after then end of the training series.
        """
        if not self._fit_called:
            raise_log(
                ValueError(
                    'The model must be fit before calling `predict()`.'
                    'For global models, if `predict()` is called without specifying a series,'
                    'the model must have been fit on a single training series.'
                ), logger)
Ejemplo n.º 8
0
def remove_from_series(ts: TimeSeries, other: TimeSeries,
                       model: Union[SeasonalityMode, ModelMode]) -> TimeSeries:
    """
    Removes the TimeSeries `other` from the TimeSeries `ts` as specified by `model`.
    Use e.g. to remove an additive or multiplicative trend from a series.

    Parameters
    ----------
    ts
        The TimeSeries to be modified.
    other
        The TimeSeries to remove.
    model
        The type of model considered.
        Must be `from darts import ModelMode, SeasonalityMode` Enums member.
        Either MULTIPLICATIVE or ADDITIVE.
    Returns
    -------
    TimeSeries
        A TimeSeries defined by removing `other` from `ts`.
    """

    ts._assert_univariate()
    raise_if_not(
        model in ModelMode or model in SeasonalityMode,
        f"Unknown value for model_mode: {model}.",
        logger,
    )

    if model.value == "multiplicative":
        new_ts = ts / other
    elif model.value == "additive":
        new_ts = ts - other
    else:
        raise_log(
            ValueError(
                "Invalid parameter; must be either ADDITIVE or MULTIPLICATIVE. Was: {}"
                .format(model)))
    return new_ts
Ejemplo n.º 9
0
    def __init__(self,
                 scaler=None,
                 name="Scaler",
                 n_jobs: int = 1,
                 verbose: bool = False):
        """Generic wrapper class for using scalers on time series.

        The underlying `scaler` has to implement the ``fit()``, ``transform()`` and
        ``inverse_transform()`` methods (typically from scikit-learn).

        When the scaler is applied on multivariate series, the scaling is done per-component.
        When the series are stochastic, the scaling is done across all samples (for each given component).
        The transformation is applied independently for each dimension (component) of the time series,
        effectively merging all samples of a component in order to compute the transform.

        Notes
        -----
        The scaler will not scale the series' static covariates. This has to be done either before constructing the
        series, or later on by extracting the covariates, transforming the values and then reapplying them to the
        series. For this, see TimeSeries properties `TimeSeries.static_covariates` and method
        `TimeSeries.with_static_covariates()`

        Parameters
        ----------
        scaler
            The scaler to transform the data with. It must provide ``fit()``,
            ``transform()`` and ``inverse_transform()`` methods.
            Default: :class:`sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))`; this will scale all
            the values of a time series between 0 and 1.
        name
            A specific name for the scaler
        n_jobs
            The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is
            passed as input to a method, parallelising operations regarding different ``TimeSeries``. Defaults to `1`
            (sequential). Setting the parameter to `-1` means using all the available processors.
            Note: for a small amount of data, the parallelisation overhead could end up increasing the total
            required amount of time.
        verbose
            Optionally, whether to print operations progress

        Notes
        -----
        In case the :class:`Scaler` is applied to multiple ``TimeSeries`` objects, a deep-copy of the
        chosen scaler will be instantiated, fitted, and stored, for each ``TimeSeries``.

        Examples
        --------
        >>> from darts.datasets import AirPassengersDataset
        >>> from sklearn.preprocessing import MinMaxScaler
        >>> from darts.dataprocessing.transformers import Scaler
        >>> series = AirPassengersDataset().load()
        >>> scaler = MinMaxScaler(feature_range=(-1, 1))
        >>> transformer = Scaler(scaler)
        >>> series_transformed = transformer.fit_transform(series)
        >>> print(min(series_transformed.values()))
        [-1.]
        >>> print(max(series_transformed.values()))
        [2.]
        """

        super().__init__(name=name, n_jobs=n_jobs, verbose=verbose)

        if scaler is None:
            scaler = MinMaxScaler(feature_range=(0, 1))

        if (not callable(getattr(scaler, "fit", None))
                or not callable(getattr(scaler, "transform", None))
                or not callable(getattr(scaler, "inverse_transform", None))):
            raise_log(
                ValueError(
                    "The provided transformer object must have fit(), transform() and inverse_transform() methods"
                ),
                logger,
            )

        self.transformer = scaler
        self.transformer_instances = None
Ejemplo n.º 10
0
def check_seasonality(ts: TimeSeries,
                      m: Optional[int] = None,
                      max_lag: int = 24,
                      alpha: float = 0.05):
    """
    Checks whether the TimeSeries `ts` is seasonal with period `m` or not.

    If `m` is None, we work under the assumption that there is a unique seasonality period, which is inferred
    from the Auto-correlation Function (ACF).

    Parameters
    ----------
    ts
        The time series to check for seasonality.
    m
        The seasonality period to check.
    max_lag
        The maximal lag allowed in the ACF.
    alpha
        The desired confidence level (default 5%).

    Returns
    -------
    Tuple[bool, int]
        A tuple `(season, m)`, where season is a boolean indicating whether the series has seasonality or not
        and `m` is the seasonality period.
    """

    ts._assert_univariate()

    if m is not None and (m < 2 or not isinstance(m, int)):
        raise_log(ValueError("m must be an integer greater than 1."), logger)

    if m is not None and m > max_lag:
        raise_log(ValueError("max_lag must be greater than or equal to m."),
                  logger)

    n_unique = np.unique(ts.values()).shape[0]

    if n_unique == 1:  # Check for non-constant TimeSeries
        return False, 0

    r = acf(
        ts.values(), nlags=max_lag, fft=False
    )  # In case user wants to check for seasonality higher than 24 steps.

    # Finds local maxima of Auto-Correlation Function
    candidates = argrelmax(r)[0]

    if len(candidates) == 0:
        return False, 0

    if m is not None:
        # Check for local maximum when m is user defined.
        test = m not in candidates

        if test:
            return False, m

        candidates = [m]

    # Remove r[0], the auto-correlation at lag order 0, that introduces bias.
    r = r[1:]

    # The non-adjusted upper limit of the significance interval.
    band_upper = r.mean() + norm.ppf(1 - alpha / 2) * r.var()

    # Significance test, stops at first admissible value. The two '-1' below
    # compensate for the index change due to the restriction of the original r to r[1:].
    for candidate in candidates:
        stat = _bartlett_formula(r, candidate - 1, len(ts))
        if r[candidate - 1] > stat * band_upper:
            return True, candidate
    return False, 0
Ejemplo n.º 11
0
def extract_trend_and_seasonality(
    ts: TimeSeries,
    freq: int = None,
    model: Union[SeasonalityMode, ModelMode] = ModelMode.MULTIPLICATIVE,
    method: str = "naive",
    **kwargs,
) -> Tuple[TimeSeries, TimeSeries]:
    """
    Extracts trend and seasonality from a TimeSeries instance using `statsmodels.tsa`.

    Parameters
    ----------
    ts
        The series to decompose
    freq
        The seasonality period to use.
    model
        The type of decomposition to use.
        Must be ``from darts import ModelMode, SeasonalityMode`` Enum member.
        Either ``MULTIPLICATIVE`` or ``ADDITIVE``.
        Defaults ``ModelMode.MULTIPLICATIVE``.
    method
        The method to be used to decompose the series.
        - "naive" : Seasonal decomposition using moving averages [1]_.
        - "STL" : Season-Trend decomposition using LOESS [2]_. Only compatible with ``ADDITIVE`` model type.
    kwargs
        Other keyword arguments are passed down to the decomposition method.
    Returns
    -------
    Tuple[TimeSeries, TimeSeries]
        A tuple of (trend, seasonal) time series.

    References
    -------
    .. [1] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.seasonal_decompose.html
    .. [2] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.STL.html
    """

    ts._assert_univariate()
    raise_if_not(
        model in ModelMode or model in SeasonalityMode,
        f"Unknown value for model_mode: {model}.",
        logger,
    )
    raise_if_not(
        model is not SeasonalityMode.NONE,
        "The model must be either MULTIPLICATIVE or ADDITIVE.",
    )

    if method == "naive":

        decomp = seasonal_decompose(ts.pd_series(),
                                    period=freq,
                                    model=model.value,
                                    extrapolate_trend="freq")

    elif method == "STL":
        raise_if_not(
            model in [SeasonalityMode.ADDITIVE, ModelMode.ADDITIVE],
            f"Only ADDITIVE model is compatible with the STL method. Current model is {model}.",
            logger,
        )

        decomp = STL(
            endog=ts.pd_series(),
            period=freq,
            **kwargs,
        ).fit()

    else:
        raise_log(ValueError(f"Unknown value for method: {method}"), logger)

    season = TimeSeries.from_times_and_values(
        ts.time_index,
        decomp.seasonal,
        static_covariates=ts.static_covariates,
        hierarchy=ts.hierarchy,
    )
    trend = TimeSeries.from_times_and_values(
        ts.time_index,
        decomp.trend,
        static_covariates=ts.static_covariates,
        hierarchy=ts.hierarchy,
    )

    return trend, season
Ejemplo n.º 12
0
def _extend_time_index_until(
    time_index: Union[pd.DatetimeIndex, pd.RangeIndex],
    until: Optional[Union[int, str, pd.Timestamp]],
    add_length: int,
) -> pd.DatetimeIndex:

    if not add_length and not until:
        return time_index

    raise_if(
        bool(add_length) and bool(until),
        "set only one of add_length and until")

    end = time_index[-1]
    freq = time_index.freq

    if add_length:
        raise_if_not(
            add_length >= 0,
            f"Expected add_length, by which to extend the time series by, "
            f"to be positive, got {add_length}",
        )

        try:
            end += add_length * freq
        except pd.errors.OutOfBoundsDatetime:
            raise_log(
                ValueError(
                    f"the add operation between {end} and {add_length * freq} will overflow"
                ),
                logger,
            )
    else:
        datetime_index = isinstance(time_index, pd.DatetimeIndex)

        if datetime_index:
            raise_if_not(
                isinstance(until, (str, pd.Timestamp)),
                "Expected valid timestamp for TimeSeries, "
                "indexed by DatetimeIndex, "
                f"for parameter until, got {type(end)}",
                logger,
            )
        else:
            raise_if_not(
                isinstance(until, int),
                "Expected integer for TimeSeries, indexed by RangeIndex, "
                f"for parameter until, got {type(end)}",
                logger,
            )

        timestamp = pd.Timestamp(until) if datetime_index else until

        raise_if_not(
            timestamp > end,
            f"Expected until, {timestamp} to lie past end of time index {end}",
        )

        ahead = timestamp - end
        raise_if_not(
            (ahead % freq) == pd.Timedelta(0),
            f"End date must correspond with frequency {freq} of the time axis",
            logger,
        )

        end = timestamp

    new_time_index = pd.date_range(start=time_index[0], end=end, freq=freq)
    return new_time_index
Ejemplo n.º 13
0
    def __init__(
        self,
        input_chunk_length: int,
        output_chunk_length: int,
        loss_fn: nn.modules.loss._Loss = nn.MSELoss(),
        torch_metrics: Optional[Union[torchmetrics.Metric,
                                      torchmetrics.MetricCollection]] = None,
        likelihood: Optional[Likelihood] = None,
        optimizer_cls: torch.optim.Optimizer = torch.optim.Adam,
        optimizer_kwargs: Optional[Dict] = None,
        lr_scheduler_cls: Optional[
            torch.optim.lr_scheduler._LRScheduler] = None,
        lr_scheduler_kwargs: Optional[Dict] = None,
    ) -> None:
        """
        PyTorch Lightning-based Forecasting Module.

        This class is meant to be inherited to create a new PyTorch Lightning-based forecasting module.
        When subclassing this class, please make sure to add the following methods with the given signatures:
            - :func:`PLTorchForecastingModel.__init__()`
            - :func:`PLTorchForecastingModel.forward()`
            - :func:`PLTorchForecastingModel._produce_train_output()`
            - :func:`PLTorchForecastingModel._get_batch_prediction()`

        In subclass `MyModel`'s :func:`__init__` function call ``super(MyModel, self).__init__(**kwargs)`` where
        ``kwargs`` are the parameters of :class:`PLTorchForecastingModel`.

        Parameters
        ----------
        input_chunk_length
            Number of input past time steps per chunk.
        output_chunk_length
            Number of output time steps per chunk.
        loss_fn
            PyTorch loss function used for training.
            This parameter will be ignored for probabilistic models if the ``likelihood`` parameter is specified.
            Default: ``torch.nn.MSELoss()``.
        torch_metrics
            A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found
            at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``.
        likelihood
            One of Darts' :meth:`Likelihood <darts.utils.likelihood_models.Likelihood>` models to be used for
            probabilistic forecasts. Default: ``None``.
        optimizer_cls
            The PyTorch optimizer class to be used. Default: ``torch.optim.Adam``.
        optimizer_kwargs
            Optionally, some keyword arguments for the PyTorch optimizer (e.g., ``{'lr': 1e-3}``
            for specifying a learning rate). Otherwise the default values of the selected ``optimizer_cls``
            will be used. Default: ``None``.
        lr_scheduler_cls
            Optionally, the PyTorch learning rate scheduler class to be used. Specifying ``None`` corresponds
            to using a constant learning rate. Default: ``None``.
        lr_scheduler_kwargs
            Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``.
        """
        super().__init__()

        # save hyper parameters for saving/loading
        # do not save type nn.Module params
        self.save_hyperparameters(ignore=["loss_fn", "torch_metrics"])

        raise_if(
            input_chunk_length is None or output_chunk_length is None,
            "Both `input_chunk_length` and `output_chunk_length` must be passed to `PLForecastingModule`",
            logger,
        )

        self.input_chunk_length = input_chunk_length
        self.output_chunk_length = output_chunk_length

        # define the loss function
        self.criterion = loss_fn
        # by default models are deterministic (i.e. not probabilistic)
        self.likelihood = likelihood

        # persist optimiser and LR scheduler parameters
        self.optimizer_cls = optimizer_cls
        self.optimizer_kwargs = dict(
        ) if optimizer_kwargs is None else optimizer_kwargs
        self.lr_scheduler_cls = lr_scheduler_cls
        self.lr_scheduler_kwargs = (dict() if lr_scheduler_kwargs is None else
                                    lr_scheduler_kwargs)

        if torch_metrics is None:
            torch_metrics = torchmetrics.MetricCollection([])
        elif isinstance(torch_metrics, torchmetrics.Metric):
            torch_metrics = torchmetrics.MetricCollection([torch_metrics])
        elif isinstance(torch_metrics, torchmetrics.MetricCollection):
            pass
        else:
            raise_log(
                AttributeError(
                    "`torch_metrics` only accepts type torchmetrics.Metric or torchmetrics.MetricCollection"
                ),
                logger,
            )
        self.train_metrics = torch_metrics.clone(prefix="train_")
        self.val_metrics = torch_metrics.clone(prefix="val_")

        # initialize prediction parameters
        self.pred_n: Optional[int] = None
        self.pred_num_samples: Optional[int] = None
        self.pred_roll_size: Optional[int] = None
        self.pred_batch_size: Optional[int] = None
        self.pred_n_jobs: Optional[int] = None
Ejemplo n.º 14
0
    def __init__(
        self,
        num_layers: int,
        layer_width: int,
        nr_params: int,
        expansion_coefficient_dim: int,
        input_chunk_length: int,
        target_length: int,
        g_type: GTypes,
        batch_norm: bool,
        dropout: float,
        activation: str,
    ):
        """PyTorch module implementing the basic building block of the N-BEATS architecture.

        The blocks produce outputs of size (target_length, nr_params); i.e.
        "one vector per parameter". The parameters are predicted only for forecast outputs.
        Backcast outputs are in the original "domain".

        Parameters
        ----------
        num_layers
            The number of fully connected layers preceding the final forking layers.
        layer_width
            The number of neurons that make up each fully connected layer.
        nr_params
            The number of parameters of the likelihood (or 1 if no likelihood is used)
        expansion_coefficient_dim
            The dimensionality of the waveform generator parameters, also known as expansion coefficients.
            Used in the generic architecture and the trend module of the interpretable architecture, where it determines
            the degree of the polynomial basis.
        input_chunk_length
            The length of the input sequence fed to the model.
        target_length
            The length of the forecast of the model.
        g_type
            The type of function that is implemented by the waveform generator.
        batch_norm
            Whether to use batch norm
        dropout
            Dropout probability
        activation
            The activation function of encoder/decoder intermediate layer.

        Inputs
        ------
        x of shape `(batch_size, input_chunk_length)`
            Tensor containing the input sequence.

        Outputs
        -------
        x_hat of shape `(batch_size, input_chunk_length)`
            Tensor containing the 'backcast' of the block, which represents an approximation of `x`
            given the constraints of the functional space determined by `g`.
        y_hat of shape `(batch_size, output_chunk_length)`
            Tensor containing the forward forecast of the block.

        """
        super().__init__()

        self.num_layers = num_layers
        self.layer_width = layer_width
        self.target_length = target_length
        self.nr_params = nr_params
        self.g_type = g_type
        self.dropout = dropout
        self.batch_norm = batch_norm

        raise_if_not(activation in ACTIVATIONS,
                     f"'{activation}' is not in {ACTIVATIONS}")
        self.activation = getattr(nn, activation)()

        # fully connected stack before fork
        self.linear_layer_stack_list = [
            nn.Linear(input_chunk_length, layer_width)
        ]
        for _ in range(num_layers - 1):
            self.linear_layer_stack_list.append(
                nn.Linear(layer_width, layer_width))

            if self.batch_norm:
                self.linear_layer_stack_list.append(
                    nn.BatchNorm1d(num_features=self.layer_width))

            if self.dropout > 0:
                self.linear_layer_stack_list.append(
                    MonteCarloDropout(p=self.dropout))

        self.fc_stack = nn.ModuleList(self.linear_layer_stack_list)

        # Fully connected layer producing forecast/backcast expansion coeffcients (waveform generator parameters).
        # The coefficients are emitted for each parameter of the likelihood.
        if g_type == _GType.SEASONALITY:
            self.backcast_linear_layer = nn.Linear(
                layer_width, 2 * int(input_chunk_length / 2 - 1) + 1)
            self.forecast_linear_layer = nn.Linear(
                layer_width, nr_params * (2 * int(target_length / 2 - 1) + 1))
        else:
            self.backcast_linear_layer = nn.Linear(layer_width,
                                                   expansion_coefficient_dim)
            self.forecast_linear_layer = nn.Linear(
                layer_width, nr_params * expansion_coefficient_dim)

        # waveform generator functions
        if g_type == _GType.GENERIC:
            self.backcast_g = nn.Linear(expansion_coefficient_dim,
                                        input_chunk_length)
            self.forecast_g = nn.Linear(expansion_coefficient_dim,
                                        target_length)
        elif g_type == _GType.TREND:
            self.backcast_g = _TrendGenerator(expansion_coefficient_dim,
                                              input_chunk_length)
            self.forecast_g = _TrendGenerator(expansion_coefficient_dim,
                                              target_length)
        elif g_type == _GType.SEASONALITY:
            self.backcast_g = _SeasonalityGenerator(input_chunk_length)
            self.forecast_g = _SeasonalityGenerator(target_length)
        else:
            raise_log(ValueError("g_type not supported"), logger)
Ejemplo n.º 15
0
def mase(
    actual_series: Union[TimeSeries, Sequence[TimeSeries]],
    pred_series: Union[TimeSeries, Sequence[TimeSeries]],
    insample: Union[TimeSeries, Sequence[TimeSeries]],
    m: Optional[int] = 1,
    intersect: bool = True,
    *,
    reduction: Callable[[np.ndarray], float] = np.mean,
    inter_reduction: Callable[[np.ndarray], Union[float, np.ndarray]] = lambda x: x,
    n_jobs: int = 1,
    verbose: bool = False
) -> Union[float, np.ndarray]:
    """Mean Absolute Scaled Error (MASE).

    See `Mean absolute scaled error wikipedia page <https://en.wikipedia.org/wiki/Mean_absolute_scaled_error>`_
    for details about the MASE and how it is computed.

    If any of the series is stochastic (containing several samples), the median sample value is considered.

    Parameters
    ----------
    actual_series
        The (sequence of) actual series.
    pred_series
        The (sequence of) predicted series.
    insample
        The training series used to forecast `pred_series` .
        This series serves to compute the scale of the error obtained by a naive forecaster on the training data.
    m
        Optionally, the seasonality to use for differencing.
        `m=1` corresponds to the non-seasonal MASE, whereas `m>1` corresponds to seasonal MASE.
        If `m=None`, it will be tentatively inferred
        from the auto-correlation function (ACF). It will fall back to a value of 1 if this fails.
    intersect
        For time series that are overlapping in time without having the same time index, setting `True`
        will consider the values only over their common time interval (intersection in time).
    reduction
        Function taking as input a ``np.ndarray`` and returning a scalar value. This function is used to aggregate
        the metrics of different components in case of multivariate ``TimeSeries`` instances.
    inter_reduction
        Function taking as input a ``np.ndarray`` and returning either a scalar value or a ``np.ndarray``.
        This function can be used to aggregate the metrics of different series in case the metric is evaluated on a
        ``Sequence[TimeSeries]``. Defaults to the identity function, which returns the pairwise metrics for each pair
        of ``TimeSeries`` received in input. Example: ``inter_reduction=np.mean``, will return the average of the
        pairwise metrics.
    n_jobs
        The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is
        passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1`
        (sequential). Setting the parameter to `-1` means using all the available processors.
    verbose
        Optionally, whether to print operations progress

    Raises
    ------
    ValueError
        If the `insample` series is periodic ( :math:`X_t = X_{t-m}` )

    Returns
    -------
    float
        The Mean Absolute Scaled Error (MASE)
    """

    def _multivariate_mase(
        actual_series: TimeSeries,
        pred_series: TimeSeries,
        insample: TimeSeries,
        m: int,
        intersect: bool,
        reduction: Callable[[np.ndarray], float],
    ):

        raise_if_not(
            actual_series.width == pred_series.width,
            "The two TimeSeries instances must have the same width.",
            logger,
        )
        raise_if_not(
            actual_series.width == insample.width,
            "The insample TimeSeries must have the same width as the other series.",
            logger,
        )
        raise_if_not(
            insample.end_time() + insample.freq == pred_series.start_time(),
            "The pred_series must be the forecast of the insample series",
            logger,
        )

        insample_ = (
            insample.quantile_timeseries(quantile=0.5)
            if insample.is_stochastic
            else insample
        )

        value_list = []
        for i in range(actual_series.width):
            # old implementation of mase on univariate TimeSeries
            if m is None:
                test_season, m = check_seasonality(insample)
                if not test_season:
                    warn(
                        "No seasonality found when computing MASE. Fixing the period to 1.",
                        UserWarning,
                    )
                    m = 1

            y_true, y_hat = _get_values_or_raise(
                actual_series.univariate_component(i),
                pred_series.univariate_component(i),
                intersect,
                remove_nan_union=False,
            )

            x_t = insample_.univariate_component(i).values()
            errors = np.abs(y_true - y_hat)
            scale = np.mean(np.abs(x_t[m:] - x_t[:-m]))
            raise_if_not(
                not np.isclose(scale, 0),
                "cannot use MASE with periodical signals",
                logger,
            )
            value_list.append(np.mean(errors / scale))

        return reduction(value_list)

    if isinstance(actual_series, TimeSeries):
        raise_if_not(
            isinstance(pred_series, TimeSeries),
            "Expecting pred_series to be TimeSeries",
        )
        raise_if_not(
            isinstance(insample, TimeSeries), "Expecting insample to be TimeSeries"
        )
        return _multivariate_mase(
            actual_series=actual_series,
            pred_series=pred_series,
            insample=insample,
            m=m,
            intersect=intersect,
            reduction=reduction,
        )

    elif isinstance(actual_series, Sequence) and isinstance(
        actual_series[0], TimeSeries
    ):

        raise_if_not(
            isinstance(pred_series, Sequence)
            and isinstance(pred_series[0], TimeSeries),
            "Expecting pred_series to be a Sequence[TimeSeries]",
        )
        raise_if_not(
            isinstance(insample, Sequence) and isinstance(insample[0], TimeSeries),
            "Expecting insample to be a Sequence[TimeSeries]",
        )
        raise_if_not(
            len(pred_series) == len(actual_series)
            and len(pred_series) == len(insample),
            "The TimeSeries sequences must have the same length.",
            logger,
        )

        raise_if_not(isinstance(n_jobs, int), "n_jobs must be an integer")
        raise_if_not(isinstance(verbose, bool), "verbose must be a bool")

        iterator = _build_tqdm_iterator(
            iterable=zip(actual_series, pred_series, insample),
            verbose=verbose,
            total=len(actual_series),
        )

        value_list = _parallel_apply(
            iterator=iterator,
            fn=_multivariate_mase,
            n_jobs=n_jobs,
            fn_args=dict(),
            fn_kwargs={"m": m, "intersect": intersect, "reduction": reduction},
        )
        return inter_reduction(value_list)
    else:
        raise_log(
            ValueError(
                "Input type not supported, only TimeSeries and Sequence[TimeSeries] are accepted."
            )
        )
Ejemplo n.º 16
0
    def __init__(
        self,
        lags: Union[int, list] = None,
        lags_past_covariates: Union[int, List[int]] = None,
        lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
        output_chunk_length: int = 1,
        model=None,
    ):
        """Regression Model
        Can be used to fit any scikit-learn-like regressor class to predict the target time series from lagged values.

        Parameters
        ----------
        lags
            Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags
            are used (from -1 backward). Otherwise a list of integers with lags is required (each lag must be < 0).
        lags_past_covariates
            Number of lagged past_covariates values used to predict the next time step. If an integer is given the last
            `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers
            with lags < 0 is required.
        lags_future_covariates
            Number of lagged future_covariates values used to predict the next time step. If an tuple (past, future) is
            given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first
            `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list
            of integers with lags is required.
        output_chunk_length
            Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
            horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
            be useful if the covariates don't extend far enough into the future.
        model
            Scikit-learn-like model with ``fit()`` and ``predict()`` methods. Also possible to use model that doesn't
            support multi-output regression for multivariate timeseries, in which case one regressor
            will be used per component in the multivariate series.
            If None, defaults to: ``sklearn.linear_model.LinearRegression(n_jobs=-1)``.
        """

        super().__init__()

        self.model = model
        self.lags = {}
        self.output_chunk_length = None
        self.input_dim = None

        # model checks
        if self.model is None:
            self.model = LinearRegression(n_jobs=-1)

        if not callable(getattr(self.model, "fit", None)):
            raise_log(
                Exception("Provided model object must have a fit() method",
                          logger))
        if not callable(getattr(self.model, "predict", None)):
            raise_log(
                Exception("Provided model object must have a predict() method",
                          logger))

        # check lags
        raise_if(
            (lags is None) and (lags_future_covariates is None)
            and (lags_past_covariates is None),
            "At least one of `lags`, `lags_future_covariates` or `lags_past_covariates` must be not None.",
        )

        lags_type_checks = [
            (lags, "lags"),
            (lags_past_covariates, "lags_past_covariates"),
        ]

        for _lags, lags_name in lags_type_checks:
            raise_if_not(
                isinstance(_lags, (int, list)) or _lags is None,
                f"`{lags_name}` must be of type int or list. Given: {type(_lags)}.",
            )
            raise_if(
                isinstance(_lags, bool),
                f"`{lags_name}` must be of type int or list, not bool.",
            )

        raise_if_not(
            isinstance(lags_future_covariates, (tuple, list))
            or lags_future_covariates is None,
            f"`lags_future_covariates` must be of type tuple or list. Given: {type(lags_future_covariates)}.",
        )

        if isinstance(lags_future_covariates, tuple):
            raise_if_not(
                len(lags_future_covariates) == 2
                and isinstance(lags_future_covariates[0], int)
                and isinstance(lags_future_covariates[1], int),
                "`lags_future_covariates` tuple must be of length 2, and must contain two integers",
            )
            raise_if(
                isinstance(lags_future_covariates[0], bool)
                or isinstance(lags_future_covariates[1], bool),
                "`lags_future_covariates` tuple must contain intergers, not bool",
            )

        # set lags
        if isinstance(lags, int):
            raise_if_not(lags > 0,
                         f"`lags` must be strictly positive. Given: {lags}.")
            # selecting last `lags` lags, starting from position 1 (skipping current, pos 0, the one we want to predict)
            self.lags["target"] = list(range(-lags, 0))
        elif isinstance(lags, list):
            for lag in lags:
                raise_if(
                    not isinstance(lag, int) or (lag >= 0),
                    f"Every element of `lags` must be a strictly negative integer. Given: {lags}.",
                )
            if lags:
                self.lags["target"] = sorted(lags)

        if isinstance(lags_past_covariates, int):
            raise_if_not(
                lags_past_covariates > 0,
                f"`lags_past_covariates` must be an integer > 0. Given: {lags_past_covariates}.",
            )
            self.lags["past"] = list(range(-lags_past_covariates, 0))
        elif isinstance(lags_past_covariates, list):
            for lag in lags_past_covariates:
                raise_if(
                    not isinstance(lag, int) or (lag >= 0),
                    f"Every element of `lags_covariates` must be an integer < 0. Given: {lags_past_covariates}.",
                )
            if lags_past_covariates:
                self.lags["past"] = sorted(lags_past_covariates)

        if isinstance(lags_future_covariates, tuple):
            raise_if_not(
                lags_future_covariates[0] >= 0
                and lags_future_covariates[1] >= 0,
                f"`lags_future_covariates` tuple must contain integers >= 0. Given: {lags_future_covariates}.",
            )
            if (lags_future_covariates[0] is not None
                    and lags_future_covariates[1] is not None):
                if not (lags_future_covariates[0] == 0
                        and lags_future_covariates[1] == 0):
                    self.lags["future"] = list(
                        range(-lags_future_covariates[0],
                              lags_future_covariates[1]))
        elif isinstance(lags_future_covariates, list):
            for lag in lags_future_covariates:
                raise_if(
                    not isinstance(lag, int) or isinstance(lag, bool),
                    f"Every element of `lags_future_covariates` must be an integer. Given: {lags_future_covariates}.",
                )
            if lags_future_covariates:
                self.lags["future"] = sorted(lags_future_covariates)

        # check and set output_chunk_length
        raise_if_not(
            isinstance(output_chunk_length, int) and output_chunk_length > 0,
            f"output_chunk_length must be an integer greater than 0. Given: {output_chunk_length}",
        )
        self.output_chunk_length = output_chunk_length