Example #1
0
def _index_tensor(x: Tensor, item: Any) -> Tensor:
    """"""
    squeeze: List[int] = []
    if not isinstance(item, tuple):
        item = (item, )

    saw_ellipsis = False

    for i, item_i in enumerate(item):
        axis = i - len(item) if saw_ellipsis else i
        if isinstance(item_i, int):
            if item_i != -1:
                x = x.slice_axis(axis=axis, begin=item_i, end=item_i + 1)
            else:
                x = x.slice_axis(axis=axis, begin=-1, end=None)
            squeeze.append(axis)
        elif item_i == slice(None):
            continue
        elif item_i == Ellipsis:
            saw_ellipsis = True
            continue
        elif isinstance(item_i, slice):
            assert item_i.step is None
            start = item_i.start if item_i.start is not None else 0
            x = x.slice_axis(axis=axis, begin=start, end=item_i.stop)
        else:
            raise RuntimeError(f"invalid indexing item: {item}")
    if len(squeeze):
        x = x.squeeze(axis=tuple(squeeze))
    return x
Example #2
0
    def _compute_edges(F, bin_centers: Tensor) -> Tensor:
        r"""
        Computes the edges of the bins based on the centers. The first and last edge are set to :math:`10^{-10}` and
        :math:`10^{10}`, repsectively.

        Parameters
        ----------
        F
        bin_centers
            Tensor of shape `(*batch_shape, num_bins)`.

        Returns
        -------
        Tensor
            Tensor of shape (*batch.shape, num_bins+1)
        """

        low = (
            F.zeros_like(bin_centers.slice_axis(axis=-1, begin=0, end=1))
            - 1.0e10
        )
        high = (
            F.zeros_like(bin_centers.slice_axis(axis=-1, begin=0, end=1))
            + 1.0e10
        )

        means = (
            F.broadcast_add(
                bin_centers.slice_axis(axis=-1, begin=1, end=None),
                bin_centers.slice_axis(axis=-1, begin=0, end=-1),
            )
            / 2.0
        )

        return F.concat(low, means, high, dim=-1)
Example #3
0
    def get_issm_coeff(
        self,
        seasonal_indicators: Tensor  # (batch_size, time_length)
    ) -> Tuple[Tensor, Tensor, Tensor]:
        F = getF(seasonal_indicators)
        emission_coeff_ls, transition_coeff_ls, innovation_coeff_ls = zip(
            self.nonseasonal_issm.get_issm_coeff(seasonal_indicators),
            *[
                issm.get_issm_coeff(
                    seasonal_indicators.slice_axis(axis=-1,
                                                   begin=ix,
                                                   end=ix + 1))
                for ix, issm in enumerate(self.seasonal_issms)
            ],
        )

        # stack emission and innovation coefficients
        emission_coeff = F.concat(*emission_coeff_ls, dim=-1)

        innovation_coeff = F.concat(*innovation_coeff_ls, dim=-1)

        # transition coefficient is block diagonal!
        transition_coeff = _make_block_diagonal(transition_coeff_ls)

        return emission_coeff, transition_coeff, innovation_coeff
Example #4
0
def reconciliation_error(A: Tensor, samples: Tensor) -> float:
    r"""
    Computes the maximum relative reconciliation error among all the aggregated
    time series

    .. math::

                    \max_i \frac{|y_i - s_i|} {|y_i|},

    where :math:`i` refers to the aggregated time series index, :math:`y_i` is
    the (direct) forecast obtained for the :math:`i^{th}` time series
    and :math:`s_i` is its aggregated forecast obtained by summing the
    corresponding bottom-level forecasts. If :math:`y_i` is zero, then the
    absolute difference, :math:`|s_i|`, is used instead.

    This can be comupted as follows given the constraint matrix A:

    .. math::

                    \max \frac{|A \times samples|} {|samples[:r]|},

    where :math:`r` is the number aggregated time series.

    Parameters
    ----------
    A
        The constraint matrix A in the equation: Ay = 0 (y being the
        values/forecasts of all time series in the hierarchy).
    samples
        Samples. Shape: `(*batch_shape, target_dim)`.

    Returns
    -------
    Float
        Reconciliation error


    """

    num_agg_ts = A.shape[0]
    forecasts_agg_ts = samples.slice_axis(
        axis=-1, begin=0, end=num_agg_ts
    ).asnumpy()

    abs_err = mx.nd.abs(mx.nd.dot(samples, A, transpose_b=True)).asnumpy()
    rel_err = np.where(
        forecasts_agg_ts == 0,
        abs_err,
        abs_err / np.abs(forecasts_agg_ts),
    )

    return np.max(rel_err)
Example #5
0
    def hybrid_forward(
        self,
        F,
        feat_static_cat: Tensor,
        past_observed_values: Tensor,
        past_seasonal_indicators: Tensor,
        past_time_feat: Tensor,
        past_target: Tensor,
    ) -> Tensor:
        lds, _ = self.compute_lds(
            F,
            feat_static_cat=feat_static_cat,
            seasonal_indicators=past_seasonal_indicators.slice_axis(
                axis=1, begin=-self.past_length, end=None
            ),
            time_feat=past_time_feat.slice_axis(
                axis=1, begin=-self.past_length, end=None
            ),
            length=self.past_length,
        )

        _, scale = self.scaler(past_target, past_observed_values)

        observed_context = past_observed_values.slice_axis(
            axis=1, begin=-self.past_length, end=None
        )

        ll, _, _ = lds.log_prob(
            x=past_target.slice_axis(
                axis=1, begin=-self.past_length, end=None
            ),
            observed=observed_context.min(axis=-1, keepdims=False),
            scale=scale,
        )

        return weighted_average(
            F=F, x=-ll, axis=1, weights=observed_context.squeeze(axis=-1)
        )
Example #6
0
def _make_2_block_diagonal(F, left: Tensor, right: Tensor) -> Tensor:
    """
    Creates a block diagonal matrix of shape (batch_size, m+n, m+n) where m and n are the sizes of
    the axis 1 of left and right respectively.

    Parameters
    ----------
    F
    left
        Tensor of shape (batch_size, seq_length, m, m)
    right
        Tensor of shape (batch_size, seq_length, n, n)
    Returns
    -------
    Tensor
        Block diagonal matrix of shape (batch_size, seq_length, m+n, m+n)
    """
    # shape (batch_size, seq_length, m, n)
    zeros_off_diag = F.broadcast_add(
        left.slice_axis(
            axis=-1, begin=0,
            end=1).zeros_like(),  # shape (batch_size, seq_length, m, 1)
        right.slice_axis(
            axis=-2, begin=0,
            end=1).zeros_like(),  # shape (batch_size, seq_length, 1, n)
    )

    # shape (batch_size, n, m)
    zeros_off_diag_tr = zeros_off_diag.swapaxes(2, 3)

    # block diagonal: shape (batch_size, seq_length, m+n, m+n)
    _block_diagonal = F.concat(
        F.concat(left, zeros_off_diag, dim=3),
        F.concat(zeros_off_diag_tr, right, dim=3),
        dim=2,
    )

    return _block_diagonal
Example #7
0
    def emission_coeff(
            self,
            seasonal_indicators: Tensor  # (batch_size, time_length)
    ) -> Tensor:
        F = getF(seasonal_indicators)

        _emission_coeff = F.ones(shape=(1, 1, 1, self.latent_dim()))

        # get the right shape: (batch_size, seq_length, obs_dim, latent_dim)
        zeros = _broadcast_param(
            F.zeros_like(
                seasonal_indicators.slice_axis(axis=-1, begin=0,
                                               end=1).squeeze(axis=-1)),
            axes=[2, 3],
            sizes=[1, self.latent_dim()],
        )

        return _emission_coeff.broadcast_like(zeros)
Example #8
0
    def transition_coeff(
            self,
            seasonal_indicators: Tensor  # (batch_size, time_length)
    ) -> Tensor:
        F = getF(seasonal_indicators)

        _transition_coeff = (F.eye(
            self.latent_dim()).expand_dims(axis=0).expand_dims(axis=0))

        # get the right shape: (batch_size, seq_length, latent_dim, latent_dim)
        zeros = _broadcast_param(
            F.zeros_like(
                seasonal_indicators.slice_axis(axis=-1, begin=0,
                                               end=1).squeeze(axis=-1)),
            axes=[2, 3],
            sizes=[self.latent_dim(), self.latent_dim()],
        )

        return _transition_coeff.broadcast_like(zeros)
Example #9
0
    def create_network_input(
            self,
            F,
            feat_static_cat: Tensor,  # (batch_size, num_features)
            past_time_feat: Tensor,  # (batch_size, num_features, history_length)
            past_target: Tensor,  # (batch_size, history_length, 1)
            past_observed_values: Tensor,  # (batch_size, history_length)
            future_time_feat: Optional[
                Tensor],  # (batch_size, num_features, prediction_length)
            future_target: Optional[Tensor],  # (batch_size, prediction_length)
    ) -> Tuple[Tensor, Tensor, Tensor]:
        """
        Creates inputs for the transformer network.

        All tensor arguments should have NTC layout.
        """

        if future_time_feat is None or future_target is None:
            time_feat = past_time_feat.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )
            sequence = past_target
            sequence_length = self.history_length
            subsequences_length = self.context_length
        else:
            time_feat = F.concat(
                past_time_feat.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                future_time_feat,
                dim=1,
            )
            sequence = F.concat(past_target, future_target, dim=1)
            sequence_length = self.history_length + self.prediction_length
            subsequences_length = self.context_length + self.prediction_length

        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags = self.get_lagged_subsequences(
            F=F,
            sequence=sequence,
            sequence_length=sequence_length,
            indices=self.lags_seq,
            subsequences_length=subsequences_length,
        )

        # scale is computed on the context length last units of the past target
        # scale shape is (batch_size, 1, *target_shape)
        _, scale = self.scaler(
            past_target.slice_axis(axis=1,
                                   begin=-self.context_length,
                                   end=None),
            past_observed_values.slice_axis(axis=1,
                                            begin=-self.context_length,
                                            end=None),
        )
        embedded_cat = self.embedder(feat_static_cat)

        # in addition to embedding features, use the log scale as it can help
        # prediction too(batch_size, num_features + prod(target_shape))
        static_feat = F.concat(
            embedded_cat,
            F.log(scale) if len(self.target_shape) == 0 else F.log(
                scale.squeeze(axis=1)),
            dim=1,
        )

        repeated_static_feat = static_feat.expand_dims(axis=1).repeat(
            axis=1, repeats=subsequences_length)

        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1))

        # from (batch_size, sub_seq_len, *target_shape, num_lags)
        # to (batch_size, sub_seq_len, prod(target_shape) * num_lags)
        input_lags = F.reshape(
            data=lags_scaled,
            shape=(
                -1,
                subsequences_length,
                len(self.lags_seq) * prod(self.target_shape),
            ),
        )

        # (batch_size, sub_seq_len, input_dim)
        inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1)

        return inputs, scale, static_feat
Example #10
0
    def train_hybrid_forward(
        self,
        F,
        target_dimension_indicator: Tensor,
        past_time_feat: Tensor,
        past_target_cdf: Tensor,
        past_observed_values: Tensor,
        past_is_pad: Tensor,
        future_time_feat: Tensor,
        future_target_cdf: Tensor,
        future_observed_values: Tensor,
    ) -> Tuple[Tensor, ...]:
        """
        Computes the loss for training DeepVAR, all inputs tensors representing
        time series have NTC layout.

        Parameters
        ----------
        F
        target_dimension_indicator
            Indices of the target dimension (batch_size, target_dim)
        past_time_feat
            Dynamic features of past time series (batch_size, history_length,
            num_features)
        past_target_cdf
            Past marginal CDF transformed target values (batch_size,
            history_length, target_dim)
        past_observed_values
            Indicator whether or not the values were observed (batch_size,
            history_length, target_dim)
        past_is_pad
            Indicator whether the past target values have been padded
            (batch_size, history_length)
        future_time_feat
            Future time features (batch_size, prediction_length, num_features)
        future_target_cdf
            Future marginal CDF transformed target values (batch_size,
            prediction_length, target_dim)
        future_observed_values
            Indicator whether or not the future values were observed
            (batch_size, prediction_length, target_dim)

        Returns
        -------
        distr
            Loss with shape (batch_size, 1)
        likelihoods
            Likelihoods for each time step
            (batch_size, context + prediction_length, 1)
        distr_args
            Distribution arguments (context + prediction_length,
            number_of_arguments)
        """

        seq_len = self.context_length + self.prediction_length

        # unroll the decoder in "training mode", i.e. by providing future data
        # as well
        rnn_outputs, _, scale, lags_scaled, inputs = self.unroll_encoder(
            F=F,
            past_time_feat=past_time_feat,
            past_target_cdf=past_target_cdf,
            past_observed_values=past_observed_values,
            past_is_pad=past_is_pad,
            future_time_feat=future_time_feat,
            future_target_cdf=future_target_cdf,
            target_dimension_indicator=target_dimension_indicator,
        )

        # put together target sequence
        # (batch_size, seq_len, target_dim)
        target = F.concat(
            past_target_cdf.slice_axis(
                axis=1, begin=-self.context_length, end=None
            ),
            future_target_cdf,
            dim=1,
        )

        # assert_shape(target, (-1, seq_len, self.target_dim))

        distr, distr_args = self.distr(
            time_features=inputs,
            rnn_outputs=rnn_outputs,
            scale=scale,
            lags_scaled=lags_scaled,
            target_dimension_indicator=target_dimension_indicator,
            seq_len=self.context_length + self.prediction_length,
        )

        # we sum the last axis to have the same shape for all likelihoods
        # (batch_size, subseq_length, 1)
        likelihoods = -distr.log_prob(target).expand_dims(axis=-1)

        assert_shape(likelihoods, (-1, seq_len, 1))

        past_observed_values = F.broadcast_minimum(
            past_observed_values, 1 - past_is_pad.expand_dims(axis=-1)
        )

        # (batch_size, subseq_length, target_dim)
        observed_values = F.concat(
            past_observed_values.slice_axis(
                axis=1, begin=-self.context_length, end=None
            ),
            future_observed_values,
            dim=1,
        )

        # mask the loss at one time step if one or more observations is missing
        # in the target dimensions (batch_size, subseq_length, 1)
        loss_weights = observed_values.min(axis=-1, keepdims=True)

        assert_shape(loss_weights, (-1, seq_len, 1))

        loss = weighted_average(
            F=F, x=likelihoods, weights=loss_weights, axis=1
        )

        assert_shape(loss, (-1, -1, 1))

        self.distribution = distr

        return (loss, likelihoods) + distr_args
Example #11
0
    def unroll_encoder(
        self,
        F,
        past_time_feat: Tensor,
        past_target_cdf: Tensor,
        past_observed_values: Tensor,
        past_is_pad: Tensor,
        future_time_feat: Optional[Tensor],
        future_target_cdf: Optional[Tensor],
        target_dimension_indicator: Tensor,
    ) -> Tuple[Tensor, List[Tensor], Tensor, Tensor, Tensor]:
        """
        Unrolls the RNN encoder over past and, if present, future data.
        Returns outputs and state of the encoder, plus the scale of
        past_target_cdf and a vector of static features that was constructed
        and fed as input to the encoder. All tensor arguments should have NTC
        layout.

        Parameters
        ----------
        F
        past_time_feat
            Past time features (batch_size, history_length, num_features)
        past_target_cdf
            Past marginal CDF transformed target values (batch_size,
            history_length, target_dim)
        past_observed_values
            Indicator whether or not the values were observed (batch_size,
            history_length, target_dim)
        past_is_pad
            Indicator whether the past target values have been padded
            (batch_size, history_length)
        future_time_feat
            Future time features (batch_size, prediction_length, num_features)
        future_target_cdf
            Future marginal CDF transformed target values (batch_size,
            prediction_length, target_dim)
        target_dimension_indicator
            Dimensionality of the time series (batch_size, target_dim)

        Returns
        -------
        outputs
            RNN outputs (batch_size, seq_len, num_cells)
        states
            RNN states. Nested list with (batch_size, num_cells) tensors with
        dimensions target_dim x num_layers x (batch_size, num_cells)
        scale
            Mean scales for the time series (batch_size, 1, target_dim)
        lags_scaled
            Scaled lags(batch_size, sub_seq_len, target_dim, num_lags)
        inputs
            inputs to the RNN

        """

        past_observed_values = F.broadcast_minimum(
            past_observed_values, 1 - past_is_pad.expand_dims(axis=-1)
        )

        if future_time_feat is None or future_target_cdf is None:
            time_feat = past_time_feat.slice_axis(
                axis=1, begin=-self.context_length, end=None
            )
            sequence = past_target_cdf
            sequence_length = self.history_length
            subsequences_length = self.context_length
        else:
            time_feat = F.concat(
                past_time_feat.slice_axis(
                    axis=1, begin=-self.context_length, end=None
                ),
                future_time_feat,
                dim=1,
            )
            sequence = F.concat(past_target_cdf, future_target_cdf, dim=1)
            sequence_length = self.history_length + self.prediction_length
            subsequences_length = self.context_length + self.prediction_length

        # (batch_size, sub_seq_len, target_dim, num_lags)
        lags = self.get_lagged_subsequences(
            F=F,
            sequence=sequence,
            sequence_length=sequence_length,
            indices=self.lags_seq,
            subsequences_length=subsequences_length,
        )

        # scale is computed on the context length last units of the past target
        # scale shape is (batch_size, 1, target_dim)
        _, scale = self.scaler(
            past_target_cdf.slice_axis(
                axis=1, begin=-self.context_length, end=None
            ),
            past_observed_values.slice_axis(
                axis=1, begin=-self.context_length, end=None
            ),
        )

        outputs, states, lags_scaled, inputs = self.unroll(
            F=F,
            lags=lags,
            scale=scale,
            time_feat=time_feat,
            target_dimension_indicator=target_dimension_indicator,
            unroll_length=subsequences_length,
            begin_state=None,
        )

        return outputs, states, scale, lags_scaled, inputs
Example #12
0
    def hybrid_forward(
        self,
        F,
        feat_static_cat: Tensor,
        feat_static_real: Tensor,
        past_time_feat: Tensor,
        past_target: Tensor,
        past_observed_values: Tensor,
        past_is_pad: Optional[Tensor],
        future_time_feat: Tensor,
        future_target: Tensor,
        future_observed_values: Tensor,
    ) -> Tensor:
        """
        Computes the loss for training DeepAR, all inputs tensors representing
        time series have NTC layout.

        Parameters
        ----------
        F
        feat_static_cat : (batch_size, num_features)
        feat_static_real : (batch_size, num_features)
        past_time_feat : (batch_size, history_length, num_features)
        past_target : (batch_size, history_length, *target_shape)
        past_observed_values : (batch_size, history_length, *target_shape, seq_len)
        future_time_feat : (batch_size, prediction_length, num_features)
        future_target : (batch_size, prediction_length, *target_shape)
        future_observed_values : (batch_size, prediction_length, *target_shape)

        Returns loss with shape (batch_size, context + prediction_length, 1)
        -------

        """

        outputs = self.distribution(
            feat_static_cat=feat_static_cat,
            feat_static_real=feat_static_real,
            past_time_feat=past_time_feat,
            past_target=past_target,
            past_observed_values=past_observed_values,
            past_is_pad=past_is_pad,
            future_time_feat=future_time_feat,
            future_target=future_target,
            future_observed_values=future_observed_values,
            return_rnn_outputs=True,
        )
        # since return_rnn_outputs=True, assert:
        assert isinstance(outputs, tuple)
        distr, rnn_outputs = outputs

        # put together target sequence
        # (batch_size, seq_len, *target_shape)
        target = F.concat(
            past_target.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            ),
            future_target,
            dim=1,
        )

        # (batch_size, seq_len)
        loss = distr.loss(target)

        # (batch_size, seq_len, *target_shape)
        observed_values = F.concat(
            past_observed_values.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=self.history_length,
            ),
            future_observed_values,
            dim=1,
        )

        # mask the loss at one time step iff one or more observations is missing in the target dimensions
        # (batch_size, seq_len)
        loss_weights = (observed_values if (len(self.target_shape) == 0) else
                        observed_values.min(axis=-1, keepdims=False))

        weighted_loss = weighted_average(
            F=F,
            x=loss,
            weights=loss_weights,
            axis=1,
            include_zeros_in_denominator=self.include_zeros_in_denominator,
        )

        # need to mask possible nans and -inf
        loss = F.where(condition=loss_weights, x=loss, y=F.zeros_like(loss))

        # rnn_outputs is already merged into a single tensor
        assert not isinstance(rnn_outputs, list)
        # it seems that the trainer only uses the first return value for backward
        # so we only add regularization to weighted_loss
        if self.alpha:
            ar_loss = self.ar_loss(rnn_outputs)
            weighted_loss = weighted_loss + ar_loss
        if self.beta:
            tar_loss = self.tar_loss(rnn_outputs)
            weighted_loss = weighted_loss + tar_loss
        return weighted_loss, loss
Example #13
0
    def unroll_encoder_default(
        self,
        F,
        feat_static_cat: Tensor,  # (batch_size, num_features)
        feat_static_real: Tensor,  # (batch_size, num_features)
        past_time_feat: Tensor,  # (batch_size, history_length, num_features)
        past_target: Tensor,  # (batch_size, history_length, *target_shape)
        past_observed_values:
        Tensor,  # (batch_size, history_length, *target_shape)
        past_is_pad: Tensor,
        future_observed_values: Optional[Tensor],
        future_time_feat: Optional[
            Tensor],  # (batch_size, prediction_length, num_features)
        future_target: Optional[
            Tensor],  # (batch_size, prediction_length, *target_shape)
    ) -> Tuple[Tensor, List, Tensor, Tensor, Tensor]:
        """
        Unrolls the LSTM encoder over past and, if present, future data.
        Returns outputs and state of the encoder, plus the scale of past_target
        and a vector of static features that was constructed and fed as input
        to the encoder.
        All tensor arguments should have NTC layout.
        """

        if future_time_feat is None or future_target is None:
            time_feat = past_time_feat.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )

            is_padded_indicator = past_is_pad.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )

            sequence = past_target
            sequence_length = self.history_length
            subsequences_length = self.context_length
        else:
            time_feat = F.concat(
                past_time_feat.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                future_time_feat,
                dim=1,
            )

            is_padded_indicator = F.concat(
                past_is_pad.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                F.zeros_like(future_observed_values),
                dim=1,
            )

            sequence = F.concat(past_target, future_target, dim=1)
            sequence_length = self.history_length + self.prediction_length
            subsequences_length = self.context_length + self.prediction_length

        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags = self.get_lagged_subsequences(
            F=F,
            sequence=sequence,
            sequence_length=sequence_length,
            indices=self.lags_seq,
            subsequences_length=subsequences_length,
        )

        # scale is computed on the context length last units of the past target
        # scale shape is (batch_size, 1, *target_shape)
        _, scale = self.scaler(
            past_target.slice_axis(axis=1,
                                   begin=-self.context_length,
                                   end=None),
            past_observed_values.slice_axis(axis=1,
                                            begin=-self.context_length,
                                            end=None),
        )

        # (batch_size, num_features)
        embedded_cat = self.embedder(feat_static_cat)

        # in addition to embedding features, use the log scale as it can help
        # prediction too
        # (batch_size, num_features + prod(target_shape))
        static_feat = F.concat(
            embedded_cat,
            feat_static_real,
            F.log(scale) if len(self.target_shape) == 0 else F.log(
                scale.squeeze(axis=1)),
            dim=1,
        )

        # (batch_size, subsequences_length, num_features + 1)
        repeated_static_feat = static_feat.expand_dims(axis=1).repeat(
            axis=1, repeats=subsequences_length)

        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1))

        # from (batch_size, sub_seq_len, *target_shape, num_lags)
        # to (batch_size, sub_seq_len, prod(target_shape) * num_lags)
        input_lags = F.reshape(
            data=lags_scaled,
            shape=(
                -1,
                subsequences_length,
                len(self.lags_seq) * prod(self.target_shape),
            ),
        )

        # (batch_size, sub_seq_len, input_dim)
        inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1)

        begin_state = self.rnn.begin_state(
            func=F.zeros,
            dtype=self.dtype,
            batch_size=inputs.shape[0]
            if isinstance(inputs, mx.nd.NDArray) else 0,
        )
        state = begin_state
        # This is a dummy computation to avoid deferred initialization error
        # when past_is_pad is not used in the computation graph in default
        # unrolling mode.
        state = [
            F.where(
                is_padded_indicator.slice_axis(axis=1, begin=0, end=1).repeat(
                    repeats=self.num_cells, axis=1),
                bs,
                s,
            ) for bs, s in zip(begin_state, state)
        ]

        # unroll encoder
        outputs, state = self.rnn.unroll(
            inputs=inputs,
            length=subsequences_length,
            layout="NTC",
            merge_outputs=True,
            begin_state=state,
        )

        # outputs: (batch_size, seq_len, num_cells)
        # state: list of (batch_size, num_cells) tensors
        # scale: (batch_size, 1, *target_shape)
        # static_feat: (batch_size, num_features + prod(target_shape))
        return outputs, state, scale, static_feat, sequence
Example #14
0
    def unroll_encoder_imputation(
        self,
        F,
        feat_static_cat: Tensor,  # (batch_size, num_features)
        feat_static_real: Tensor,  # (batch_size, num_features)
        past_time_feat: Tensor,  # (batch_size, history_length, num_features)
        past_target: Tensor,  # (batch_size, history_length, *target_shape)
        past_observed_values:
        Tensor,  # (batch_size, history_length, *target_shape)
        past_is_pad: Tensor,  # (batch_size, history_length, *target_shape)
        future_observed_values: Optional[
            Tensor],  # (batch_size, history_length, *target_shape)
        future_time_feat: Optional[
            Tensor],  # (batch_size, prediction_length, num_features)
        future_target: Optional[
            Tensor],  # (batch_size, prediction_length, *target_shape)
    ) -> Tuple[Tensor, List, Tensor, Tensor, Tensor]:
        """
        Unrolls the RNN encoder in "imputation mode" which will fill imputed
        values with samples from the DeepAR model.
        """

        if future_time_feat is None or future_target is None:
            time_feat = past_time_feat.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )

            is_padded_indicator = past_is_pad.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )
            target = past_target.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )
            target_observed_values = past_observed_values.slice_axis(
                axis=1,
                begin=self.history_length - self.context_length,
                end=None,
            )
            sequence = past_target
            sequence_length = self.history_length
            subsequences_length = self.context_length
        else:
            time_feat = F.concat(
                past_time_feat.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                future_time_feat,
                dim=1,
            )

            is_padded_indicator = F.concat(
                past_is_pad.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                F.zeros_like(future_observed_values),
                dim=1,
            )

            target = F.concat(
                past_target.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                future_target,
                dim=1,
            )

            target_observed_values = F.concat(
                past_observed_values.slice_axis(
                    axis=1,
                    begin=self.history_length - self.context_length,
                    end=None,
                ),
                future_observed_values,
                dim=1,
            )

            sequence = F.concat(past_target, future_target, dim=1)
            sequence_length = self.history_length + self.prediction_length
            subsequences_length = self.context_length + self.prediction_length
        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags = self.get_lagged_subsequences(
            F=F,
            sequence=sequence,
            sequence_length=sequence_length,
            indices=self.lags_seq,
            subsequences_length=subsequences_length,
        )

        # scale is computed on the context length last units of the past target
        # scale shape is (batch_size, 1, *target_shape)
        _, scale = self.scaler(
            past_target.slice_axis(axis=1,
                                   begin=-self.context_length,
                                   end=None),
            past_observed_values.slice_axis(axis=1,
                                            begin=-self.context_length,
                                            end=None),
        )

        # (batch_size, num_features)
        embedded_cat = self.embedder(feat_static_cat)

        # in addition to embedding features, use the log scale as it can help
        # prediction too
        # (batch_size, num_features + prod(target_shape))
        static_feat = F.concat(
            embedded_cat,
            feat_static_real,
            F.log(scale) if len(self.target_shape) == 0 else F.log(
                scale.squeeze(axis=1)),
            dim=1,
        )

        # (batch_size, subsequences_length, num_features + 1)
        repeated_static_feat = static_feat.expand_dims(axis=1).repeat(
            axis=1, repeats=subsequences_length)

        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1))
        # from (batch_size, sub_seq_len, *target_shape, num_lags)
        # to (batch_size, sub_seq_len, prod(target_shape) * num_lags)
        input_lags = F.reshape(
            data=lags_scaled,
            shape=(
                -1,
                subsequences_length,
                len(self.lags_seq) * prod(self.target_shape),
            ),
        )

        # (batch_size, sub_seq_len, input_dim)
        inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1)

        # Set initial state
        begin_state = self.rnn.begin_state(
            func=F.zeros,
            dtype=self.dtype,
            batch_size=inputs.shape[0]
            if isinstance(inputs, mx.nd.NDArray) else 0,
        )

        unroll_results = self.imputation_rnn_unroll(
            F,
            begin_state=begin_state,
            sequence=sequence,
            sequence_length=sequence_length,
            subsequences_length=subsequences_length,
            scale=scale,
            target=target,
            target_observed_values=target_observed_values,
            time_feat=time_feat,
            repeated_static_feat=repeated_static_feat,
            is_padded_indicator=is_padded_indicator,
        )

        outputs, state, imputed_sequence = unroll_results
        # outputs: (batch_size, seq_len, num_cells)
        # state: list of (batch_size, num_cells) tensors
        # scale: (batch_size, 1, *target_shape)
        # static_feat: (batch_size, num_features + prod(target_shape))
        out = F.concat(*outputs, dim=1)
        return out, state, scale, static_feat, imputed_sequence
Example #15
0
    def prepare_inputs_imputation_step(
        self,
        F,
        begin_state: List[Tensor],
        imputed_sequence: Tensor,
        sequence_length: int,
        subsequences_length: int,
        scale: Tensor,
        target: Tensor,
        target_observed_values: Tensor,
        time_feat: Tensor,
        repeated_static_feat: Tensor,
        is_padded_indicator: Tensor,
        state,
        i: int,
    ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
        """
        Prepares inputs for the next LSTM unrolling step at step i.
        """
        lags = self.get_lagged_subsequences(
            F=F,
            sequence=imputed_sequence,
            sequence_length=sequence_length,
            indices=self.lags_seq,
            subsequences_length=subsequences_length,
        )
        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1))
        # from (batch_size, sub_seq_len, *target_shape, num_lags)
        # to (batch_size, sub_seq_len, prod(target_shape) * num_lags)
        input_lags = F.reshape(
            data=lags_scaled,
            shape=(
                -1,
                subsequences_length,
                len(self.lags_seq) * prod(self.target_shape),
            ),
        )
        # (batch_size, sub_seq_len, input_dim)
        inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1)

        is_pad = is_padded_indicator.slice_axis(axis=1, begin=i, end=i + 1)

        current_observed_indicator = target_observed_values.slice_axis(axis=1,
                                                                       begin=i,
                                                                       end=i +
                                                                       1)

        current_target = target.slice_axis(axis=1, begin=i, end=i + 1)

        pre_sequence = imputed_sequence.slice_axis(axis=1,
                                                   begin=0,
                                                   end=-subsequences_length +
                                                   i)

        post_sequence = imputed_sequence.slice_axis(
            axis=1, begin=-subsequences_length + i + 1, end=None)
        # Reset the state to the begin state if the current target is padded
        state = [
            F.where(is_pad.repeat(repeats=self.num_cells, axis=1), bs, s)
            for bs, s in zip(begin_state, state)
        ]
        return (
            inputs,
            is_pad,
            current_observed_indicator,
            current_target,
            pre_sequence,
            post_sequence,
            state,
        )
Example #16
0
    def hybrid_forward(
        self,
        F,
        feat_static_cat: Tensor,
        past_observed_values: Tensor,
        past_seasonal_indicators: Tensor,
        past_time_feat: Tensor,
        past_target: Tensor,
        future_seasonal_indicators: Tensor,
        future_time_feat: Tensor,
    ) -> Tensor:
        lds, lstm_state = self.compute_lds(
            F,
            feat_static_cat=feat_static_cat,
            seasonal_indicators=past_seasonal_indicators.slice_axis(
                axis=1, begin=-self.past_length, end=None
            ),
            time_feat=past_time_feat.slice_axis(
                axis=1, begin=-self.past_length, end=None
            ),
            length=self.past_length,
        )

        _, scale = self.scaler(past_target, past_observed_values)

        observed_context = past_observed_values.slice_axis(
            axis=1, begin=-self.past_length, end=None
        )

        _, final_mean, final_cov = lds.log_prob(
            x=past_target.slice_axis(
                axis=1, begin=-self.past_length, end=None
            ),
            observed=observed_context.min(axis=-1, keepdims=False),
            scale=scale,
        )

        lds_prediction, _ = self.compute_lds(
            F,
            feat_static_cat=feat_static_cat,
            seasonal_indicators=future_seasonal_indicators,
            time_feat=future_time_feat,
            length=self.prediction_length,
            lstm_begin_state=lstm_state,
            prior_mean=final_mean,
            prior_cov=final_cov,
        )

        samples = lds_prediction.sample(
            num_samples=self.num_parallel_samples, scale=scale
        )

        # convert samples from
        # (num_samples, batch_size, prediction_length, target_dim)
        # to
        # (batch_size, num_samples, prediction_length, target_dim)
        # and squeeze last axis in the univariate case
        if self.univariate:
            return samples.transpose(axes=(1, 0, 2, 3)).squeeze(axis=3)
        else:
            return samples.transpose(axes=(1, 0, 2, 3))
Example #17
0
    def hybrid_forward(
        self, F, past_target: Tensor, past_observed_values: Tensor
    ) -> Tensor:
        """
        Given the tensor `past_target`, first we normalize it by the
        `past_observed_values` which is an indicator tensor with 0 or 1 values.
        Then it outputs the result of LSTNet.

        Parameters
        ----------
        F
        past_target
            Tensor of shape (batch_size, num_series, context_length)
        past_observed_values
            Tensor of shape (batch_size, num_series, context_length)

        Returns
        -------
        Tensor
            Shape (batch_size, num_series, 1) if `horizon` was specified
            and of shape (batch_size, num_series, prediction_length)
            if `prediction_length` was provided
        """
        context_target = past_target.slice_axis(
            axis=2, begin=-self.context_length, end=None
        )
        context_observed = past_observed_values.slice_axis(
            axis=2, begin=-self.context_length, end=None
        )

        scaled_context, scale = self.scaler(context_target, context_observed)
        cnn_inputs = F.concat(
            scaled_context.expand_dims(axis=1),
            context_observed.expand_dims(axis=1),
            dim=1,
        )
        c = self.cnn(cnn_inputs)
        c = self.dropout(c)
        c = F.squeeze(c, axis=2)  # NCT

        r = F.transpose(c, axes=(2, 0, 1))  # TNC
        if F is mx.ndarray:
            ctx = (
                r.context
                if isinstance(r, mx.gluon.tensor_types)
                else r[0].context
            )
            with ctx:
                rnn_begin_state = self.rnn.begin_state(
                    func=F.zeros, dtype=self.dtype, batch_size=r.shape[1]
                )
        else:
            rnn_begin_state = self.rnn.begin_state(
                func=F.zeros, dtype=self.dtype, batch_size=0
            )

        r, _ = self.rnn.unroll(
            inputs=r,
            length=min(self.conv_out, self.context_length),
            layout="TNC",
            merge_outputs=True,
            begin_state=rnn_begin_state,
        )
        r = F.squeeze(
            F.slice_axis(r, axis=0, begin=-1, end=None), axis=0
        )  # NC
        s = self._skip_rnn_layer(F, c)
        # make fc broadcastable for output
        fc = self.fc(F.concat(r, s, dim=1)).expand_dims(
            axis=2
        )  # N x num_series x 1
        if self.prediction_length:
            fc = F.tile(
                fc, reps=(1, 1, self.prediction_length)
            )  # N x num_series x prediction_length
        ar = self._ar_highway(F, scaled_context, context_observed)
        out = fc + ar
        if self.output_activation is None:
            return out, scale
        return (
            (
                F.sigmoid(out)
                if self.output_activation == "sigmoid"
                else F.tanh(out)
            ),
            scale,
        )