Exemple #1
0
        def _assemble_covariates(
            feat_dynamic_real: Tensor,
            feat_dynamic_cat: Tensor,
            feat_static_real: Tensor,
            feat_static_cat: Tensor,
            is_past: bool,
        ) -> Tensor:
            covariates = []
            if feat_dynamic_real.shape[-1] > 0:
                covariates.append(feat_dynamic_real)
            if feat_static_real.shape[-1] > 0:
                covariates.append(
                    feat_static_real.expand_dims(axis=1).repeat(
                        axis=1,
                        repeats=self.context_length
                        if is_past
                        else self.prediction_length,
                    )
                )
            if len(covariates) > 0:
                covariates = F.concat(*covariates, dim=-1)
                covariates = self.covar_proj(covariates)
            else:
                covariates = None

            categories = []
            if feat_dynamic_cat.shape[-1] > 0:
                categories.append(feat_dynamic_cat)
            if feat_static_cat.shape[-1] > 0:
                categories.append(
                    feat_static_cat.expand_dims(axis=1).repeat(
                        axis=1,
                        repeats=self.context_length
                        if is_past
                        else self.prediction_length,
                    )
                )
            if len(categories) > 0:
                categories = F.concat(*categories, dim=-1)
                embeddings = self.embedder(categories)
                embeddings = F.reshape(
                    embeddings, shape=(0, 0, -4, self.d_hidden, -1)
                ).sum(axis=-1)
                if covariates is not None:
                    covariates = covariates + embeddings
                else:
                    covariates = embeddings
            else:
                pass

            return covariates
def capacitance_tril(F, rank: Tensor, W: Tensor, D: Tensor) -> Tensor:
    r"""

    Parameters
    ----------
    F
    rank
    W : (..., dim, rank)
    D : (..., dim)

    Returns
    -------
        the capacitance matrix :math:`I + W^T D^{-1} W`

    """
    # (..., dim, rank)
    Wt_D_inv_t = F.broadcast_div(W, D.expand_dims(axis=-1))

    # (..., rank, rank)
    K = F.linalg_gemm2(Wt_D_inv_t, W, transpose_a=True)

    # (..., rank, rank)
    Id = F.broadcast_mul(F.ones_like(K), F.eye(rank))

    # (..., rank, rank)
    return F.linalg.potrf(K + Id)
Exemple #3
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F

        probs = self.bin_probs.swapaxes(0, 1)  # (num_bins, batch)
        zeros_batch_size = F.slice_axis(probs, axis=0, begin=0,
                                        end=1).squeeze(axis=0)  # (batch_size,)

        level = level.expand_dims(axis=0)
        # cdf shape (batch_size, levels)
        zeros_cdf = F.broadcast_add(zeros_batch_size.expand_dims(axis=1),
                                    level.zeros_like())
        start_state = (zeros_cdf, zeros_cdf.astype("int32"))

        def step(p, state):
            cdf, idx = state
            cdf = F.broadcast_add(cdf, p.expand_dims(axis=1))
            idx = F.where(F.broadcast_greater(cdf, level), idx, idx + 1)
            return zeros_batch_size, (cdf, idx)

        _, states = F.contrib.foreach(step, probs, start_state)
        _, idx = states

        # expand centers to shape (batch, levels, num_bins)
        # so we can use pick with idx.shape = (batch, levels)
        centers_expanded = F.broadcast_add(
            self.bin_centers.expand_dims(axis=1),
            zeros_cdf.expand_dims(axis=-1),
        )
        a = centers_expanded.pick(idx, axis=-1)
        return a.swapaxes(0, 1)
Exemple #4
0
    def hybrid_forward(
            self,
            F,
            feat_static_cat: Tensor,  # (batch_size, 1)
            past_time_feat: Tensor,
            # (batch_size, history_length, num_features)
            past_target: Tensor,  # (batch_size, history_length)
    ) -> Tensor:
        """
        Parameters
        ----------
        F
            Function space
        feat_static_cat
            Shape: (batch_size, 1)
        past_time_feat
            Shape: (batch_size, history_length, num_features)
        past_target
            Shape: (batch_size, history_length)

        Returns
        -------
        Tensor
            A batch of negative log likelihoods.
        """

        fixed_effect, random_effect = self.compute_global_local(
            F, feat_static_cat, past_time_feat)

        loss = self.negative_normal_likelihood(F,
                                               past_target.expand_dims(axis=2),
                                               fixed_effect, random_effect)
        return loss
def _expand_param(p: Tensor, num_samples: Optional[int] = None) -> Tensor:
    """
    Expand parameters by num_samples along the first dimension.
    """
    if num_samples is None:
        return p
    return p.expand_dims(axis=0).repeat(axis=0, repeats=num_samples)
Exemple #6
0
 def quantile(self, level: Tensor) -> Tensor:
     F = self.F
     for _ in range(self.all_dim):
         level = level.expand_dims(axis=-1)
     return F.broadcast_add(
         F.broadcast_mul(self.high - self.low, level), self.low
     )
        def s(mu: Tensor, D: Tensor, W: Tensor) -> Tensor:
            F = getF(mu)

            samples_D = F.sample_normal(mu=F.zeros_like(mu),
                                        sigma=F.ones_like(mu),
                                        dtype=dtype)
            cov_D = D.sqrt() * samples_D

            # dummy only use to get the shape (..., rank, 1)
            dummy_tensor = F.linalg_gemm2(W,
                                          mu.expand_dims(axis=-1),
                                          transpose_a=True).squeeze(axis=-1)

            samples_W = F.sample_normal(
                mu=F.zeros_like(dummy_tensor),
                sigma=F.ones_like(dummy_tensor),
                dtype=dtype,
            )

            cov_W = F.linalg_gemm2(
                W, samples_W.expand_dims(axis=-1)).squeeze(axis=-1)

            samples = mu + cov_D + cov_W

            return samples
Exemple #8
0
    def quantile_internal(self,
                          x: Tensor,
                          axis: Optional[int] = None) -> Tensor:
        r"""
        Evaluates the quantile function at the quantile levels contained in `x`.

        Parameters
        ----------
        x
            Tensor of shape ``*gamma.shape`` if axis=None, or containing an
            additional axis on the specified position, otherwise.
        axis
            Index of the axis containing the different quantile levels which
            are to be computed.

        Returns
        -------
        Tensor
            Quantiles tensor, of the same shape as x.
        """

        F = self.F

        # shapes of self
        # self.gamma: (*batch_shape)
        # self.knot_positions, self.b: (*batch_shape, num_pieces)

        # axis=None - passed at inference when num_samples is None
        # The shape of x is (*batch_shape).
        # The shapes of the parameters should be:
        # gamma: (*batch_shape), knot_positions, b: (*batch_shape, num_pieces)
        # They match the self. counterparts so no reshaping is needed

        # axis=0 - passed at inference when num_samples is not None
        # The shape of x is (num_samples, *batch_shape).
        # The shapes of the parameters should be:
        # gamma: (num_samples, *batch_shape), knot_positions, b: (num_samples, *batch_shape, num_pieces),
        # They do not match the self. counterparts and we need to expand the axis=0 to all of them.

        # axis=-2 - passed at training when we evaluate quantiles at knot_positions in order to compute a_tilde
        # The shape of x is shape(x) = shape(knot_positions) = (*batch_shape, num_pieces).
        # The shape of the parameters shopuld be:
        # gamma: (*batch_shape, 1), knot_positions: (*batch_shape, 1, num_pieces), b: (*batch_shape, 1, num_pieces)
        # They do not match the self. counterparts and we need to expand axis=-1 for gamma and axis=-2 for the rest.

        if axis is not None:
            gamma = self.gamma.expand_dims(axis=axis if axis == 0 else -1)
            knot_positions = self.knot_positions.expand_dims(axis=axis)
            b = self.b.expand_dims(axis=axis)
        else:
            gamma, knot_positions, b = self.gamma, self.knot_positions, self.b

        x_minus_knots = F.broadcast_minus(x.expand_dims(axis=-1),
                                          knot_positions)

        quantile = F.broadcast_add(
            gamma, F.sum(F.broadcast_mul(b, F.relu(x_minus_knots)), axis=-1))

        return quantile
def mahalanobis_distance(
    F, W: Tensor, D: Tensor, capacitance_tril: Tensor, x: Tensor
) -> Tensor:
    r"""
    Uses the Woodbury matrix identity

    .. math::
        (W W^T + D)^{-1} = D^{-1} - D^{-1} W C^{-1} W^T D^{-1},

    where :math:`C` is the capacitance matrix :math:`I + W^T D^{-1} W`, to compute the squared
    Mahalanobis distance :math:`x^T (W W^T + D)^{-1} x`.

    Parameters
    ----------
    F
    W
        (..., dim, rank)
    D
        (..., dim)
    capacitance_tril
        (..., rank, rank)
    x
        (..., dim)

    Returns
    -------

    """
    xx = x.expand_dims(axis=-1)

    # (..., rank, 1)
    Wt_Dinv_x = F.linalg_gemm2(
        F.broadcast_div(W, D.expand_dims(axis=-1)), xx, transpose_a=True
    )

    # compute x^T D^-1 x, (...,)
    maholanobis_D_inv = F.broadcast_div(x.square(), D).sum(axis=-1)

    # (..., rank)
    L_inv_Wt_Dinv_x = F.linalg_trsm(capacitance_tril, Wt_Dinv_x).squeeze(
        axis=-1
    )

    maholanobis_L = L_inv_Wt_Dinv_x.square().sum(axis=-1).squeeze()

    return F.broadcast_minus(maholanobis_D_inv, maholanobis_L)
Exemple #10
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F
        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        condition = F.broadcast_greater(level, level.zeros_like() + 0.5)
        u = F.where(condition, F.log(2.0 * level), -F.log(2.0 - 2.0 * level))

        return F.broadcast_add(self.mu, F.broadcast_mul(self.b, u))
Exemple #11
0
    def quantile(self, level: Tensor):
        F = self.F
        # we consider level to be an independent axis and so expand it
        # to shape (num_levels, 1, 1, ...)
        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        x_shifted = F.broadcast_div(F.power(1 - level, -self.xi) - 1, self.xi)
        x = F.broadcast_mul(x_shifted, self.beta)
        return x
Exemple #12
0
    def _assemble_inputs(F, target: Tensor, static_features: Tensor,
                         dynamic_features: Tensor) -> Tensor:
        """
        Assemble features from target, static features, and the dynamic
        features.

        Parameters
        ----------
        F
            A module that can either refer to the Symbol API or the NDArray
            API in MXNet.

        target
            target time series,
            shape (batch_size, sequence_length)

        static_features
            static features,
            shape (batch_size, num_static_features)

        dynamic_features
            dynamic_features,
            shape (batch_size, sequence_length, num_dynamic_features)

        Returns
        -------
        Tensor
            combined features,
            shape (batch_size, sequence_length,
                   num_static_features + num_dynamic_features + 1)

        """
        target = target.expand_dims(axis=-1)  # (N, T, 1)

        helper_ones = F.ones_like(target)  # Ones of (N, T, 1)
        tiled_static_features = F.batch_dot(
            helper_ones, static_features.expand_dims(1))  # (N, T, C)
        inputs = F.concat(target,
                          tiled_static_features,
                          dynamic_features,
                          dim=2)  # (N, T, C)
        return inputs
Exemple #13
0
def test_mixture(
    distr1: Distribution, distr2: Distribution, p: Tensor, serialize_fn
) -> None:
    # sample from component distributions, and select samples
    samples1 = distr1.sample(num_samples=NUM_SAMPLES_LARGE)
    samples2 = distr2.sample(num_samples=NUM_SAMPLES_LARGE)

    # TODO: for multivariate case, test should not sample elements from different components in the event_dim dimension
    rand = mx.nd.random.uniform(shape=(NUM_SAMPLES_LARGE, *p.shape))
    choice = (rand < p.expand_dims(axis=0)).broadcast_like(samples1)
    samples_ref = mx.nd.where(choice, samples1, samples2)

    # construct mixture distribution and sample from it

    mixture_probs = mx.nd.stack(p, 1.0 - p, axis=-1)

    mixture = MixtureDistribution(
        mixture_probs=mixture_probs, components=[distr1, distr2]
    )
    mixture = serialize_fn(mixture)

    samples_mix = mixture.sample(num_samples=NUM_SAMPLES_LARGE)

    # check that shapes are right

    assert (
        samples1.shape
        == samples2.shape
        == samples_mix.shape
        == samples_ref.shape
    )

    # check mean and stddev
    calc_mean = mixture.mean.asnumpy()
    calc_std = mixture.stddev.asnumpy()
    sample_mean = samples_mix.asnumpy().mean(axis=0)
    sample_std = samples_mix.asnumpy().std(axis=0)

    assert np.allclose(calc_mean, sample_mean, atol=1e-1)
    assert np.allclose(calc_std, sample_std, atol=2e-1)

    # check that histograms are close
    assert (
        diff(
            histogram(samples_mix.asnumpy()), histogram(samples_ref.asnumpy())
        )
        < 0.05
    )

    # can only calculated cdf for gaussians currently
    if isinstance(distr1, Gaussian) and isinstance(distr2, Gaussian):
        emp_cdf, edges = empirical_cdf(samples_mix.asnumpy())
        calc_cdf = mixture.cdf(mx.nd.array(edges)).asnumpy()
        assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2)
Exemple #14
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F
        # we consider level to be an independent axis and so expand it
        # to shape (num_levels, 1, 1, ...)
        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        return F.broadcast_add(
            self.mu,
            F.broadcast_mul(self.sigma,
                            math.sqrt(2.0) * erfinv(F, 2.0 * level - 1.0)),
        )
Exemple #15
0
    def _cdf(self, x: Tensor) -> Tensor:
        r"""
        Computes the quantile level :math:`\alpha` such that
        :math:`q(\alpha) = x`.

        Parameters
        ----------
        x
            Tensor of shape gamma.shape

        Returns
        -------
        Tensor
            Tensor of shape gamma.shape
        """

        F = self.F
        gamma, b, knot_positions = self.gamma, self.b, self.knot_positions

        quantiles_at_knots = self.quantile(knot_positions, axis=-2)

        # Mask to nullify the terms corresponding to knots larger than l_0, which is the largest knot
        # (quantile level) such that the quantile at l_0, s(l_0) < x.
        # (..., num_pieces)
        mask = F.broadcast_lesser(quantiles_at_knots, x.expand_dims(axis=-1))

        slope_l0 = F.sum(b * mask, axis=-1, keepdims=False)

        # slope_l0 can be zero in which case a_tilde = 0.
        # The following is to circumvent mxnet issue with "where" operator which returns nan even if the statement
        # you are interested in does not result in nan (but the "else" statement evaluates to nan).
        slope_l0_nz = F.where(
            slope_l0 == F.zeros_like(slope_l0), F.ones_like(x), slope_l0
        )

        a_tilde = F.where(
            slope_l0 == F.zeros_like(slope_l0),
            F.zeros_like(x),
            (
                x
                - gamma
                + F.sum(b * knot_positions * mask, axis=-1, keepdims=False)
            )
            / slope_l0_nz,
        )

        return a_tilde
Exemple #16
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F

        # self.bin_probs.shape = (batch_shape, num_bins)
        probs = self.bin_probs.transpose()  # (num_bins, batch_shape.T)

        # (batch_shape)
        zeros_batch_size = F.zeros_like(
            F.slice_axis(self.bin_probs, axis=-1, begin=0, end=1).squeeze(
                axis=-1
            )
        )

        level = level.expand_dims(axis=0)

        # cdf shape (batch_size.T, levels)
        zeros_cdf = F.broadcast_add(
            zeros_batch_size.transpose().expand_dims(axis=-1),
            level.zeros_like(),
        )
        start_state = (zeros_cdf, zeros_cdf.astype("int32"))

        def step(p, state):
            cdf, idx = state
            cdf = F.broadcast_add(cdf, p.expand_dims(axis=-1))
            idx = F.where(F.broadcast_greater(cdf, level), idx, idx + 1)
            return zeros_batch_size, (cdf, idx)

        _, states = F.contrib.foreach(step, probs, start_state)
        _, idx = states

        # idx.shape = (batch.T, levels)
        # centers.shape = (batch, num_bins)
        #
        # expand centers to shape -> (levels, batch, num_bins)
        # so we can use pick with idx.T.shape = (levels, batch)
        #
        # zeros_cdf.shape (batch.T, levels)
        centers_expanded = F.broadcast_add(
            self.bin_centers.transpose().expand_dims(axis=-1),
            zeros_cdf.expand_dims(axis=0),
        ).transpose()

        # centers_expanded.shape = (levels, batch, num_bins)
        # idx.shape (batch.T, levels)
        a = centers_expanded.pick(idx.transpose(), axis=-1)
        return a
Exemple #17
0
def make_nd_diag(F, x: Tensor, d: int) -> Tensor:
    """
    Make a diagonal tensor, given the diagonal

    Parameters
    ----------
    F
        The function space to use.
    x
        Diagonal to use, shape :math:`(..., d)`.
    d
        Last dimension of `x`.

    Returns
    -------
    Tensor
        A tensor y of shape :math:`(..., d, d)` such that
        :math:`y[..., i, i] = x[..., i]`.
    """
    return F.broadcast_mul(F.eye(d), x.expand_dims(axis=-1))
Exemple #18
0
    def hybrid_forward(
            self,
            F,
            feat_static_cat: Tensor,  # (batch_size, num_features)
            past_time_feat: Tensor,
            # (batch_size, num_features, history_length)
            past_target: Tensor,  # (batch_size, history_length)
    ) -> Tensor:
        """
        Parameters
        ----------
        F
            Function space
        feat_static_cat
            Shape: (batch_size, num_features)
        past_time_feat
            Shape: (batch_size, history_length, num_features)
        past_target
            Shape: (batch_size, history_length)

        Returns
        -------
        Tensor
            A batch of negative log likelihoods.
        """
        _, target_scale = self.scaler(
            past_target,
            F.ones_like(past_target),  # TODO: pass the actual observed here
        )

        input_feat = self.assemble_features(F, feat_static_cat, past_time_feat)
        outputs = self.model(input_feat)

        distr = self.distr_output.distribution(
            self.proj_distr_args(outputs),
            scale=target_scale.expand_dims(axis=1).expand_dims(axis=2),
        )

        loss = distr.loss(past_target.expand_dims(axis=-1))

        return loss
Exemple #19
0
def test_mixture(
    distr1: Distribution, distr2: Distribution, p: Tensor
) -> None:

    # sample from component distributions, and select samples

    samples1 = distr1.sample(num_samples=NUM_SAMPLES)
    samples2 = distr2.sample(num_samples=NUM_SAMPLES)

    rand = mx.nd.random.uniform(shape=(NUM_SAMPLES, *p.shape))
    choice = (rand < p.expand_dims(axis=0)).broadcast_like(samples1)
    samples_ref = mx.nd.where(choice, samples1, samples2)

    # construct mixture distribution and sample from it

    mixture_probs = mx.nd.stack(p, 1.0 - p, axis=-1)

    mixture = MixtureDistribution(
        mixture_probs=mixture_probs, components=[distr1, distr2]
    )

    samples_mix = mixture.sample(num_samples=NUM_SAMPLES)

    # check that shapes are right

    assert (
        samples1.shape
        == samples2.shape
        == samples_mix.shape
        == samples_ref.shape
    )

    # check that histograms are close

    assert (
        diff(
            histogram(samples_mix.asnumpy()), histogram(samples_ref.asnumpy())
        )
        < 0.05
    )
Exemple #20
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F
        # we consider level to be an independent axis and so expand it
        # to shape (num_levels, 1, 1, ...)

        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        quantiles = F.broadcast_mul(self.value, level.ones_like())
        level = F.broadcast_mul(quantiles.ones_like(), level)

        minus_inf = -quantiles.ones_like() / 0.0
        quantiles = F.where(
            F.broadcast_logical_or(level != 0, F.contrib.isnan(quantiles)),
            quantiles,
            minus_inf,
        )

        nans = level.zeros_like() / 0.0
        quantiles = F.where(level != level, nans, quantiles)

        return quantiles
Exemple #21
0
    def exact_inference(self, x_train: Tensor, y_train: Tensor,
                        x_test: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
        """
        Parameters
        ----------
        x_train
            Training set of features of shape (batch_size, context_length, num_features).
        y_train
            Training labels of shape (batch_size, context_length).
        x_test
            Test set of features of shape (batch_size, prediction_length, num_features).
        Returns
        -------
        Tuple
            Tensor
                Predictive GP samples of shape (batch_size, prediction_length, num_samples).
            Tensor
                Predictive mean of the GP of shape (batch_size, prediction_length).
            Tensor
                Predictive standard deviation of the GP of shape (batch_size, prediction_length).
        """
        assert (self.context_length
                is not None), "The value of `context_length` must be set."
        assert (self.prediction_length
                is not None), "The value of `prediction_length` must be set."
        # Compute Cholesky factorization of training kernel matrix
        l_train = self._compute_cholesky_gp(
            self.kernel.kernel_matrix(x_train, x_train), self.context_length)

        lower_tri_solve = self.F.linalg.trsm(
            l_train, self.kernel.kernel_matrix(x_train, x_test))
        predictive_mean = self.F.linalg.gemm2(
            lower_tri_solve,
            self.F.linalg.trsm(l_train, y_train.expand_dims(axis=-1)),
            transpose_a=True,
        ).squeeze(axis=-1)
        # Can rewrite second term as
        # :math:`||L^-1 * K(x_train,x_test||_2^2`
        #  and only solve 1 equation
        predictive_covariance = self.kernel.kernel_matrix(
            x_test, x_test) - self.F.linalg.gemm2(
                lower_tri_solve, lower_tri_solve, transpose_a=True)
        # Extract diagonal entries of covariance matrix
        predictive_std = batch_diagonal(
            self.F,
            predictive_covariance,
            self.prediction_length,
            self.ctx,
            self.float_type,
        )
        # If self.sample_noise = True, predictive covariance has sigma^2 on the diagonal
        if self.sample_noise:
            predictive_std = self.F.broadcast_add(predictive_std,
                                                  self.sigma**2)
        predictive_std = self.F.sqrt(predictive_std).squeeze(axis=-1)
        # Compute sample from GP predictive distribution
        return (
            self.sample(predictive_mean, predictive_covariance),
            predictive_mean,
            predictive_std,
        )
Exemple #22
0
 def cdf(self, x: Tensor) -> Tensor:
     F = self.F
     x = x.expand_dims(axis=-1)
     # left_edges = self.bin_edges.slice_axis(axis=-1, begin=0, end=-1)
     mask = F.broadcast_lesser_equal(self.bin_centers, x)
     return F.broadcast_mul(self.bin_probs, mask).sum(axis=-1)
Exemple #23
0
    def train_hybrid_forward(
        self,
        F,
        target_dimension_indicator: Tensor,
        past_time_feat: Tensor,
        past_target_cdf: Tensor,
        past_observed_values: Tensor,
        past_is_pad: Tensor,
        future_time_feat: Tensor,
        future_target_cdf: Tensor,
        future_observed_values: Tensor,
    ) -> Tuple[Tensor, ...]:
        """
        Computes the loss for training DeepVAR, all inputs tensors representing
        time series have NTC layout.

        Parameters
        ----------
        F
        target_dimension_indicator
            Indices of the target dimension (batch_size, target_dim)
        past_time_feat
            Dynamic features of past time series (batch_size, history_length,
            num_features)
        past_target_cdf
            Past marginal CDF transformed target values (batch_size,
            history_length, target_dim)
        past_observed_values
            Indicator whether or not the values were observed (batch_size,
            history_length, target_dim)
        past_is_pad
            Indicator whether the past target values have been padded
            (batch_size, history_length)
        future_time_feat
            Future time features (batch_size, prediction_length, num_features)
        future_target_cdf
            Future marginal CDF transformed target values (batch_size,
            prediction_length, target_dim)
        future_observed_values
            Indicator whether or not the future values were observed
            (batch_size, prediction_length, target_dim)

        Returns
        -------
        distr
            Loss with shape (batch_size, 1)
        likelihoods
            Likelihoods for each time step
            (batch_size, context + prediction_length, 1)
        distr_args
            Distribution arguments (context + prediction_length,
            number_of_arguments)
        """

        seq_len = self.context_length + self.prediction_length

        # unroll the decoder in "training mode", i.e. by providing future data
        # as well
        rnn_outputs, _, scale, lags_scaled, inputs = self.unroll_encoder(
            F=F,
            past_time_feat=past_time_feat,
            past_target_cdf=past_target_cdf,
            past_observed_values=past_observed_values,
            past_is_pad=past_is_pad,
            future_time_feat=future_time_feat,
            future_target_cdf=future_target_cdf,
            target_dimension_indicator=target_dimension_indicator,
        )

        # put together target sequence
        # (batch_size, seq_len, target_dim)
        target = F.concat(
            past_target_cdf.slice_axis(axis=1,
                                       begin=-self.context_length,
                                       end=None),
            future_target_cdf,
            dim=1,
        )

        # assert_shape(target, (-1, seq_len, self.target_dim))

        distr, distr_args = self.distr(
            time_features=inputs,
            rnn_outputs=rnn_outputs,
            scale=scale,
            lags_scaled=lags_scaled,
            target_dimension_indicator=target_dimension_indicator,
            seq_len=self.context_length + self.prediction_length,
        )

        # we sum the last axis to have the same shape for all likelihoods
        # (batch_size, subseq_length, 1)
        likelihoods = -distr.log_prob(target).expand_dims(axis=-1)

        assert_shape(likelihoods, (-1, seq_len, 1))

        past_observed_values = F.broadcast_minimum(
            past_observed_values, 1 - past_is_pad.expand_dims(axis=-1))

        # (batch_size, subseq_length, target_dim)
        observed_values = F.concat(
            past_observed_values.slice_axis(axis=1,
                                            begin=-self.context_length,
                                            end=None),
            future_observed_values,
            dim=1,
        )

        # mask the loss at one time step if one or more observations is missing
        # in the target dimensions (batch_size, subseq_length, 1)
        loss_weights = observed_values.min(axis=-1, keepdims=True)

        assert_shape(loss_weights, (-1, seq_len, 1))

        loss = weighted_average(F=F,
                                x=likelihoods,
                                weights=loss_weights,
                                axis=1)

        assert_shape(loss, (-1, -1, 1))

        self.distribution = distr

        return (loss, likelihoods) + distr_args
Exemple #24
0
 def process_static_real(self, F, feature: Tensor) -> Tensor:
     return F.tile(feature.expand_dims(axis=1), reps=(1, self.T, 1))
Exemple #25
0
    def predict_hybrid_forward(
        self,
        F,
        target_dimension_indicator: Tensor,
        past_time_feat: Tensor,
        past_target_cdf: Tensor,
        past_observed_values: Tensor,
        past_is_pad: Tensor,
        future_time_feat: Tensor,
    ) -> Tensor:
        """
        Predicts samples given the trained DeepVAR model.
        All tensors should have NTC layout.
        Parameters
        ----------
        F
        target_dimension_indicator
            Indices of the target dimension (batch_size, target_dim)
        past_time_feat
            Dynamic features of past time series (batch_size, history_length,
            num_features)
        past_target_cdf
            Past marginal CDF transformed target values (batch_size,
            history_length, target_dim)
        past_observed_values
            Indicator whether or not the values were observed (batch_size,
            history_length, target_dim)
        past_is_pad
            Indicator whether the past target values have been padded
            (batch_size, history_length)
        future_time_feat
            Future time features (batch_size, prediction_length, num_features)

        Returns
        -------
        sample_paths : Tensor
            A tensor containing sampled paths (1, num_sample_paths,
            prediction_length, target_dim).

        """

        # mark padded data as unobserved
        # (batch_size, target_dim, seq_len)
        past_observed_values = F.broadcast_minimum(
            past_observed_values, 1 - past_is_pad.expand_dims(axis=-1))

        # unroll the decoder in "prediction mode", i.e. with past data only
        _, state, scale, _, inputs = self.unroll_encoder(
            F=F,
            past_time_feat=past_time_feat,
            past_target_cdf=past_target_cdf,
            past_observed_values=past_observed_values,
            past_is_pad=past_is_pad,
            future_time_feat=None,
            future_target_cdf=None,
            target_dimension_indicator=target_dimension_indicator,
        )

        return self.sampling_decoder(
            F=F,
            past_target_cdf=past_target_cdf,
            target_dimension_indicator=target_dimension_indicator,
            time_feat=future_time_feat,
            scale=scale,
            begin_states=state,
        )
Exemple #26
0
 def process_static_cat(self, F, feature: Tensor) -> Tensor:
     feature = self.embed_static(feature.astype(self.dtype))
     return F.tile(feature.expand_dims(axis=1), reps=(1, self.T, 1))
Exemple #27
0
def kalman_filter_step(
    F,
    target: Tensor,
    prior_mean: Tensor,
    prior_cov: Tensor,
    emission_coeff: Tensor,
    residual: Tensor,
    noise_std: Tensor,
    latent_dim: int,
    output_dim: int,
):
    """
    One step of the Kalman filter.

    This function computes the filtered state (mean and covariance) given the
    linear system coefficients the prior state (mean and variance),
    as well as observations.

    Parameters
    ----------
    F
    target
        Observations of the system output, shape (batch_size, output_dim)
    prior_mean
        Prior mean of the latent state, shape (batch_size, latent_dim)
    prior_cov
        Prior covariance of the latent state, shape
        (batch_size, latent_dim, latent_dim)
    emission_coeff
        Emission coefficient, shape (batch_size, output_dim, latent_dim)
    residual
        Residual component, shape (batch_size, output_dim)
    noise_std
        Standard deviation of the output noise, shape (batch_size, output_dim)
    latent_dim
        Dimension of the latent state vector
    Returns
    -------
    Tensor
        Filtered_mean, shape (batch_size, latent_dim)
    Tensor
        Filtered_covariance, shape (batch_size, latent_dim, latent_dim)
    Tensor
        Log probability, shape (batch_size, )
    """
    # output_mean: mean of the target (batch_size, obs_dim)
    output_mean = F.linalg_gemm2(
        emission_coeff, prior_mean.expand_dims(axis=-1)).squeeze(axis=-1)

    # noise covariance
    noise_cov = make_nd_diag(F=F, x=noise_std * noise_std, d=output_dim)

    S_hh_x_A_tr = F.linalg_gemm2(prior_cov, emission_coeff, transpose_b=True)

    # covariance of the target
    output_cov = F.linalg_gemm2(emission_coeff, S_hh_x_A_tr) + noise_cov

    # compute the Cholesky decomposition output_cov = LL^T
    L_output_cov = F.linalg_potrf(output_cov)

    # Compute Kalman gain matrix K:
    # K = S_hh X with X = A^T output_cov^{-1}
    # We have X = A^T output_cov^{-1} => X output_cov = A^T => X LL^T = A^T
    # We can thus obtain X by solving two linear systems involving L
    kalman_gain = F.linalg_trsm(
        L_output_cov,
        F.linalg_trsm(L_output_cov,
                      S_hh_x_A_tr,
                      rightside=True,
                      transpose=True),
        rightside=True,
    )

    # compute the error
    target_minus_residual = target - residual
    delta = target_minus_residual - output_mean

    # filtered estimates
    filtered_mean = prior_mean.expand_dims(axis=-1) + F.linalg_gemm2(
        kalman_gain, delta.expand_dims(axis=-1))
    filtered_mean = filtered_mean.squeeze(axis=-1)

    # Joseph's symmetrized update for covariance:
    ImKA = F.broadcast_sub(F.eye(latent_dim),
                           F.linalg_gemm2(kalman_gain, emission_coeff))

    filtered_cov = F.linalg_gemm2(
        ImKA, F.linalg_gemm2(
            prior_cov, ImKA, transpose_b=True)) + F.linalg_gemm2(
                kalman_gain,
                F.linalg_gemm2(noise_cov, kalman_gain, transpose_b=True))

    # likelihood term: (batch_size,)
    log_p = MultivariateGaussian(output_mean,
                                 L_output_cov).log_prob(target_minus_residual)

    return filtered_mean, filtered_cov, log_p
Exemple #28
0
def cumsum(F,
           x: Tensor,
           exclusive: bool = False,
           reverse: bool = False) -> Tensor:
    r"""
    Find cumulative sum on the last axis by multiplying with lower triangular
    ones-matrix:

    .. math::

       \operatorname{cumsum}(x) =
       \begin{cases}
         \operatorname{ltr\_ones} \times x
           & \text{for cumulative sum}\\
         x \times \operatorname{ltr\_ones}
           & \text{for cumulative sum in the reverse order}
       \end{cases}

    Also supports `exclusive` flag to start the cumsum with zero.
    For example, if :math:`x = [a, b, c]`, we have

    .. math::

       \operatorname{cumsum}(x) =
       \begin{cases}
         [a, a + b, a + b + c]
           & \text{if }\mathit{reverse = False, exclusive = False}\\
         [0, a, a + b]
           & \text{if }\mathit{reverse = False, exclusive = True}\\
         [a + b + c, b + c, c]
           & \text{if }\mathit{reverse = True, exclusive = False}\\
         [b + c, c, 0]
           & \text{if }\mathit{reverse = True, exclusive = True}\\
       \end{cases}

    Parameters
    ----------
    F
        The function space to use.
    x
        A tensor with shape :math:`(..., n)`.
    exclusive
        If `True`, the cumulative sum starts with zero.
    reverse
        If `True`, the cumulative sum is performed in the opposite direction.

    Returns
    -------
    Tensor:
        A modified tensor with identical shape and cumulative sums in the last
        axis.
    """

    # Create a new axis (for matrix multiplication) either at last location or
    # last-but-one location (for reverse mode)
    exp_dim = -2 if reverse else -1
    # (..., 1, n) if reverse is True and (..., n, 1) otherwise
    x = x.expand_dims(axis=exp_dim)

    # Ones_matrix (..., n, n)
    ones_matrix = F.linalg_gemm2(
        F.ones_like(x),
        F.ones_like(x),
        transpose_a=reverse,
        transpose_b=not reverse,
    )
    cumulative_sum = F.linalg_trmm(ones_matrix, x, rightside=reverse)

    if exclusive:
        cumulative_sum = cumulative_sum - x

    return cumulative_sum.squeeze(axis=exp_dim)
Exemple #29
0
def test_nan_mixture(
    distr_class,
    p: Tensor,
    x: Tensor,
    distr_params: Dict[str, Tensor],
    distr_params_grad: Dict[str, Tensor],
    serialize_fn,
) -> None:
    # sample from component distributions, and select samples
    distr = distr_class(**distr_params)

    samples = distr.sample(num_samples=NUM_SAMPLES_LARGE)

    rand = mx.nd.random.uniform(shape=(NUM_SAMPLES_LARGE, *p.shape))
    choice = (rand > p.expand_dims(axis=0)).broadcast_like(samples)
    samples_ref = mx.nd.where(choice, samples, samples.zeros_like())

    # construct NanMixture distribution and sample from it
    nan_mixture = NanMixture(nan_prob=p, distribution=distr)

    nan_mixture = serialize_fn(nan_mixture)

    samples_mix = nan_mixture.sample(num_samples=NUM_SAMPLES_LARGE)
    # check that shapes are right

    assert samples.shape == samples_mix.shape == samples_ref.shape

    # TODO check mean and stddev

    # check log_prob
    log_prob = nan_mixture.log_prob(x)

    log_prob_true = mx.nd.log(mx.nd.where(x != x, p, (1 - p) * distr.prob(x)))

    assert np.allclose(log_prob.asnumpy(), log_prob_true.asnumpy())

    for param in distr_params:
        distr_params[param].attach_grad()
    p.attach_grad()

    with mx.autograd.record():
        distr = distr_class(**distr_params)
        nan_mixture = NanMixture(nan_prob=p, distribution=distr)
        nll = -nan_mixture.log_prob(x)
    nll.backward()

    p_grad_true = mx.nd.where(x != x, -1 / p, 1 / (1 - p))
    # gradient is undefined for these cases:
    p_grad_true = mx.nd.where(
        mx.nd.logical_or(
            mx.nd.logical_and(x != x, p == 0),
            mx.nd.logical_and(x == x, p == 1),
        ),
        0.0 / p_grad_true.zeros_like(),
        p_grad_true,
    )

    assert np.allclose(p.grad.asnumpy(), p_grad_true.asnumpy())

    for param in distr_params:

        assert np.allclose(
            distr_params[param].grad.asnumpy(), distr_params_grad[param]
        )
Exemple #30
0
    def hybrid_forward(
        self,
        F,
        feat_static_cat: Tensor,
        past_target: Tensor,
        past_observed_values: Tensor,
        past_is_pad: Tensor,
        past_time_feat: Tensor,
        future_time_feat: Tensor,
        scale: Tensor,
    ) -> Tensor:

        embedded_cat = self.feature_embedder(feat_static_cat)
        static_feat = F.concat(embedded_cat, F.log(scale + 1.0), dim=1)

        past_target = past_target.astype("int32")

        def blow_up(u):
            """
            Expand to (batch_size x num_samples)
            """
            return F.repeat(u, repeats=self.num_samples, axis=0)

        def is_last_layer(i):
            return i + 1 == len(self.dilations)

        queues = []

        full_time_features = F.concat(past_time_feat, future_time_feat, dim=-1)

        future_observed_values = F.slice_axis(
            future_time_feat, begin=0, end=1, axis=1
        ).ones_like()

        full_observed = F.concat(
            F.expand_dims(past_observed_values, axis=1),
            future_observed_values,
            dim=-1,
        )

        repeated_static_feat = F.repeat(
            F.expand_dims(static_feat, axis=-1),
            repeats=self.pred_length + self.receptive_field,
            axis=-1,
        )

        full_features = F.concat(
            full_time_features, full_observed, repeated_static_feat, dim=1
        )

        feature_slice = F.slice_axis(
            full_features,
            begin=-self.pred_length - self.receptive_field + 1,
            end=None,
            axis=-1,
        )

        tmp = F.slice_axis(
            past_target, begin=-self.receptive_field, end=None, axis=-1
        )
        o = self.target_embed(tmp).swapaxes(1, 2)
        o = F.concat(
            o,
            F.slice_axis(
                feature_slice, begin=-self.receptive_field, end=None, axis=-1
            ),
            dim=1,
        )
        o = self.conv_project(o)

        for i, d in enumerate(self.dilations):
            sz = 1 if d == 2 ** (self.dilation_depth - 1) else d * 2
            _, o = self.residuals[i](o)
            if not is_last_layer(i):
                o_chunk = F.slice_axis(o, begin=-sz - 1, end=-1, axis=-1)
            else:
                o_chunk = o
            queues.append(blow_up(o_chunk))

        res = F.slice_axis(past_target, begin=-2, end=None, axis=-1)
        res = blow_up(res)

        for n in range(self.pred_length):
            queues_next = []
            o = self.target_embed(
                F.slice_axis(res, begin=-2, end=None, axis=-1)
            ).swapaxes(1, 2)
            b = F.slice_axis(
                full_features,
                begin=self.receptive_field + n - 1,
                end=self.receptive_field + n + 1,
                axis=-1,
            )
            b = blow_up(b)
            o = F.concat(o, b, dim=1)
            o = self.conv_project(o)

            skip_outs = []
            for i, d in enumerate(self.dilations):
                skip, o = self.residuals[i](o)
                skip_outs.append(skip)
                if not is_last_layer(i):
                    q = queues[i]
                    o = F.concat(q, o, num_args=2, dim=-1)
                    queues_next.append(
                        F.slice_axis(o, begin=1, end=None, axis=-1)
                    )
            queues = queues_next
            y = sum(skip_outs)
            y = self.output_act(y)
            y = self.conv1(y)
            y = self.output_act(y)
            unnormalized_outputs = self.conv2(y)
            if self.temperature > 0:
                probs = F.softmax(
                    unnormalized_outputs / self.temperature, axis=1
                )
                y = F.sample_multinomial(probs.swapaxes(1, 2))
            else:
                y = F.argmax(unnormalized_outputs, axis=1)
            y = y.astype("int32")
            res = F.concat(res, y, num_args=2, dim=-1)
        samples = F.slice_axis(res, begin=-self.pred_length, end=None, axis=-1)
        samples = samples.reshape(
            shape=(-1, self.num_samples, self.pred_length)
        )
        samples = self.post_transform(samples)
        samples = F.broadcast_mul(scale.expand_dims(axis=1), samples)
        return samples