예제 #1
0
파일: _network.py 프로젝트: yifeim/gluon-ts
    def hybrid_forward(
            self,
            F,
            feat_static_cat: Tensor,  # (batch_size, 1)
            past_time_feat: Tensor,
            # (batch_size, history_length, num_features)
            past_target: Tensor,  # (batch_size, history_length)
    ) -> Tensor:
        """
        Parameters
        ----------
        F
            Function space
        feat_static_cat
            Shape: (batch_size, 1)
        past_time_feat
            Shape: (batch_size, history_length, num_features)
        past_target
            Shape: (batch_size, history_length)

        Returns
        -------
        Tensor
            A batch of negative log likelihoods.
        """

        fixed_effect, random_effect = self.compute_global_local(
            F, feat_static_cat, past_time_feat)

        loss = self.negative_normal_likelihood(F,
                                               past_target.expand_dims(axis=2),
                                               fixed_effect, random_effect)
        return loss
예제 #2
0
def _expand_param(p: Tensor, num_samples: Optional[int] = None) -> Tensor:
    """
    Expand parameters by num_samples along the first dimension.
    """
    if num_samples is None:
        return p
    return p.expand_dims(axis=0).repeat(axis=0, repeats=num_samples)
예제 #3
0
        def s(mu: Tensor, D: Tensor, W: Tensor) -> Tensor:
            F = getF(mu)

            samples_D = F.sample_normal(mu=F.zeros_like(mu),
                                        sigma=F.ones_like(mu),
                                        dtype=dtype)
            cov_D = D.sqrt() * samples_D

            # dummy only use to get the shape (..., rank, 1)
            dummy_tensor = F.linalg_gemm2(W,
                                          mu.expand_dims(axis=-1),
                                          transpose_a=True).squeeze(axis=-1)

            samples_W = F.sample_normal(
                mu=F.zeros_like(dummy_tensor),
                sigma=F.ones_like(dummy_tensor),
                dtype=dtype,
            )

            cov_W = F.linalg_gemm2(
                W, samples_W.expand_dims(axis=-1)).squeeze(axis=-1)

            samples = mu + cov_D + cov_W

            return samples
예제 #4
0
def capacitance_tril(F, rank: Tensor, W: Tensor, D: Tensor) -> Tensor:
    r"""

    Parameters
    ----------
    F
    rank
    W : (..., dim, rank)
    D : (..., dim)

    Returns
    -------
        the capacitance matrix :math:`I + W^T D^{-1} W`

    """
    # (..., dim, rank)
    Wt_D_inv_t = F.broadcast_div(W, D.expand_dims(axis=-1))

    # (..., rank, rank)
    K = F.linalg_gemm2(Wt_D_inv_t, W, transpose_a=True)

    # (..., rank, rank)
    Id = F.broadcast_mul(F.ones_like(K), F.eye(rank))

    # (..., rank, rank)
    return F.linalg.potrf(K + Id)
    def quantile_losses(self, obs: Tensor, quantiles: Tensor,
                        levels: Tensor) -> Tensor:
        """
        Computes quantile losses for all the quantiles specified.

        Parameters
        ----------
        obs
            Ground truth observation. Shape: `(batch_size, seq_len, *event_shape)`
        quantiles
            Quantile values. Shape: `(batch_size, seq_len, *event_shape, num_quantiles)`
        levels
            Quantile levels. Shape: `(batch_size, seq_len, *event_shape, num_quantiles)`
        Returns
        -------
        Tensor
            Quantile losses of shape: `(batch_size, seq_len, *event_shape, num_quantiles)`

        """
        obs = obs.expand_dims(axis=-1)
        assert obs.shape[:-1] == quantiles.shape[:-1]
        assert obs.shape[:-1] == levels.shape[:-1]
        assert obs.shape[-1] == 1

        return self.F.where(
            obs >= quantiles,
            levels * (obs - quantiles),
            (1 - levels) * (quantiles - obs),
        )
예제 #6
0
    def exact_inference(self, x_train: Tensor, y_train: Tensor,
                        x_test: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
        """
        Parameters
        ----------
        x_train
            Training set of features of shape (batch_size, context_length, num_features).
        y_train
            Training labels of shape (batch_size, context_length).
        x_test
            Test set of features of shape (batch_size, prediction_length, num_features).
        Returns
        -------
        Tuple
            Tensor
                Predictive GP samples of shape (batch_size, prediction_length, num_samples).
            Tensor
                Predictive mean of the GP of shape (batch_size, prediction_length).
            Tensor
                Predictive standard deviation of the GP of shape (batch_size, prediction_length).
        """
        assert (self.context_length
                is not None), "The value of `context_length` must be set."
        assert (self.prediction_length
                is not None), "The value of `prediction_length` must be set."
        # Compute Cholesky factorization of training kernel matrix
        l_train = self._compute_cholesky_gp(
            self.kernel.kernel_matrix(x_train, x_train), self.context_length)

        lower_tri_solve = self.F.linalg.trsm(
            l_train, self.kernel.kernel_matrix(x_train, x_test))
        predictive_mean = self.F.linalg.gemm2(
            lower_tri_solve,
            self.F.linalg.trsm(l_train, y_train.expand_dims(axis=-1)),
            transpose_a=True,
        ).squeeze(axis=-1)
        # Can rewrite second term as
        # :math:`||L^-1 * K(x_train,x_test||_2^2`
        #  and only solve 1 equation
        predictive_covariance = self.kernel.kernel_matrix(
            x_test, x_test) - self.F.linalg.gemm2(
                lower_tri_solve, lower_tri_solve, transpose_a=True)
        # Extract diagonal entries of covariance matrix
        predictive_std = batch_diagonal(
            self.F,
            predictive_covariance,
            self.prediction_length,
            self.float_type,
        )
        # If self.sample_noise = True, predictive covariance has sigma^2 on the diagonal
        if self.sample_noise:
            predictive_std = self.F.broadcast_add(predictive_std,
                                                  self.sigma**2)
        predictive_std = self.F.sqrt(predictive_std).squeeze(axis=-1)
        # Compute sample from GP predictive distribution
        return (
            self.sample(predictive_mean, predictive_covariance),
            predictive_mean,
            predictive_std,
        )
예제 #7
0
        def _assemble_covariates(
            feat_dynamic_real: Tensor,
            feat_dynamic_cat: Tensor,
            feat_static_real: Tensor,
            feat_static_cat: Tensor,
            is_past: bool,
        ) -> Tensor:
            covariates = []
            if feat_dynamic_real.shape[-1] > 0:
                covariates.append(feat_dynamic_real)
            if feat_static_real.shape[-1] > 0:
                covariates.append(
                    feat_static_real.expand_dims(axis=1).repeat(
                        axis=1,
                        repeats=self.context_length
                        if is_past else self.prediction_length,
                    ))
            if len(covariates) > 0:
                covariates = F.concat(*covariates, dim=-1)
                covariates = self.covar_proj(covariates)
            else:
                covariates = None

            categories = []
            if feat_dynamic_cat.shape[-1] > 0:
                categories.append(feat_dynamic_cat)
            if feat_static_cat.shape[-1] > 0:
                categories.append(
                    feat_static_cat.expand_dims(axis=1).repeat(
                        axis=1,
                        repeats=self.context_length
                        if is_past else self.prediction_length,
                    ))
            if len(categories) > 0:
                categories = F.concat(*categories, dim=-1)
                embeddings = self.embedder(categories)
                embeddings = F.reshape(embeddings,
                                       shape=(0, 0, -4, self.d_hidden,
                                              -1)).sum(axis=-1)
                if covariates is not None:
                    covariates = covariates + embeddings
                else:
                    covariates = embeddings
            else:
                pass

            return covariates
예제 #8
0
    def quantile_internal(self,
                          x: Tensor,
                          axis: Optional[int] = None) -> Tensor:
        r"""
        Evaluates the quantile function at the quantile levels contained in `x`.

        Parameters
        ----------
        x
            Tensor of shape ``*gamma.shape`` if axis=None, or containing an
            additional axis on the specified position, otherwise.
        axis
            Index of the axis containing the different quantile levels which
            are to be computed.

        Returns
        -------
        Tensor
            Quantiles tensor, of the same shape as x.
        """

        F = self.F

        # shapes of self
        # self.gamma: (*batch_shape)
        # self.knot_positions, self.b: (*batch_shape, num_pieces)

        # axis=None - passed at inference when num_samples is None
        # The shape of x is (*batch_shape).
        # The shapes of the parameters should be:
        # gamma: (*batch_shape), knot_positions, b: (*batch_shape, num_pieces)
        # They match the self. counterparts so no reshaping is needed

        # axis=0 - passed at inference when num_samples is not None
        # The shape of x is (num_samples, *batch_shape).
        # The shapes of the parameters should be:
        # gamma: (num_samples, *batch_shape), knot_positions, b: (num_samples, *batch_shape, num_pieces),
        # They do not match the self. counterparts and we need to expand the axis=0 to all of them.

        # axis=-2 - passed at training when we evaluate quantiles at knot_positions in order to compute a_tilde
        # The shape of x is shape(x) = shape(knot_positions) = (*batch_shape, num_pieces).
        # The shape of the parameters shopuld be:
        # gamma: (*batch_shape, 1), knot_positions: (*batch_shape, 1, num_pieces), b: (*batch_shape, 1, num_pieces)
        # They do not match the self. counterparts and we need to expand axis=-1 for gamma and axis=-2 for the rest.

        if axis is not None:
            gamma = self.gamma.expand_dims(axis=axis if axis == 0 else -1)
            knot_positions = self.knot_positions.expand_dims(axis=axis)
            b = self.b.expand_dims(axis=axis)
        else:
            gamma, knot_positions, b = self.gamma, self.knot_positions, self.b

        x_minus_knots = F.broadcast_minus(x.expand_dims(axis=-1),
                                          knot_positions)

        quantile = F.broadcast_add(
            gamma, F.sum(F.broadcast_mul(b, F.relu(x_minus_knots)), axis=-1))

        return quantile
def mahalanobis_distance(
    F, W: Tensor, D: Tensor, capacitance_tril: Tensor, x: Tensor
) -> Tensor:
    r"""
    Uses the Woodbury matrix identity

    .. math::
        (W W^T + D)^{-1} = D^{-1} - D^{-1} W C^{-1} W^T D^{-1},

    where :math:`C` is the capacitance matrix :math:`I + W^T D^{-1} W`, to compute the squared
    Mahalanobis distance :math:`x^T (W W^T + D)^{-1} x`.

    Parameters
    ----------
    F
    W
        (..., dim, rank)
    D
        (..., dim)
    capacitance_tril
        (..., rank, rank)
    x
        (..., dim)

    Returns
    -------

    """
    xx = x.expand_dims(axis=-1)

    # (..., rank, 1)
    Wt_Dinv_x = F.linalg_gemm2(
        F.broadcast_div(W, D.expand_dims(axis=-1)), xx, transpose_a=True
    )

    # compute x^T D^-1 x, (...,)
    maholanobis_D_inv = F.broadcast_div(x.square(), D).sum(axis=-1)

    # (..., rank)
    L_inv_Wt_Dinv_x = F.linalg_trsm(capacitance_tril, Wt_Dinv_x).squeeze(
        axis=-1
    )

    maholanobis_L = L_inv_Wt_Dinv_x.square().sum(axis=-1).squeeze()

    return F.broadcast_minus(maholanobis_D_inv, maholanobis_L)
예제 #10
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F
        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        condition = F.broadcast_greater(level, level.zeros_like() + 0.5)
        u = F.where(condition, F.log(2.0 * level), -F.log(2.0 - 2.0 * level))

        return F.broadcast_add(self.mu, F.broadcast_mul(self.b, u))
예제 #11
0
    def quantile(self, level: Tensor):
        F = self.F
        # we consider level to be an independent axis and so expand it
        # to shape (num_levels, 1, 1, ...)
        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        x_shifted = F.broadcast_div(F.power(1 - level, -self.xi) - 1, self.xi)
        x = F.broadcast_mul(x_shifted, self.beta)
        return x
예제 #12
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F
        # we consider level to be an independent axis and so expand it
        # to shape (num_levels, 1, 1, ...)
        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        return F.broadcast_add(
            self.mu,
            F.broadcast_mul(self.sigma,
                            math.sqrt(2.0) * F.erfinv(2.0 * level - 1.0)),
        )
예제 #13
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F

        # self.bin_probs.shape = (batch_shape, num_bins)
        probs = self.bin_probs.transpose()  # (num_bins, batch_shape.T)

        # (batch_shape)
        zeros_batch_size = F.zeros_like(
            F.slice_axis(self.bin_probs, axis=-1, begin=0, end=1).squeeze(
                axis=-1
            )
        )

        level = level.expand_dims(axis=0)

        # cdf shape (batch_size.T, levels)
        zeros_cdf = F.broadcast_add(
            zeros_batch_size.transpose().expand_dims(axis=-1),
            level.zeros_like(),
        )
        start_state = (zeros_cdf, zeros_cdf.astype("int32"))

        def step(p, state):
            cdf, idx = state
            cdf = F.broadcast_add(cdf, p.expand_dims(axis=-1))
            idx = F.where(F.broadcast_greater(cdf, level), idx, idx + 1)
            return zeros_batch_size, (cdf, idx)

        _, states = F.contrib.foreach(step, probs, start_state)
        _, idx = states

        # idx.shape = (batch.T, levels)
        # centers.shape = (batch, num_bins)
        #
        # expand centers to shape -> (levels, batch, num_bins)
        # so we can use pick with idx.T.shape = (levels, batch)
        #
        # zeros_cdf.shape (batch.T, levels)
        centers_expanded = F.broadcast_add(
            self.bin_centers.transpose().expand_dims(axis=-1),
            zeros_cdf.expand_dims(axis=0),
        ).transpose()

        # centers_expanded.shape = (levels, batch, num_bins)
        # idx.shape (batch.T, levels)
        a = centers_expanded.pick(idx.transpose(), axis=-1)
        return a
예제 #14
0
def test_mixture(distr1: Distribution, distr2: Distribution, p: Tensor,
                 serialize_fn) -> None:
    # sample from component distributions, and select samples
    samples1 = distr1.sample(num_samples=NUM_SAMPLES_LARGE)
    samples2 = distr2.sample(num_samples=NUM_SAMPLES_LARGE)

    # TODO: for multivariate case, test should not sample elements from different components in the event_dim dimension
    rand = mx.nd.random.uniform(shape=(NUM_SAMPLES_LARGE, *p.shape))
    choice = (rand < p.expand_dims(axis=0)).broadcast_like(samples1)
    samples_ref = mx.nd.where(choice, samples1, samples2)

    # construct mixture distribution and sample from it

    mixture_probs = mx.nd.stack(p, 1.0 - p, axis=-1)

    mixture = MixtureDistribution(mixture_probs=mixture_probs,
                                  components=[distr1, distr2])
    mixture = serialize_fn(mixture)

    samples_mix = mixture.sample(num_samples=NUM_SAMPLES_LARGE)

    # check that shapes are right

    assert (samples1.shape == samples2.shape == samples_mix.shape ==
            samples_ref.shape)

    # check mean and stddev
    calc_mean = mixture.mean.asnumpy()
    calc_std = mixture.stddev.asnumpy()
    sample_mean = samples_mix.asnumpy().mean(axis=0)
    sample_std = samples_mix.asnumpy().std(axis=0)

    assert np.allclose(calc_mean, sample_mean, atol=1e-1)
    assert np.allclose(calc_std, sample_std, atol=2e-1)

    # check that histograms are close
    assert (diff(histogram(samples_mix.asnumpy()),
                 histogram(samples_ref.asnumpy())) < 0.05)

    # can only calculated cdf for gaussians currently
    if isinstance(distr1, Gaussian) and isinstance(distr2, Gaussian):
        emp_cdf, edges = empirical_cdf(samples_mix.asnumpy())
        calc_cdf = mixture.cdf(mx.nd.array(edges)).asnumpy()
        assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2)
예제 #15
0
    def cdf(self, x: Tensor) -> Tensor:
        r"""
        Computes the quantile level :math:`\alpha` such that
        :math:`q(\alpha) = x`.

        Parameters
        ----------
        x
            Tensor of shape gamma.shape

        Returns
        -------
        Tensor
            Tensor of shape gamma.shape
        """

        F = self.F
        gamma, b, knot_positions = self.gamma, self.b, self.knot_positions

        quantiles_at_knots = self.quantile_internal(knot_positions, axis=-2)

        # Mask to nullify the terms corresponding to knots larger than l_0,
        # which is the largest knot(quantile level) such that the quantile at
        # l_0, s(l_0) < x.(..., num_pieces)
        mask = F.broadcast_lesser(quantiles_at_knots, x.expand_dims(axis=-1))

        slope_l0 = F.sum(b * mask, axis=-1, keepdims=False)

        # slope_l0 can be zero in which case a_tilde = 0. The following is to
        # circumvent mxnet issue with "where" operator which returns nan even
        # if the statement you are interested in does not result in nan
        # (but the "else" statement evaluates to nan).
        slope_l0_nz = F.where(slope_l0 == F.zeros_like(slope_l0),
                              F.ones_like(x), slope_l0)

        a_tilde = F.where(
            slope_l0 == F.zeros_like(slope_l0),
            F.zeros_like(x),
            (x - gamma +
             F.sum(b * knot_positions * mask, axis=-1, keepdims=False)) /
            slope_l0_nz,
        )

        return F.broadcast_minimum(F.ones_like(a_tilde), a_tilde)
예제 #16
0
    def _assemble_inputs(
        self,
        F,
        target: Tensor,
        static_features: Tensor,
        dynamic_features: Tensor,
    ) -> Tensor:
        """
        Assemble features from target, static features, and the dynamic
        features.

        Parameters
        ----------
        F
            A module that can either refer to the Symbol API or the NDArray
            API in MXNet.
        target
            target time series,
            shape (batch_size, sequence_length, 1)
        static_features
            static features,
            shape (batch_size, num_feat_static)
        dynamic_features
            dynamic_features,
            shape (batch_size, sequence_length, num_feat_dynamic)

        Returns
        -------
        Tensor
            combined features,
            shape (batch_size, sequence_length,
                   num_feat_static + num_feat_dynamic + 1)
        """

        helper_ones = F.ones_like(target)  # Ones of (N, T, 1)
        tiled_static_features = F.batch_dot(
            helper_ones, static_features.expand_dims(1))  # (N, T, C)
        inputs = F.concat(target,
                          tiled_static_features,
                          dynamic_features,
                          dim=2)  # (N, T, C)
        return inputs
예제 #17
0
def make_nd_diag(F, x: Tensor, d: int) -> Tensor:
    """
    Make a diagonal tensor, given the diagonal

    Parameters
    ----------
    F
        The function space to use.
    x
        Diagonal to use, shape :math:`(..., d)`.
    d
        Last dimension of `x`.

    Returns
    -------
    Tensor
        A tensor y of shape :math:`(..., d, d)` such that
        :math:`y[..., i, i] = x[..., i]`.
    """
    return F.broadcast_mul(F.eye(d), x.expand_dims(axis=-1))
예제 #18
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F
        # we consider level to be an independent axis and so expand it
        # to shape (num_levels, 1, 1, ...)

        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        quantiles = F.broadcast_mul(self.value, level.ones_like())
        level = F.broadcast_mul(quantiles.ones_like(), level)

        minus_inf = -quantiles.ones_like() / 0.0
        quantiles = F.where(
            F.broadcast_logical_or(level != 0, F.contrib.isnan(quantiles)),
            quantiles,
            minus_inf,
        )

        nans = level.zeros_like() / 0.0
        quantiles = F.where(level != level, nans, quantiles)

        return quantiles
예제 #19
0
    def prepare_inputs_imputation_step(
        self,
        F,
        begin_state: List[Tensor],
        imputed_sequence: Tensor,
        sequence_length: int,
        subsequences_length: int,
        scale: Tensor,
        target: Tensor,
        target_observed_values: Tensor,
        time_feat: Tensor,
        repeated_static_feat: Tensor,
        is_padded_indicator: Tensor,
        state,
        i: int,
    ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
        """
        Prepares inputs for the next LSTM unrolling step at step i.
        """
        lags = self.get_lagged_subsequences(
            F=F,
            sequence=imputed_sequence,
            sequence_length=sequence_length,
            indices=self.lags_seq,
            subsequences_length=subsequences_length,
        )
        # (batch_size, sub_seq_len, *target_shape, num_lags)
        lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1))
        # from (batch_size, sub_seq_len, *target_shape, num_lags)
        # to (batch_size, sub_seq_len, prod(target_shape) * num_lags)
        input_lags = F.reshape(
            data=lags_scaled,
            shape=(
                -1,
                subsequences_length,
                len(self.lags_seq) * prod(self.target_shape),
            ),
        )
        # (batch_size, sub_seq_len, input_dim)
        inputs = F.concat(input_lags, time_feat, repeated_static_feat, dim=-1)

        is_pad = is_padded_indicator.slice_axis(axis=1, begin=i, end=i + 1)

        current_observed_indicator = target_observed_values.slice_axis(axis=1,
                                                                       begin=i,
                                                                       end=i +
                                                                       1)

        current_target = target.slice_axis(axis=1, begin=i, end=i + 1)

        pre_sequence = imputed_sequence.slice_axis(axis=1,
                                                   begin=0,
                                                   end=-subsequences_length +
                                                   i)

        post_sequence = imputed_sequence.slice_axis(
            axis=1, begin=-subsequences_length + i + 1, end=None)
        # Reset the state to the begin state if the current target is padded
        state = [
            F.where(is_pad.repeat(repeats=self.num_cells, axis=1), bs, s)
            for bs, s in zip(begin_state, state)
        ]
        return (
            inputs,
            is_pad,
            current_observed_indicator,
            current_target,
            pre_sequence,
            post_sequence,
            state,
        )
예제 #20
0
def kalman_filter_step(
    F,
    target: Tensor,
    prior_mean: Tensor,
    prior_cov: Tensor,
    emission_coeff: Tensor,
    residual: Tensor,
    noise_std: Tensor,
    latent_dim: int,
    output_dim: int,
):
    """
    One step of the Kalman filter.

    This function computes the filtered state (mean and covariance) given the
    linear system coefficients the prior state (mean and variance),
    as well as observations.

    Parameters
    ----------
    F
    target
        Observations of the system output, shape (batch_size, output_dim)
    prior_mean
        Prior mean of the latent state, shape (batch_size, latent_dim)
    prior_cov
        Prior covariance of the latent state, shape
        (batch_size, latent_dim, latent_dim)
    emission_coeff
        Emission coefficient, shape (batch_size, output_dim, latent_dim)
    residual
        Residual component, shape (batch_size, output_dim)
    noise_std
        Standard deviation of the output noise, shape (batch_size, output_dim)
    latent_dim
        Dimension of the latent state vector
    Returns
    -------
    Tensor
        Filtered_mean, shape (batch_size, latent_dim)
    Tensor
        Filtered_covariance, shape (batch_size, latent_dim, latent_dim)
    Tensor
        Log probability, shape (batch_size, )
    """
    # output_mean: mean of the target (batch_size, obs_dim)
    output_mean = F.linalg_gemm2(
        emission_coeff, prior_mean.expand_dims(axis=-1)).squeeze(axis=-1)

    # noise covariance
    noise_cov = make_nd_diag(F=F, x=noise_std * noise_std, d=output_dim)

    S_hh_x_A_tr = F.linalg_gemm2(prior_cov, emission_coeff, transpose_b=True)

    # covariance of the target
    output_cov = F.linalg_gemm2(emission_coeff, S_hh_x_A_tr) + noise_cov

    # compute the Cholesky decomposition output_cov = LL^T
    L_output_cov = F.linalg_potrf(output_cov)

    # Compute Kalman gain matrix K:
    # K = S_hh X with X = A^T output_cov^{-1}
    # We have X = A^T output_cov^{-1} => X output_cov = A^T => X LL^T = A^T
    # We can thus obtain X by solving two linear systems involving L
    kalman_gain = F.linalg_trsm(
        L_output_cov,
        F.linalg_trsm(L_output_cov,
                      S_hh_x_A_tr,
                      rightside=True,
                      transpose=True),
        rightside=True,
    )

    # compute the error
    target_minus_residual = target - residual
    delta = target_minus_residual - output_mean

    # filtered estimates
    filtered_mean = prior_mean.expand_dims(axis=-1) + F.linalg_gemm2(
        kalman_gain, delta.expand_dims(axis=-1))
    filtered_mean = filtered_mean.squeeze(axis=-1)

    # Joseph's symmetrized update for covariance:
    ImKA = F.broadcast_sub(F.eye(latent_dim),
                           F.linalg_gemm2(kalman_gain, emission_coeff))

    filtered_cov = F.linalg_gemm2(
        ImKA, F.linalg_gemm2(
            prior_cov, ImKA, transpose_b=True)) + F.linalg_gemm2(
                kalman_gain,
                F.linalg_gemm2(noise_cov, kalman_gain, transpose_b=True))

    # likelihood term: (batch_size,)
    log_p = MultivariateGaussian(output_mean,
                                 L_output_cov).log_prob(target_minus_residual)

    return filtered_mean, filtered_cov, log_p
예제 #21
0
    def unroll(
        self,
        F,
        lags: Tensor,
        scale: Tensor,
        time_feat: Tensor,
        target_dimension_indicator: Tensor,
        unroll_length: int,
        begin_state: Optional[List[Tensor]],
    ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
        """
        Prepares the input to the RNN and unrolls it the given number of time
        steps.

        Parameters
        ----------
        F
        lags
            Input lags (batch_size, sub_seq_len, target_dim, num_lags)
        scale
            Mean scale (batch_size, 1, target_dim)
        time_feat
            Additional time features
        target_dimension_indicator
            Indices of the target dimension (batch_size, target_dim)
        unroll_length
            length to unroll
        begin_state
            State to start the unrolling of the RNN

        Returns
        -------
        outputs
            RNN outputs (batch_size, seq_len, num_cells)
        states
            RNN states. Nested list with (batch_size, num_cells) tensors with
        dimensions target_dim x num_layers x (batch_size, num_cells)
        lags_scaled
            Scaled lags(batch_size, sub_seq_len, target_dim, num_lags)
        inputs
            inputs to the RNN
        """
        # (batch_size, sub_seq_len, target_dim, num_lags)
        lags_scaled = F.broadcast_div(lags, scale.expand_dims(axis=-1))

        assert_shape(
            lags_scaled,
            (-1, unroll_length, self.target_dim, len(self.lags_seq)),
        )

        input_lags = F.reshape(
            data=lags_scaled,
            shape=(-1, unroll_length, len(self.lags_seq) * self.target_dim),
        )

        # (batch_size, target_dim, embed_dim)
        index_embeddings = self.embed(target_dimension_indicator)
        assert_shape(index_embeddings, (-1, self.target_dim, self.embed_dim))

        # (batch_size, seq_len, target_dim * embed_dim)
        repeated_index_embeddings = (
            index_embeddings.expand_dims(axis=1)
            .repeat(axis=1, repeats=unroll_length)
            .reshape((-1, unroll_length, self.target_dim * self.embed_dim))
        )

        # (batch_size, sub_seq_len, input_dim)
        inputs = F.concat(
            input_lags, repeated_index_embeddings, time_feat, dim=-1
        )

        # unroll encoder
        outputs, state = self.rnn.unroll(
            inputs=inputs,
            length=unroll_length,
            layout="NTC",
            merge_outputs=True,
            begin_state=begin_state,
        )

        assert_shape(outputs, (-1, unroll_length, self.num_cells))
        for s in state:
            assert_shape(s, (-1, self.num_cells))

        assert_shape(
            lags_scaled,
            (-1, unroll_length, self.target_dim, len(self.lags_seq)),
        )

        return outputs, state, lags_scaled, inputs
예제 #22
0
def cumsum(
    F, x: Tensor, exclusive: bool = False, reverse: bool = False
) -> Tensor:
    r"""
    Find cumulative sum on the last axis by multiplying with lower triangular
    ones-matrix:

    .. math::

       \operatorname{cumsum}(x) =
       \begin{cases}
         \operatorname{ltr\_ones} \times x
           & \text{for cumulative sum}\\
         x \times \operatorname{ltr\_ones}
           & \text{for cumulative sum in the reverse order}
       \end{cases}

    Also supports `exclusive` flag to start the cumsum with zero.
    For example, if :math:`x = [a, b, c]`, we have

    .. math::

       \operatorname{cumsum}(x) =
       \begin{cases}
         [a, a + b, a + b + c]
           & \text{if }\mathit{reverse = False, exclusive = False}\\
         [0, a, a + b]
           & \text{if }\mathit{reverse = False, exclusive = True}\\
         [a + b + c, b + c, c]
           & \text{if }\mathit{reverse = True, exclusive = False}\\
         [b + c, c, 0]
           & \text{if }\mathit{reverse = True, exclusive = True}\\
       \end{cases}

    Parameters
    ----------
    F
        The function space to use.
    x
        A tensor with shape :math:`(..., n)`.
    exclusive
        If `True`, the cumulative sum starts with zero.
    reverse
        If `True`, the cumulative sum is performed in the opposite direction.

    Returns
    -------
    Tensor:
        A modified tensor with identical shape and cumulative sums in the last
        axis.
    """

    # Create a new axis (for matrix multiplication) either at last location or
    # last-but-one location (for reverse mode)
    exp_dim = -2 if reverse else -1
    # (..., 1, n) if reverse is True and (..., n, 1) otherwise
    x = x.expand_dims(axis=exp_dim)

    # Ones_matrix (..., n, n)
    ones_matrix = F.linalg_gemm2(
        F.ones_like(x),
        F.ones_like(x),
        transpose_a=reverse,
        transpose_b=not reverse,
    )
    cumulative_sum = F.linalg_trmm(ones_matrix, x, rightside=reverse)

    if exclusive:
        cumulative_sum = cumulative_sum - x

    return cumulative_sum.squeeze(axis=exp_dim)
예제 #23
0
    def reconcile_samples(self, samples: Tensor) -> Tensor:
        """
        Computes coherent samples by projecting unconstrained `samples` using the matrix `self.M`.

        Parameters
        ----------
        samples
            Unconstrained samples

        Returns
        -------
        Coherent samples
            Tensor, shape same as that of `samples`.

        """
        proj_matrix_shape = self.M.shape  # (num_ts, num_ts)

        num_iter_dims = len(self.seq_axis) if self.seq_axis else 0

        # Expand `M` depending on the shape of samples:
        # If seq_axis = None, during training the first axis is only `batch_size`, in which case `M` would be expanded
        # 3 times; during prediction it would be expanded 2 times since the first axis is
        # `batch_size x num_parallel_samples`.
        M_expanded = self.M
        for i in range(len(samples.shape[num_iter_dims:-1])):
            M_expanded = M_expanded.expand_dims(axis=0)

        # If seq_axis = None broadcast M to (num_samples, batch_size, seq_len, m, m) during training
        # and to (num_samples * batch_size, seq_len, m, m) during prediction
        # Else broadcast to the appropriate remaining dimension
        _shape = (list(samples.shape[:-1]) if not self.seq_axis else [
            samples.shape[i] for i in range(len(samples.shape[:-1]))
            if i not in self.seq_axis
        ])
        self.M_broadcast = mx.nd.broadcast_to(
            M_expanded,
            shape=_shape + list(proj_matrix_shape),
        )

        if self.seq_axis:
            # bring the axis to iterate in the beginning
            samples = mx.nd.moveaxis(samples, self.seq_axis,
                                     list(range(len(self.seq_axis))))

            out = []
            for idx in product(*[
                    range(x) for x in
                [samples.shape[d] for d in range(len(self.seq_axis))]
            ]):
                s = samples[idx]
                out.append(
                    mx.nd.linalg.gemm2(self.M_broadcast,
                                       s.expand_dims(-1)).squeeze(axis=-1))

            # put the axis in the correct order again
            out = mx.nd.concat(*out, dim=0).reshape(samples.shape)
            out = mx.nd.moveaxis(out, list(range(len(self.seq_axis))),
                                 self.seq_axis)
            return out
        else:
            return mx.nd.linalg.gemm2(self.M_broadcast,
                                      samples.expand_dims(-1)).squeeze(axis=-1)
예제 #24
0
 def cdf(self, x: Tensor) -> Tensor:
     F = self.F
     x = x.expand_dims(axis=-1)
     # left_edges = self.bin_edges.slice_axis(axis=-1, begin=0, end=-1)
     mask = F.broadcast_lesser_equal(self.bin_centers, x)
     return F.broadcast_mul(self.bin_probs, mask).sum(axis=-1)
예제 #25
0
    def hybrid_forward(
        self,
        F,
        feat_static_cat: Tensor,
        past_target: Tensor,
        past_observed_values: Tensor,
        past_time_feat: Tensor,
        future_time_feat: Tensor,
        scale: Tensor,
    ) -> Tensor:
        """
        Computes the training loss for the wavenet model.

        Parameters
        ----------
        F
        feat_static_cat
            Static categorical features: (batch_size, num_cat_features)
        past_target
            Past target: (batch_size, receptive_field)
        past_observed_values
            Observed value indicator for the past target: (batch_size,
            receptive_field)
        past_time_feat
            Past time features: (batch_size, num_time_features,
            receptive_field)
        future_time_feat
            Future time features: (batch_size, num_time_features, pred_length)
        scale
            scale of the time series: (batch_size, 1)

        Returns
        -------
        Tensor
            Prediction samples with shape (batch_size, num_samples,
            pred_length)
        """
        def blow_up(u):
            """
            Expand to (batch_size x num_samples)
            """
            return F.repeat(u, repeats=self.num_samples, axis=0)

        past_target = past_target.astype("int32")
        full_features = self.get_full_features(
            F,
            feat_static_cat=feat_static_cat,
            past_observed_values=past_observed_values,
            past_time_feat=past_time_feat,
            future_time_feat=future_time_feat,
            future_observed_values=None,
            scale=scale,
        )

        # To compute queues for the first step, we need features from
        # -self.pred_length - self.receptive_field + 1 to -self.pred_length + 1
        features_end_ix = (-self.pred_length +
                           1 if self.pred_length > 1 else None)
        queues = self.get_initial_conv_queues(
            F,
            past_target=F.slice_axis(past_target,
                                     begin=-self.receptive_field,
                                     end=None,
                                     axis=-1),
            features=F.slice_axis(
                full_features,
                begin=-self.pred_length - self.receptive_field + 1,
                end=features_end_ix,
                axis=-1,
            ),
        )
        queues = [blow_up(queue) for queue in queues]

        res = F.slice_axis(past_target, begin=-2, end=None, axis=-1)
        res = blow_up(res)
        for n in range(self.pred_length):
            # Generate one-step ahead predictions. The input consists of target
            # and features corresponding to the last two time steps.
            current_target = F.slice_axis(res, begin=-2, end=None, axis=-1)
            current_features = F.slice_axis(
                full_features,
                begin=self.receptive_field + n - 1,
                end=self.receptive_field + n + 1,
                axis=-1,
            )
            embedding = self.target_feature_embedding(
                F,
                target=current_target,
                features=blow_up(current_features),
            )

            # (batch_size, 1, num_bins) where 1 corresponds to the time axis.
            unnormalized_outputs, queues = self.base_net(
                F, embedding, one_step_prediction=True, queues=queues)
            if self.temperature > 0:
                # (batch_size, 1, num_bins) where 1 corresponds to the time
                # axis.
                probs = F.softmax(unnormalized_outputs / self.temperature,
                                  axis=-1)
                # (batch_size, 1)
                y = F.sample_multinomial(probs)
            else:
                # (batch_size, 1)
                y = F.argmax(unnormalized_outputs, axis=-1)
            y = y.astype("int32")
            res = F.concat(res, y, num_args=2, dim=-1)
        samples = F.slice_axis(res, begin=-self.pred_length, end=None, axis=-1)
        samples = samples.reshape(shape=(-1, self.num_samples,
                                         self.pred_length))
        samples = self.post_transform(samples)
        samples = F.broadcast_mul(scale.expand_dims(axis=1), samples)
        return samples
예제 #26
0
    def unroll_encoder(
        self,
        F,
        past_time_feat: Tensor,
        past_target_cdf: Tensor,
        past_observed_values: Tensor,
        past_is_pad: Tensor,
        future_time_feat: Optional[Tensor],
        future_target_cdf: Optional[Tensor],
        target_dimension_indicator: Tensor,
    ) -> Tuple[Tensor, List[Tensor], Tensor, Tensor, Tensor]:
        """
        Unrolls the RNN encoder over past and, if present, future data.
        Returns outputs and state of the encoder, plus the scale of
        past_target_cdf and a vector of static features that was constructed
        and fed as input to the encoder. All tensor arguments should have NTC
        layout.

        Parameters
        ----------
        F
        past_time_feat
            Past time features (batch_size, history_length, num_features)
        past_target_cdf
            Past marginal CDF transformed target values (batch_size,
            history_length, target_dim)
        past_observed_values
            Indicator whether or not the values were observed (batch_size,
            history_length, target_dim)
        past_is_pad
            Indicator whether the past target values have been padded
            (batch_size, history_length)
        future_time_feat
            Future time features (batch_size, prediction_length, num_features)
        future_target_cdf
            Future marginal CDF transformed target values (batch_size,
            prediction_length, target_dim)
        target_dimension_indicator
            Dimensionality of the time series (batch_size, target_dim)

        Returns
        -------
        outputs
            RNN outputs (batch_size, seq_len, num_cells)
        states
            RNN states. Nested list with (batch_size, num_cells) tensors with
        dimensions target_dim x num_layers x (batch_size, num_cells)
        scale
            Mean scales for the time series (batch_size, 1, target_dim)
        lags_scaled
            Scaled lags(batch_size, sub_seq_len, target_dim, num_lags)
        inputs
            inputs to the RNN

        """

        past_observed_values = F.broadcast_minimum(
            past_observed_values, 1 - past_is_pad.expand_dims(axis=-1)
        )

        if future_time_feat is None or future_target_cdf is None:
            time_feat = past_time_feat.slice_axis(
                axis=1, begin=-self.context_length, end=None
            )
            sequence = past_target_cdf
            sequence_length = self.history_length
            subsequences_length = self.context_length
        else:
            time_feat = F.concat(
                past_time_feat.slice_axis(
                    axis=1, begin=-self.context_length, end=None
                ),
                future_time_feat,
                dim=1,
            )
            sequence = F.concat(past_target_cdf, future_target_cdf, dim=1)
            sequence_length = self.history_length + self.prediction_length
            subsequences_length = self.context_length + self.prediction_length

        # (batch_size, sub_seq_len, target_dim, num_lags)
        lags = self.get_lagged_subsequences(
            F=F,
            sequence=sequence,
            sequence_length=sequence_length,
            indices=self.lags_seq,
            subsequences_length=subsequences_length,
        )

        # scale is computed on the context length last units of the past target
        # scale shape is (batch_size, 1, target_dim)
        _, scale = self.scaler(
            past_target_cdf.slice_axis(
                axis=1, begin=-self.context_length, end=None
            ),
            past_observed_values.slice_axis(
                axis=1, begin=-self.context_length, end=None
            ),
        )

        outputs, states, lags_scaled, inputs = self.unroll(
            F=F,
            lags=lags,
            scale=scale,
            time_feat=time_feat,
            target_dimension_indicator=target_dimension_indicator,
            unroll_length=subsequences_length,
            begin_state=None,
        )

        return outputs, states, scale, lags_scaled, inputs
예제 #27
0
    def _tensor_cdf_bisection(self,
                              level: Tensor,
                              tol=1e-6,
                              max_iter=120) -> Tensor:
        r"""
        Returns a Tensor of shape (len(level), *batch_size) with the corresponding quantiles.
        """
        F = self.F
        local_max_support_val = min(1e16, MAX_SUPPORT_VAL)

        try:
            support_lb, support_ub = self.support_min_max
            support_lb = F.broadcast_maximum(
                F.broadcast_minimum(
                    support_lb,
                    F.ones(self.batch_shape) * local_max_support_val,
                ),
                F.ones(self.batch_shape) * -local_max_support_val,
            )
            support_ub = F.broadcast_maximum(
                F.broadcast_minimum(
                    support_ub,
                    F.ones(self.batch_shape) * local_max_support_val,
                ),
                F.ones(self.batch_shape) * -local_max_support_val,
            )

            upper_bound = F.broadcast_like(support_lb.expand_dims(axis=0),
                                           level,
                                           lhs_axes=0,
                                           rhs_axes=0)
            lower_bound = F.broadcast_like(support_ub.expand_dims(axis=0),
                                           level,
                                           lhs_axes=0,
                                           rhs_axes=0)
        except NotImplementedError:
            # default to R if not defined
            upper_bound = (F.ones(
                (len(level), *self.batch_shape)) * local_max_support_val)
            lower_bound = (F.ones(
                (len(level), *self.batch_shape)) * -local_max_support_val)

        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        q = 0.5 * F.broadcast_add(upper_bound, lower_bound)
        val = self.cdf(q) - level

        cnt = 0
        while F.sum(F.abs(val) > tol) > 0 and cnt < max_iter:
            mask_g = F.greater(val, tol)
            mask_l = F.lesser(val, -tol)
            mask_done = F.lesser_equal(F.abs(val), tol)

            upper_bound = (F.broadcast_mul(q, mask_g) +
                           F.broadcast_mul(upper_bound, mask_l) +
                           F.broadcast_mul(q, mask_done))
            lower_bound = (F.broadcast_mul(q, mask_l) +
                           F.broadcast_mul(lower_bound, mask_g) +
                           F.broadcast_mul(q, mask_done))

            q = 0.5 * F.broadcast_add(upper_bound, lower_bound)
            val = self.cdf(q) - level
            cnt += 1
        return q
예제 #28
0
    def train_hybrid_forward(
        self,
        F,
        target_dimension_indicator: Tensor,
        past_time_feat: Tensor,
        past_target_cdf: Tensor,
        past_observed_values: Tensor,
        past_is_pad: Tensor,
        future_time_feat: Tensor,
        future_target_cdf: Tensor,
        future_observed_values: Tensor,
    ) -> Tuple[Tensor, ...]:
        """
        Computes the loss for training DeepVAR, all inputs tensors representing
        time series have NTC layout.

        Parameters
        ----------
        F
        target_dimension_indicator
            Indices of the target dimension (batch_size, target_dim)
        past_time_feat
            Dynamic features of past time series (batch_size, history_length,
            num_features)
        past_target_cdf
            Past marginal CDF transformed target values (batch_size,
            history_length, target_dim)
        past_observed_values
            Indicator whether or not the values were observed (batch_size,
            history_length, target_dim)
        past_is_pad
            Indicator whether the past target values have been padded
            (batch_size, history_length)
        future_time_feat
            Future time features (batch_size, prediction_length, num_features)
        future_target_cdf
            Future marginal CDF transformed target values (batch_size,
            prediction_length, target_dim)
        future_observed_values
            Indicator whether or not the future values were observed
            (batch_size, prediction_length, target_dim)

        Returns
        -------
        distr
            Loss with shape (batch_size, 1)
        likelihoods
            Likelihoods for each time step
            (batch_size, context + prediction_length, 1)
        distr_args
            Distribution arguments (context + prediction_length,
            number_of_arguments)
        """

        seq_len = self.context_length + self.prediction_length

        # unroll the decoder in "training mode", i.e. by providing future data
        # as well
        rnn_outputs, _, scale, lags_scaled, inputs = self.unroll_encoder(
            F=F,
            past_time_feat=past_time_feat,
            past_target_cdf=past_target_cdf,
            past_observed_values=past_observed_values,
            past_is_pad=past_is_pad,
            future_time_feat=future_time_feat,
            future_target_cdf=future_target_cdf,
            target_dimension_indicator=target_dimension_indicator,
        )

        # put together target sequence
        # (batch_size, seq_len, target_dim)
        target = F.concat(
            past_target_cdf.slice_axis(
                axis=1, begin=-self.context_length, end=None
            ),
            future_target_cdf,
            dim=1,
        )

        # assert_shape(target, (-1, seq_len, self.target_dim))

        distr, distr_args = self.distr(
            time_features=inputs,
            rnn_outputs=rnn_outputs,
            scale=scale,
            lags_scaled=lags_scaled,
            target_dimension_indicator=target_dimension_indicator,
            seq_len=self.context_length + self.prediction_length,
        )

        # we sum the last axis to have the same shape for all likelihoods
        # (batch_size, subseq_length, 1)
        likelihoods = -distr.log_prob(target).expand_dims(axis=-1)

        assert_shape(likelihoods, (-1, seq_len, 1))

        past_observed_values = F.broadcast_minimum(
            past_observed_values, 1 - past_is_pad.expand_dims(axis=-1)
        )

        # (batch_size, subseq_length, target_dim)
        observed_values = F.concat(
            past_observed_values.slice_axis(
                axis=1, begin=-self.context_length, end=None
            ),
            future_observed_values,
            dim=1,
        )

        # mask the loss at one time step if one or more observations is missing
        # in the target dimensions (batch_size, subseq_length, 1)
        loss_weights = observed_values.min(axis=-1, keepdims=True)

        assert_shape(loss_weights, (-1, seq_len, 1))

        loss = weighted_average(
            F=F, x=likelihoods, weights=loss_weights, axis=1
        )

        assert_shape(loss, (-1, -1, 1))

        self.distribution = distr

        return (loss, likelihoods) + distr_args
예제 #29
0
 def process_static_real(self, F, feature: Tensor) -> Tensor:
     return F.tile(feature.expand_dims(axis=1), reps=(1, self.T, 1))
예제 #30
0
    def predict_hybrid_forward(
        self,
        F,
        target_dimension_indicator: Tensor,
        past_time_feat: Tensor,
        past_target_cdf: Tensor,
        past_observed_values: Tensor,
        past_is_pad: Tensor,
        future_time_feat: Tensor,
    ) -> Tensor:
        """
        Predicts samples given the trained DeepVAR model.
        All tensors should have NTC layout.
        Parameters
        ----------
        F
        target_dimension_indicator
            Indices of the target dimension (batch_size, target_dim)
        past_time_feat
            Dynamic features of past time series (batch_size, history_length,
            num_features)
        past_target_cdf
            Past marginal CDF transformed target values (batch_size,
            history_length, target_dim)
        past_observed_values
            Indicator whether or not the values were observed (batch_size,
            history_length, target_dim)
        past_is_pad
            Indicator whether the past target values have been padded
            (batch_size, history_length)
        future_time_feat
            Future time features (batch_size, prediction_length, num_features)

        Returns
        -------
        sample_paths : Tensor
            A tensor containing sampled paths (1, num_sample_paths,
            prediction_length, target_dim).

        """

        # mark padded data as unobserved
        # (batch_size, target_dim, seq_len)
        past_observed_values = F.broadcast_minimum(
            past_observed_values, 1 - past_is_pad.expand_dims(axis=-1)
        )

        # unroll the decoder in "prediction mode", i.e. with past data only
        _, state, scale, _, inputs = self.unroll_encoder(
            F=F,
            past_time_feat=past_time_feat,
            past_target_cdf=past_target_cdf,
            past_observed_values=past_observed_values,
            past_is_pad=past_is_pad,
            future_time_feat=None,
            future_target_cdf=None,
            target_dimension_indicator=target_dimension_indicator,
        )

        return self.sampling_decoder(
            F=F,
            past_target_cdf=past_target_cdf,
            target_dimension_indicator=target_dimension_indicator,
            time_feat=future_time_feat,
            scale=scale,
            begin_states=state,
        )