Example #1
0
def _index_tensor(x: Tensor, item: Any) -> Tensor:
    """"""
    squeeze: List[int] = []
    if not isinstance(item, tuple):
        item = (item, )

    saw_ellipsis = False

    for i, item_i in enumerate(item):
        axis = i - len(item) if saw_ellipsis else i
        if isinstance(item_i, int):
            if item_i != -1:
                x = x.slice_axis(axis=axis, begin=item_i, end=item_i + 1)
            else:
                x = x.slice_axis(axis=axis, begin=-1, end=None)
            squeeze.append(axis)
        elif item_i == slice(None):
            continue
        elif item_i == Ellipsis:
            saw_ellipsis = True
            continue
        elif isinstance(item_i, slice):
            assert item_i.step is None
            start = item_i.start if item_i.start is not None else 0
            x = x.slice_axis(axis=axis, begin=start, end=item_i.stop)
        else:
            raise RuntimeError(f"invalid indexing item: {item}")
    if len(squeeze):
        x = x.squeeze(axis=tuple(squeeze))
    return x
Example #2
0
 def __init__(
     self,
     alpha: Tensor,
     beta: Tensor,
     zero_probability: Tensor,
     one_probability: Tensor,
 ) -> None:
     F = getF(alpha)
     self.alpha = alpha
     self.beta = beta
     self.zero_probability = zero_probability
     self.one_probability = one_probability
     self.beta_probability = 1 - zero_probability - one_probability
     self.beta_distribution = Beta(alpha=alpha, beta=beta)
     mixture_probs = F.stack(zero_probability,
                             one_probability,
                             self.beta_probability,
                             axis=-1)
     super().__init__(
         components=[
             Deterministic(alpha.zeros_like()),
             Deterministic(alpha.ones_like()),
             self.beta_distribution,
         ],
         mixture_probs=mixture_probs,
     )
Example #3
0
    def _compute_edges(F, bin_centers: Tensor) -> Tensor:
        r"""
        Computes the edges of the bins based on the centers. The first and last edge are set to :math:`10^{-10}` and
        :math:`10^{10}`, repsectively.

        Parameters
        ----------
        F
        bin_centers
            Tensor of shape `(*batch_shape, num_bins)`.

        Returns
        -------
        Tensor
            Tensor of shape (*batch.shape, num_bins+1)
        """

        low = (
            F.zeros_like(bin_centers.slice_axis(axis=-1, begin=0, end=1))
            - 1.0e10
        )
        high = (
            F.zeros_like(bin_centers.slice_axis(axis=-1, begin=0, end=1))
            + 1.0e10
        )

        means = (
            F.broadcast_add(
                bin_centers.slice_axis(axis=-1, begin=1, end=None),
                bin_centers.slice_axis(axis=-1, begin=0, end=-1),
            )
            / 2.0
        )

        return F.concat(low, means, high, dim=-1)
Example #4
0
    def fit(cls, F, samples: Tensor, rank: int = 0) -> Distribution:
        """
        Returns an instance of `LowrankMultivariateGaussian` after fitting
        parameters to the given data. Only the special case of `rank` = 0 is
        supported at the moment.

        Parameters
        ----------
        F
        samples
            Tensor of shape (num_samples, batch_size, seq_len, target_dim)
        rank
            Rank of W

        Returns
        -------
        Distribution instance of type `LowrankMultivariateGaussian`.

        """
        # TODO: Implement it for the general case: `rank` > 0
        assert rank == 0, "Fit is not only implemented for the case rank = 0!"

        # Compute mean and variances
        mu = samples.mean(axis=0)
        var = F.square(samples - samples.mean(axis=0)).mean(axis=0)
        return cls(dim=samples.shape[-1], rank=rank, mu=mu, D=var)
Example #5
0
        def s(mu: Tensor, D: Tensor, W: Tensor) -> Tensor:
            F = getF(mu)

            samples_D = F.sample_normal(mu=F.zeros_like(mu),
                                        sigma=F.ones_like(mu),
                                        dtype=dtype)
            cov_D = D.sqrt() * samples_D

            # dummy only use to get the shape (..., rank, 1)
            dummy_tensor = F.linalg_gemm2(W,
                                          mu.expand_dims(axis=-1),
                                          transpose_a=True).squeeze(axis=-1)

            samples_W = F.sample_normal(
                mu=F.zeros_like(dummy_tensor),
                sigma=F.ones_like(dummy_tensor),
                dtype=dtype,
            )

            cov_W = F.linalg_gemm2(
                W, samples_W.expand_dims(axis=-1)).squeeze(axis=-1)

            samples = mu + cov_D + cov_W

            return samples
Example #6
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F
        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        condition = F.broadcast_greater(level, level.zeros_like() + 0.5)
        u = F.where(condition, F.log(2.0 * level), -F.log(2.0 - 2.0 * level))

        return F.broadcast_add(self.mu, F.broadcast_mul(self.b, u))
Example #7
0
    def compute_scale(self, F, data: Tensor,
                      observed_indicator: Tensor) -> Tensor:
        """
        Parameters
        ----------
        F
            A module that can either refer to the Symbol API or the NDArray
            API in MXNet.

        data
            tensor containing the data to be scaled.

        observed_indicator
            observed_indicator: binary tensor with the same shape as
            ``data``, that has 1 in correspondence of observed data points,
            and 0 in correspondence of missing data points.

        Returns
        -------
        Tensor
            shape (N, T, C) or (N, C, T) scaled along the specified axis.

        """

        axis_zero = nd.prod(
            data == data.zeros_like(), self.axis, keepdims=True
        )  # Along the specified axis, which array are always at zero
        axis_zero = nd.broadcast_to(
            axis_zero, shape=data.shape)  # Broadcast it to the shape of data

        min_val = nd.where(
            1 - observed_indicator,
            nd.broadcast_to(data.max(keepdims=True), shape=data.shape),
            data,
        ).min(
            axis=self.axis, keepdims=True
        )  # return the min value along specified axis while ignoring value according to observed_indicator
        max_val = nd.where(
            1 - observed_indicator,
            nd.broadcast_to(data.min(keepdims=True), shape=data.shape),
            data,
        ).max(
            axis=self.axis, keepdims=True
        )  # return the max value along specified axis while ignoring value according to observed_indicator

        scaled_data = (data - min_val) / (max_val - min_val)  # Rescale
        scaled_data = nd.where(
            axis_zero, scaled_data.zeros_like(), scaled_data
        )  # Clip Nan values to zero if the data was equal to zero along specified axis
        scaled_data = nd.where(
            scaled_data != scaled_data, scaled_data.ones_like(), scaled_data
        )  # Clip the Nan values to one. scaled_date!=scaled_data tells us where the Nan values are in scaled_data

        return nd.where(
            1 - observed_indicator, scaled_data.zeros_like(), scaled_data
        )  # Replace data with zero where observed_indicator tells us to.
Example #8
0
    def exact_inference(self, x_train: Tensor, y_train: Tensor,
                        x_test: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
        """
        Parameters
        ----------
        x_train
            Training set of features of shape (batch_size, context_length, num_features).
        y_train
            Training labels of shape (batch_size, context_length).
        x_test
            Test set of features of shape (batch_size, prediction_length, num_features).
        Returns
        -------
        Tuple
            Tensor
                Predictive GP samples of shape (batch_size, prediction_length, num_samples).
            Tensor
                Predictive mean of the GP of shape (batch_size, prediction_length).
            Tensor
                Predictive standard deviation of the GP of shape (batch_size, prediction_length).
        """
        assert (self.context_length
                is not None), "The value of `context_length` must be set."
        assert (self.prediction_length
                is not None), "The value of `prediction_length` must be set."
        # Compute Cholesky factorization of training kernel matrix
        l_train = self._compute_cholesky_gp(
            self.kernel.kernel_matrix(x_train, x_train), self.context_length)

        lower_tri_solve = self.F.linalg.trsm(
            l_train, self.kernel.kernel_matrix(x_train, x_test))
        predictive_mean = self.F.linalg.gemm2(
            lower_tri_solve,
            self.F.linalg.trsm(l_train, y_train.expand_dims(axis=-1)),
            transpose_a=True,
        ).squeeze(axis=-1)
        # Can rewrite second term as
        # :math:`||L^-1 * K(x_train,x_test||_2^2`
        #  and only solve 1 equation
        predictive_covariance = self.kernel.kernel_matrix(
            x_test, x_test) - self.F.linalg.gemm2(
                lower_tri_solve, lower_tri_solve, transpose_a=True)
        # Extract diagonal entries of covariance matrix
        predictive_std = batch_diagonal(
            self.F,
            predictive_covariance,
            self.prediction_length,
            self.float_type,
        )
        # If self.sample_noise = True, predictive covariance has sigma^2 on the diagonal
        if self.sample_noise:
            predictive_std = self.F.broadcast_add(predictive_std,
                                                  self.sigma**2)
        predictive_std = self.F.sqrt(predictive_std).squeeze(axis=-1)
        # Compute sample from GP predictive distribution
        return (
            self.sample(predictive_mean, predictive_covariance),
            predictive_mean,
            predictive_std,
        )
Example #9
0
    def compute_scale(
        self, F, data: Tensor, observed_indicator: Tensor  # shapes (N, T, C)
    ) -> Tensor:
        # these will have shape (N, C)
        num_observed = F.sum(observed_indicator, axis=1)
        sum_observed = (data.abs() * observed_indicator).sum(axis=1)

        # first compute a global scale per-dimension
        total_observed = num_observed.sum(axis=0)
        denominator = F.maximum(total_observed, 1.0)
        default_scale = sum_observed.sum(axis=0) / denominator  # shape (C, )

        # then compute a per-item, per-dimension scale
        denominator = F.maximum(num_observed, 1.0)
        scale = sum_observed / denominator  # shape (N, C)

        # use per-batch scale when no element is observed
        # or when the sequence contains only zeros
        cond = F.broadcast_greater(sum_observed, F.zeros_like(sum_observed))
        scale = F.where(
            cond,
            scale,
            F.broadcast_mul(default_scale, F.ones_like(num_observed)),
        )

        return F.maximum(scale, self.scale_min)
Example #10
0
def lowrank_log_likelihood(rank: int, mu: Tensor, D: Tensor, W: Tensor,
                           x: Tensor) -> Tensor:

    F = getF(mu)

    dim = F.ones_like(mu).sum(axis=-1).max()

    dim_factor = dim * math.log(2 * math.pi)

    if W is not None:
        batch_capacitance_tril = capacitance_tril(F=F, rank=rank, W=W, D=D)

        log_det_factor = log_det(F=F,
                                 batch_D=D,
                                 batch_capacitance_tril=batch_capacitance_tril)

        mahalanobis_factor = mahalanobis_distance(
            F=F, W=W, D=D, capacitance_tril=batch_capacitance_tril, x=x - mu)
    else:
        log_det_factor = D.log().sum(axis=-1)
        x_centered = x - mu
        mahalanobis_factor = F.broadcast_div(x_centered.square(),
                                             D).sum(axis=-1)

    ll: Tensor = -0.5 * (F.broadcast_add(dim_factor, log_det_factor) +
                         mahalanobis_factor)

    return ll
Example #11
0
    def log_prob(self, x: Tensor) -> Tensor:
        F = self.F

        # masking data NaN's with ones to prevent NaN gradients
        x_non_nan = F.where(x != x, F.ones_like(x), x)

        # calculate likelihood for values which are not NaN
        non_nan_dist_log_likelihood = F.where(
            x != x,
            -x.ones_like() / 0.0,
            self.components[0].log_prob(x_non_nan),
        )

        log_mix_weights = F.log(self.mixture_probs)

        # stack log probabilities of components
        component_log_likelihood = F.stack(
            *[non_nan_dist_log_likelihood, self.components[1].log_prob(x)],
            axis=-1,
        )
        # compute mixture log probability by log-sum-exp
        summands = log_mix_weights + component_log_likelihood
        max_val = F.max_axis(summands, axis=-1, keepdims=True)

        sum_exp = F.sum(F.exp(F.broadcast_minus(summands, max_val)),
                        axis=-1,
                        keepdims=True)

        log_sum_exp = F.log(sum_exp) + max_val
        return log_sum_exp.squeeze(axis=-1)
Example #12
0
    def hybrid_forward(
            self,
            F,
            feat_static_cat: Tensor,  # (batch_size, 1)
            past_time_feat: Tensor,
            # (batch_size, history_length, num_features)
            past_target: Tensor,  # (batch_size, history_length)
    ) -> Tensor:
        """
        Parameters
        ----------
        F
            Function space
        feat_static_cat
            Shape: (batch_size, 1)
        past_time_feat
            Shape: (batch_size, history_length, num_features)
        past_target
            Shape: (batch_size, history_length)

        Returns
        -------
        Tensor
            A batch of negative log likelihoods.
        """

        fixed_effect, random_effect = self.compute_global_local(
            F, feat_static_cat, past_time_feat)

        loss = self.negative_normal_likelihood(F,
                                               past_target.expand_dims(axis=2),
                                               fixed_effect, random_effect)
        return loss
Example #13
0
    def get_issm_coeff(
        self,
        seasonal_indicators: Tensor  # (batch_size, time_length)
    ) -> Tuple[Tensor, Tensor, Tensor]:
        F = getF(seasonal_indicators)
        emission_coeff_ls, transition_coeff_ls, innovation_coeff_ls = zip(
            self.nonseasonal_issm.get_issm_coeff(seasonal_indicators),
            *[
                issm.get_issm_coeff(
                    seasonal_indicators.slice_axis(axis=-1,
                                                   begin=ix,
                                                   end=ix + 1))
                for ix, issm in enumerate(self.seasonal_issms)
            ],
        )

        # stack emission and innovation coefficients
        emission_coeff = F.concat(*emission_coeff_ls, dim=-1)

        innovation_coeff = F.concat(*innovation_coeff_ls, dim=-1)

        # transition coefficient is block diagonal!
        transition_coeff = _make_block_diagonal(transition_coeff_ls)

        return emission_coeff, transition_coeff, innovation_coeff
Example #14
0
def _expand_param(p: Tensor, num_samples: Optional[int] = None) -> Tensor:
    """
    Expand parameters by num_samples along the first dimension.
    """
    if num_samples is None:
        return p
    return p.expand_dims(axis=0).repeat(axis=0, repeats=num_samples)
Example #15
0
def capacitance_tril(F, rank: Tensor, W: Tensor, D: Tensor) -> Tensor:
    r"""

    Parameters
    ----------
    F
    rank
    W : (..., dim, rank)
    D : (..., dim)

    Returns
    -------
        the capacitance matrix :math:`I + W^T D^{-1} W`

    """
    # (..., dim, rank)
    Wt_D_inv_t = F.broadcast_div(W, D.expand_dims(axis=-1))

    # (..., rank, rank)
    K = F.linalg_gemm2(Wt_D_inv_t, W, transpose_a=True)

    # (..., rank, rank)
    Id = F.broadcast_mul(F.ones_like(K), F.eye(rank))

    # (..., rank, rank)
    return F.linalg.potrf(K + Id)
Example #16
0
    def log_prob(self, x: Tensor) -> Tensor:
        F = self.F
        alpha, beta = self.alpha, self.beta

        def gamma_log_prob(x, alpha, beta):
            return (
                alpha * F.log(beta)
                - F.gammaln(alpha)
                + (alpha - 1) * F.log(x)
                - beta * x
            )

        """
        The gamma_log_prob(x) above returns NaNs for x<=0. Wherever there are NaN in either of the F.where() conditional
        vectors, then F.where() returns NaN at that entry as well, due to its indicator function multiplication: 
        1*f(x) + np.nan*0 = nan, since np.nan*0 return nan. 
        Therefore replacing gamma_log_prob(x) with gamma_log_prob(abs(x) mitigates nan returns in cases of x<=0 without 
        altering the value in cases of x>0. 
        This is a known issue in pytorch as well https://github.com/pytorch/pytorch/issues/12986.
        """
        # mask zeros to prevent NaN gradients for x==0
        x_masked = F.where(x == 0, x.ones_like() * 0.5, x)

        return F.where(
            x > 0,
            gamma_log_prob(F.abs(x_masked), alpha, beta),
            -(10.0 ** 15) * F.ones_like(x),
        )
Example #17
0
def weighted_average(
    F, x: Tensor, weights: Optional[Tensor] = None, axis: Optional[int] = None
) -> Tensor:
    """
    Computes the weighted average of a given tensor across a given axis, masking values associated with weight zero,
    meaning instead of `nan * 0 = nan` you will get `0 * 0 = 0`.

    Parameters
    ----------
    F
        The function space to use.
    x
        Input tensor, of which the average must be computed.
    weights
        Weights tensor, of the same shape as `x`.
    axis
        The axis along which to average `x`

    Returns
    -------
    Tensor:
        The tensor with values averaged along the specified `axis`.
    """
    if weights is not None:
        weighted_tensor = F.where(
            condition=weights, x=x * weights, y=F.zeros_like(x)
        )
        sum_weights = F.maximum(1.0, weights.sum(axis=axis))
        return weighted_tensor.sum(axis=axis) / sum_weights
    else:
        return x.mean(axis=axis)
    def quantile_losses(self, obs: Tensor, quantiles: Tensor,
                        levels: Tensor) -> Tensor:
        """
        Computes quantile losses for all the quantiles specified.

        Parameters
        ----------
        obs
            Ground truth observation. Shape: `(batch_size, seq_len, *event_shape)`
        quantiles
            Quantile values. Shape: `(batch_size, seq_len, *event_shape, num_quantiles)`
        levels
            Quantile levels. Shape: `(batch_size, seq_len, *event_shape, num_quantiles)`
        Returns
        -------
        Tensor
            Quantile losses of shape: `(batch_size, seq_len, *event_shape, num_quantiles)`

        """
        obs = obs.expand_dims(axis=-1)
        assert obs.shape[:-1] == quantiles.shape[:-1]
        assert obs.shape[:-1] == levels.shape[:-1]
        assert obs.shape[-1] == 1

        return self.F.where(
            obs >= quantiles,
            levels * (obs - quantiles),
            (1 - levels) * (quantiles - obs),
        )
Example #19
0
        def _assemble_covariates(
            feat_dynamic_real: Tensor,
            feat_dynamic_cat: Tensor,
            feat_static_real: Tensor,
            feat_static_cat: Tensor,
            is_past: bool,
        ) -> Tensor:
            covariates = []
            if feat_dynamic_real.shape[-1] > 0:
                covariates.append(feat_dynamic_real)
            if feat_static_real.shape[-1] > 0:
                covariates.append(
                    feat_static_real.expand_dims(axis=1).repeat(
                        axis=1,
                        repeats=self.context_length
                        if is_past else self.prediction_length,
                    ))
            if len(covariates) > 0:
                covariates = F.concat(*covariates, dim=-1)
                covariates = self.covar_proj(covariates)
            else:
                covariates = None

            categories = []
            if feat_dynamic_cat.shape[-1] > 0:
                categories.append(feat_dynamic_cat)
            if feat_static_cat.shape[-1] > 0:
                categories.append(
                    feat_static_cat.expand_dims(axis=1).repeat(
                        axis=1,
                        repeats=self.context_length
                        if is_past else self.prediction_length,
                    ))
            if len(categories) > 0:
                categories = F.concat(*categories, dim=-1)
                embeddings = self.embedder(categories)
                embeddings = F.reshape(embeddings,
                                       shape=(0, 0, -4, self.d_hidden,
                                              -1)).sum(axis=-1)
                if covariates is not None:
                    covariates = covariates + embeddings
                else:
                    covariates = embeddings
            else:
                pass

            return covariates
Example #20
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F

        # self.bin_probs.shape = (batch_shape, num_bins)
        probs = self.bin_probs.transpose()  # (num_bins, batch_shape.T)

        # (batch_shape)
        zeros_batch_size = F.zeros_like(
            F.slice_axis(self.bin_probs, axis=-1, begin=0, end=1).squeeze(
                axis=-1
            )
        )

        level = level.expand_dims(axis=0)

        # cdf shape (batch_size.T, levels)
        zeros_cdf = F.broadcast_add(
            zeros_batch_size.transpose().expand_dims(axis=-1),
            level.zeros_like(),
        )
        start_state = (zeros_cdf, zeros_cdf.astype("int32"))

        def step(p, state):
            cdf, idx = state
            cdf = F.broadcast_add(cdf, p.expand_dims(axis=-1))
            idx = F.where(F.broadcast_greater(cdf, level), idx, idx + 1)
            return zeros_batch_size, (cdf, idx)

        _, states = F.contrib.foreach(step, probs, start_state)
        _, idx = states

        # idx.shape = (batch.T, levels)
        # centers.shape = (batch, num_bins)
        #
        # expand centers to shape -> (levels, batch, num_bins)
        # so we can use pick with idx.T.shape = (levels, batch)
        #
        # zeros_cdf.shape (batch.T, levels)
        centers_expanded = F.broadcast_add(
            self.bin_centers.transpose().expand_dims(axis=-1),
            zeros_cdf.expand_dims(axis=0),
        ).transpose()

        # centers_expanded.shape = (levels, batch, num_bins)
        # idx.shape (batch.T, levels)
        a = centers_expanded.pick(idx.transpose(), axis=-1)
        return a
Example #21
0
    def quantile_internal(self,
                          x: Tensor,
                          axis: Optional[int] = None) -> Tensor:
        r"""
        Evaluates the quantile function at the quantile levels contained in `x`.

        Parameters
        ----------
        x
            Tensor of shape ``*gamma.shape`` if axis=None, or containing an
            additional axis on the specified position, otherwise.
        axis
            Index of the axis containing the different quantile levels which
            are to be computed.

        Returns
        -------
        Tensor
            Quantiles tensor, of the same shape as x.
        """

        F = self.F

        # shapes of self
        # self.gamma: (*batch_shape)
        # self.knot_positions, self.b: (*batch_shape, num_pieces)

        # axis=None - passed at inference when num_samples is None
        # The shape of x is (*batch_shape).
        # The shapes of the parameters should be:
        # gamma: (*batch_shape), knot_positions, b: (*batch_shape, num_pieces)
        # They match the self. counterparts so no reshaping is needed

        # axis=0 - passed at inference when num_samples is not None
        # The shape of x is (num_samples, *batch_shape).
        # The shapes of the parameters should be:
        # gamma: (num_samples, *batch_shape), knot_positions, b: (num_samples, *batch_shape, num_pieces),
        # They do not match the self. counterparts and we need to expand the axis=0 to all of them.

        # axis=-2 - passed at training when we evaluate quantiles at knot_positions in order to compute a_tilde
        # The shape of x is shape(x) = shape(knot_positions) = (*batch_shape, num_pieces).
        # The shape of the parameters shopuld be:
        # gamma: (*batch_shape, 1), knot_positions: (*batch_shape, 1, num_pieces), b: (*batch_shape, 1, num_pieces)
        # They do not match the self. counterparts and we need to expand axis=-1 for gamma and axis=-2 for the rest.

        if axis is not None:
            gamma = self.gamma.expand_dims(axis=axis if axis == 0 else -1)
            knot_positions = self.knot_positions.expand_dims(axis=axis)
            b = self.b.expand_dims(axis=axis)
        else:
            gamma, knot_positions, b = self.gamma, self.knot_positions, self.b

        x_minus_knots = F.broadcast_minus(x.expand_dims(axis=-1),
                                          knot_positions)

        quantile = F.broadcast_add(
            gamma, F.sum(F.broadcast_mul(b, F.relu(x_minus_knots)), axis=-1))

        return quantile
Example #22
0
 def s(mu: Tensor, b: Tensor) -> Tensor:
     ones = mu.ones_like()
     x = F.random.uniform(-0.5 * ones, 0.5 * ones, dtype=dtype)
     laplace_samples = mu - b * F.sign(x) * F.log(
         (1.0 - 2.0 * F.abs(x)).clip(1.0e-30, 1.0e30)
         # 1.0 - 2.0 * F.abs(x)
     )
     return laplace_samples
def mahalanobis_distance(
    F, W: Tensor, D: Tensor, capacitance_tril: Tensor, x: Tensor
) -> Tensor:
    r"""
    Uses the Woodbury matrix identity

    .. math::
        (W W^T + D)^{-1} = D^{-1} - D^{-1} W C^{-1} W^T D^{-1},

    where :math:`C` is the capacitance matrix :math:`I + W^T D^{-1} W`, to compute the squared
    Mahalanobis distance :math:`x^T (W W^T + D)^{-1} x`.

    Parameters
    ----------
    F
    W
        (..., dim, rank)
    D
        (..., dim)
    capacitance_tril
        (..., rank, rank)
    x
        (..., dim)

    Returns
    -------

    """
    xx = x.expand_dims(axis=-1)

    # (..., rank, 1)
    Wt_Dinv_x = F.linalg_gemm2(
        F.broadcast_div(W, D.expand_dims(axis=-1)), xx, transpose_a=True
    )

    # compute x^T D^-1 x, (...,)
    maholanobis_D_inv = F.broadcast_div(x.square(), D).sum(axis=-1)

    # (..., rank)
    L_inv_Wt_Dinv_x = F.linalg_trsm(capacitance_tril, Wt_Dinv_x).squeeze(
        axis=-1
    )

    maholanobis_L = L_inv_Wt_Dinv_x.square().sum(axis=-1).squeeze()

    return F.broadcast_minus(maholanobis_D_inv, maholanobis_L)
Example #24
0
 def __init__(self, alpha: Tensor, beta: Tensor,
              one_probability: Tensor) -> None:
     super().__init__(
         alpha=alpha,
         beta=beta,
         zero_probability=alpha.zeros_like(),
         one_probability=one_probability,
     )
Example #25
0
    def get_gp_params(
        self,
        F,
        past_target: Tensor,
        past_time_feat: Tensor,
        feat_static_cat: Tensor,
    ) -> Tuple:
        """
        This function returns the GP hyper-parameters for the model.

        Parameters
        ----------
        F
            A module that can either refer to the Symbol API or the NDArray
            API in MXNet.
        past_target
            Training time series values of shape (batch_size, context_length).
        past_time_feat
            Training features of shape (batch_size, context_length,
            num_features).
        feat_static_cat
            Time series indices of shape (batch_size, 1).

        Returns
        -------
        Tuple
            Tuple of kernel hyper-parameters of length num_hyperparams.
                Each is a Tensor of shape (batch_size, 1, 1).
            Model noise sigma.
                Tensor of shape (batch_size, 1, 1).
        """
        output = self.embedding(feat_static_cat.squeeze()
                                )  # Shape (batch_size, num_hyperparams + 1)
        kernel_args = self.proj_kernel_args(output)
        sigma = softplus(
            F,
            output.slice_axis(  # sigma is the last hyper-parameter
                axis=1,
                begin=self.num_hyperparams,
                end=self.num_hyperparams + 1,
            ),
        )
        if self.params_scaling:
            scalings = self.kernel_output.gp_params_scaling(
                F, past_target, past_time_feat)
            sigma = F.broadcast_mul(sigma, scalings[self.num_hyperparams])
            kernel_args = (F.broadcast_mul(kernel_arg, scaling)
                           for kernel_arg, scaling in zip(
                               kernel_args, scalings[0:self.num_hyperparams]))
        min_value = 1e-5
        max_value = 1e8
        kernel_args = (kernel_arg.clip(min_value,
                                       max_value).expand_dims(axis=2)
                       for kernel_arg in kernel_args)
        sigma = sigma.clip(min_value, max_value).expand_dims(axis=2)
        return kernel_args, sigma
Example #26
0
    def quantile(self, level: Tensor):
        F = self.F
        # we consider level to be an independent axis and so expand it
        # to shape (num_levels, 1, 1, ...)
        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        x_shifted = F.broadcast_div(F.power(1 - level, -self.xi) - 1, self.xi)
        x = F.broadcast_mul(x_shifted, self.beta)
        return x
Example #27
0
    def compute_scale(
        self,
        F,
        data: Tensor,
        observed_indicator: Tensor,  # shapes (N, T, C) or (N, C, T)
    ) -> Tensor:
        """
        Parameters
        ----------
        F
            A module that can either refer to the Symbol API or the NDArray
            API in MXNet.

        data
            tensor containing the data to be scaled.

        observed_indicator
            observed_indicator: binary tensor with the same shape as
            ``data``, that has 1 in correspondence of observed data points,
            and 0 in correspondence of missing data points.

        Returns
        -------
        Tensor
            shape (N, C), computed according to the
            average absolute value over time of the observed values.
        """

        # these will have shape (N, C)
        num_observed = F.sum(observed_indicator, axis=self.axis)
        sum_observed = (data.abs() * observed_indicator).sum(axis=self.axis)

        # first compute a global scale per-dimension
        total_observed = num_observed.sum(axis=0)
        denominator = F.maximum(total_observed, 1.0)
        if self.default_scale is not None:
            default_scale = self.default_scale * F.ones_like(num_observed)
        else:
            # shape (C, )
            default_scale = sum_observed.sum(axis=0) / denominator

        # then compute a per-item, per-dimension scale
        denominator = F.maximum(num_observed, 1.0)
        scale = sum_observed / denominator  # shape (N, C)

        # use per-batch scale when no element is observed
        # or when the sequence contains only zeros
        cond = F.broadcast_greater(sum_observed, F.zeros_like(sum_observed))
        scale = F.where(
            cond,
            scale,
            F.broadcast_mul(default_scale, F.ones_like(num_observed)),
        )

        return F.maximum(scale, self.minimum_scale)
Example #28
0
    def log_intensity(self, x: Tensor) -> Tensor:
        r"""
        Logarithm of the intensity (a.k.a. hazard) function.

        The intensity is defined as :math:`\lambda(x) = p(x) / S(x)`.

        The intensity of the Weibull distribution is
        :math:`\lambda(x) = b * k * x^{k - 1}`.
        """
        log_x = x.clip(1e-10, np.inf).log()
        return self.rate.log() + self.shape.log() + (self.shape - 1) * log_x
Example #29
0
    def log_survival(self, x: Tensor) -> Tensor:
        r"""
        Logarithm of the survival function :math:`\log S(x) = \log(1 - CDF(x))`.

        We define :math:`z = (\log(x) - \mu) / \sigma` and obtain the survival
        function as :math:`S(x) = sigmoid(-z)`, or equivalently
        :math:`\log S(x) = -\log(1 + \exp(z))`.
        """
        log_x = x.clip(1e-20, np.inf).log()
        z = (log_x - self.mu) / self.sigma
        F = getF(x)
        return -F.Activation(z, "softrelu")
Example #30
0
    def quantile(self, level: Tensor) -> Tensor:
        F = self.F
        # we consider level to be an independent axis and so expand it
        # to shape (num_levels, 1, 1, ...)
        for _ in range(self.all_dim):
            level = level.expand_dims(axis=-1)

        return F.broadcast_add(
            self.mu,
            F.broadcast_mul(self.sigma,
                            math.sqrt(2.0) * F.erfinv(2.0 * level - 1.0)),
        )