예제 #1
0
    def distribution(
        self,
        feat_static_cat: Tensor,
        feat_static_real: Tensor,
        past_time_feat: Tensor,
        past_target: Tensor,
        past_observed_values: Tensor,
        future_time_feat: Tensor,
        future_target: Tensor,
        future_observed_values: Tensor,
        return_rnn_outputs: bool = False,
    ) -> Union[Distribution, Tuple[Distribution, Tensor]]:
        """

        Returns the distribution predicted by the model on the range of
        past_target and future_target.

        The distribution is obtained by unrolling the network with the true
        target, this is also the distribution that is being minimized during
        training. This can be used in anomaly detection, see for instance
        examples/anomaly_detection.py.

        Input arguments are the same as for the hybrid_forward method.

        Returns
        -------
        Distribution
            a distribution object whose mean has shape:
            (batch_size, context_length + prediction_length).
        Tensor
            (optional) when return_rnn_outputs=True, rnn_outputs will be returned
            so that it could be used for regularization
        """
        # unroll the decoder in "training mode"
        # i.e. by providing future data as well
        F = getF(feat_static_cat)

        rnn_outputs, _, scale, _ = self.unroll_encoder(
            F=F,
            feat_static_cat=feat_static_cat,
            feat_static_real=feat_static_real,
            past_time_feat=past_time_feat,
            past_target=past_target,
            past_observed_values=past_observed_values,
            future_time_feat=future_time_feat,
            future_target=future_target,
        )

        distr_args = self.proj_distr_args(rnn_outputs)

        # return the output of rnn layers if return_rnn_outputs=True, so that it can be used for regularization later
        # assume no dropout for outputs, so can be directly used for activation regularization
        return (
            (
                self.distr_output.distribution(distr_args, scale=scale),
                rnn_outputs,
            )
            if return_rnn_outputs
            else self.distr_output.distribution(distr_args, scale=scale)
        )
예제 #2
0
    def distribution(
        self,
        cond_mean: Tensor,
        interval_alpha_bias: Optional[Tensor] = None,
        size_alpha_bias: Optional[Tensor] = None,
    ) -> Tuple[Distribution, ...]:
        F = getF(cond_mean)

        cond_interval, cond_size = F.split(cond_mean, num_outputs=2, axis=-1)

        alpha_biases = [
            F.broadcast_mul(F.ones_like(cond_interval), bias)
            if bias is not None
            else None
            for bias in [interval_alpha_bias, size_alpha_bias]
        ]

        distr_params = zip(
            [self.interval_distr_output, self.size_distr_output],
            [cond_interval, cond_size],
            alpha_biases,
        )

        return tuple(
            (
                do.distribution(mean)
                if len(do.args_dim) == 1
                else do.distribution(
                    [mean, F.Activation(alpha_bias, "softrelu") + 1e-5]
                )
            )
            for ix, (do, mean, alpha_bias) in enumerate(distr_params)
        )
예제 #3
0
    def get_issm_coeff(
        self,
        seasonal_indicators: Tensor  # (batch_size, time_length)
    ) -> Tuple[Tensor, Tensor, Tensor]:
        F = getF(seasonal_indicators)
        emission_coeff_ls, transition_coeff_ls, innovation_coeff_ls = zip(
            self.nonseasonal_issm.get_issm_coeff(seasonal_indicators),
            *[
                issm.get_issm_coeff(
                    seasonal_indicators.slice_axis(axis=-1,
                                                   begin=ix,
                                                   end=ix + 1))
                for ix, issm in enumerate(self.seasonal_issms)
            ],
        )

        # stack emission and innovation coefficients
        emission_coeff = F.concat(*emission_coeff_ls, dim=-1)

        innovation_coeff = F.concat(*innovation_coeff_ls, dim=-1)

        # transition coefficient is block diagonal!
        transition_coeff = _make_block_diagonal(transition_coeff_ls)

        return emission_coeff, transition_coeff, innovation_coeff
예제 #4
0
 def __init__(
     self,
     amplitude: Tensor,
     length_scale: Tensor,
     frequency: Tensor,
     F=None,
 ) -> None:
     """
     Parameters
     ----------
     amplitude : Tensor
         Periodic kernel amplitude hyper-parameter of shape
         (batch_size, 1, 1).
     length_scale : Tensor
         Periodic kernel length scale hyper-parameter of of shape
         (batch_size, 1, 1).
     frequency : Tensor
         Periodic kernel hyper-parameter of shape (batch_size, 1, 1).
     F : ModuleType
         A module that can either refer to the Symbol API or the NDArray
         API in MXNet.
     """
     self.F = F if F else getF(amplitude)
     self.amplitude = amplitude
     self.length_scale = length_scale
     self.frequency = frequency
예제 #5
0
 def s(xi: Tensor, beta: Tensor) -> Tensor:
     F = getF(xi)
     sample_U = uniform.Uniform(F.zeros_like(xi),
                                F.ones_like(xi)).sample()
     boxcox = box_cox_transform.BoxCoxTransform(-xi, F.array([0]))
     sample_X = -1 * boxcox.f(1 - sample_U) * beta
     return sample_X
예제 #6
0
    def log_survival(self, x: Tensor) -> Tensor:
        r"""
        Logarithm of the survival function :math:`\log S(x) = \log(1 - CDF(x))`.

        We define :math:`z = (\log(x) - \mu) / \sigma` and obtain the survival
        function as :math:`S(x) = sigmoid(-z)`, or equivalently
        :math:`\log S(x) = -\log(1 + \exp(z))`.
        """
        log_x = x.clip(1e-20, np.inf).log()
        z = (log_x - self.mu) / self.sigma
        F = getF(x)
        return -F.Activation(z, "softrelu")
예제 #7
0
    def log_intensity(self, x: Tensor) -> Tensor:
        r"""
        Logarithm of the intensity (a.k.a. hazard) function.

        The intensity is defined as :math:`\lambda(x) = p(x) / S(x)`.

        We define :math:`z = (\log(x) - \mu) / \sigma` and obtain the intensity
        as :math:`\lambda(x) = sigmoid(z) / (\sigma * \log(x))`, or equivalently
        :math:`\log \lambda(x) = z - \log(1 + \exp(z)) - \log(\sigma) - \log(x)`.
        """
        log_x = x.clip(1e-20, np.inf).log()
        z = (log_x - self.mu) / self.sigma
        F = getF(x)
        return z - self.sigma.log() - F.Activation(z, "softrelu") - log_x
예제 #8
0
    def emission_coeff(
        self, feature: Tensor  # (batch_size, time_length, 1)
    ) -> Tensor:
        F = getF(feature)

        _emission_coeff = F.ones(shape=(1, 1, 1, self.latent_dim()))

        # get the right shape: (batch_size, time_length, obs_dim, latent_dim)
        zeros = _broadcast_param(
            feature.squeeze(axis=2),
            axes=[2, 3],
            sizes=[1, self.latent_dim()],
        )

        return _emission_coeff.broadcast_like(zeros)
예제 #9
0
    def log_intensity(self, y: Tensor) -> Tensor:
        r"""
        Logarithm of the intensity (a.k.a. hazard) function.

        The intensity is defined as :math:`\lambda(y) = p(y) / S(y)`.
        """
        F = getF(y)
        lp = 0.0
        x = y
        for t in self.transforms[::-1]:
            x = t.f_inv(y)
            ladj = t.log_abs_det_jac(x, y)
            lp -= sum_trailing_axes(F, ladj, self.event_dim - t.event_dim)
            y = x
        return self.base_distribution.log_intensity(x) + lp
예제 #10
0
    def forwardshift(A):
        """
        Shift an array's content forward by 1 time step along the first axis,
        keeping the shape identical by padding on the left with zeros.

        Parameters
        ----------
        A : nd.NDArray
            Shape (N, T, ...), the tensor in which the entries will be shifted
            forward by one
        """
        F = getF(A)
        A = F.Concat(F.zeros_like(F.slice_axis(A, axis=1, begin=0, end=1)),
                     A,
                     dim=1)
        return F.slice_axis(A, axis=1, begin=0, end=-1)
예제 #11
0
    def sample(
        self,
        num_samples=None,
        dtype=np.float32,
        lower_bound: Optional[Tensor] = None,
    ) -> Tensor:
        r"""
        Draw samples from the distribution.

        We generate samples as :math:`u \sim Uniform(0, 1), x = S^{-1}(u)`,
        where :math:`S^{-1}` is the inverse of the survival function
        :math:`S(x) = 1 - CDF(x)`.

        Parameters
        ----------
        num_samples
            Number of samples to generate.
        dtype
            Data type of the generated samples.
        lower_bound
            If None, generate samples as usual. If lower_bound is provided,
            all generated samples will be larger than the specified values.
            That is, we sample from `p(x | x > lower_bound)`.
            Shape: `(*batch_size)`

        Returns
        -------
        x
            Sampled inter-event times.
            Shape: `(num_samples, *batch_size)`
        """
        F = getF(self.mu)
        if num_samples is not None:
            sample_shape = (num_samples,) + self.batch_shape
        else:
            sample_shape = self.batch_shape
        u = F.uniform(0, 1, shape=sample_shape)
        # Make sure that the generated samples are larger than condition_above.
        # This is easy to ensure when using inverse-survival sampling: we
        # simply multiply `u ~ Uniform(0, 1)` by `S(y)` to ensure that
        # `x > y`.
        with autograd.pause():
            if lower_bound is not None:
                survival = self.log_survival(lower_bound).exp()
                u = u * survival
            x = (self.mu + self.sigma * (F.log1p(-u) - F.log(u))).exp()
        return x
예제 #12
0
    def distribution(
        self,
        feat_static_cat: Tensor,
        feat_static_real: Tensor,
        past_time_feat: Tensor,
        past_target: Tensor,
        past_observed_values: Tensor,
        future_time_feat: Tensor,
        future_target: Tensor,
        future_observed_values: Tensor,
    ) -> Distribution:
        """

        Returns the distribution predicted by the model on the range of
        past_target and future_target.

        The distribution is obtained by unrolling the network with the true
        target, this is also the distribution that is being minimized during
        training. This can be used in anomaly detection, see for instance
        examples/anomaly_detection.py.

        Input arguments are the same as for the hybrid_forward method.

        Returns
        -------
        Distribution
            a distribution object whose mean has shape:
            (batch_size, context_length + prediction_length).
        """
        # unroll the decoder in "training mode"
        # i.e. by providing future data as well
        F = getF(feat_static_cat)

        rnn_outputs, _, scale, _ = self.unroll_encoder(
            F=F,
            feat_static_cat=feat_static_cat,
            feat_static_real=feat_static_real,
            past_time_feat=past_time_feat,
            past_target=past_target,
            past_observed_values=past_observed_values,
            future_time_feat=future_time_feat,
            future_target=future_target,
        )

        distr_args = self.proj_distr_args(rnn_outputs)

        return self.distr_output.distribution(distr_args, scale=scale)
예제 #13
0
def log_abs_det(A: Tensor) -> Tensor:
    """
    Logarithm of the absolute value of matrix `A`
    Parameters
    ----------
    A
        Tensor matrix from which to compute the log absolute value of its determinant

    Returns
    -------
        Tensor

    """
    F = getF(A)
    A_squared = F.linalg.gemm2(A, A, transpose_a=True)
    L = F.linalg.potrf(A_squared)
    return F.diag(L, axis1=-2, axis2=-1).abs().log().sum(-1)
예제 #14
0
    def transition_coeff(
            self,
            feature: Tensor  # (batch_size, time_length, 1)
    ) -> Tensor:
        F = getF(feature)

        _transition_coeff = (F.eye(
            self.latent_dim()).expand_dims(axis=0).expand_dims(axis=0))

        # get the right shape: (batch_size, time_length, latent_dim, latent_dim)
        zeros = _broadcast_param(
            feature.squeeze(axis=2),
            axes=[2, 3],
            sizes=[self.latent_dim(), self.latent_dim()],
        )

        return _transition_coeff.broadcast_like(zeros)
예제 #15
0
    def forwardshift(A):
        """
        Shift an array's content forward by 1 time step along the first axis,
        keeping the shape identical by repeating the first element.

        Parameters
        ----------
        A : nd.NDArray
            Shape (N, T, ...), the tensor in which the entries will be shifted
            forward by one
        """
        F = getF(A)
        return F.Concat(
            F.slice_axis(A, axis=1, begin=0, end=1),
            F.slice_axis(A, axis=1, begin=0, end=-1),
            dim=1,
        )
예제 #16
0
def _make_block_diagonal(blocks: List[Tensor]) -> Tensor:
    assert (len(blocks) >
            0), "You need at least one tensor to make a block-diagonal tensor"

    if len(blocks) == 1:
        return blocks[0]

    F = getF(blocks[0])

    # transition coefficient is block diagonal!
    block_diagonal = _make_2_block_diagonal(F, blocks[0], blocks[1])
    for i in range(2, len(blocks)):
        block_diagonal = _make_2_block_diagonal(F=F,
                                                left=block_diagonal,
                                                right=blocks[i])

    return block_diagonal
예제 #17
0
    def emission_coeff(
            self,
            seasonal_indicators: Tensor  # (batch_size, time_length)
    ) -> Tensor:
        F = getF(seasonal_indicators)

        _emission_coeff = F.ones(shape=(1, 1, 1, self.latent_dim()))

        # get the right shape: (batch_size, seq_length, obs_dim, latent_dim)
        zeros = _broadcast_param(
            F.zeros_like(
                seasonal_indicators.slice_axis(axis=-1, begin=0,
                                               end=1).squeeze(axis=-1)),
            axes=[2, 3],
            sizes=[1, self.latent_dim()],
        )

        return _emission_coeff.broadcast_like(zeros)
예제 #18
0
    def transition_coeff(
            self,
            seasonal_indicators: Tensor  # (batch_size, time_length)
    ) -> Tensor:
        F = getF(seasonal_indicators)

        _transition_coeff = (F.eye(
            self.latent_dim()).expand_dims(axis=0).expand_dims(axis=0))

        # get the right shape: (batch_size, seq_length, latent_dim, latent_dim)
        zeros = _broadcast_param(
            F.zeros_like(
                seasonal_indicators.slice_axis(axis=-1, begin=0,
                                               end=1).squeeze(axis=-1)),
            axes=[2, 3],
            sizes=[self.latent_dim(), self.latent_dim()],
        )

        return _transition_coeff.broadcast_like(zeros)
예제 #19
0
    def distr(
        self,
        rnn_outputs: Tensor,
        time_features: Tensor,
        scale: Tensor,
        lags_scaled: Tensor,
        target_dimension_indicator: Tensor,
        seq_len: int,
    ):
        """
        Returns the distribution of GPVAR with respect to the RNN outputs.

        Parameters
        ----------
        rnn_outputs
            Outputs of the unrolled RNN (batch_size, seq_len, num_cells)
        time_features
            Dynamic time features (batch_size, seq_len, num_features)
        scale
            Mean scale for each time series (batch_size, 1, target_dim)
        lags_scaled
            Scaled lags used for RNN input
            (batch_size, seq_len, target_dim, num_lags)
        target_dimension_indicator
            Indices of the target dimension (batch_size, target_dim)
        seq_len
            Length of the sequences

        Returns
        -------
        distr
            Distribution instance
        distr_args
            Distribution arguments
        """
        F = getF(rnn_outputs)

        # (batch_size, target_dim, embed_dim)
        index_embeddings = self.embed(target_dimension_indicator)

        # broadcast to (batch_size, seq_len, target_dim, embed_dim)
        repeated_index_embeddings = index_embeddings.expand_dims(
            axis=1).repeat(axis=1, repeats=seq_len)

        # broadcast to (batch_size, seq_len, target_dim, num_features)
        time_features = time_features.expand_dims(axis=2).repeat(
            axis=2, repeats=self.target_dim_sample)

        # (batch_size, seq_len, target_dim, embed_dim + num_cells + num_inputs)
        distr_input = F.concat(rnn_outputs,
                               repeated_index_embeddings,
                               time_features,
                               dim=-1)

        # TODO 1 pass inputs in proj args
        distr_args = self.proj_dist_args(distr_input)

        # compute likelihood of target given the predicted parameters
        distr = self.distr_output.distribution(distr_args,
                                               scale=scale,
                                               dim=self.target_dim_sample)

        return distr, distr_args
예제 #20
0
 def innovation_coeff(self, seasonal_indicators: Tensor) -> Tensor:
     F = getF(seasonal_indicators)
     # seasonal_indicators = F.modulo(seasonal_indicators - 1, self.latent_dim)
     return F.one_hot(seasonal_indicators,
                      depth=self.latent_dim()).squeeze(axis=2)
예제 #21
0
 def emission_coeff(self, seasonal_indicators: Tensor) -> Tensor:
     F = getF(seasonal_indicators)
     return F.one_hot(seasonal_indicators, depth=self.latent_dim())
예제 #22
0
 def __init__(self, xi: Tensor, beta: Tensor, F=None) -> None:
     self.xi = xi
     self.beta = beta
     self.F = F if F else getF(xi)  # assuming xi and beta of same type
예제 #23
0
 def F(self):
     return getF(self.xi)
예제 #24
0
 def innovation_coeff(self, feature: Tensor) -> Tensor:
     F = getF(feature)
     return F.one_hot(feature, depth=self.latent_dim()).squeeze(axis=2)
예제 #25
0
 def __init__(
     self,
     sigma: Tensor,
     kernel: Kernel,
     prediction_length: Optional[int] = None,
     context_length: Optional[int] = None,
     num_samples: Optional[int] = None,
     float_type: DType = np.float64,
     jitter_method: str = "iter",
     max_iter_jitter: int = 10,
     neg_tol: float = -1e-8,
     diag_weight: float = 1e-6,
     increase_jitter: int = 10,
     sample_noise: bool = True,
     F=None,
 ) -> None:
     r"""
     Parameters
     ----------
     sigma
         Noise parameter of shape (batch_size, num_data_points, 1),
         where num_data_points is the number of rows in the Cholesky matrix.
     kernel
         Kernel object.
     prediction_length
         Prediction length.
     context_length
         Training length.
     num_samples
         The number of samples to be drawn.
     float_type
         Determines whether to use single or double precision.
     jitter_method
         Iteratively jitter method or use eigenvalue decomposition depending on problem size.
     max_iter_jitter
         Maximum number of iterations for jitter to iteratively make the matrix positive definite.
     neg_tol
         Parameter in the jitter methods to eliminate eliminate matrices with diagonal elements smaller than this
         when checking if a matrix is positive definite.
     diag_weight
         Multiple of mean of diagonal entries to initialize the jitter.
     increase_jitter
         Each iteration multiply by jitter by this amount
     sample_noise
         Boolean to determine whether to add :math:`\sigma^2I` to the predictive covariance matrix.
     F
         A module that can either refer to the Symbol API or the NDArray
         API in MXNet.
     """
     assert (prediction_length is None or prediction_length > 0
             ), "The value of `prediction_length` should be > 0"
     assert (context_length is None or context_length > 0
             ), "The value of `context_length` should be > 0"
     assert (num_samples is None
             or num_samples > 0), "The value of `num_samples` should be > 0"
     self.sigma = sigma
     self.kernel = kernel
     self.prediction_length = prediction_length
     self.context_length = (context_length if context_length is not None
                            else prediction_length)
     self.num_samples = num_samples
     self.F = F if F else getF(sigma)
     self.float_type = float_type
     self.jitter_method = jitter_method
     self.max_iter_jitter = max_iter_jitter
     self.neg_tol = neg_tol
     self.diag_weight = diag_weight
     self.increase_jitter = increase_jitter
     self.sample_noise = sample_noise
예제 #26
0
 def emission_coeff(self, feature: Tensor) -> Tensor:
     F = getF(feature)
     return F.one_hot(feature, depth=self.latent_dim())