Beispiel #1
0
    def get_issm_coeff(
        self,
        seasonal_indicators: Tensor  # (batch_size, time_length)
    ) -> Tuple[Tensor, Tensor, Tensor]:
        F = getF(seasonal_indicators)
        emission_coeff_ls, transition_coeff_ls, innovation_coeff_ls = zip(
            self.nonseasonal_issm.get_issm_coeff(seasonal_indicators),
            *[
                issm.get_issm_coeff(
                    seasonal_indicators.slice_axis(axis=-1,
                                                   begin=ix,
                                                   end=ix + 1))
                for ix, issm in enumerate(self.seasonal_issms)
            ],
        )

        # stack emission and innovation coefficients
        emission_coeff = F.concat(*emission_coeff_ls, dim=-1)

        innovation_coeff = F.concat(*innovation_coeff_ls, dim=-1)

        # transition coefficient is block diagonal!
        transition_coeff = _make_block_diagonal(transition_coeff_ls)

        return emission_coeff, transition_coeff, innovation_coeff
Beispiel #2
0
def lowrank_log_likelihood(
    rank: int, mu: Tensor, D: Tensor, W: Tensor, x: Tensor
) -> Tensor:

    F = getF(mu)

    dim = F.ones_like(mu).sum(axis=-1).max()

    dim_factor = dim * math.log(2 * math.pi)

    batch_capacitance_tril = capacitance_tril(F=F, rank=rank, W=W, D=D)

    log_det_factor = log_det(
        F=F, batch_D=D, batch_capacitance_tril=batch_capacitance_tril
    )

    mahalanobis_factor = mahalanobis_distance(
        F=F, W=W, D=D, capacitance_tril=batch_capacitance_tril, x=x - mu
    )

    ll: Tensor = -0.5 * (
        F.broadcast_add(dim_factor, log_det_factor) + mahalanobis_factor
    )

    return ll
Beispiel #3
0
 def __init__(
     self, mu: Tensor, L: Tensor, F=None, float_type: DType = np.float32
 ) -> None:
     self.mu = mu
     self.F = F if F else getF(mu)
     self.L = L
     self.float_type = float_type
Beispiel #4
0
        def s(mu: Tensor, D: Tensor, W: Tensor) -> Tensor:
            F = getF(mu)

            samples_D = F.sample_normal(
                mu=F.zeros_like(mu), sigma=F.ones_like(mu), dtype=dtype
            )
            cov_D = D.sqrt() * samples_D

            # dummy only use to get the shape (..., rank, 1)
            dummy_tensor = F.linalg_gemm2(
                W, mu.expand_dims(axis=-1), transpose_a=True
            ).squeeze(axis=-1)

            samples_W = F.sample_normal(
                mu=F.zeros_like(dummy_tensor),
                sigma=F.ones_like(dummy_tensor),
                dtype=dtype,
            )

            cov_W = F.linalg_gemm2(W, samples_W.expand_dims(axis=-1)).squeeze(
                axis=-1
            )

            samples = mu + cov_D + cov_W

            return samples
Beispiel #5
0
 def __init__(self,
              alpha: Tensor,
              F=None,
              float_type: DType = np.float32) -> None:
     self.alpha = alpha
     self.F = F if F else getF(alpha)
     self.float_type = float_type
Beispiel #6
0
 def __init__(self, dim: int, rank: int, mu: Tensor, D: Tensor,
              W: Tensor) -> None:
     self.dim = dim
     self.rank = rank
     self.mu = mu
     self.D = D
     self.W = W
     self.F = getF(mu)
     self.Cov = None
Beispiel #7
0
        def s(alpha: Tensor) -> Tensor:
            F = getF(alpha)
            samples_gamma = F.sample_gamma(alpha=alpha,
                                           beta=F.ones_like(alpha),
                                           dtype=dtype)
            sum_gamma = F.sum(samples_gamma, axis=-1, keepdims=True)
            samples_s = F.broadcast_div(samples_gamma, sum_gamma)

            return samples_s
Beispiel #8
0
        def s(alpha: Tensor) -> Tensor:
            F = getF(alpha)
            samples_gamma = F.sample_gamma(alpha=alpha,
                                           beta=F.ones_like(alpha),
                                           dtype=dtype)
            sum_gamma = F.sum(samples_gamma, axis=-1, keepdims=True)
            samples_s = F.broadcast_div(samples_gamma, sum_gamma)

            cat_samples = F.sample_multinomial(samples_s, shape=n_trials)
            return F.sum(F.one_hot(cat_samples, dim), axis=-2)
Beispiel #9
0
    def __init__(
        self, gamma: Tensor, slopes: Tensor, knot_spacings: Tensor, F=None
    ) -> None:
        self.F = F if F else getF(gamma)
        self.gamma = gamma

        # Since most of the calculations are easily expressed in the original parameters, we transform the
        # learned parameters back
        self.b, self.knot_positions = PiecewiseLinear._to_orig_params(
            self.F, slopes, knot_spacings
        )
Beispiel #10
0
 def crps(self, y: Tensor) -> Tensor:
     # TODO: use event_shape
     F = getF(y)
     x = y
     scale = 1.0
     for t in self.transforms[::-1]:
         assert isinstance(
             t, AffineTransformation), "Not an AffineTransformation"
         x = t.f_inv(x)
         scale *= t.scale
     p = self.base_distribution.crps(x)
     return F.broadcast_mul(p, scale)
Beispiel #11
0
 def __init__(
     self,
     dim: int,
     n_trials: int,
     alpha: Tensor,
     F=None,
     float_type: DType = np.float32,
 ) -> None:
     self.dim = dim
     self.n_trials = n_trials
     self.alpha = alpha
     self.F = F if F else getF(alpha)
     self.float_type = float_type
Beispiel #12
0
    def distribution(
        self,
        feat_static_cat: Tensor,
        feat_static_real: Tensor,
        past_time_feat: Tensor,
        past_target: Tensor,
        past_observed_values: Tensor,
        future_time_feat: Tensor,
        future_target: Tensor,
        future_observed_values: Tensor,
    ) -> Distribution:
        """

        Returns the distribution predicted by the model on the range of
        past_target and future_target.

        The distribution is obtained by unrolling the network with the true
        target, this is also the distribution that is being minimized during
        training. This can be used in anomaly detection, see for instance
        examples/anomaly_detection.py.

        Input arguments are the same as for the hybrid_forward method.

        Returns
        -------
        Distribution
            a distribution object whose mean has shape:
            (batch_size, context_length + prediction_length).
        """
        # unroll the decoder in "training mode"
        # i.e. by providing future data as well
        F = getF(feat_static_cat)

        rnn_outputs, _, scale, _ = self.unroll_encoder(
            F=F,
            feat_static_cat=feat_static_cat,
            feat_static_real=feat_static_real,
            past_time_feat=past_time_feat,
            past_target=past_target,
            past_observed_values=past_observed_values,
            future_time_feat=future_time_feat,
            future_target=future_target,
        )
        distr_args_m = self.proj_distr_args_m(rnn_outputs)
        distr_args_q = self.proj_distr_args_q(rnn_outputs)
        return (
            self.distr_output_m.distribution(distr_args_m, scale=scale),
            self.distr_output_q.distribution(distr_args_q, scale=scale),
        )
Beispiel #13
0
    def distribution(
        self,
        feat_static_cat: Tensor,
        past_time_feat: Tensor,
        past_target: Tensor,
        past_observed_values: Tensor,
        future_time_feat: Tensor,
        future_target: Tensor,
        future_observed_values: Tensor,
    ) -> Distribution:
        """

        Returns the distribution predicted by the model on the range of past_target and future_target.

        The distribution is obtained by unrolling the network with the true target, this is also the distribution
        that is being minimized during training.
        This can be used in anomaly detection, see for instance examples/anomaly_detection.py.

        Parameters
        ----------
        feat_static_cat
        past_time_feat
        past_target
        past_observed_values
        future_time_feat
        future_target
        future_observed_values

        Returns
        -------
        sample_paths : Distribution
            a distribution object whose mean has shape: (batch_size, context_length + prediction_length).
        """
        # unroll the decoder in "training mode", i.e. by providing future data as well
        F = getF(feat_static_cat)

        rnn_outputs, _, scale, _ = self.unroll_encoder(
            F=F,
            feat_static_cat=feat_static_cat,
            past_time_feat=past_time_feat,
            past_target=past_target,
            past_observed_values=past_observed_values,
            future_time_feat=future_time_feat,
            future_target=future_target,
        )

        distr_args = self.proj_distr_args(rnn_outputs)

        return self.distr_output.distribution(distr_args, scale=scale)
Beispiel #14
0
    def __init__(
        self,
        emission_coeff: Tensor,
        transition_coeff: Tensor,
        innovation_coeff: Tensor,
        noise_std: Tensor,
        residuals: Tensor,
        prior_mean: Tensor,
        prior_cov: Tensor,
        latent_dim: int,
        output_dim: int,
        seq_length: int,
        F=None,
    ) -> None:
        self.latent_dim = latent_dim
        self.output_dim = output_dim
        self.seq_length = seq_length

        # Split coefficients along time axis for easy access
        # emission_coef[t]: (batch_size, obs_dim, latent_dim)
        self.emission_coeff = emission_coeff.split(
            axis=1, num_outputs=self.seq_length, squeeze_axis=True
        )

        # innovation_coef[t]: (batch_size, latent_dim)
        self.innovation_coeff = innovation_coeff.split(
            axis=1, num_outputs=self.seq_length, squeeze_axis=False
        )

        # transition_coeff: (batch_size, latent_dim, latent_dim)
        self.transition_coeff = transition_coeff.split(
            axis=1, num_outputs=self.seq_length, squeeze_axis=True
        )

        # noise_std[t]: (batch_size, obs_dim)
        self.noise_std = noise_std.split(
            axis=1, num_outputs=self.seq_length, squeeze_axis=True
        )

        # residuals[t]: (batch_size, obs_dim)
        self.residuals = residuals.split(
            axis=1, num_outputs=self.seq_length, squeeze_axis=True
        )

        self.prior_mean = prior_mean
        self.prior_cov = prior_cov
        self.F = F if F else getF(noise_std)
Beispiel #15
0
def _make_block_diagonal(blocks: List[Tensor]) -> Tensor:
    assert (len(blocks) >
            0), "You need at least one tensor to make a block-diagonal tensor"

    if len(blocks) == 1:
        return blocks[0]

    F = getF(blocks[0])

    # transition coefficient is block diagonal!
    block_diagonal = _make_2_block_diagonal(F, blocks[0], blocks[1])
    for i in range(2, len(blocks)):
        block_diagonal = _make_2_block_diagonal(F=F,
                                                left=block_diagonal,
                                                right=blocks[i])

    return block_diagonal
Beispiel #16
0
    def crps(self, y: Tensor) -> Tensor:
        # TODO: use event_shape
        F = getF(y)

        for t in self.transforms[::-1]:
            assert isinstance(
                t, AffineTransformation), "Not an AffineTransformation"
            assert (t.scale is not None
                    and t.loc is None), "Not a scaling transformation"

            scale = t.scale
            x = t.f_inv(y)

        # (..., 1)
        p = self.base_distribution.crps(x)

        return F.broadcast_mul(p, scale)
Beispiel #17
0
    def emission_coeff(
            self,
            seasonal_indicators: Tensor  # (batch_size, time_length)
    ) -> Tensor:
        F = getF(seasonal_indicators)

        _emission_coeff = F.ones(shape=(1, 1, 1, self.latent_dim()))

        # get the right shape: (batch_size, seq_length, obs_dim, latent_dim)
        zeros = _broadcast_param(
            F.zeros_like(
                seasonal_indicators.slice_axis(axis=-1, begin=0,
                                               end=1).squeeze(axis=-1)),
            axes=[2, 3],
            sizes=[1, self.latent_dim()],
        )

        return _emission_coeff.broadcast_like(zeros)
Beispiel #18
0
 def __init__(self,
              amplitude: Tensor,
              length_scale: Tensor,
              F=None) -> None:
     """
     Parameters
     ----------
     amplitude : Tensor
         RBF kernel amplitude hyper-parameter of shape (batch_size, 1, 1).
     length_scale : Tensor
         RBF kernel length scale hyper-parameter of of shape (batch_size, 1, 1).
     F : ModuleType
         A module that can either refer to the Symbol API or the NDArray
         API in MXNet.
     """
     self.F = F if F else getF(amplitude)
     self.amplitude = amplitude
     self.length_scale = length_scale
Beispiel #19
0
    def transition_coeff(
            self,
            seasonal_indicators: Tensor  # (batch_size, time_length)
    ) -> Tensor:
        F = getF(seasonal_indicators)

        _transition_coeff = (F.eye(
            self.latent_dim()).expand_dims(axis=0).expand_dims(axis=0))

        # get the right shape: (batch_size, seq_length, latent_dim, latent_dim)
        zeros = _broadcast_param(
            F.zeros_like(
                seasonal_indicators.slice_axis(axis=-1, begin=0,
                                               end=1).squeeze(axis=-1)),
            axes=[2, 3],
            sizes=[self.latent_dim(), self.latent_dim()],
        )

        return _transition_coeff.broadcast_like(zeros)
Beispiel #20
0
    def distr(
        self,
        rnn_outputs: Tensor,
        time_features: Tensor,
        scale: Tensor,
        lags_scaled: Tensor,
        target_dimension_indicator: Tensor,
        seq_len: int,
    ):
        """
        Returns the distribution of GPVAR with respect to the RNN outputs.

        Parameters
        ----------
        rnn_outputs
            Outputs of the unrolled RNN (batch_size, seq_len, num_cells)
        time_features
            Dynamic time features (batch_size, seq_len, num_features)
        scale
            Mean scale for each time series (batch_size, 1, target_dim)
        lags_scaled
            Scaled lags used for RNN input
            (batch_size, seq_len, target_dim, num_lags)
        target_dimension_indicator
            Indices of the target dimension (batch_size, target_dim)
        seq_len
            Length of the sequences

        Returns
        -------
        distr
            Distribution instance
        distr_args
            Distribution arguments
        """
        F = getF(rnn_outputs)

        # (batch_size, target_dim, embed_dim)
        index_embeddings = self.embed(target_dimension_indicator)

        # broadcast to (batch_size, seq_len, target_dim, embed_dim)
        repeated_index_embeddings = index_embeddings.expand_dims(
            axis=1
        ).repeat(axis=1, repeats=seq_len)

        # broadcast to (batch_size, seq_len, target_dim, num_features)
        time_features = time_features.expand_dims(axis=2).repeat(
            axis=2, repeats=self.target_dim_sample
        )

        # (batch_size, seq_len, target_dim, embed_dim + num_cells + num_inputs)
        distr_input = F.concat(
            rnn_outputs, repeated_index_embeddings, time_features, dim=-1
        )

        # TODO 1 pass inputs in proj args
        distr_args = self.proj_dist_args(distr_input)

        # compute likelihood of target given the predicted parameters
        distr = self.distr_output.distribution(
            distr_args, scale=scale, dim=self.target_dim_sample
        )

        return distr, distr_args
Beispiel #21
0
 def __init__(self, mu: Tensor, L: Tensor, F=None) -> None:
     self.mu = mu
     self.F = F if F else getF(mu)
     self.L = L
Beispiel #22
0
 def innovation_coeff(self, seasonal_indicators: Tensor) -> Tensor:
     F = getF(seasonal_indicators)
     # seasonal_indicators = F.modulo(seasonal_indicators - 1, self.latent_dim)
     return F.one_hot(seasonal_indicators,
                      depth=self.latent_dim()).squeeze(axis=2)
Beispiel #23
0
 def emission_coeff(self, seasonal_indicators: Tensor) -> Tensor:
     F = getF(seasonal_indicators)
     return F.one_hot(seasonal_indicators, depth=self.latent_dim())
 def __init__(
     self,
     sigma: Tensor,
     kernel: Kernel,
     prediction_length: Optional[int] = None,
     context_length: Optional[int] = None,
     num_samples: Optional[int] = None,
     ctx: mx.Context = mx.Context("cpu"),
     float_type: DType = np.float64,
     jitter_method: str = "iter",
     max_iter_jitter: int = 10,
     neg_tol: float = -1e-8,
     diag_weight: float = 1e-6,
     increase_jitter: int = 10,
     sample_noise: bool = True,
     F=None,
 ) -> None:
     r"""
     Parameters
     ----------
     sigma
         Noise parameter of shape (batch_size, num_data_points, 1),
         where num_data_points is the number of rows in the Cholesky matrix.
     kernel
         Kernel object.
     prediction_length
         Prediction length.
     context_length
         Training length.
     num_samples
         The number of samples to be drawn.
     ctx
         Determines whether to compute on the cpu or gpu.
     float_type
         Determines whether to use single or double precision.
     jitter_method
         Iteratively jitter method or use eigenvalue decomposition depending on problem size.
     max_iter_jitter
         Maximum number of iterations for jitter to iteratively make the matrix positive definite.
     neg_tol
         Parameter in the jitter methods to eliminate eliminate matrices with diagonal elements smaller than this
         when checking if a matrix is positive definite.
     diag_weight
         Multiple of mean of diagonal entries to initialize the jitter.
     increase_jitter
         Each iteration multiply by jitter by this amount
     sample_noise
         Boolean to determine whether to add :math:`\sigma^2I` to the predictive covariance matrix.
     F
         A module that can either refer to the Symbol API or the NDArray
         API in MXNet.
     """
     assert (prediction_length is None or prediction_length > 0
             ), "The value of `prediction_length` should be > 0"
     assert (context_length is None or context_length > 0
             ), "The value of `context_length` should be > 0"
     assert (num_samples is None
             or num_samples > 0), "The value of `num_samples` should be > 0"
     self.sigma = sigma
     self.kernel = kernel
     self.prediction_length = prediction_length
     self.context_length = (context_length if context_length is not None
                            else prediction_length)
     self.num_samples = num_samples
     self.F = F if F else getF(sigma)
     self.ctx = ctx
     self.float_type = float_type
     self.jitter_method = jitter_method
     self.max_iter_jitter = max_iter_jitter
     self.neg_tol = neg_tol
     self.diag_weight = diag_weight
     self.increase_jitter = increase_jitter
     self.sample_noise = sample_noise