コード例 #1
0
def test_deterministic_l2(mu: float, hybridize: bool) -> None:
    """
    Test to check that maximizing the likelihood recovers the parameters.
    This tests uses the Gaussian distribution with fixed variance and sample mean.
    This essentially reduces to determistic L2.
    """
    # generate samples
    mu = mu
    mus = mx.nd.zeros(NUM_SAMPLES) + mu

    deterministic_distr = Gaussian(mu=mus, sigma=0.1 * mx.nd.ones_like(mus))
    samples = deterministic_distr.sample()

    class GaussianFixedVarianceOutput(GaussianOutput):
        @classmethod
        def domain_map(cls, F, mu, sigma):
            sigma = 0.1 * F.ones_like(sigma)
            return mu.squeeze(axis=-1), sigma.squeeze(axis=-1)

    mu_hat, _ = maximum_likelihood_estimate_sgd(
        GaussianFixedVarianceOutput(),
        samples,
        init_biases=[3 * mu, 0.1],
        hybridize=hybridize,
        num_epochs=PositiveInt(1),
    )

    assert (np.abs(mu_hat - mu) <
            TOL * mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}"
コード例 #2
0
def test_gaussian_likelihood(mu: float, sigma: float, hybridize: bool):
    """
    Test to check that maximizing the likelihood recovers the parameters
    """

    # generate samples
    mus = mx.nd.zeros((NUM_SAMPLES, )) + mu
    sigmas = mx.nd.zeros((NUM_SAMPLES, )) + sigma

    distr = Gaussian(mus, sigmas)
    samples = distr.sample()

    init_biases = [
        mu - START_TOL_MULTIPLE * TOL * mu,
        inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma),
    ]

    mu_hat, sigma_hat = maximum_likelihood_estimate_sgd(
        GaussianOutput(),
        samples,
        init_biases=init_biases,
        hybridize=hybridize,
        learning_rate=PositiveFloat(0.001),
        num_epochs=PositiveInt(5),
    )

    assert (np.abs(mu_hat - mu) <
            TOL * mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}"
    assert (np.abs(sigma_hat - sigma) < TOL * sigma
            ), f"alpha did not match: sigma = {sigma}, sigma_hat = {sigma_hat}"
コード例 #3
0
ファイル: model.py プロジェクト: artur-deluca/sac-mxnet
    def sample(self, mean, log_std):
        std = log_std.exp()

        distribution = Gaussian(mu=mean, sigma=std)
        sample = distribution.sample(
            dtype="float64"
        )  # for reparameterization trick (mu + std * N(0,1))
        sample_log_prob = distribution.log_prob(sample)

        return self.scale_and_bound(sample, sample_log_prob, mean)
コード例 #4
0
def test_box_cox_tranform(
    lambdas: Tuple[float, float],
    mu_sigma: Tuple[float, float],
    hybridize: bool,
):
    '''
    Test to check that maximizing the likelihood recovers the parameters
    '''
    # test instance
    lam_1, lam_2 = lambdas
    mu, sigma = mu_sigma

    # generate samples
    lamdas_1 = mx.nd.zeros((NUM_SAMPLES, )) + lam_1
    lamdas_2 = mx.nd.zeros((NUM_SAMPLES, )) + lam_2
    transform = InverseBoxCoxTransform(lamdas_1, lamdas_2)

    mus = mx.nd.zeros((NUM_SAMPLES, )) + mu
    sigmas = mx.nd.zeros((NUM_SAMPLES, )) + sigma
    gausian_distr = Gaussian(mus, sigmas)

    # Here the base distribution is Guassian which is transformed to
    # non-Gaussian via the inverse Box-Cox transform.
    # Sampling from `trans_distr` gives non-Gaussian samples
    trans_distr = TransformedDistribution(gausian_distr, transform)

    # Given the non-Gaussian samples find the true parameters
    # of the Box-Cox transformation as well as the underlying Gaussian distribution.
    samples = trans_distr.sample()

    init_biases = [
        mu - START_TOL_MULTIPLE * TOL * mu,
        inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma),
        lam_1 - START_TOL_MULTIPLE * TOL * lam_1,
        inv_softplus(lam_2 - START_TOL_MULTIPLE * TOL * lam_2),
    ]

    mu_hat, sigma_hat, lam_1_hat, lam_2_hat = maximum_likelihood_estimate_sgd(
        TransformedDistributionOutput(
            GaussianOutput(),
            InverseBoxCoxTransformOutput(lb_obs=lam_2, fix_lambda_2=True),
        ),
        samples,
        init_biases=init_biases,
        hybridize=hybridize,
        learning_rate=PositiveFloat(0.01),
        num_epochs=PositiveInt(18),
    )

    assert (np.abs(lam_1_hat - lam_1) < TOL * lam_1
            ), f"lam_1 did not match: lam_1 = {lam_1}, lam_1_hat = {lam_1_hat}"
    # assert (
    #     np.abs(lam_2_hat - lam_2) < TOL * lam_2
    # ), f"lam_2 did not match: lam_2 = {lam_2}, lam_2_hat = {lam_2_hat}"

    assert np.abs(mu_hat - mu) < TOL * np.abs(
        mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}"
    assert (np.abs(sigma_hat - sigma) < TOL * sigma
            ), f"sigma did not match: sigma = {sigma}, sigma_hat = {sigma_hat}"
コード例 #5
0
    return np.mean(np.abs(x - y))


NUM_SAMPLES = 1_000
NUM_SAMPLES_LARGE = 100_000


SHAPE = (2, 1, 3)


@pytest.mark.parametrize(
    "distr1, distr2, p",
    [
        (
            Gaussian(
                mu=mx.nd.zeros(shape=SHAPE),
                sigma=1e-3 + 0.2 * mx.nd.ones(shape=SHAPE),
            ),
            Gaussian(
                mu=mx.nd.ones(shape=SHAPE),
                sigma=1e-3 + 0.1 * mx.nd.ones(shape=SHAPE),
            ),
            0.2 * mx.nd.ones(shape=SHAPE),
        ),
        (
            StudentT(
                mu=mx.nd.ones(shape=SHAPE),
                sigma=1e-1 + mx.nd.zeros(shape=SHAPE),
                nu=mx.nd.zeros(shape=SHAPE) + 2.2,
            ),
            Gaussian(
                mu=-mx.nd.ones(shape=SHAPE),
コード例 #6
0
ファイル: lds.py プロジェクト: hw2312/dsi-capstone-m5
    def sample(self,
               num_samples: Optional[int] = None,
               scale: Optional[Tensor] = None) -> Tensor:
        r"""
        Generates samples from the LDS: p(z_1, z_2, \ldots, z_{`seq_length`}).

        Parameters
        ----------
        num_samples
            Number of samples to generate
        scale
            Scale of each sequence in x, shape (batch_size, output_dim)

        Returns
        -------
        Tensor
            Samples, shape (num_samples, batch_size, seq_length, output_dim)
        """
        F = self.F

        # Note on shapes: here we work with tensors of the following shape
        # in each time step t: (num_samples, batch_size, dim, dim),
        # where dim can be obs_dim or latent_dim or a constant 1 to facilitate
        # generalized matrix multiplication (gemm2)

        # Sample observation noise for all time steps
        # noise_std: (batch_size, seq_length, obs_dim, 1)
        noise_std = F.stack(*self.noise_std, axis=1).expand_dims(axis=-1)

        # samples_eps_obs[t]: (num_samples, batch_size, obs_dim, 1)
        samples_eps_obs = (Gaussian(noise_std.zeros_like(),
                                    noise_std).sample(num_samples).split(
                                        axis=-3,
                                        num_outputs=self.seq_length,
                                        squeeze_axis=True))

        # Sample standard normal for all time steps
        # samples_eps_std_normal[t]: (num_samples, batch_size, obs_dim, 1)
        samples_std_normal = (Gaussian(
            noise_std.zeros_like(),
            noise_std.ones_like()).sample(num_samples).split(
                axis=-3, num_outputs=self.seq_length, squeeze_axis=True))

        # Sample the prior state.
        # samples_lat_state: (num_samples, batch_size, latent_dim, 1)
        # The prior covariance is observed to be slightly negative definite whenever there is
        # excessive zero padding at the beginning of the time series.
        # We add positive tolerance to the diagonal to avoid numerical issues.
        # Note that `jitter_cholesky` adds positive tolerance only if the decomposition without jitter fails.
        state = MultivariateGaussian(
            self.prior_mean,
            jitter_cholesky(F,
                            self.prior_cov,
                            self.latent_dim,
                            float_type=np.float32),
        )
        samples_lat_state = state.sample(num_samples).expand_dims(axis=-1)

        samples_seq = []
        for t in range(self.seq_length):
            # Expand all coefficients to include samples in axis 0
            # emission_coeff_t: (num_samples, batch_size, obs_dim, latent_dim)
            # transition_coeff_t:
            #   (num_samples, batch_size, latent_dim, latent_dim)
            # innovation_coeff_t: (num_samples, batch_size, 1, latent_dim)
            emission_coeff_t, transition_coeff_t, innovation_coeff_t = [
                _broadcast_param(coeff, axes=[0], sizes=[num_samples])
                if num_samples is not None else coeff for coeff in [
                    self.emission_coeff[t],
                    self.transition_coeff[t],
                    self.innovation_coeff[t],
                ]
            ]

            # Expand residuals as well
            # residual_t: (num_samples, batch_size, obs_dim, 1)
            residual_t = (_broadcast_param(
                self.residuals[t].expand_dims(axis=-1),
                axes=[0],
                sizes=[num_samples],
            ) if num_samples is not None else self.residuals[t].expand_dims(
                axis=-1))

            # (num_samples, batch_size, 1, obs_dim)
            samples_t = (F.linalg_gemm2(emission_coeff_t, samples_lat_state) +
                         residual_t + samples_eps_obs[t])
            samples_t = (samples_t.swapaxes(dim1=2, dim2=3) if num_samples
                         is not None else samples_t.swapaxes(dim1=1, dim2=2))
            samples_seq.append(samples_t)

            # sample next state: (num_samples, batch_size, latent_dim, 1)
            samples_lat_state = F.linalg_gemm2(
                transition_coeff_t, samples_lat_state) + F.linalg_gemm2(
                    innovation_coeff_t,
                    samples_std_normal[t],
                    transpose_a=True)

        # (num_samples, batch_size, seq_length, obs_dim)
        samples = F.concat(*samples_seq, dim=-2)
        return (samples if scale is None else F.broadcast_mul(
            samples,
            scale.expand_dims(axis=1).expand_dims(
                axis=0) if num_samples is not None else scale.expand_dims(
                    axis=1),
        ))
コード例 #7
0
    StudentT,
    Uniform,
    TransformedDistribution,
    Dirichlet,
    DirichletMultinomial,
)
from gluonts.distribution.bijection import AffineTransformation
from gluonts.distribution.box_cox_transform import BoxCoxTransform


@pytest.mark.parametrize(
    "distr, expected_batch_shape, expected_event_shape",
    [
        (
            Gaussian(
                mu=mx.nd.zeros(shape=(3, 4, 5)),
                sigma=mx.nd.ones(shape=(3, 4, 5)),
            ),
            (3, 4, 5),
            (),
        ),
        (
            Gamma(
                alpha=mx.nd.ones(shape=(3, 4, 5)),
                beta=mx.nd.ones(shape=(3, 4, 5)),
            ),
            (3, 4, 5),
            (),
        ),
        (
            Beta(
                alpha=mx.nd.ones(shape=(3, 4, 5)),
コード例 #8
0
    def sample(
        self, num_samples: Optional[int] = None, scale: Optional[Tensor] = None
    ) -> Tensor:
        r"""
        Generates samples from the LDS: p(z_1, z_2, \ldots, z_{`seq_length`}).

        Parameters
        ----------
        num_samples
            Number of samples to generate
        scale
            Scale of each sequence in x, shape (batch_size, output_dim)

        Returns
        -------
        Tensor
            Samples, shape (num_samples, batch_size, seq_length, output_dim)
        """
        F = self.F

        # Note on shapes: here we work with tensors of the following shape
        # in each time step t: (num_samples, batch_size, dim, dim),
        # where dim can be obs_dim or latent_dim or a constant 1 to facilitate
        # generalized matrix multiplication (gemm2)

        # Sample observation noise for all time steps
        # noise_std: (batch_size, seq_length, obs_dim, 1)
        noise_std = F.stack(*self.noise_std, axis=1).expand_dims(axis=-1)

        # samples_eps_obs[t]: (num_samples, batch_size, obs_dim, 1)
        samples_eps_obs = (
            Gaussian(noise_std.zeros_like(), noise_std)
            .sample(num_samples)
            .split(axis=2, num_outputs=self.seq_length, squeeze_axis=True)
        )

        # Sample standard normal for all time steps
        # samples_eps_std_normal[t]: (num_samples, batch_size, obs_dim, 1)
        samples_std_normal = (
            Gaussian(noise_std.zeros_like(), noise_std.ones_like())
            .sample(num_samples)
            .split(axis=2, num_outputs=self.seq_length, squeeze_axis=True)
        )

        # Sample the prior state.
        # samples_lat_state: (num_samples, batch_size, latent_dim, 1)
        state = MultivariateGaussian(
            self.prior_mean, F.linalg_potrf(self.prior_cov)
        )
        samples_lat_state = state.sample(num_samples).expand_dims(axis=-1)

        samples_seq = []
        for t in range(self.seq_length):
            # Expand all coefficients to include samples in axis 0
            # emission_coeff_t: (num_samples, batch_size, obs_dim, latent_dim)
            # transition_coeff_t:
            #   (num_samples, batch_size, latent_dim, latent_dim)
            # innovation_coeff_t: (num_samples, batch_size, 1, latent_dim)
            emission_coeff_t, transition_coeff_t, innovation_coeff_t = [
                _broadcast_param(coeff, axes=[0], sizes=[num_samples])
                for coeff in [
                    self.emission_coeff[t],
                    self.transition_coeff[t],
                    self.innovation_coeff[t],
                ]
            ]

            # Expand residuals as well
            # residual_t: (num_samples, batch_size, obs_dim, 1)
            residual_t = _broadcast_param(
                self.residuals[t].expand_dims(axis=-1),
                axes=[0],
                sizes=[num_samples],
            )

            # (num_samples, batch_size, 1, obs_dim)
            samples_t = (
                F.linalg_gemm2(emission_coeff_t, samples_lat_state)
                + residual_t
                + samples_eps_obs[t]
            ).swapaxes(dim1=2, dim2=3)
            samples_seq.append(samples_t)

            # sample next state: (num_samples, batch_size, latent_dim, 1)
            samples_lat_state = F.linalg_gemm2(
                transition_coeff_t, samples_lat_state
            ) + F.linalg_gemm2(
                innovation_coeff_t, samples_std_normal[t], transpose_a=True
            )

        # (num_samples, batch_size, seq_length, obs_dim)
        samples = F.concat(*samples_seq, dim=2)
        return (
            samples
            if scale is None
            else F.broadcast_mul(samples, scale.expand_dims(axis=1))
        )