def test_categorical_likelihood(num_cats: int, cat_probs: np.ndarray,
                                hybridize: bool):
    """
    Test to check that maximizing the likelihood recovers the parameters
    """
    cat_prob = mx.nd.array(cat_probs)
    cat_probs = mx.nd.zeros((NUM_SAMPLES, num_cats)) + cat_prob

    distr = Categorical(cat_probs.log())
    samples = distr.sample()

    cat_prob_init = mx.nd.random_uniform(1 - TOL, 1 + TOL, num_cats) * cat_prob
    cat_prob_init = cat_prob_init / cat_prob_init.sum()

    init_biases = [cat_prob_init]

    cat_log_prob_hat = maximum_likelihood_estimate_sgd(
        CategoricalOutput(num_cats),
        samples,
        init_biases=init_biases,
        hybridize=hybridize,
        learning_rate=PositiveFloat(0.05),
        num_epochs=PositiveInt(25),
    )
    cat_prob_hat = np.exp(cat_log_prob_hat)

    prob_deviation = np.abs(cat_prob_hat - cat_prob.asnumpy()).flatten()
    tolerance = (TOL * cat_prob.asnumpy()).flatten()

    assert np.all(
        np.less(prob_deviation, tolerance)
    ), f"cat_prob did not match: cat_prob = {cat_prob}, cat_prob_hat = {cat_prob_hat}"
Esempio n. 2
0
def test_nanmixture_categorical_inference() -> None:

    nmdo = NanMixtureOutput(CategoricalOutput(3))

    args_proj = nmdo.get_args_proj()
    args_proj.initialize()
    args_proj.hybridize()

    input = mx.nd.ones((NUM_SAMPLES))

    trainer = mx.gluon.Trainer(
        args_proj.collect_params(), "sgd", {"learning_rate": 0.000002}
    )

    mixture_samples = mx.nd.array(cat_samples)

    N = 3000
    t = tqdm(list(range(N)))
    for _ in t:
        with mx.autograd.record():
            distr_args = args_proj(input)
            d = nmdo.distribution(distr_args)
            loss = d.loss(mixture_samples)
        loss.backward()
        loss_value = loss.mean().asnumpy()
        t.set_postfix({"loss": loss_value})
        trainer.step(NUM_SAMPLES)

    distr_args = args_proj(input)
    d = nmdo.distribution(distr_args)

    cat_probs_hat = d.distribution.probs.asnumpy()
    nan_prob_hat = d.nan_prob.asnumpy()

    assert np.allclose(
        cat_probs, cat_probs_hat, atol=TOL
    ), f"categorical dist: cat_probs did not match: cat_probs = {cat_probs}, cat_probs_hat = {cat_probs_hat}"
    assert (
        np.abs(nan_prob - nan_prob_hat) < TOL
    ), f"categorical dist: nan_prob did not match: nan_prob = {nan_prob}, nan_prob_hat = {nan_prob_hat}"
Esempio n. 3
0
    def __init__(
        self,
        num_marks: int,
        interval_length: float,
        time_distr_output: TPPDistributionOutput = distribution.WeibullOutput(),  # noqa: E501
        embedding_dim: int = 5,
        num_hidden_dimensions: int = 10,
        output_scale: Optional[Tensor] = None,
        apply_log_to_rnn_inputs: bool = True,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)

        self.num_marks = num_marks
        self.interval_length = interval_length
        self.rnn_hidden_size = num_hidden_dimensions
        self.output_scale = output_scale
        self.apply_log_to_rnn_inputs = apply_log_to_rnn_inputs

        with self.name_scope():
            self.embedding = mx.gluon.nn.Embedding(
                input_dim=num_marks, output_dim=embedding_dim
            )
            self.rnn = mx.gluon.rnn.GRU(
                num_hidden_dimensions,
                input_size=embedding_dim + 1,
                layout="NTC",
            )
            # Conditional distribution over the inter-arrival times
            self.time_distr_output = time_distr_output
            self.time_distr_args_proj = self.time_distr_output.get_args_proj()
            # Conditional distribution over the marks
            if num_marks > 1:
                self.mark_distr_output = CategoricalOutput(num_marks)
                self.mark_distr_args_proj = (
                    self.mark_distr_output.get_args_proj()
                )
Esempio n. 4
0
    ), f"categorical dist: nan_prob did not match: nan_prob = {nan_prob}, nan_prob_hat = {nan_prob_hat}"


n_cat = 3
cat_probs = np.array([0.2, 0.3, 0.5])
cat_samples = np.random.choice(
    list(range(n_cat)), p=cat_probs, size=NUM_SAMPLES
)
cat_samples = np.where(
    np.random.uniform(size=(NUM_SAMPLES)) > nan_prob, cat_samples, np.nan
)


@pytest.mark.parametrize(
    "distribution_output",
    [GaussianOutput(), StudentTOutput(), CategoricalOutput(num_cats=2),],
)
@pytest.mark.parametrize("serialize_fn", serialize_fn_list)
def test_nanmixture_output(distribution_output, serialize_fn) -> None:

    nmdo = NanMixtureOutput(distribution_output)

    args_proj = nmdo.get_args_proj()
    args_proj.initialize()

    input = mx.nd.ones(shape=(3, 2))

    distr_args = args_proj(input)

    d = nmdo.distribution(distr_args)
    d = serialize_fn(d)
Esempio n. 5
0
    PiecewiseLinearOutput,
    PoissonOutput,
    StudentTOutput,
    UniformOutput,
    ZeroAndOneInflatedBetaOutput,
    ZeroInflatedBetaOutput,
    ZeroInflatedNegativeBinomialOutput,
    ZeroInflatedPoissonOutput,
)


@pytest.mark.parametrize(
    "distr_output",
    [
        BetaOutput(),
        CategoricalOutput(num_cats=3),
        DeterministicOutput(value=42.0),
        DirichletMultinomialOutput(dim=3, n_trials=5),
        DirichletOutput(dim=4),
        EmpiricalDistributionOutput(num_samples=10,
                                    distr_output=GaussianOutput()),
        GammaOutput(),
        GaussianOutput(),
        GenParetoOutput(),
        LaplaceOutput(),
        LogitNormalOutput(),
        LoglogisticOutput(),
        LowrankMultivariateGaussianOutput(dim=5, rank=2),
        MultivariateGaussianOutput(dim=4),
        NegativeBinomialOutput(),
        OneInflatedBetaOutput(),