def test_values(
    distr: PiecewiseLinear,
    target: List[float],
    expected_target_cdf: List[float],
    expected_target_crps: List[float],
):
    target = mx.nd.array(target).reshape(shape=(len(target),))
    expected_target_cdf = np.array(expected_target_cdf).reshape(
        (len(expected_target_cdf),)
    )
    expected_target_crps = np.array(expected_target_crps).reshape(
        (len(expected_target_crps),)
    )

    assert all(np.isclose(distr.cdf(target).asnumpy(), expected_target_cdf))
    assert all(np.isclose(distr.crps(target).asnumpy(), expected_target_crps))

    # compare with empirical cdf from samples
    num_samples = 100_000
    samples = distr.sample(num_samples).asnumpy()
    assert np.isfinite(samples).all()

    emp_cdf, edges = empirical_cdf(samples)
    calc_cdf = distr.cdf(mx.nd.array(edges)).asnumpy()
    assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2)
Exemple #2
0
def test_sampling(distr_class, params, serialize_fn) -> None:
    distr = distr_class(**params)
    distr = serialize_fn(distr)
    samples = distr.sample()
    assert samples.shape == (2, )
    num_samples = 1_000_000
    samples = distr.sample(num_samples)
    assert samples.shape == (num_samples, 2)

    np_samples = samples.asnumpy()
    # avoid accuracy issues with float32 when calculating std
    # see https://github.com/numpy/numpy/issues/8869
    np_samples = np_samples.astype(np.float64)

    assert np.isfinite(np_samples).all()
    assert np.allclose(np_samples.mean(axis=0),
                       distr.mean.asnumpy(),
                       atol=1e-2,
                       rtol=1e-2)

    emp_std = np_samples.std(axis=0)
    assert np.allclose(emp_std, distr.stddev.asnumpy(), atol=1e-1, rtol=1e-1)

    if distr_class in DISTRIBUTIONS_WITH_CDF:
        emp_cdf, edges = empirical_cdf(np_samples)
        calc_cdf = distr.cdf(mx.nd.array(edges)).asnumpy()
        assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2)

    if distr_class in DISTRIBUTIONS_WITH_QUANTILE_FUNCTION:
        levels = np.linspace(1.0e-3, 1.0 - 1.0e-3, 100)
        emp_qfunc = np.percentile(np_samples, levels * 100, axis=0)
        calc_qfunc = distr.quantile(mx.nd.array(levels)).asnumpy()
        assert np.allclose(calc_qfunc, emp_qfunc, rtol=1e-1)
Exemple #3
0
def test_mixture(
    distr1: Distribution, distr2: Distribution, p: Tensor, serialize_fn
) -> None:
    # sample from component distributions, and select samples
    samples1 = distr1.sample(num_samples=NUM_SAMPLES_LARGE)
    samples2 = distr2.sample(num_samples=NUM_SAMPLES_LARGE)

    # TODO: for multivariate case, test should not sample elements from different components in the event_dim dimension
    rand = mx.nd.random.uniform(shape=(NUM_SAMPLES_LARGE, *p.shape))
    choice = (rand < p.expand_dims(axis=0)).broadcast_like(samples1)
    samples_ref = mx.nd.where(choice, samples1, samples2)

    # construct mixture distribution and sample from it

    mixture_probs = mx.nd.stack(p, 1.0 - p, axis=-1)

    mixture = MixtureDistribution(
        mixture_probs=mixture_probs, components=[distr1, distr2]
    )
    mixture = serialize_fn(mixture)

    samples_mix = mixture.sample(num_samples=NUM_SAMPLES_LARGE)

    # check that shapes are right

    assert (
        samples1.shape
        == samples2.shape
        == samples_mix.shape
        == samples_ref.shape
    )

    # check mean and stddev
    calc_mean = mixture.mean.asnumpy()
    calc_std = mixture.stddev.asnumpy()
    sample_mean = samples_mix.asnumpy().mean(axis=0)
    sample_std = samples_mix.asnumpy().std(axis=0)

    assert np.allclose(calc_mean, sample_mean, atol=1e-1)
    assert np.allclose(calc_std, sample_std, atol=2e-1)

    # check that histograms are close
    assert (
        diff(
            histogram(samples_mix.asnumpy()), histogram(samples_ref.asnumpy())
        )
        < 0.05
    )

    # can only calculated cdf for gaussians currently
    if isinstance(distr1, Gaussian) and isinstance(distr2, Gaussian):
        emp_cdf, edges = empirical_cdf(samples_mix.asnumpy())
        calc_cdf = mixture.cdf(mx.nd.array(edges)).asnumpy()
        assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2)
Exemple #4
0
def test_sampling(distr_class, params) -> None:
    distr = distr_class(**params)
    samples = distr.sample()
    assert samples.shape == (2,)
    num_samples = 100_000
    samples = distr.sample(num_samples)
    assert samples.shape == (num_samples, 2)

    np_samples = samples.asnumpy()

    assert np.isfinite(np_samples).all()

    assert np.allclose(
        np_samples.mean(axis=0), distr.mean.asnumpy(), atol=1e-2, rtol=1e-2
    )

    emp_std = np_samples.std(axis=0)
    assert np.allclose(emp_std, distr.stddev.asnumpy(), atol=1e-1, rtol=5e-2)

    if distr_class in DISTRIBUTIONS_WITH_CDF:
        emp_cdf, edges = empirical_cdf(np_samples)
        calc_cdf = distr.cdf(mx.nd.array(edges)).asnumpy()
        assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2)