Example #1
0
def test_c2st_snl_on_linearGaussian(num_dim: int, prior_str: str, set_seed):
    """Test SNL on linear Gaussian, comparing to ground truth posterior via c2st.

    Args:
        num_dim: parameter dimension of the gaussian model
        prior_str: one of "gaussian" or "uniform"
        set_seed: fixture for manual seeding
    """

    x_o = zeros((1, num_dim))
    num_samples = 500

    # likelihood_mean will be likelihood_shift+theta
    likelihood_shift = -1.0 * ones(num_dim)
    likelihood_cov = 0.3 * eye(num_dim)

    if prior_str == "gaussian":
        prior_mean = zeros(num_dim)
        prior_cov = eye(num_dim)
        prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov)
        gt_posterior = true_posterior_linear_gaussian_mvn_prior(
            x_o[0], likelihood_shift, likelihood_cov, prior_mean, prior_cov)
        target_samples = gt_posterior.sample((num_samples, ))
    else:
        prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim))
        target_samples = samples_true_posterior_linear_gaussian_uniform_prior(
            x_o,
            likelihood_shift,
            likelihood_cov,
            prior=prior,
            num_samples=num_samples)

    simulator = lambda theta: linear_gaussian(theta, likelihood_shift,
                                              likelihood_cov)

    infer = SNL(
        *prepare_for_sbi(simulator, prior),
        mcmc_method="slice_np",
        show_progress_bars=False,
    )

    posterior = infer(num_rounds=1,
                      num_simulations_per_round=1000).set_default_x(x_o)

    samples = posterior.sample(sample_shape=(num_samples, ),
                               mcmc_parameters={"thin": 3})

    # Check performance based on c2st accuracy.
    check_c2st(samples, target_samples, alg=f"snle_a-{prior_str}-prior")

    # TODO: we do not have a test for SNL log_prob(). This is because the output
    # TODO: density is not normalized, so KLd does not make sense.
    if prior_str == "uniform":
        # Check whether the returned probability outside of the support is zero.
        posterior_prob = get_prob_outside_uniform_prior(posterior, num_dim)
        assert (
            posterior_prob == 0.0
        ), "The posterior probability outside of the prior support is not zero"
def test_inference_with_restriction_estimator():

    # likelihood_mean will be likelihood_shift+theta
    num_dim = 3
    likelihood_shift = -1.0 * ones(num_dim)
    likelihood_cov = 0.3 * eye(num_dim)
    x_o = zeros(1, num_dim)
    num_samples = 500

    def linear_gaussian_nan(theta,
                            likelihood_shift=likelihood_shift,
                            likelihood_cov=likelihood_cov):
        condition = theta[:, 0] < 0.0
        x = linear_gaussian(theta, likelihood_shift, likelihood_cov)
        x[condition] = float("nan")

        return x

    prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim))
    target_samples = samples_true_posterior_linear_gaussian_uniform_prior(
        x_o,
        likelihood_shift=likelihood_shift,
        likelihood_cov=likelihood_cov,
        num_samples=num_samples,
        prior=prior,
    )

    simulator, prior = prepare_for_sbi(linear_gaussian_nan, prior)
    restriction_estimator = RestrictionEstimator(prior=prior)
    proposals = [prior]
    num_rounds = 2

    for r in range(num_rounds):
        theta, x = simulate_for_sbi(simulator, proposals[-1], 1000)
        restriction_estimator.append_simulations(theta, x)
        if r < num_rounds - 1:
            _ = restriction_estimator.train()
        proposals.append(restriction_estimator.restrict_prior())

    all_theta, all_x, _ = restriction_estimator.get_simulations()

    # Any method can be used in combination with the `RejectionEstimator`.
    inference = SNPE_C(prior=prior)
    posterior_estimator = inference.append_simulations(all_theta,
                                                       all_x).train()

    # Build posterior.
    posterior = DirectPosterior(
        prior=prior,
        posterior_estimator=posterior_estimator).set_default_x(x_o)

    samples = posterior.sample((num_samples, ))

    # Compute the c2st and assert it is near chance level of 0.5.
    check_c2st(samples, target_samples, alg=f"{SNPE_C}")
Example #3
0
def test_inference_with_nan_simulator(method: type, exclude_invalid_x: bool,
                                      percent_nans: float, set_seed):

    # likelihood_mean will be likelihood_shift+theta
    num_dim = 3
    likelihood_shift = -1.0 * ones(num_dim)
    likelihood_cov = 0.3 * eye(num_dim)
    x_o = zeros(1, num_dim)
    num_samples = 500
    num_simulations = 2000

    def linear_gaussian_nan(theta,
                            likelihood_shift=likelihood_shift,
                            likelihood_cov=likelihood_cov):
        x = linear_gaussian(theta, likelihood_shift, likelihood_cov)
        # Set nan randomly.
        x[torch.rand(x.shape) < (percent_nans * 1.0 /
                                 x.shape[1])] = float("nan")

        return x

    prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim))
    target_samples = samples_true_posterior_linear_gaussian_uniform_prior(
        x_o,
        likelihood_shift=likelihood_shift,
        likelihood_cov=likelihood_cov,
        num_samples=num_samples,
        prior=prior,
    )

    simulator, prior = prepare_for_sbi(linear_gaussian_nan, prior)
    inference = method(prior)

    theta, x = simulate_for_sbi(simulator, prior, num_simulations)
    _ = inference.append_simulations(
        theta, x).train(exclude_invalid_x=exclude_invalid_x)

    posterior = inference.build_posterior().set_default_x(x_o)

    samples = posterior.sample((num_samples, ))

    # Compute the c2st and assert it is near chance level of 0.5.
    check_c2st(samples, target_samples, alg=f"{method}")
Example #4
0
def test_smcabc_inference_on_linear_gaussian(
    num_dim,
    prior_type: str,
    lra=False,
    sass=False,
    sass_expansion_degree=1,
    kde=False,
    kde_bandwidth="cv",
    transform=False,
    num_simulations=20000,
):
    x_o = zeros((1, num_dim))
    num_samples = 1000
    likelihood_shift = -1.0 * ones(num_dim)
    likelihood_cov = 0.3 * eye(num_dim)

    if prior_type == "gaussian":
        prior_mean = zeros(num_dim)
        prior_cov = eye(num_dim)
        prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov)
        gt_posterior = true_posterior_linear_gaussian_mvn_prior(
            x_o[0], likelihood_shift, likelihood_cov, prior_mean, prior_cov)
        target_samples = gt_posterior.sample((num_samples, ))
    elif prior_type == "uniform":
        prior = BoxUniform(-ones(num_dim), ones(num_dim))
        target_samples = samples_true_posterior_linear_gaussian_uniform_prior(
            x_o[0], likelihood_shift, likelihood_cov, prior, num_samples)
    else:
        raise ValueError("Wrong prior string.")

    def simulator(theta):
        return linear_gaussian(theta, likelihood_shift, likelihood_cov)

    infer = SMC(simulator,
                prior,
                simulation_batch_size=10000,
                algorithm_variant="C")

    phat = infer(
        x_o,
        num_particles=1000,
        num_initial_pop=5000,
        epsilon_decay=0.5,
        num_simulations=num_simulations,
        distance_based_decay=True,
        return_summary=False,
        lra=lra,
        sass=sass,
        sass_fraction=0.5,
        sass_expansion_degree=sass_expansion_degree,
        kde=kde,
        kde_kwargs=dict(
            bandwidth=kde_bandwidth,
            transform=biject_to(prior.support) if transform else None,
        ),
    )

    check_c2st(
        phat.sample((num_samples, )) if kde else phat,
        target_samples,
        alg=
        f"SMCABC-{prior_type}prior-lra{lra}-sass{sass}-kde{kde}-{kde_bandwidth}",
    )

    if kde:
        samples = phat.sample((10, ))
        phat.log_prob(samples)
Example #5
0
def test_c2st_snpe_on_linearGaussian(
    num_dim: int,
    prior_str: str,
    set_seed,
):
    """Test whether SNPE C infers well a simple example with available ground truth.

    Args:
        set_seed: fixture for manual seeding
    """

    x_o = zeros(1, num_dim)
    num_samples = 1000

    # likelihood_mean will be likelihood_shift+theta
    likelihood_shift = -1.0 * ones(num_dim)
    likelihood_cov = 0.3 * eye(num_dim)

    if prior_str == "gaussian":
        prior_mean = zeros(num_dim)
        prior_cov = eye(num_dim)
        prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov)
        gt_posterior = true_posterior_linear_gaussian_mvn_prior(
            x_o[0], likelihood_shift, likelihood_cov, prior_mean, prior_cov)
        target_samples = gt_posterior.sample((num_samples, ))
    else:
        prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim))
        target_samples = samples_true_posterior_linear_gaussian_uniform_prior(
            x_o,
            likelihood_shift,
            likelihood_cov,
            prior=prior,
            num_samples=num_samples)

    def simulator(theta):
        return linear_gaussian(theta, likelihood_shift, likelihood_cov)

    simulator, prior = prepare_for_sbi(simulator, prior)
    inference = SNPE_C(
        prior,
        show_progress_bars=False,
    )

    theta, x = simulate_for_sbi(simulator,
                                prior,
                                2000,
                                simulation_batch_size=1000)
    _ = inference.append_simulations(theta, x).train(training_batch_size=100)
    posterior = inference.build_posterior().set_default_x(x_o)
    samples = posterior.sample((num_samples, ))

    # Compute the c2st and assert it is near chance level of 0.5.
    check_c2st(samples, target_samples, alg="snpe_c")

    # Checks for log_prob()
    if prior_str == "gaussian":
        # For the Gaussian prior, we compute the KLd between ground truth and posterior.
        dkl = get_dkl_gaussian_prior(posterior, x_o[0], likelihood_shift,
                                     likelihood_cov, prior_mean, prior_cov)

        max_dkl = 0.15

        assert (
            dkl < max_dkl
        ), f"D-KL={dkl} is more than 2 stds above the average performance."

    elif prior_str == "uniform":
        # Check whether the returned probability outside of the support is zero.
        posterior_prob = get_prob_outside_uniform_prior(posterior, num_dim)
        assert (
            posterior_prob == 0.0
        ), "The posterior probability outside of the prior support is not zero"

        # Check whether normalization (i.e. scaling up the density due
        # to leakage into regions without prior support) scales up the density by the
        # correct factor.
        (
            posterior_likelihood_unnorm,
            posterior_likelihood_norm,
            acceptance_prob,
        ) = get_normalization_uniform_prior(posterior, prior, x_o)
        # The acceptance probability should be *exactly* the ratio of the unnormalized
        # and the normalized likelihood. However, we allow for an error margin of 1%,
        # since the estimation of the acceptance probability is random (based on
        # rejection sampling).
        assert (
            acceptance_prob * 0.99 < posterior_likelihood_unnorm /
            posterior_likelihood_norm < acceptance_prob * 1.01
        ), "Normalizing the posterior density using the acceptance probability failed."
def test_c2st_sre_variants_on_linearGaussian(
    num_dim: int,
    num_trials: int,
    prior_str: str,
    method_str: str,
    set_seed,
):
    """Test c2st accuracy of inference with SRE on linear Gaussian model.

    Args:
        num_dim: parameter dimension of the gaussian model
        prior_str: one of "gaussian" or "uniform"
        set_seed: fixture for manual seeding
    """

    x_o = zeros(num_trials, num_dim)
    num_samples = 500
    num_simulations = 2500 if num_trials == 1 else 35000

    # `likelihood_mean` will be `likelihood_shift + theta`.
    likelihood_shift = -1.0 * ones(num_dim)
    likelihood_cov = 0.8 * eye(num_dim)

    if prior_str == "gaussian":
        prior_mean = zeros(num_dim)
        prior_cov = eye(num_dim)
        prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov)
    else:
        prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim))

    def simulator(theta):
        return linear_gaussian(theta, likelihood_shift, likelihood_cov)

    simulator, prior = prepare_for_sbi(simulator, prior)
    kwargs = dict(
        prior=prior,
        classifier="resnet",
        show_progress_bars=False,
    )

    inference = SNRE_B(**kwargs) if method_str == "sre" else AALR(**kwargs)

    # Should use default `num_atoms=10` for SRE; `num_atoms=2` for AALR
    theta, x = simulate_for_sbi(
        simulator, prior, num_simulations, simulation_batch_size=50
    )
    _ = inference.append_simulations(theta, x).train()
    posterior = inference.build_posterior().set_default_x(x_o)

    samples = posterior.sample(
        sample_shape=(num_samples,),
        mcmc_method="slice_np_vectorized",
        mcmc_parameters={"thin": 3, "num_chains": 5},
    )

    # Get posterior samples.
    if prior_str == "gaussian":
        gt_posterior = true_posterior_linear_gaussian_mvn_prior(
            x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov
        )
        target_samples = gt_posterior.sample((num_samples,))
    else:
        target_samples = samples_true_posterior_linear_gaussian_uniform_prior(
            x_o, likelihood_shift, likelihood_cov, prior=prior, num_samples=num_samples
        )

    # Check performance based on c2st accuracy.
    check_c2st(
        samples, target_samples, alg=f"sre-{prior_str}-{method_str}-{num_trials}trials"
    )

    map_ = posterior.map(num_init_samples=1_000, init_method="prior")

    # Checks for log_prob()
    if prior_str == "gaussian" and method_str == "aalr":
        # For the Gaussian prior, we compute the KLd between ground truth and
        # posterior. We can do this only if the classifier_loss was as described in
        # Hermans et al. 2020 ('aalr') since Durkan et al. 2020 version only allows
        # evaluation up to a constant.
        # For the Gaussian prior, we compute the KLd between ground truth and posterior
        dkl = get_dkl_gaussian_prior(
            posterior, x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov
        )

        max_dkl = 0.15

        assert (
            dkl < max_dkl
        ), f"KLd={dkl} is more than 2 stds above the average performance."

        assert ((map_ - gt_posterior.mean) ** 2).sum() < 0.5

    if prior_str == "uniform":
        # Check whether the returned probability outside of the support is zero.
        posterior_prob = get_prob_outside_uniform_prior(posterior, prior, num_dim)
        assert (
            posterior_prob == 0.0
        ), "The posterior probability outside of the prior support is not zero"

        assert ((map_ - ones(num_dim)) ** 2).sum() < 0.5
Example #7
0
def test_c2st_and_map_snl_on_linearGaussian_different(num_dim: int,
                                                      prior_str: str):
    """Test SNL on linear Gaussian, comparing to ground truth posterior via c2st.

    Args:
        num_dim: parameter dimension of the gaussian model
        prior_str: one of "gaussian" or "uniform"

    """
    num_samples = 500
    num_simulations = 3000
    trials_to_test = [1]

    # likelihood_mean will be likelihood_shift+theta
    likelihood_shift = -1.0 * ones(num_dim)
    # Use increased cov to avoid too small posterior cov for many trials.
    likelihood_cov = 0.8 * eye(num_dim)

    if prior_str == "gaussian":
        prior_mean = zeros(num_dim)
        prior_cov = eye(num_dim)
        prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov)
    else:
        prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim))

    simulator, prior = prepare_for_sbi(
        lambda theta: linear_gaussian(theta, likelihood_shift, likelihood_cov),
        prior)
    density_estimator = likelihood_nn("maf", num_transforms=3)
    inference = SNLE(density_estimator=density_estimator,
                     show_progress_bars=False)

    theta, x = simulate_for_sbi(simulator,
                                prior,
                                num_simulations,
                                simulation_batch_size=10000)
    likelihood_estimator = inference.append_simulations(theta, x).train()

    # Test inference amortized over trials.
    for num_trials in trials_to_test:
        x_o = zeros((num_trials, num_dim))
        if prior_str == "gaussian":
            gt_posterior = true_posterior_linear_gaussian_mvn_prior(
                x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov)
            target_samples = gt_posterior.sample((num_samples, ))
        elif prior_str == "uniform":
            target_samples = samples_true_posterior_linear_gaussian_uniform_prior(
                x_o,
                likelihood_shift,
                likelihood_cov,
                prior=prior,
                num_samples=num_samples,
            )
        else:
            raise ValueError(f"Wrong prior_str: '{prior_str}'.")

        potential_fn, theta_transform = likelihood_estimator_based_potential(
            prior=prior, likelihood_estimator=likelihood_estimator, x_o=x_o)
        posterior = MCMCPosterior(
            proposal=prior,
            potential_fn=potential_fn,
            theta_transform=theta_transform,
            method="slice_np_vectorized",
            thin=5,
            num_chains=5,
        )

        samples = posterior.sample(sample_shape=(num_samples, ))

        # Check performance based on c2st accuracy.
        check_c2st(samples,
                   target_samples,
                   alg=f"snle_a-{prior_str}-prior-{num_trials}-trials")

        map_ = posterior.map(num_init_samples=1_000,
                             init_method="proposal",
                             show_progress_bars=False)

        # TODO: we do not have a test for SNL log_prob(). This is because the output
        # TODO: density is not normalized, so KLd does not make sense.
        if prior_str == "uniform":
            # Check whether the returned probability outside of the support is zero.
            posterior_prob = get_prob_outside_uniform_prior(
                posterior, prior, num_dim)
            assert (
                posterior_prob == 0.0
            ), "The posterior probability outside of the prior support is not zero"

            assert ((map_ - ones(num_dim))**2).sum() < 0.5
        else:
            assert ((map_ - gt_posterior.mean)**2).sum() < 0.5
def test_c2st_snl_on_linearGaussian_different_dims_and_trials(
        num_dim: int, prior_str: str, set_seed):
    """Test SNL on linear Gaussian, comparing to ground truth posterior via c2st.

    Args:
        num_dim: parameter dimension of the gaussian model
        prior_str: one of "gaussian" or "uniform"
        set_seed: fixture for manual seeding
    """
    num_samples = 500
    num_simulations = 7500
    trials_to_test = [1, 5, 10]

    # likelihood_mean will be likelihood_shift+theta
    likelihood_shift = -1.0 * ones(num_dim)
    # Use increased cov to avoid too small posterior cov for many trials.
    likelihood_cov = 0.8 * eye(num_dim)

    if prior_str == "gaussian":
        prior_mean = zeros(num_dim)
        prior_cov = eye(num_dim)
        prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov)
    else:
        prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim))

    simulator, prior = prepare_for_sbi(
        lambda theta: linear_gaussian(theta, likelihood_shift, likelihood_cov),
        prior)
    inference = SNL(prior, show_progress_bars=False)

    theta, x = simulate_for_sbi(simulator,
                                prior,
                                num_simulations,
                                simulation_batch_size=50)
    _ = inference.append_simulations(theta, x).train()

    # Test inference amortized over trials.
    for num_trials in trials_to_test:
        x_o = zeros((num_trials, num_dim))
        if prior_str == "gaussian":
            gt_posterior = true_posterior_linear_gaussian_mvn_prior(
                x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov)
            target_samples = gt_posterior.sample((num_samples, ))
        else:
            target_samples = samples_true_posterior_linear_gaussian_uniform_prior(
                x_o,
                likelihood_shift,
                likelihood_cov,
                prior=prior,
                num_samples=num_samples,
            )
        posterior = inference.build_posterior(
            mcmc_method="slice_np_vectorized").set_default_x(x_o)

        samples = posterior.sample(sample_shape=(num_samples, ),
                                   mcmc_parameters={
                                       "thin": 3,
                                       "num_chains": 2
                                   })

        # Check performance based on c2st accuracy.
        check_c2st(samples,
                   target_samples,
                   alg=f"snle_a-{prior_str}-prior-{num_trials}-trials")

        map_ = posterior.map(num_init_samples=1_000, init_method="prior")

        # TODO: we do not have a test for SNL log_prob(). This is because the output
        # TODO: density is not normalized, so KLd does not make sense.
        if prior_str == "uniform":
            # Check whether the returned probability outside of the support is zero.
            posterior_prob = get_prob_outside_uniform_prior(
                posterior, prior, num_dim)
            assert (
                posterior_prob == 0.0
            ), "The posterior probability outside of the prior support is not zero"

            assert ((map_ - ones(num_dim))**2).sum() < 0.5
        else:
            assert ((map_ - gt_posterior.mean)**2).sum() < 0.5