def test_c2st_snl_on_linearGaussian(num_dim: int, prior_str: str, set_seed): """Test SNL on linear Gaussian, comparing to ground truth posterior via c2st. Args: num_dim: parameter dimension of the gaussian model prior_str: one of "gaussian" or "uniform" set_seed: fixture for manual seeding """ x_o = zeros((1, num_dim)) num_samples = 500 # likelihood_mean will be likelihood_shift+theta likelihood_shift = -1.0 * ones(num_dim) likelihood_cov = 0.3 * eye(num_dim) if prior_str == "gaussian": prior_mean = zeros(num_dim) prior_cov = eye(num_dim) prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov) gt_posterior = true_posterior_linear_gaussian_mvn_prior( x_o[0], likelihood_shift, likelihood_cov, prior_mean, prior_cov) target_samples = gt_posterior.sample((num_samples, )) else: prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) target_samples = samples_true_posterior_linear_gaussian_uniform_prior( x_o, likelihood_shift, likelihood_cov, prior=prior, num_samples=num_samples) simulator = lambda theta: linear_gaussian(theta, likelihood_shift, likelihood_cov) infer = SNL( *prepare_for_sbi(simulator, prior), mcmc_method="slice_np", show_progress_bars=False, ) posterior = infer(num_rounds=1, num_simulations_per_round=1000).set_default_x(x_o) samples = posterior.sample(sample_shape=(num_samples, ), mcmc_parameters={"thin": 3}) # Check performance based on c2st accuracy. check_c2st(samples, target_samples, alg=f"snle_a-{prior_str}-prior") # TODO: we do not have a test for SNL log_prob(). This is because the output # TODO: density is not normalized, so KLd does not make sense. if prior_str == "uniform": # Check whether the returned probability outside of the support is zero. posterior_prob = get_prob_outside_uniform_prior(posterior, num_dim) assert ( posterior_prob == 0.0 ), "The posterior probability outside of the prior support is not zero"
def test_inference_with_restriction_estimator(): # likelihood_mean will be likelihood_shift+theta num_dim = 3 likelihood_shift = -1.0 * ones(num_dim) likelihood_cov = 0.3 * eye(num_dim) x_o = zeros(1, num_dim) num_samples = 500 def linear_gaussian_nan(theta, likelihood_shift=likelihood_shift, likelihood_cov=likelihood_cov): condition = theta[:, 0] < 0.0 x = linear_gaussian(theta, likelihood_shift, likelihood_cov) x[condition] = float("nan") return x prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) target_samples = samples_true_posterior_linear_gaussian_uniform_prior( x_o, likelihood_shift=likelihood_shift, likelihood_cov=likelihood_cov, num_samples=num_samples, prior=prior, ) simulator, prior = prepare_for_sbi(linear_gaussian_nan, prior) restriction_estimator = RestrictionEstimator(prior=prior) proposals = [prior] num_rounds = 2 for r in range(num_rounds): theta, x = simulate_for_sbi(simulator, proposals[-1], 1000) restriction_estimator.append_simulations(theta, x) if r < num_rounds - 1: _ = restriction_estimator.train() proposals.append(restriction_estimator.restrict_prior()) all_theta, all_x, _ = restriction_estimator.get_simulations() # Any method can be used in combination with the `RejectionEstimator`. inference = SNPE_C(prior=prior) posterior_estimator = inference.append_simulations(all_theta, all_x).train() # Build posterior. posterior = DirectPosterior( prior=prior, posterior_estimator=posterior_estimator).set_default_x(x_o) samples = posterior.sample((num_samples, )) # Compute the c2st and assert it is near chance level of 0.5. check_c2st(samples, target_samples, alg=f"{SNPE_C}")
def test_inference_with_nan_simulator(method: type, exclude_invalid_x: bool, percent_nans: float, set_seed): # likelihood_mean will be likelihood_shift+theta num_dim = 3 likelihood_shift = -1.0 * ones(num_dim) likelihood_cov = 0.3 * eye(num_dim) x_o = zeros(1, num_dim) num_samples = 500 num_simulations = 2000 def linear_gaussian_nan(theta, likelihood_shift=likelihood_shift, likelihood_cov=likelihood_cov): x = linear_gaussian(theta, likelihood_shift, likelihood_cov) # Set nan randomly. x[torch.rand(x.shape) < (percent_nans * 1.0 / x.shape[1])] = float("nan") return x prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) target_samples = samples_true_posterior_linear_gaussian_uniform_prior( x_o, likelihood_shift=likelihood_shift, likelihood_cov=likelihood_cov, num_samples=num_samples, prior=prior, ) simulator, prior = prepare_for_sbi(linear_gaussian_nan, prior) inference = method(prior) theta, x = simulate_for_sbi(simulator, prior, num_simulations) _ = inference.append_simulations( theta, x).train(exclude_invalid_x=exclude_invalid_x) posterior = inference.build_posterior().set_default_x(x_o) samples = posterior.sample((num_samples, )) # Compute the c2st and assert it is near chance level of 0.5. check_c2st(samples, target_samples, alg=f"{method}")
def test_smcabc_inference_on_linear_gaussian( num_dim, prior_type: str, lra=False, sass=False, sass_expansion_degree=1, kde=False, kde_bandwidth="cv", transform=False, num_simulations=20000, ): x_o = zeros((1, num_dim)) num_samples = 1000 likelihood_shift = -1.0 * ones(num_dim) likelihood_cov = 0.3 * eye(num_dim) if prior_type == "gaussian": prior_mean = zeros(num_dim) prior_cov = eye(num_dim) prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov) gt_posterior = true_posterior_linear_gaussian_mvn_prior( x_o[0], likelihood_shift, likelihood_cov, prior_mean, prior_cov) target_samples = gt_posterior.sample((num_samples, )) elif prior_type == "uniform": prior = BoxUniform(-ones(num_dim), ones(num_dim)) target_samples = samples_true_posterior_linear_gaussian_uniform_prior( x_o[0], likelihood_shift, likelihood_cov, prior, num_samples) else: raise ValueError("Wrong prior string.") def simulator(theta): return linear_gaussian(theta, likelihood_shift, likelihood_cov) infer = SMC(simulator, prior, simulation_batch_size=10000, algorithm_variant="C") phat = infer( x_o, num_particles=1000, num_initial_pop=5000, epsilon_decay=0.5, num_simulations=num_simulations, distance_based_decay=True, return_summary=False, lra=lra, sass=sass, sass_fraction=0.5, sass_expansion_degree=sass_expansion_degree, kde=kde, kde_kwargs=dict( bandwidth=kde_bandwidth, transform=biject_to(prior.support) if transform else None, ), ) check_c2st( phat.sample((num_samples, )) if kde else phat, target_samples, alg= f"SMCABC-{prior_type}prior-lra{lra}-sass{sass}-kde{kde}-{kde_bandwidth}", ) if kde: samples = phat.sample((10, )) phat.log_prob(samples)
def test_c2st_snpe_on_linearGaussian( num_dim: int, prior_str: str, set_seed, ): """Test whether SNPE C infers well a simple example with available ground truth. Args: set_seed: fixture for manual seeding """ x_o = zeros(1, num_dim) num_samples = 1000 # likelihood_mean will be likelihood_shift+theta likelihood_shift = -1.0 * ones(num_dim) likelihood_cov = 0.3 * eye(num_dim) if prior_str == "gaussian": prior_mean = zeros(num_dim) prior_cov = eye(num_dim) prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov) gt_posterior = true_posterior_linear_gaussian_mvn_prior( x_o[0], likelihood_shift, likelihood_cov, prior_mean, prior_cov) target_samples = gt_posterior.sample((num_samples, )) else: prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) target_samples = samples_true_posterior_linear_gaussian_uniform_prior( x_o, likelihood_shift, likelihood_cov, prior=prior, num_samples=num_samples) def simulator(theta): return linear_gaussian(theta, likelihood_shift, likelihood_cov) simulator, prior = prepare_for_sbi(simulator, prior) inference = SNPE_C( prior, show_progress_bars=False, ) theta, x = simulate_for_sbi(simulator, prior, 2000, simulation_batch_size=1000) _ = inference.append_simulations(theta, x).train(training_batch_size=100) posterior = inference.build_posterior().set_default_x(x_o) samples = posterior.sample((num_samples, )) # Compute the c2st and assert it is near chance level of 0.5. check_c2st(samples, target_samples, alg="snpe_c") # Checks for log_prob() if prior_str == "gaussian": # For the Gaussian prior, we compute the KLd between ground truth and posterior. dkl = get_dkl_gaussian_prior(posterior, x_o[0], likelihood_shift, likelihood_cov, prior_mean, prior_cov) max_dkl = 0.15 assert ( dkl < max_dkl ), f"D-KL={dkl} is more than 2 stds above the average performance." elif prior_str == "uniform": # Check whether the returned probability outside of the support is zero. posterior_prob = get_prob_outside_uniform_prior(posterior, num_dim) assert ( posterior_prob == 0.0 ), "The posterior probability outside of the prior support is not zero" # Check whether normalization (i.e. scaling up the density due # to leakage into regions without prior support) scales up the density by the # correct factor. ( posterior_likelihood_unnorm, posterior_likelihood_norm, acceptance_prob, ) = get_normalization_uniform_prior(posterior, prior, x_o) # The acceptance probability should be *exactly* the ratio of the unnormalized # and the normalized likelihood. However, we allow for an error margin of 1%, # since the estimation of the acceptance probability is random (based on # rejection sampling). assert ( acceptance_prob * 0.99 < posterior_likelihood_unnorm / posterior_likelihood_norm < acceptance_prob * 1.01 ), "Normalizing the posterior density using the acceptance probability failed."
def test_c2st_sre_variants_on_linearGaussian( num_dim: int, num_trials: int, prior_str: str, method_str: str, set_seed, ): """Test c2st accuracy of inference with SRE on linear Gaussian model. Args: num_dim: parameter dimension of the gaussian model prior_str: one of "gaussian" or "uniform" set_seed: fixture for manual seeding """ x_o = zeros(num_trials, num_dim) num_samples = 500 num_simulations = 2500 if num_trials == 1 else 35000 # `likelihood_mean` will be `likelihood_shift + theta`. likelihood_shift = -1.0 * ones(num_dim) likelihood_cov = 0.8 * eye(num_dim) if prior_str == "gaussian": prior_mean = zeros(num_dim) prior_cov = eye(num_dim) prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov) else: prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) def simulator(theta): return linear_gaussian(theta, likelihood_shift, likelihood_cov) simulator, prior = prepare_for_sbi(simulator, prior) kwargs = dict( prior=prior, classifier="resnet", show_progress_bars=False, ) inference = SNRE_B(**kwargs) if method_str == "sre" else AALR(**kwargs) # Should use default `num_atoms=10` for SRE; `num_atoms=2` for AALR theta, x = simulate_for_sbi( simulator, prior, num_simulations, simulation_batch_size=50 ) _ = inference.append_simulations(theta, x).train() posterior = inference.build_posterior().set_default_x(x_o) samples = posterior.sample( sample_shape=(num_samples,), mcmc_method="slice_np_vectorized", mcmc_parameters={"thin": 3, "num_chains": 5}, ) # Get posterior samples. if prior_str == "gaussian": gt_posterior = true_posterior_linear_gaussian_mvn_prior( x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov ) target_samples = gt_posterior.sample((num_samples,)) else: target_samples = samples_true_posterior_linear_gaussian_uniform_prior( x_o, likelihood_shift, likelihood_cov, prior=prior, num_samples=num_samples ) # Check performance based on c2st accuracy. check_c2st( samples, target_samples, alg=f"sre-{prior_str}-{method_str}-{num_trials}trials" ) map_ = posterior.map(num_init_samples=1_000, init_method="prior") # Checks for log_prob() if prior_str == "gaussian" and method_str == "aalr": # For the Gaussian prior, we compute the KLd between ground truth and # posterior. We can do this only if the classifier_loss was as described in # Hermans et al. 2020 ('aalr') since Durkan et al. 2020 version only allows # evaluation up to a constant. # For the Gaussian prior, we compute the KLd between ground truth and posterior dkl = get_dkl_gaussian_prior( posterior, x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov ) max_dkl = 0.15 assert ( dkl < max_dkl ), f"KLd={dkl} is more than 2 stds above the average performance." assert ((map_ - gt_posterior.mean) ** 2).sum() < 0.5 if prior_str == "uniform": # Check whether the returned probability outside of the support is zero. posterior_prob = get_prob_outside_uniform_prior(posterior, prior, num_dim) assert ( posterior_prob == 0.0 ), "The posterior probability outside of the prior support is not zero" assert ((map_ - ones(num_dim)) ** 2).sum() < 0.5
def test_c2st_and_map_snl_on_linearGaussian_different(num_dim: int, prior_str: str): """Test SNL on linear Gaussian, comparing to ground truth posterior via c2st. Args: num_dim: parameter dimension of the gaussian model prior_str: one of "gaussian" or "uniform" """ num_samples = 500 num_simulations = 3000 trials_to_test = [1] # likelihood_mean will be likelihood_shift+theta likelihood_shift = -1.0 * ones(num_dim) # Use increased cov to avoid too small posterior cov for many trials. likelihood_cov = 0.8 * eye(num_dim) if prior_str == "gaussian": prior_mean = zeros(num_dim) prior_cov = eye(num_dim) prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov) else: prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) simulator, prior = prepare_for_sbi( lambda theta: linear_gaussian(theta, likelihood_shift, likelihood_cov), prior) density_estimator = likelihood_nn("maf", num_transforms=3) inference = SNLE(density_estimator=density_estimator, show_progress_bars=False) theta, x = simulate_for_sbi(simulator, prior, num_simulations, simulation_batch_size=10000) likelihood_estimator = inference.append_simulations(theta, x).train() # Test inference amortized over trials. for num_trials in trials_to_test: x_o = zeros((num_trials, num_dim)) if prior_str == "gaussian": gt_posterior = true_posterior_linear_gaussian_mvn_prior( x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov) target_samples = gt_posterior.sample((num_samples, )) elif prior_str == "uniform": target_samples = samples_true_posterior_linear_gaussian_uniform_prior( x_o, likelihood_shift, likelihood_cov, prior=prior, num_samples=num_samples, ) else: raise ValueError(f"Wrong prior_str: '{prior_str}'.") potential_fn, theta_transform = likelihood_estimator_based_potential( prior=prior, likelihood_estimator=likelihood_estimator, x_o=x_o) posterior = MCMCPosterior( proposal=prior, potential_fn=potential_fn, theta_transform=theta_transform, method="slice_np_vectorized", thin=5, num_chains=5, ) samples = posterior.sample(sample_shape=(num_samples, )) # Check performance based on c2st accuracy. check_c2st(samples, target_samples, alg=f"snle_a-{prior_str}-prior-{num_trials}-trials") map_ = posterior.map(num_init_samples=1_000, init_method="proposal", show_progress_bars=False) # TODO: we do not have a test for SNL log_prob(). This is because the output # TODO: density is not normalized, so KLd does not make sense. if prior_str == "uniform": # Check whether the returned probability outside of the support is zero. posterior_prob = get_prob_outside_uniform_prior( posterior, prior, num_dim) assert ( posterior_prob == 0.0 ), "The posterior probability outside of the prior support is not zero" assert ((map_ - ones(num_dim))**2).sum() < 0.5 else: assert ((map_ - gt_posterior.mean)**2).sum() < 0.5
def test_c2st_snl_on_linearGaussian_different_dims_and_trials( num_dim: int, prior_str: str, set_seed): """Test SNL on linear Gaussian, comparing to ground truth posterior via c2st. Args: num_dim: parameter dimension of the gaussian model prior_str: one of "gaussian" or "uniform" set_seed: fixture for manual seeding """ num_samples = 500 num_simulations = 7500 trials_to_test = [1, 5, 10] # likelihood_mean will be likelihood_shift+theta likelihood_shift = -1.0 * ones(num_dim) # Use increased cov to avoid too small posterior cov for many trials. likelihood_cov = 0.8 * eye(num_dim) if prior_str == "gaussian": prior_mean = zeros(num_dim) prior_cov = eye(num_dim) prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov) else: prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) simulator, prior = prepare_for_sbi( lambda theta: linear_gaussian(theta, likelihood_shift, likelihood_cov), prior) inference = SNL(prior, show_progress_bars=False) theta, x = simulate_for_sbi(simulator, prior, num_simulations, simulation_batch_size=50) _ = inference.append_simulations(theta, x).train() # Test inference amortized over trials. for num_trials in trials_to_test: x_o = zeros((num_trials, num_dim)) if prior_str == "gaussian": gt_posterior = true_posterior_linear_gaussian_mvn_prior( x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov) target_samples = gt_posterior.sample((num_samples, )) else: target_samples = samples_true_posterior_linear_gaussian_uniform_prior( x_o, likelihood_shift, likelihood_cov, prior=prior, num_samples=num_samples, ) posterior = inference.build_posterior( mcmc_method="slice_np_vectorized").set_default_x(x_o) samples = posterior.sample(sample_shape=(num_samples, ), mcmc_parameters={ "thin": 3, "num_chains": 2 }) # Check performance based on c2st accuracy. check_c2st(samples, target_samples, alg=f"snle_a-{prior_str}-prior-{num_trials}-trials") map_ = posterior.map(num_init_samples=1_000, init_method="prior") # TODO: we do not have a test for SNL log_prob(). This is because the output # TODO: density is not normalized, so KLd does not make sense. if prior_str == "uniform": # Check whether the returned probability outside of the support is zero. posterior_prob = get_prob_outside_uniform_prior( posterior, prior, num_dim) assert ( posterior_prob == 0.0 ), "The posterior probability outside of the prior support is not zero" assert ((map_ - ones(num_dim))**2).sum() < 0.5 else: assert ((map_ - gt_posterior.mean)**2).sum() < 0.5