def sample_posterior_predictive(model, posterior_samples, baseball_dataset): """ Generate samples from posterior predictive distribution. """ train, test, player_names = train_test_split(baseball_dataset) at_bats = train[:, 0] at_bats_season = test[:, 0] logging.Formatter("%(message)s") logging.info("\nPosterior Predictive:") logging.info("Hit Rate - Initial 45 At Bats") logging.info("-----------------------------") # set hits=None to convert it from observation node to sample node with ignore_experimental_warning(): train_predict = predictive(model, posterior_samples, at_bats, None) train_summary = summary(train_predict, sites=["obs"], player_names=player_names)["obs"] train_summary = train_summary.assign(ActualHits=baseball_dataset[["Hits"]].values) logging.info(train_summary) logging.info("\nHit Rate - Season Predictions") logging.info("-----------------------------") with ignore_experimental_warning(): test_predict = predictive(model, posterior_samples, at_bats_season, None) test_summary = summary(test_predict, sites=["obs"], player_names=player_names)["obs"] test_summary = test_summary.assign(ActualHits=baseball_dataset[["SeasonHits"]].values) logging.info(test_summary)
def test_predictive(num_samples, parallel): model, data, true_probs = beta_bernoulli() init_params, potential_fn, transforms, _ = initialize_model( model, model_args=(data, )) nuts_kernel = NUTS(potential_fn=potential_fn, transforms=transforms) mcmc = MCMC(nuts_kernel, 100, initial_params=init_params, warmup_steps=100) mcmc.run(data) samples = mcmc.get_samples() with ignore_experimental_warning(): with optional(pytest.warns(UserWarning), num_samples not in (None, 100)): predictive_samples = predictive(model, samples, num_samples=num_samples, return_sites=["beta", "obs"], parallel=parallel) # check shapes assert predictive_samples["beta"].shape == (100, 5) assert predictive_samples["obs"].shape == (100, 1000, 5) # check sample mean assert_close(predictive_samples["obs"].reshape([-1, 5]).mean(0), true_probs, rtol=0.1)
def evaluate_log_posterior_density(model, posterior_samples, baseball_dataset): """ Evaluate the log probability density of observing the unseen data (season hits) given a model and posterior distribution over the parameters. """ _, test, player_names = train_test_split(baseball_dataset) at_bats_season, hits_season = test[:, 0], test[:, 1] with ignore_experimental_warning(): trace = predictive(model, posterior_samples, at_bats_season, hits_season, parallel=True, return_trace=True) # Use LogSumExp trick to evaluate $log(1/num_samples \sum_i p(new_data | \theta^{i})) $, # where $\theta^{i}$ are parameter samples from the model's posterior. trace.compute_log_prob() log_joint = 0. for name, site in trace.nodes.items(): if site["type"] == "sample" and not site_is_subsample(site): # We use `sum_rightmost(x, -1)` to take the sum of all rightmost dimensions of `x` # except the first dimension (which corresponding to the number of posterior samples) site_log_prob_sum = sum_rightmost(site['log_prob'], -1) log_joint += site_log_prob_sum posterior_pred_density = torch.logsumexp(log_joint, dim=0) - math.log(log_joint.shape[0]) logging.info("\nLog posterior predictive density") logging.info("--------------------------------") logging.info("{:.4f}\n".format(posterior_pred_density))
def sample_posterior_predictive(model, posterior_samples, *args): with ignore_experimental_warning(): predict = predictive(model, posterior_samples, *args) return predict