def evaluate_log_predictive_density(model, model_trace_posterior, baseball_dataset): """ Evaluate the log probability density of observing the unseen data (season hits) given a model and empirical distribution over the parameters. """ _, test, player_names = train_test_split(baseball_dataset) at_bats_season, hits_season = test[:, 0], test[:, 1] test_eval = TracePredictive(conditioned_model, model_trace_posterior, num_samples=args.num_samples) test_eval.run(model, at_bats_season, hits_season) trace_log_pdf = [] for tr in test_eval.exec_traces: trace_log_pdf.append(tr.log_prob_sum()) # Use LogSumExp trick to evaluate $log(1/num_samples \sum_i p(new_data | \theta^{i})) $, # where $\theta^{i}$ are parameter samples from the model's posterior. posterior_pred_density = log_sum_exp(torch.stack(trace_log_pdf)) - math.log(len(trace_log_pdf)) logging.info("\nLog posterior predictive density") logging.info("---------------------------------") logging.info("{:.4f}\n".format(posterior_pred_density))
def sampling_prediction(self, svi, x_train, y_train, x_test, num_samples = 1000): posterior = svi.run(x_train, y_train) trace_pred = TracePredictive(self.wrapped_model, posterior, num_samples = num_samples) post_pred = trace_pred.run(x_test, None) sites= ['prediction', 'obs'] marginal = get_marginal(post_pred, sites) site_stats = {} for i in range(marginal.shape[1]): site_name = sites[i] marginal_site = pd.DataFrame(marginal[:, i]).transpose() site_stats[site_name] = marginal_site.apply(pd.Series.describe, axis=1)[["mean", "std"]] mu = site_stats["prediction"] y_o = site_stats["obs"] return mu["mean"], mu["std"], y_o["mean"], y_o["std"]
for site, values in summary(posterior, sites).items(): print("Site: {}".format(site)) print(values, "\n") def wrapped_model(x_data, y_data): pyro.sample("prediction", dist.Delta(model(x_data, y_data))) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(x_data, None) post_summary = summary(post_pred, sites= ['prediction', 'obs']) mu = post_summary["prediction"] y = post_summary["obs"] print("sample y data:") print(y.head(10)) predictions = pd.DataFrame({ "cont_africa": x_data[:, 0], "rugged": x_data[:, 1], "mu_mean": mu["mean"], "mu_perc_5": mu["5%"], "mu_perc_95": mu["95%"], "y_mean": y["mean"], "y_perc_5": y["5%"],
sites = ["a", "bA", "bR", "bAR", "sigma"] for site, values in summary(posterior, sites).items(): print("Site: {}".format(site)) print(values, "\n") def wrapped_model(is_cont_africa, ruggedness, log_gdp): pyro.sample("prediction", Delta(model(is_cont_africa, ruggedness, log_gdp))) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(is_cont_africa, ruggedness, None) post_summary = summary(post_pred, sites=['prediction', 'obs']) mu = post_summary["prediction"] y = post_summary["obs"] print("sample y data:") print(y.head(10)) predictions = pd.DataFrame({ "cont_africa": x_data[:, 0], "rugged": x_data[:, 1], "mu_mean": mu["mean"], "mu_perc_5": mu["5%"], "mu_perc_95": mu["95%"], "y_mean": y["mean"], "y_perc_5": y["5%"],