def main(args): baseball_dataset = pd.read_csv(DATA_URL, "\t") train, _, player_names = train_test_split(baseball_dataset) at_bats, hits = train[:, 0], train[:, 1] nuts_kernel = NUTS(conditioned_model, adapt_step_size=True) logging.info("Original Dataset:") logging.info(baseball_dataset) # (1) Full Pooling Model posterior_fully_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps) \ .run(fully_pooled, at_bats, hits) logging.info("\nModel: Fully Pooled") logging.info("===================") logging.info("\nphi:") logging.info( summary(posterior_fully_pooled, sites=["phi"], player_names=player_names)["phi"]) posterior_predictive = TracePredictive(fully_pooled, posterior_fully_pooled, num_samples=args.num_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(fully_pooled, posterior_fully_pooled, baseball_dataset) # (2) No Pooling Model posterior_not_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps) \ .run(not_pooled, at_bats, hits) logging.info("\nModel: Not Pooled") logging.info("=================") logging.info("\nphi:") logging.info( summary(posterior_not_pooled, sites=["phi"], player_names=player_names)["phi"]) posterior_predictive = TracePredictive(not_pooled, posterior_not_pooled, num_samples=args.num_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(not_pooled, posterior_not_pooled, baseball_dataset) # (3) Partially Pooled Model posterior_partially_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps) \ .run(partially_pooled, at_bats, hits) logging.info("\nModel: Partially Pooled") logging.info("=======================") logging.info("\nSigmoid(alpha):") logging.info( summary(posterior_partially_pooled, sites=["alpha"], player_names=player_names, transforms={"alpha": lambda x: 1. / (1 + np.exp(-x))})["alpha"]) posterior_predictive = TracePredictive(partially_pooled, posterior_partially_pooled, num_samples=args.num_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(partially_pooled, posterior_partially_pooled, baseball_dataset)
def sampling_prediction(self, svi, x_train, y_train, x_test, num_samples = 1000): posterior = svi.run(x_train, y_train) trace_pred = TracePredictive(self.wrapped_model, posterior, num_samples = num_samples) post_pred = trace_pred.run(x_test, None) sites= ['prediction', 'obs'] marginal = get_marginal(post_pred, sites) site_stats = {} for i in range(marginal.shape[1]): site_name = sites[i] marginal_site = pd.DataFrame(marginal[:, i]).transpose() site_stats[site_name] = marginal_site.apply(pd.Series.describe, axis=1)[["mean", "std"]] mu = site_stats["prediction"] y_o = site_stats["obs"] return mu["mean"], mu["std"], y_o["mean"], y_o["std"]
def evaluate_log_predictive_density(model, model_trace_posterior, baseball_dataset): """ Evaluate the log probability density of observing the unseen data (season hits) given a model and empirical distribution over the parameters. """ _, test, player_names = train_test_split(baseball_dataset) at_bats_season, hits_season = test[:, 0], test[:, 1] test_eval = TracePredictive(conditioned_model, model_trace_posterior, num_samples=args.num_samples) test_eval.run(model, at_bats_season, hits_season) trace_log_pdf = [] for tr in test_eval.exec_traces: trace_log_pdf.append(tr.log_prob_sum()) # Use LogSumExp trick to evaluate $log(1/num_samples \sum_i p(new_data | \theta^{i})) $, # where $\theta^{i}$ are parameter samples from the model's posterior. posterior_pred_density = log_sum_exp(torch.stack(trace_log_pdf)) - math.log(len(trace_log_pdf)) logging.info("\nLog posterior predictive density") logging.info("---------------------------------") logging.info("{:.4f}\n".format(posterior_pred_density))
sites = ["a", "bA", "bR", "bAR", "sigma"] for site, values in summary(posterior, sites).items(): print("Site: {}".format(site)) print(values, "\n") def wrapped_model(x_data, y_data): pyro.sample("prediction", dist.Delta(model(x_data, y_data))) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(x_data, None) post_summary = summary(post_pred, sites= ['prediction', 'obs']) mu = post_summary["prediction"] y = post_summary["obs"] print("sample y data:") print(y.head(10)) predictions = pd.DataFrame({ "cont_africa": x_data[:, 0], "rugged": x_data[:, 1], "mu_mean": mu["mean"], "mu_perc_5": mu["5%"], "mu_perc_95": mu["95%"],
posterior = svi.run(log_gdp, is_cont_africa, ruggedness) sites = ["a", "bA", "bR", "bAR", "sigma"] for site, values in summary(posterior, sites).items(): print("Site: {}".format(site)) print(values, "\n") def wrapped_model(is_cont_africa, ruggedness, log_gdp): pyro.sample("prediction", Delta(model(is_cont_africa, ruggedness, log_gdp))) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(is_cont_africa, ruggedness, None) post_summary = summary(post_pred, sites=['prediction', 'obs']) mu = post_summary["prediction"] y = post_summary["obs"] print("sample y data:") print(y.head(10)) predictions = pd.DataFrame({ "cont_africa": x_data[:, 0], "rugged": x_data[:, 1], "mu_mean": mu["mean"], "mu_perc_5": mu["5%"], "mu_perc_95": mu["95%"], "y_mean": y["mean"],
def main(args): pyro.set_rng_seed(args.rng_seed) baseball_dataset = pd.read_csv(DATA_URL, "\t") train, _, player_names = train_test_split(baseball_dataset) at_bats, hits = train[:, 0], train[:, 1] logging.info("Original Dataset:") logging.info(baseball_dataset) num_predictive_samples = args.num_samples * args.num_chains # (1) Full Pooling Model nuts_kernel = NUTS(fully_pooled) posterior_fully_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains).run(at_bats, hits) logging.info("\nModel: Fully Pooled") logging.info("===================") logging.info("\nphi:") logging.info(summary(posterior_fully_pooled, sites=["phi"], player_names=player_names)["phi"]) posterior_predictive = TracePredictive(fully_pooled, posterior_fully_pooled, num_samples=num_predictive_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(posterior_predictive, baseball_dataset) # (2) No Pooling Model nuts_kernel = NUTS(not_pooled) posterior_not_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains).run(at_bats, hits) logging.info("\nModel: Not Pooled") logging.info("=================") logging.info("\nphi:") logging.info(summary(posterior_not_pooled, sites=["phi"], player_names=player_names)["phi"]) posterior_predictive = TracePredictive(not_pooled, posterior_not_pooled, num_samples=num_predictive_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(posterior_predictive, baseball_dataset) # (3) Partially Pooled Model # TODO: remove once htps://github.com/uber/pyro/issues/1458 is resolved if "CI" not in os.environ: nuts_kernel = NUTS(partially_pooled) posterior_partially_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains).run(at_bats, hits) logging.info("\nModel: Partially Pooled") logging.info("=======================") logging.info("\nphi:") logging.info(summary(posterior_partially_pooled, sites=["phi"], player_names=player_names)["phi"]) posterior_predictive = TracePredictive(partially_pooled, posterior_partially_pooled, num_samples=num_predictive_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(posterior_predictive, baseball_dataset) # (4) Partially Pooled with Logit Model nuts_kernel = NUTS(partially_pooled_with_logit) posterior_partially_pooled_with_logit = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains).run(at_bats, hits) logging.info("\nModel: Partially Pooled with Logit") logging.info("==================================") logging.info("\nSigmoid(alpha):") logging.info(summary(posterior_partially_pooled_with_logit, sites=["alpha"], player_names=player_names, transforms={"alpha": lambda x: 1. / (1 + (-x).exp())})["alpha"]) posterior_predictive = TracePredictive(partially_pooled_with_logit, posterior_partially_pooled_with_logit, num_samples=num_predictive_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(posterior_predictive, baseball_dataset)