def main(args):
    baseball_dataset = pd.read_csv(DATA_URL, "\t")
    train, _, player_names = train_test_split(baseball_dataset)
    at_bats, hits = train[:, 0], train[:, 1]
    nuts_kernel = NUTS(conditioned_model, adapt_step_size=True)
    logging.info("Original Dataset:")
    logging.info(baseball_dataset)

    # (1) Full Pooling Model
    posterior_fully_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps) \
        .run(fully_pooled, at_bats, hits)
    logging.info("\nModel: Fully Pooled")
    logging.info("===================")
    logging.info("\nphi:")
    logging.info(
        summary(posterior_fully_pooled,
                sites=["phi"],
                player_names=player_names)["phi"])
    posterior_predictive = TracePredictive(fully_pooled,
                                           posterior_fully_pooled,
                                           num_samples=args.num_samples)
    sample_posterior_predictive(posterior_predictive, baseball_dataset)
    evaluate_log_predictive_density(fully_pooled, posterior_fully_pooled,
                                    baseball_dataset)

    # (2) No Pooling Model
    posterior_not_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps) \
        .run(not_pooled, at_bats, hits)
    logging.info("\nModel: Not Pooled")
    logging.info("=================")
    logging.info("\nphi:")
    logging.info(
        summary(posterior_not_pooled, sites=["phi"],
                player_names=player_names)["phi"])
    posterior_predictive = TracePredictive(not_pooled,
                                           posterior_not_pooled,
                                           num_samples=args.num_samples)
    sample_posterior_predictive(posterior_predictive, baseball_dataset)
    evaluate_log_predictive_density(not_pooled, posterior_not_pooled,
                                    baseball_dataset)

    # (3) Partially Pooled Model
    posterior_partially_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps) \
        .run(partially_pooled, at_bats, hits)
    logging.info("\nModel: Partially Pooled")
    logging.info("=======================")
    logging.info("\nSigmoid(alpha):")
    logging.info(
        summary(posterior_partially_pooled,
                sites=["alpha"],
                player_names=player_names,
                transforms={"alpha": lambda x: 1. /
                            (1 + np.exp(-x))})["alpha"])
    posterior_predictive = TracePredictive(partially_pooled,
                                           posterior_partially_pooled,
                                           num_samples=args.num_samples)
    sample_posterior_predictive(posterior_predictive, baseball_dataset)
    evaluate_log_predictive_density(partially_pooled,
                                    posterior_partially_pooled,
                                    baseball_dataset)
Ejemplo n.º 2
0
    def sampling_prediction(self, svi, x_train, y_train, x_test, 
                            num_samples = 1000):
        posterior = svi.run(x_train, y_train)
        trace_pred = TracePredictive(self.wrapped_model, posterior,  
                                      num_samples = num_samples)
        post_pred = trace_pred.run(x_test, None)
        sites= ['prediction', 'obs']
        marginal = get_marginal(post_pred, sites)
        site_stats = {}
        for i in range(marginal.shape[1]):
            site_name = sites[i]
            marginal_site = pd.DataFrame(marginal[:, i]).transpose()
            site_stats[site_name] = marginal_site.apply(pd.Series.describe, 
                                                        axis=1)[["mean", "std"]]

        mu = site_stats["prediction"]
        y_o = site_stats["obs"]

        return mu["mean"], mu["std"], y_o["mean"], y_o["std"] 
Ejemplo n.º 3
0
def evaluate_log_predictive_density(model, model_trace_posterior, baseball_dataset):
    """
    Evaluate the log probability density of observing the unseen data (season hits)
    given a model and empirical distribution over the parameters.
    """
    _, test, player_names = train_test_split(baseball_dataset)
    at_bats_season, hits_season = test[:, 0], test[:, 1]
    test_eval = TracePredictive(conditioned_model,
                                model_trace_posterior,
                                num_samples=args.num_samples)
    test_eval.run(model, at_bats_season, hits_season)
    trace_log_pdf = []
    for tr in test_eval.exec_traces:
        trace_log_pdf.append(tr.log_prob_sum())
    # Use LogSumExp trick to evaluate $log(1/num_samples \sum_i p(new_data | \theta^{i})) $,
    # where $\theta^{i}$ are parameter samples from the model's posterior.
    posterior_pred_density = log_sum_exp(torch.stack(trace_log_pdf)) - math.log(len(trace_log_pdf))
    logging.info("\nLog posterior predictive density")
    logging.info("---------------------------------")
    logging.info("{:.4f}\n".format(posterior_pred_density))
Ejemplo n.º 4
0
def evaluate_log_predictive_density(model, model_trace_posterior, baseball_dataset):
    """
    Evaluate the log probability density of observing the unseen data (season hits)
    given a model and empirical distribution over the parameters.
    """
    _, test, player_names = train_test_split(baseball_dataset)
    at_bats_season, hits_season = test[:, 0], test[:, 1]
    test_eval = TracePredictive(conditioned_model,
                                model_trace_posterior,
                                num_samples=args.num_samples)
    test_eval.run(model, at_bats_season, hits_season)
    trace_log_pdf = []
    for tr in test_eval.exec_traces:
        trace_log_pdf.append(tr.log_prob_sum())
    # Use LogSumExp trick to evaluate $log(1/num_samples \sum_i p(new_data | \theta^{i})) $,
    # where $\theta^{i}$ are parameter samples from the model's posterior.
    posterior_pred_density = log_sum_exp(torch.stack(trace_log_pdf)) - math.log(len(trace_log_pdf))
    logging.info("\nLog posterior predictive density")
    logging.info("---------------------------------")
    logging.info("{:.4f}\n".format(posterior_pred_density))
Ejemplo n.º 5
0
sites = ["a", "bA", "bR", "bAR", "sigma"]

for site, values in summary(posterior, sites).items():
    print("Site: {}".format(site))
    print(values, "\n")


def wrapped_model(x_data, y_data):
    pyro.sample("prediction", dist.Delta(model(x_data, y_data)))



# posterior predictive distribution we can get samples from
trace_pred = TracePredictive(wrapped_model,
                             posterior,
                             num_samples=1000)
post_pred = trace_pred.run(x_data, None)
post_summary = summary(post_pred, sites= ['prediction', 'obs'])
mu = post_summary["prediction"]
y = post_summary["obs"]

print("sample y data:")
print(y.head(10))

predictions = pd.DataFrame({
    "cont_africa": x_data[:, 0],
    "rugged": x_data[:, 1],
    "mu_mean": mu["mean"],
    "mu_perc_5": mu["5%"],
    "mu_perc_95": mu["95%"],
Ejemplo n.º 6
0
posterior = svi.run(log_gdp, is_cont_africa, ruggedness)

sites = ["a", "bA", "bR", "bAR", "sigma"]

for site, values in summary(posterior, sites).items():
    print("Site: {}".format(site))
    print(values, "\n")


def wrapped_model(is_cont_africa, ruggedness, log_gdp):
    pyro.sample("prediction", Delta(model(is_cont_africa, ruggedness,
                                          log_gdp)))


# posterior predictive distribution we can get samples from
trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000)
post_pred = trace_pred.run(is_cont_africa, ruggedness, None)
post_summary = summary(post_pred, sites=['prediction', 'obs'])
mu = post_summary["prediction"]
y = post_summary["obs"]

print("sample y data:")
print(y.head(10))

predictions = pd.DataFrame({
    "cont_africa": x_data[:, 0],
    "rugged": x_data[:, 1],
    "mu_mean": mu["mean"],
    "mu_perc_5": mu["5%"],
    "mu_perc_95": mu["95%"],
    "y_mean": y["mean"],
Ejemplo n.º 7
0
def main(args):
    pyro.set_rng_seed(args.rng_seed)
    baseball_dataset = pd.read_csv(DATA_URL, "\t")
    train, _, player_names = train_test_split(baseball_dataset)
    at_bats, hits = train[:, 0], train[:, 1]
    logging.info("Original Dataset:")
    logging.info(baseball_dataset)
    num_predictive_samples = args.num_samples * args.num_chains

    # (1) Full Pooling Model
    nuts_kernel = NUTS(fully_pooled)
    posterior_fully_pooled = MCMC(nuts_kernel,
                                  num_samples=args.num_samples,
                                  warmup_steps=args.warmup_steps,
                                  num_chains=args.num_chains).run(at_bats, hits)
    logging.info("\nModel: Fully Pooled")
    logging.info("===================")
    logging.info("\nphi:")
    logging.info(summary(posterior_fully_pooled, sites=["phi"], player_names=player_names)["phi"])
    posterior_predictive = TracePredictive(fully_pooled,
                                           posterior_fully_pooled,
                                           num_samples=num_predictive_samples)
    sample_posterior_predictive(posterior_predictive, baseball_dataset)
    evaluate_log_predictive_density(posterior_predictive, baseball_dataset)

    # (2) No Pooling Model
    nuts_kernel = NUTS(not_pooled)
    posterior_not_pooled = MCMC(nuts_kernel,
                                num_samples=args.num_samples,
                                warmup_steps=args.warmup_steps,
                                num_chains=args.num_chains).run(at_bats, hits)
    logging.info("\nModel: Not Pooled")
    logging.info("=================")
    logging.info("\nphi:")
    logging.info(summary(posterior_not_pooled, sites=["phi"], player_names=player_names)["phi"])
    posterior_predictive = TracePredictive(not_pooled,
                                           posterior_not_pooled,
                                           num_samples=num_predictive_samples)
    sample_posterior_predictive(posterior_predictive, baseball_dataset)
    evaluate_log_predictive_density(posterior_predictive, baseball_dataset)

    # (3) Partially Pooled Model
    # TODO: remove once htps://github.com/uber/pyro/issues/1458 is resolved
    if "CI" not in os.environ:
        nuts_kernel = NUTS(partially_pooled)
        posterior_partially_pooled = MCMC(nuts_kernel,
                                          num_samples=args.num_samples,
                                          warmup_steps=args.warmup_steps,
                                          num_chains=args.num_chains).run(at_bats, hits)
        logging.info("\nModel: Partially Pooled")
        logging.info("=======================")
        logging.info("\nphi:")
        logging.info(summary(posterior_partially_pooled, sites=["phi"],
                             player_names=player_names)["phi"])
        posterior_predictive = TracePredictive(partially_pooled,
                                               posterior_partially_pooled,
                                               num_samples=num_predictive_samples)
        sample_posterior_predictive(posterior_predictive, baseball_dataset)
        evaluate_log_predictive_density(posterior_predictive, baseball_dataset)

    # (4) Partially Pooled with Logit Model
    nuts_kernel = NUTS(partially_pooled_with_logit)
    posterior_partially_pooled_with_logit = MCMC(nuts_kernel,
                                                 num_samples=args.num_samples,
                                                 warmup_steps=args.warmup_steps,
                                                 num_chains=args.num_chains).run(at_bats, hits)
    logging.info("\nModel: Partially Pooled with Logit")
    logging.info("==================================")
    logging.info("\nSigmoid(alpha):")
    logging.info(summary(posterior_partially_pooled_with_logit,
                         sites=["alpha"],
                         player_names=player_names,
                         transforms={"alpha": lambda x: 1. / (1 + (-x).exp())})["alpha"])
    posterior_predictive = TracePredictive(partially_pooled_with_logit,
                                           posterior_partially_pooled_with_logit,
                                           num_samples=num_predictive_samples)
    sample_posterior_predictive(posterior_predictive, baseball_dataset)
    evaluate_log_predictive_density(posterior_predictive, baseball_dataset)