def trace_summary(svi, model, x_data, y_data): posterior = svi.run(x_data, y_data) wrapped_model = wrapped_model_fn(model) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=10000) post_pred = trace_pred.run(x_data, None) post_summary = summary(post_pred, sites=['prediction', 'obs']) mu = post_summary["prediction"] obs = post_summary["obs"] x = x_data.cpu().numpy().ravel() idx = np.argsort(x) df = pd.DataFrame({ "x_data": x[idx], "y_data": y_data.cpu().numpy().ravel()[idx], #"obs": obs[idx], "mu_mean": mu["mean"][idx], "mu_std": mu["std"][idx], "mu_perc_5": mu["5%"][idx], "mu_perc_95": mu["95%"][idx], "obs_mean": obs["mean"][idx], "obs_std": obs["std"][idx], "obs_perc_5": obs["5%"][idx], "obs_perc_95": obs["95%"][idx], }) print(df) plot_mu(df) plt.title('trace summary: mu') plot_obs(df) plt.title('trace summary: obs')
def trace_summary(svi, xdata, ydata): # import pudb; pudb.set_trace() posterior = svi.run(x_data, y_data) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(x_data, None) post_summary = summary(post_pred, sites=['prediction', 'obs']) mu = post_summary["prediction"] obs = post_summary["obs"] df = pd.DataFrame({ # "x_data": x_data.cpu().numpy(), "y_data": y_data.cpu().numpy()[..., 0], "mu_mean": mu["mean"], "mu_std": mu["std"], "mu_perc_5": mu["5%"], "mu_perc_95": mu["95%"], "obs_mean": obs["mean"], "obs_std": obs["std"], "obs_perc_5": obs["5%"], "obs_perc_95": obs["95%"], }) print(df) plot_mu(df) plt.title('trace summary: mu') plot_obs(df) plt.title('trace summary: obs')
def analysis(svi, data): for name, value in pyro.get_param_store().items(): print(name, pyro.param(name)) posterior = svi.run(data) def wrapped_model(data): pyro.sample("prediction", Delta(model(data))) trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(data) post_summary = summary(post_pred, sites=['prediction', 'obs']) mu = post_summary["prediction"] y = post_summary["obs"] print("sample Prediction (mu) data:") print(mu.head(10)) print("sample Observation (y) data:") print(y.head(10)) x_data = data[:, :-1] y_data = data[:, -1] plot_data(x_data, y_data, "predictions") # input() plot_data(x_data, y['mean'], "predictions") # plt.show() #break #import Ipython; Ipython.embed() return post_summary
def trace_summary(svi, model, x_data, y_data): posterior = svi.run(x_data, y_data) wrapped_model = wrapped_model_fn(model) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(x_data, None) post_summary = summary(post_pred, sites=['prediction', 'obs']) pred = post_summary["prediction"] obs = post_summary["obs"] #import pudb; pudb.set_trace() #x = x_data.cpu().numpy().ravel() #idx = np.argsort(x) y = y_data.cpu().numpy().ravel() idx = np.argsort(y) df = pd.DataFrame({ #"x_data": x[idx], #"y_data": y_data.cpu().numpy().ravel()[idx], "Index": np.linspace(0, np.size(y), np.size(y)), "y_data": y[idx], #"obs": obs[idx], "pred_mean": pred["mean"][idx], "pred_std": pred["std"][idx], "pred_perc_5": pred["5%"][idx], "pred_perc_95": pred["95%"][idx], "obs_mean": obs["mean"][idx], "obs_std": obs["std"][idx], "obs_perc_5": obs["5%"][idx], "obs_perc_95": obs["95%"][idx], }) print(df) plot_pred(df) plt.title('trace summary: pred') plot_obs(df) plt.title('trace summary: obs')
def trace_summary(svi, model, x_data, y_data): optim = Adam({"lr": 0.03}) # svi = SVI(model, guide, optim, loss=Trace_ELBO(), num_samples=10000) posterior = svi.run(x_data, y_data) wrapped_model = wrapped_model_fn(model) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=nsamples) post_pred = trace_pred.run(x_data, None) post_summary = summary(post_pred, sites=['prediction', 'obs']) pred = post_summary["prediction"] obs = post_summary["obs"] #x = x_data.cpu().numpy().ravel() idx = np.argsort(y_data.squeeze()) df = pd.DataFrame({ "y_test": y_data[idx].squeeze(), #"obs": obs[idx], "pred_mean": pred["mean"][idx].values, "pred_std": pred["std"][idx].values, "pred_16%": pred["16%"][idx].values, "pred_84%": pred["84%"][idx].values, "obs_mean": obs["mean"][idx].values, "obs_std": obs["std"][idx].values, "obs_16%": obs["16%"][idx].values, "obs_84%": obs["84%"][idx].values, }) print(df) plot_pred(df) plt.title('trace summary: pred') plot_obs(df) plt.title('trace summary: obs')
def ppd_samples( wrapped_model, svi_posterior, data, sites, num_samples=200): """ Get samples from posterior predictive :param wrapped_model: wrapped model with prediction site :param svi_posterior: posterior from svi.run :param data: covariate features :param sites: list of sites to take marginal over :param num_samples: number of samples from posterior :return: ppd """ trace_pred = TracePredictive(wrapped_model, svi_posterior, num_samples=num_samples) post_pred = trace_pred.run(data['data'], None) marginal = get_marginal(post_pred, sites) return marginal
def trace_summary(svi, model, x_data, y_data): optim = Adam({"lr": 0.03}) # svi = SVI(model, guide, optim, loss=Trace_ELBO(), num_samples=1000) posterior = svi.run(x_data, y_data) wrapped_model = wrapped_model_fn(model) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=nsamples) post_pred = trace_pred.run(x_data, None) post_summary = summary2(post_pred, sites=['prediction', 'obs']) #import pdb; pdb.set_trace() pred = post_summary[:, 0, :] obs = post_summary[:, 1, :] pred_mean = pred.mean(axis=0) pred_std = pred.std(axis=0) obs_mean = obs.mean(axis=0) obs_std = obs.std(axis=0) #import pdb; pdb.set_trace() plot_pred(y_data, pred_mean, pred_std, experiment_id) plot_obs(y_data, obs_mean, pred_mean, obs_std, pred_std, experiment_id)
describe = partial(pd.Series.describe, percentiles=[.05, 0.25, 0.5, 0.75, 0.95]) site_stats[site_name] = marginal_site.apply(describe, axis=1) \ [["mean", "std", "5%", "25%", "50%", "75%", "95%"]] return site_stats def wrapped_model(x_data, y_data): pyro.sample("prediction", Delta(model(x_data, y_data))) posterior = svi.run(data[0], data[1][:, -1]) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=100) post_pred = trace_pred.run(data[0], None) post_summary = summary(post_pred, sites=['prediction', 'obs']) mu = post_summary["prediction"] y = post_summary["obs"] y.insert(0, 'true', data[1].cpu().numpy()) print("sample y data:") print(y.head(10)) df = pd.DataFrame(y) nx = df.reset_index() #insert a first row in Dataframe for index nx = nx.values #Convert Dataframe to array fig = plt.figure(dpi=100, figsize=(5, 4)) plt.scatter(nx[:, 0], nx[:, 1], c='b') plt.scatter(nx[:, 0], nx[:, 2], c='r') #plt.errorbar(nx[:,0],nx[:,2], yerr=nx[:,3], fmt='o', c='r')
describe = partial(pd.Series.describe, percentiles=[.05, 0.25, 0.5, 0.75, 0.95]) site_stats[site_name] = marginal_site.apply(describe, axis=1) \ [["mean", "std", "5%", "25%", "50%", "75%", "95%"]] return site_stats def wrapped_model(x_data, y_data): pyro.sample("prediction", Delta(model(x_data, y_data))) posterior = svi.run(x_data, y_data) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(x_data, None) post_summary = summary(post_pred, sites=['prediction', 'obs']) mu = post_summary["prediction"] y = post_summary["obs"] predictions = pd.DataFrame({ "cont_africa": x_data[:, 0], "rugged": x_data[:, 1], "mu_mean": mu["mean"], "mu_perc_5": mu["5%"], "mu_perc_95": mu["95%"], "y_mean": y["mean"], "y_perc_5": y["5%"], "y_perc_95": y["95%"], "true_gdp": y_data, })
percentiles=[.05, 0.25, 0.5, 0.75, 0.95]) site_stats[site_name] = marginal_site.apply(describe, axis=1) \ [["mean", "std", "5%", "25%", "50%", "75%", "95%"]] return site_stats def wrapped_model(x_data, y_data): pyro.sample("prediction", Delta(model(x_data, y_data))) # Break # import Ipython; Ipython.embed() # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(data_train[0], None) #inputing pca components? post_summary = summary(post_pred, sites=['prediction', 'obs']) meuw = post_summary["prediction"] y = post_summary["obs"] meuw.insert(0, 'true', data_train[1].cpu().numpy()) y.insert(0, 'true', data_train[1].cpu().numpy()) print("sample meuw data:") print(meuw.head(10)) #What's the difference between meuw and y? Means are the same but sigma is very different. print("sample y data:") print(y.head(10)) df = pd.DataFrame(meuw) nx = df.reset_index() #insert a first row in Dataframe for index nx = nx.values #Convert Dataframe to array
def pyro_bayesian(regression_model, y_data): def summary(traces, sites): marginal = get_marginal(traces, sites) site_stats = {} for i in range(marginal.shape[1]): site_name = sites[i] marginal_site = pd.DataFrame(marginal[:, i]).transpose() describe = partial(pd.Series.describe, percentiles=[.05, 0.25, 0.5, 0.75, 0.95]) site_stats[site_name] = marginal_site.apply(describe, axis=1) \ [["mean", "std", "5%", "25%", "50%", "75%", "95%"]] return site_stats # CI testing assert pyro.__version__.startswith('0.3.0') pyro.enable_validation(True) pyro.set_rng_seed(1) pyro.enable_validation(True) from pyro.contrib.autoguide import AutoDiagonalNormal guide = AutoDiagonalNormal(model) optim = Adam({"lr": 0.03}) svi = SVI(model, guide, optim, loss=Trace_ELBO(), num_samples=1000) train(svi, x_data, y_data, num_iterations, regression_model) for name, value in pyro.get_param_store().items(): print(name, pyro.param(name)) get_marginal = lambda traces, sites: EmpiricalMarginal( traces, sites)._get_samples_and_weights()[0].detach().cpu().numpy() posterior = svi.run(x_data, y_data, regression_model) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(x_data, None, regression_model) post_summary = summary(post_pred, sites=['prediction', 'obs']) mu = post_summary["prediction"] y = post_summary["obs"] predictions = pd.DataFrame({ "x0": x_data[:, 0], "x1": x_data[:, 1], "mu_mean": mu["mean"], "mu_perc_5": mu["5%"], "mu_perc_95": mu["95%"], "y_mean": y["mean"], "y_perc_5": y["5%"], "y_perc_95": y["95%"], "true_gdp": y_data, }) # print("predictions=", predictions) """we need to prepend `module$$$` to all parameters of nn.Modules since # that is how they are stored in the ParamStore """ weight = get_marginal(posterior, ['module$$$linear.weight']).squeeze(1).squeeze(1) factor = get_marginal(posterior, ['module$$$factor']) # x0, x1, x2"-home_page, x1*x2-factor print("weight shape=", weight.shape) print("factor shape=", factor.shape) fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12, 6), sharey=True) ax[0].hist(weight[:, 0]) ax[1].hist(weight[:, 1]) ax[2].hist(factor.squeeze(1)) plt.show()
site_name = sites[i] marginal_site = pd.DataFrame(marginal[:, i]).transpose() describe = partial(pd.Series.describe, percentiles=[.05, 0.25, 0.5, 0.75, 0.95]) site_stats[site_name] = marginal_site.apply(describe, axis=1) \ [["mean", "std", "5%", "25%", "50%", "75%", "95%"]] return site_stats def wrapped_model(x_data, y_data): pyro.sample("prediction", Delta(model(x_data, y_data))) posterior = svi.run(x_test, y_test) print(posterior) trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(x_test, y_test) post_summary = summary(post_pred, sites= ['prediction', 'obs']) mu = post_summary["prediction"] y = post_summary["obs"] len(y) mu[:5] y_test mu.head() y.head() preds = [] for i in range(100): sampled_reg_model = guide(x_test) pred = sampled_reg_model(x_test).data.numpy().flatten() preds.append(pred)
for name, value in pyro.get_param_store().items(): print(name, pyro.param(name)) for name, value in pyro.get_param_store().items(): print(name, pyro.param(name).cpu().detach().numpy().mean()) posterior = svi.run(Xtrain, Ytrain) # Break #pdb.set_trace() # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=100) post_pred = trace_pred.run(Xtrain, None) #inputing pca components? post_summary = summary(post_pred, sites=['prediction', 'obs']) meuw = post_summary["prediction"] y = post_summary["obs"] meuw.insert(0, 'true', np.array(Ytrain.cpu())) y.insert(0, 'true', np.array(Ytrain.cpu())) print("sample meuw data:") print(meuw.head(10)) #What's the difference between mu and y? Means are the same but sigma is very different. print("sample y data:") print(y.head(10)) df = pd.DataFrame(meuw) nx = df.reset_index() #insert a first row in Dataframe for index nx = nx.values #Convert Dataframe to array
except IndexError: break handles, labels = ax.get_legend_handles_labels() fig.legend(handles, labels, loc='upper right') plt.savefig("posterior_dists.png") # sense check with standard package implementation of logistic regression sklearn_model = LogisticRegression(solver="lbfgs") sklearn_model.fit(X_train_scaled, y_train) trace_pred = TracePredictive(predictive_model, svi_meanfield_posterior, num_samples=1000) X_tensor = torch.tensor(X_test_scaled, dtype=torch.float32) y_tensor = torch.tensor(y_test.values, dtype=torch.float32) posterior_predictive = trace_pred.run(X_tensor, None) sites = ["prediction", "obs"] posterior_predictive_samples = \ {site: EmpiricalMarginal(posterior_predictive, sites=site) .enumerate_support().detach().cpu() for site in sites} subset = posterior_predictive_samples["prediction"][:, 10:20] y_pred_sklearn = sklearn_model.predict(X_test_scaled) subset_sklearn = sklearn_model.predict_proba(X_test_scaled)[10:20, 1] fig, axs = plt.subplots(nrows=5, ncols=2, figsize=(12, 10), sharex=True, sharey=True) fig.suptitle("Posterior Predictive Distributions", fontsize=16)
marginal_site = pd.DataFrame(marginal[:, i]).transpose() describe = partial(pd.Series.describe, percentiles=[.05, 0.25, 0.5, 0.75, 0.95]) site_stats[site_name] = marginal_site.apply(describe, axis=1) \ [["mean", "std", "5%", "25%", "50%", "75%", "95%"]] return site_stats def wrapped_model(x_data, y_data): pyro.sample("prediction", Delta(model(x_data, y_data))) posterior = svi.run(x_test_t, y_test_t) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=10000) post_pred = trace_pred.run(x_test_t, None) post_summary = summary(post_pred, sites= ['prediction', 'obs']) mu = post_summary["prediction"] y = post_summary["obs"] y.head(10) mu_ = mu*std+mean mu_['mean'][:10] y_test[:10]*std+mean X_train[:12] y_train_u = y_train*std+mean y_train_u.head(5) mu_ = mu['mean']*std+mean mu_[:5]
return site_stats def wrapped_model(x_data, y_data): pyro.sample("prediction", Delta(model(x_data, y_data))) posterior = svi.run(data_train[0], data_train[1][:,-1]) # posterior predictive distribution we can get samples from trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000) post_pred = trace_pred.run(data_train[0], None) #check Why data_train[0] ? post_summary = summary(post_pred, sites= ['prediction', 'obs']) mu = post_summary["prediction"] y = post_summary["obs"] y.insert(0, 'true', data_train[1].cpu().numpy()) print("sample y data:") print(y.head(10)) df = pd.DataFrame(y) nx = df.reset_index() #insert a first row in Dataframe for index nx = nx.values #Convert Dataframe to array fig = plt.figure(dpi=100, figsize=(5, 4)) plt.scatter(nx[:,0],nx[:,1], c='b') plt.scatter(nx[:,0],nx[:,2], c='r') #plt.errorbar(nx[:,0],nx[:,2], yerr=nx[:,3], fmt='o', c='r')