Example #1
0
def trace_summary(svi, model, x_data, y_data):

    posterior = svi.run(x_data, y_data)
    wrapped_model = wrapped_model_fn(model)

    # posterior predictive distribution we can get samples from
    trace_pred = TracePredictive(wrapped_model, posterior, num_samples=10000)
    post_pred = trace_pred.run(x_data, None)
    post_summary = summary(post_pred, sites=['prediction', 'obs'])
    mu = post_summary["prediction"]
    obs = post_summary["obs"]

    x = x_data.cpu().numpy().ravel()
    idx = np.argsort(x)

    df = pd.DataFrame({
        "x_data": x[idx],
        "y_data": y_data.cpu().numpy().ravel()[idx],
        #"obs": obs[idx],
        "mu_mean": mu["mean"][idx],
        "mu_std": mu["std"][idx],
        "mu_perc_5": mu["5%"][idx],
        "mu_perc_95": mu["95%"][idx],
        "obs_mean": obs["mean"][idx],
        "obs_std": obs["std"][idx],
        "obs_perc_5": obs["5%"][idx],
        "obs_perc_95": obs["95%"][idx],
    })

    print(df)

    plot_mu(df)
    plt.title('trace summary: mu')
    plot_obs(df)
    plt.title('trace summary: obs')
Example #2
0
def trace_summary(svi, xdata, ydata):

    # import pudb; pudb.set_trace()

    posterior = svi.run(x_data, y_data)

    # posterior predictive distribution we can get samples from
    trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000)
    post_pred = trace_pred.run(x_data, None)
    post_summary = summary(post_pred, sites=['prediction', 'obs'])
    mu = post_summary["prediction"]
    obs = post_summary["obs"]

    df = pd.DataFrame({
        # "x_data": x_data.cpu().numpy(),
        "y_data": y_data.cpu().numpy()[..., 0],
        "mu_mean": mu["mean"],
        "mu_std": mu["std"],
        "mu_perc_5": mu["5%"],
        "mu_perc_95": mu["95%"],
        "obs_mean": obs["mean"],
        "obs_std": obs["std"],
        "obs_perc_5": obs["5%"],
        "obs_perc_95": obs["95%"],
    })

    print(df)

    plot_mu(df)
    plt.title('trace summary: mu')
    plot_obs(df)
    plt.title('trace summary: obs')
Example #3
0
def analysis(svi, data):

    for name, value in pyro.get_param_store().items():
        print(name, pyro.param(name))

    posterior = svi.run(data)

    def wrapped_model(data):
        pyro.sample("prediction", Delta(model(data)))

    trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000)

    post_pred = trace_pred.run(data)
    post_summary = summary(post_pred, sites=['prediction', 'obs'])
    mu = post_summary["prediction"]
    y = post_summary["obs"]
    print("sample Prediction (mu) data:")
    print(mu.head(10))
    print("sample Observation (y) data:")
    print(y.head(10))

    x_data = data[:, :-1]
    y_data = data[:, -1]
    plot_data(x_data, y_data, "predictions")
    #    input()
    plot_data(x_data, y['mean'], "predictions")
    #    plt.show()
    #break
    #import Ipython; Ipython.embed()
    return post_summary
Example #4
0
def trace_summary(svi, model, x_data, y_data):

    posterior = svi.run(x_data, y_data)
    wrapped_model = wrapped_model_fn(model)

    # posterior predictive distribution we can get samples from
    trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000)
    post_pred = trace_pred.run(x_data, None)
    post_summary = summary(post_pred, sites=['prediction', 'obs'])
    pred = post_summary["prediction"]
    obs = post_summary["obs"]

    #import pudb; pudb.set_trace()

    #x = x_data.cpu().numpy().ravel()
    #idx = np.argsort(x)

    y = y_data.cpu().numpy().ravel()
    idx = np.argsort(y)

    df = pd.DataFrame({
        #"x_data": x[idx],
        #"y_data": y_data.cpu().numpy().ravel()[idx],
        "Index": np.linspace(0, np.size(y), np.size(y)),
        "y_data": y[idx],
        #"obs": obs[idx],
        "pred_mean": pred["mean"][idx],
        "pred_std": pred["std"][idx],
        "pred_perc_5": pred["5%"][idx],
        "pred_perc_95": pred["95%"][idx],
        "obs_mean": obs["mean"][idx],
        "obs_std": obs["std"][idx],
        "obs_perc_5": obs["5%"][idx],
        "obs_perc_95": obs["95%"][idx],
    })

    print(df)

    plot_pred(df)
    plt.title('trace summary: pred')
    plot_obs(df)
    plt.title('trace summary: obs')
Example #5
0
def trace_summary(svi, model, x_data, y_data):
    optim = Adam({"lr": 0.03})
    # svi = SVI(model, guide, optim, loss=Trace_ELBO(), num_samples=10000)
    posterior = svi.run(x_data, y_data)
    wrapped_model = wrapped_model_fn(model)

    # posterior predictive distribution we can get samples from
    trace_pred = TracePredictive(wrapped_model,
                                 posterior,
                                 num_samples=nsamples)
    post_pred = trace_pred.run(x_data, None)
    post_summary = summary(post_pred, sites=['prediction', 'obs'])
    pred = post_summary["prediction"]
    obs = post_summary["obs"]


    #x = x_data.cpu().numpy().ravel()
    idx = np.argsort(y_data.squeeze())

    df = pd.DataFrame({
        "y_test": y_data[idx].squeeze(),
        #"obs": obs[idx],
        "pred_mean": pred["mean"][idx].values,
        "pred_std": pred["std"][idx].values,
        "pred_16%": pred["16%"][idx].values,
        "pred_84%": pred["84%"][idx].values,
        "obs_mean": obs["mean"][idx].values,
        "obs_std": obs["std"][idx].values,
        "obs_16%": obs["16%"][idx].values,
        "obs_84%": obs["84%"][idx].values,
    })

    print(df)

    plot_pred(df)
    plt.title('trace summary: pred')
    plot_obs(df)
    plt.title('trace summary: obs')
def ppd_samples(
        wrapped_model,
        svi_posterior,
        data,
        sites,
        num_samples=200):
    """
    Get samples from posterior predictive

    :param wrapped_model: wrapped model with prediction site
    :param svi_posterior: posterior from svi.run
    :param data: covariate features
    :param sites: list of sites to take marginal over
    :param num_samples: number of samples from posterior
    :return: ppd
    """
    trace_pred = TracePredictive(wrapped_model,
                                 svi_posterior,
                                 num_samples=num_samples)
    post_pred = trace_pred.run(data['data'], None)
    marginal = get_marginal(post_pred, sites)

    return marginal
Example #7
0
def trace_summary(svi, model, x_data, y_data):
    optim = Adam({"lr": 0.03})
    # svi = SVI(model, guide, optim, loss=Trace_ELBO(), num_samples=1000)
    posterior = svi.run(x_data, y_data)
    wrapped_model = wrapped_model_fn(model)

    # posterior predictive distribution we can get samples from
    trace_pred = TracePredictive(wrapped_model,
                                 posterior,
                                 num_samples=nsamples)
    post_pred = trace_pred.run(x_data, None)
    post_summary = summary2(post_pred, sites=['prediction', 'obs'])
    #import pdb; pdb.set_trace()
    pred = post_summary[:, 0, :]
    obs = post_summary[:, 1, :]

    pred_mean = pred.mean(axis=0)
    pred_std = pred.std(axis=0)
    obs_mean = obs.mean(axis=0)
    obs_std = obs.std(axis=0)

    #import pdb; pdb.set_trace()
    plot_pred(y_data, pred_mean, pred_std, experiment_id)
    plot_obs(y_data, obs_mean, pred_mean, obs_std, pred_std, experiment_id)
Example #8
0
        describe = partial(pd.Series.describe,
                           percentiles=[.05, 0.25, 0.5, 0.75, 0.95])
        site_stats[site_name] = marginal_site.apply(describe, axis=1) \
            [["mean", "std", "5%", "25%", "50%", "75%", "95%"]]
    return site_stats


def wrapped_model(x_data, y_data):
    pyro.sample("prediction", Delta(model(x_data, y_data)))


posterior = svi.run(data[0], data[1][:, -1])

# posterior predictive distribution we can get samples from
trace_pred = TracePredictive(wrapped_model, posterior, num_samples=100)
post_pred = trace_pred.run(data[0], None)
post_summary = summary(post_pred, sites=['prediction', 'obs'])
mu = post_summary["prediction"]
y = post_summary["obs"]
y.insert(0, 'true', data[1].cpu().numpy())

print("sample y data:")
print(y.head(10))

df = pd.DataFrame(y)
nx = df.reset_index()  #insert a first row in Dataframe for index
nx = nx.values  #Convert Dataframe to array
fig = plt.figure(dpi=100, figsize=(5, 4))
plt.scatter(nx[:, 0], nx[:, 1], c='b')
plt.scatter(nx[:, 0], nx[:, 2], c='r')
#plt.errorbar(nx[:,0],nx[:,2], yerr=nx[:,3], fmt='o', c='r')
Example #9
0
        describe = partial(pd.Series.describe,
                           percentiles=[.05, 0.25, 0.5, 0.75, 0.95])
        site_stats[site_name] = marginal_site.apply(describe, axis=1) \
            [["mean", "std", "5%", "25%", "50%", "75%", "95%"]]
    return site_stats


def wrapped_model(x_data, y_data):
    pyro.sample("prediction", Delta(model(x_data, y_data)))


posterior = svi.run(x_data, y_data)

# posterior predictive distribution we can get samples from
trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000)
post_pred = trace_pred.run(x_data, None)
post_summary = summary(post_pred, sites=['prediction', 'obs'])
mu = post_summary["prediction"]
y = post_summary["obs"]
predictions = pd.DataFrame({
    "cont_africa": x_data[:, 0],
    "rugged": x_data[:, 1],
    "mu_mean": mu["mean"],
    "mu_perc_5": mu["5%"],
    "mu_perc_95": mu["95%"],
    "y_mean": y["mean"],
    "y_perc_5": y["5%"],
    "y_perc_95": y["95%"],
    "true_gdp": y_data,
})
Example #10
0
                           percentiles=[.05, 0.25, 0.5, 0.75, 0.95])
        site_stats[site_name] = marginal_site.apply(describe, axis=1) \
            [["mean", "std", "5%", "25%", "50%", "75%", "95%"]]
    return site_stats


def wrapped_model(x_data, y_data):
    pyro.sample("prediction", Delta(model(x_data, y_data)))


# Break
# import Ipython; Ipython.embed()

# posterior predictive distribution we can get samples from
trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000)
post_pred = trace_pred.run(data_train[0], None)  #inputing pca components?
post_summary = summary(post_pred, sites=['prediction', 'obs'])
meuw = post_summary["prediction"]
y = post_summary["obs"]
meuw.insert(0, 'true', data_train[1].cpu().numpy())
y.insert(0, 'true', data_train[1].cpu().numpy())

print("sample meuw data:")
print(meuw.head(10))
#What's the difference between meuw and y? Means are the same but sigma is very different.
print("sample y data:")
print(y.head(10))

df = pd.DataFrame(meuw)
nx = df.reset_index()  #insert a first row in Dataframe for index
nx = nx.values  #Convert Dataframe to array
def pyro_bayesian(regression_model, y_data):
    def summary(traces, sites):
        marginal = get_marginal(traces, sites)
        site_stats = {}
        for i in range(marginal.shape[1]):
            site_name = sites[i]
            marginal_site = pd.DataFrame(marginal[:, i]).transpose()
            describe = partial(pd.Series.describe,
                               percentiles=[.05, 0.25, 0.5, 0.75, 0.95])
            site_stats[site_name] = marginal_site.apply(describe, axis=1) \
                [["mean", "std", "5%", "25%", "50%", "75%", "95%"]]
        return site_stats

    # CI testing
    assert pyro.__version__.startswith('0.3.0')
    pyro.enable_validation(True)
    pyro.set_rng_seed(1)
    pyro.enable_validation(True)

    from pyro.contrib.autoguide import AutoDiagonalNormal
    guide = AutoDiagonalNormal(model)

    optim = Adam({"lr": 0.03})
    svi = SVI(model, guide, optim, loss=Trace_ELBO(), num_samples=1000)

    train(svi, x_data, y_data, num_iterations, regression_model)

    for name, value in pyro.get_param_store().items():
        print(name, pyro.param(name))

    get_marginal = lambda traces, sites: EmpiricalMarginal(
        traces, sites)._get_samples_and_weights()[0].detach().cpu().numpy()

    posterior = svi.run(x_data, y_data, regression_model)

    # posterior predictive distribution we can get samples from
    trace_pred = TracePredictive(wrapped_model, posterior, num_samples=1000)
    post_pred = trace_pred.run(x_data, None, regression_model)
    post_summary = summary(post_pred, sites=['prediction', 'obs'])
    mu = post_summary["prediction"]
    y = post_summary["obs"]
    predictions = pd.DataFrame({
        "x0": x_data[:, 0],
        "x1": x_data[:, 1],
        "mu_mean": mu["mean"],
        "mu_perc_5": mu["5%"],
        "mu_perc_95": mu["95%"],
        "y_mean": y["mean"],
        "y_perc_5": y["5%"],
        "y_perc_95": y["95%"],
        "true_gdp": y_data,
    })
    # print("predictions=", predictions)
    """we need to prepend `module$$$` to all parameters of nn.Modules since
    # that is how they are stored in the ParamStore
    """
    weight = get_marginal(posterior,
                          ['module$$$linear.weight']).squeeze(1).squeeze(1)
    factor = get_marginal(posterior, ['module$$$factor'])

    # x0, x1, x2"-home_page, x1*x2-factor
    print("weight shape=", weight.shape)
    print("factor shape=", factor.shape)

    fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12, 6), sharey=True)
    ax[0].hist(weight[:, 0])
    ax[1].hist(weight[:, 1])
    ax[2].hist(factor.squeeze(1))
    plt.show()
Example #12
0
        site_name = sites[i]
        marginal_site = pd.DataFrame(marginal[:, i]).transpose()
        describe = partial(pd.Series.describe, percentiles=[.05, 0.25, 0.5, 0.75, 0.95])
        site_stats[site_name] = marginal_site.apply(describe, axis=1) \
            [["mean", "std", "5%", "25%", "50%", "75%", "95%"]]
    return site_stats

def wrapped_model(x_data, y_data):
    pyro.sample("prediction", Delta(model(x_data, y_data)))

posterior = svi.run(x_test, y_test)
print(posterior)
trace_pred = TracePredictive(wrapped_model,
                             posterior,
                             num_samples=1000)
post_pred = trace_pred.run(x_test, y_test)
post_summary = summary(post_pred, sites= ['prediction', 'obs'])
mu = post_summary["prediction"]
y = post_summary["obs"]
len(y)
mu[:5]
y_test
mu.head()
y.head()
preds = []
for i in range(100):
    sampled_reg_model = guide(x_test)
    pred = sampled_reg_model(x_test).data.numpy().flatten()
    preds.append(pred)

Example #13
0

for name, value in pyro.get_param_store().items():
    print(name, pyro.param(name))

for name, value in pyro.get_param_store().items():
    print(name, pyro.param(name).cpu().detach().numpy().mean())

posterior = svi.run(Xtrain, Ytrain)

# Break
#pdb.set_trace()

# posterior predictive distribution we can get samples from
trace_pred = TracePredictive(wrapped_model, posterior, num_samples=100)
post_pred = trace_pred.run(Xtrain, None)  #inputing pca components?
post_summary = summary(post_pred, sites=['prediction', 'obs'])
meuw = post_summary["prediction"]
y = post_summary["obs"]
meuw.insert(0, 'true', np.array(Ytrain.cpu()))
y.insert(0, 'true', np.array(Ytrain.cpu()))

print("sample meuw data:")
print(meuw.head(10))
#What's the difference between mu and y? Means are the same but sigma is very different.
print("sample y data:")
print(y.head(10))

df = pd.DataFrame(meuw)
nx = df.reset_index()  #insert a first row in Dataframe for index
nx = nx.values  #Convert Dataframe to array
Example #14
0
    except IndexError:
        break
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, loc='upper right')
plt.savefig("posterior_dists.png")

# sense check with standard package implementation of logistic regression
sklearn_model = LogisticRegression(solver="lbfgs")
sklearn_model.fit(X_train_scaled, y_train)

trace_pred = TracePredictive(predictive_model,
                             svi_meanfield_posterior,
                             num_samples=1000)
X_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y_test.values, dtype=torch.float32)
posterior_predictive = trace_pred.run(X_tensor, None)
sites = ["prediction", "obs"]
posterior_predictive_samples = \
    {site: EmpiricalMarginal(posterior_predictive, sites=site)
     .enumerate_support().detach().cpu() for site in sites}

subset = posterior_predictive_samples["prediction"][:, 10:20]
y_pred_sklearn = sklearn_model.predict(X_test_scaled)
subset_sklearn = sklearn_model.predict_proba(X_test_scaled)[10:20, 1]

fig, axs = plt.subplots(nrows=5,
                        ncols=2,
                        figsize=(12, 10),
                        sharex=True,
                        sharey=True)
fig.suptitle("Posterior Predictive Distributions", fontsize=16)
Example #15
0
        marginal_site = pd.DataFrame(marginal[:, i]).transpose()
        describe = partial(pd.Series.describe, percentiles=[.05, 0.25, 0.5, 0.75, 0.95])
        site_stats[site_name] = marginal_site.apply(describe, axis=1) \
            [["mean", "std", "5%", "25%", "50%", "75%", "95%"]]
    return site_stats

def wrapped_model(x_data, y_data):
    pyro.sample("prediction", Delta(model(x_data, y_data)))

posterior = svi.run(x_test_t, y_test_t)

# posterior predictive distribution we can get samples from
trace_pred = TracePredictive(wrapped_model,
                             posterior,
                             num_samples=10000)
post_pred = trace_pred.run(x_test_t, None)

post_summary = summary(post_pred, sites= ['prediction', 'obs'])
mu = post_summary["prediction"]

y = post_summary["obs"]
y.head(10)
mu_ = mu*std+mean
mu_['mean'][:10]
y_test[:10]*std+mean
X_train[:12]

y_train_u = y_train*std+mean
y_train_u.head(5)
mu_ = mu['mean']*std+mean
mu_[:5]
    return site_stats

def wrapped_model(x_data, y_data):
    pyro.sample("prediction", Delta(model(x_data, y_data)))




posterior = svi.run(data_train[0], data_train[1][:,-1])


# posterior predictive distribution we can get samples from
trace_pred = TracePredictive(wrapped_model,
                             posterior,
                             num_samples=1000)
post_pred = trace_pred.run(data_train[0], None)  #check Why data_train[0] ?
post_summary = summary(post_pred, sites= ['prediction', 'obs'])
mu = post_summary["prediction"]
y = post_summary["obs"]
y.insert(0, 'true', data_train[1].cpu().numpy())

print("sample y data:")
print(y.head(10))

df = pd.DataFrame(y) 
nx = df.reset_index()  #insert a first row in Dataframe for index
nx = nx.values  #Convert Dataframe to array
fig = plt.figure(dpi=100, figsize=(5, 4))
plt.scatter(nx[:,0],nx[:,1], c='b') 
plt.scatter(nx[:,0],nx[:,2], c='r') 
#plt.errorbar(nx[:,0],nx[:,2], yerr=nx[:,3], fmt='o', c='r')