Python loo Examples, pymc3.loo Python Examples

Example #1

0

Show file

File: base_model.py Project: HaigangLiu/spatial-temporal-py

    def get_metrics(self, kind=['mse', 'mae', 'loo', 'aic'], sample_size=5000):
        if self.predicted is None:
            self._predict_in_sample(sample_size=sample_size, use_median=False)

        records = {}
        for kind_ in kind:
            if kind_.lower() == 'mse':
                records['mse'] = np.mean(
                    np.square(self.response - self.predicted))
            elif kind_.lower() == 'mae':
                records['mae'] = np.mean(np.abs(self.response -
                                                self.predicted))
            elif kind_.lower() == 'waic':
                records['waic'] = pm.waic(self.trace, self.model).WAIC
            elif kind_.lower() == 'loo':
                records['loo'] = pm.loo(self.trace, self.model).LOO
            else:
                raise ValueError(f'{kind_} is not supported.')

        table_content = [['Metrics', 'Value']]
        for key, value in records.items():
            value = str(round(value, 4))
            table_content.append([key, value])

        header = 'Model Fitting Metrics Report'
        BaseModel.pretty_print(header, table, table_len=50)

Example #2

0

Show file

File: BigBraggBrand.py Project: gothman5/LAT

def posteriorChecks(model, trace):
    """
        Performs various posterior checks
        Posterior Predictive Checks:
            - Simulates replicating data under the fitted model and then comparing these to the observed data
            - This checks for systematic discrepancies between real and simulated data

        Widely-applicable Information Criterion (WAIC):
            - Fully Bayesian criterion for estimating out-of-sample expectation, using the computed log pointwise posterior predictive density (LPPD) and correcting for the effective number of parameters to adjust for overfitting.
            - This is primarilly for the comparison between different models

        Leave-one-out Cross-validation (LOO):
            - Estimate of the out-of-sample predictive fit. In cross-validation, the data are repeatedly partitioned into training and holdout sets, iteratively fitting the model with the former and evaluating the fit with the holdout data. PyMC's implementation of LOO is using Pareto-smoothed importance sampling, it provides and estimate of point-wise out-of-sample prediction accuracy

        Note: out-of-sample is data not used for the fit (ie: making a prediction)

    """
    # Posterior Predictive Checks -- Generates 500 toy samples of size 100
    # This is essentially toy MC
    model_ppc = pm.sample_ppc(trace, samples=500, model=model)

    # Widely-applicable Information Criterion (WAIC)
    model_waic = pm.waic(trace[len(trace) - 100:], model, progressbar=True)

    # Leave-one-out Cross-validation (LOO)
    model_loo = pm.loo(trace[len(trace) - 100:], model, progressbar=True)

    # ppc = pm.sample_ppc(trace[nBurn:], samples=500, model=model)
    # print(np.asarray(ppc['L'].shape), ppc.keys())
    # _, axppc = plt.subplots(figsize=(12, 6))
    # axppc.hist([n.mean() for n in ppc['L']], bins=19, alpha=0.5)
    # axppc.set(title='Posterior predictive for L', xlabel='L(x)', ylabel='Frequency');
    # plt.show()

    # df_comp_WAIC = pm.compare(models = [model, modelBasic], traces = [trace[nBurn:], traceBasic[nBurn]])
    # df_comp_WAIC.head()
    # pm.compareplot(df_comp_WAIC)
    # df_comp_LOO = pm.compare(models = [model, modelBasic], traces = [trace[nBurn:], traceBasic[nBurn]], ic='LOO')
    # df_comp_LOO.head()
    # pm.compareplot(df_comp_LOO)

    # LOO results
    # LOO  pLOO   dLOO weight      SE   dSE warning
    # 0  61479.8  5.68      0   0.94  798.63     0       1
    # 1    61502  4.95  22.12   0.06  798.47  10.2       1

    return model_ppc, model_waic, model_loo

Example #3

0

Show file

File: gelman_schools.py Project: xiaoxi0920/pymc3

def run(n=1000):
    if n == "short":
        n = 50
    with schools:
        tr = sample(n)
        loo(tr)

Example #4

0

Show file

File: c06_3.py Project: johne-numata/Pymc3-test

plt.plot(x_1s[0][idx], y_p_post[idx], label='Pol order {}'.format(order))

plt.scatter(x_1s[0], y_1s)
plt.xlabel('$x$', fontsize=16)
plt.ylabel('$y$', fontsize=16, rotation=0)
plt.legend()
plt.savefig('img605.png')


print (pm.dic(trace=trace_l, model=model_l))
print( pm.dic(trace=trace_p, model=model_p))

waic_l = pm.waic(trace=trace_l, model=model_l)
waic_p = pm.waic(trace=trace_p, model=model_p)
loo_l = pm.loo(trace=trace_l, model=model_l)
loo_p = pm.loo(trace=trace_p, model=model_p)

plt.figure()
plt.subplot(121)
for idx, ic in enumerate((waic_l, waic_p)):
	plt.errorbar(ic[0], idx, xerr=ic[1], fmt='bo')
plt.title('WAIC')
plt.yticks([0, 1], ['linear', 'quadratic'])
plt.ylim(-1, 2)

plt.subplot(122)
for idx, ic in enumerate((loo_l, loo_p)):
	plt.errorbar(ic[0], idx, xerr=ic[1], fmt='go')
plt.title('LOO')
plt.yticks([0, 1], ['linear', 'quadratic'])

Example #5

0

Show file

File: pymc3_8schools.py Project: LeonardoGitHub/pmtk3

# https://github.com/pymc-devs/pymc3/blob/master/pymc3/examples/gelman_schools.py

import numpy as np
import matplotlib.pyplot as plt
from pymc3 import Model, Normal, HalfNormal, HalfCauchy, sample, traceplot, loo


J = 8
y = np.array([28,  8, -3,  7, -1,  1, 18, 12])
sigma = np.array([15, 10, 16, 11,  9, 11, 10, 18])


# Schools model defined at https://raw.githubusercontent.com/wiki/stan-dev/rstan/8schools.stan
with Model() as schools:
    print 'building model...'
    eta = Normal('eta', 0, 1, shape=J)
    mu = Normal('mu', 0, sd=1e6)
    tau = HalfCauchy('tau', 25) # not in original model
    theta = mu + tau*eta
    obs = Normal('obs', theta, sd=sigma, observed=y)
    
    
with schools:
    print 'sampling...'
    tr = sample(1000)
    l = loo(tr) # -29.6821436703
    print 'LOO estimate {}'.format(l)
    
traceplot(tr)

Example #6

0

Show file

File: MDHAssetModel.py Project: sramboer/Pychastic

    def loo(self):

        return pm.loo(self.trace, self.model)

Example #7

0

Show file

                       pr_sigma_transient_len=14,
                       pr_median_lambda = 1/8,
                       pr_sigma_lambda = 1.0)]
if rerun:
    traces = []
    models = []
    model = cov19.SIR_with_change_points(new_cases_obs = np.diff(cases_obs),
                                        change_points_list = change_points,
                                        date_begin_simulation = date_begin_sim,
                                        num_days_sim = num_days_sim,
                                        diff_data_sim = diff_data_sim,
                                        N = 83e6,
                                        priors_dict=None,
                                        weekends_modulated=True,
                                        weekend_modulation_type = 'abs_sine')
    models.append(model)
    traces.append(pm.sample(model=model, init='advi', draws=4000, tune=1000, cores = 12))
    pickle.dump([models, traces], open(path_save_pickled + 'b.pickle', 'wb'))

else:
    models, traces = pickle.load(open(path_save_pickled + 'b.pickle', 'rb'))
exec(open('figures_org.py').read())
create_figure_distributions(models[0], traces[0],
                            additional_insets = None, xlim_lambda = (0, 0.53), color = 'tab:red',
                            num_changepoints=1, xlim_tbegin=7, save_to = path_to_save + 'distribution.1b')
create_figure_timeseries(traces[0], 'tab:red',
                         plot_red_axis=True, save_to=path_to_save + 'time.1b', add_more_later = False)
loo = [pm.loo(e, scale='deviance', pointwise=True) for e in traces]
for e in reversed(loo):
    print("lo: %.2f %.2f %.2f" % (e['loo'], e['loo_se'], e['p_loo']))

Example #8

0

Show file

File: partial_corr_data_analysis_pipeline.py Project: hannahkiesow/social_brain_aging

    plt.ylabel('predicted output variable')
    plt.title(out_str + ' (%i samples)' % len(X_disc))
    plt.savefig('%s/%s_r2scatter_partial_corr_disc_10000_draws.png' % (OUT_DIR, output_name), dpi=150)
    plt.savefig('%s/%s_r2scatter_partial_corr_disc_10000_draws.pdf' % (OUT_DIR, output_name), dpi=150)

    plt.figure()
    plt.hist([it.mean() for it in Y_ppc_insample.T], bins=19, alpha=0.35,
        label='predicted output')
    plt.hist(y_disc, bins=19, alpha=0.5, label='original output')
    plt.legend(loc='upper right')
    plt.title('Posterior predictive check: predictive distribution', fontsize=10)
    plt.savefig('%s/%s_ppc_partial_corr_disc_10000_draws.png' % (OUT_DIR, output_name), dpi=150)
    plt.savefig('%s/%s_ppc_partial_corr_disc_10000_draws.pdf' % (OUT_DIR, output_name), dpi=150)


    loo_res = pm.loo(hierarchical_trace, hierarchical_model, progressbar=True, pointwise=True)
    print('LOO point-wise deviance: mean=%.2f+/-%.2f' % (np.mean(loo_res[4]), np.std(loo_res[4])))

    pd.DataFrame(Y_ppc_insample).to_csv('%s/%s_Y_ppc_insample_partial_corr_disc_10000_draws.csv' % (OUT_DIR, output_name))
    pd.DataFrame(y_pred_insample).to_csv('%s/%s_y_pred_insample_partial_corr_disc_10000_draws.csv' % (OUT_DIR, output_name))       
    pd.DataFrame(loo_res).to_csv('%s/%s_loo_res_partial_corr_disc_10000_draws.csv' % (OUT_DIR, output_name))
    joblib.dump([ppc_insample], os.path.join(OUT_DIR, output_name + '_ppc_insample_partial_corr_disc_10000_draws_dump'), compress=9)


    # female ppc

    female_Y_ppc_insample = Y_ppc_insample.T[female]
    female_y_pred_insample = female_Y_ppc_insample.mean(axis=1)
    ppc_insample = r2_score(y_disc[female], female_y_pred_insample)
    out_str = 'PPC in sample R^2: %2.6f' % (ppc_insample)
    print(out_str)

Example #9

0

Show file

File: pymc_models.py Project: madHatter106/Bayesian-ML-4-IOP-from-TOA

 def get_loo(self):
     return pm.loo(trace=self.trace_, model=self.model)

Example #10

0

Show file

# Modified from
# https://github.com/pymc-devs/pymc3/blob/master/pymc3/examples/gelman_schools.py

import numpy as np
import matplotlib.pyplot as plt
from pymc3 import Model, Normal, HalfNormal, HalfCauchy, sample, traceplot, loo

J = 8
y = np.array([28, 8, -3, 7, -1, 1, 18, 12])
sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18])

# Schools model defined at https://raw.githubusercontent.com/wiki/stan-dev/rstan/8schools.stan
with Model() as schools:
    print('building model...')
    eta = Normal('eta', 0, 1, shape=J)
    mu = Normal('mu', 0, sd=1e6)
    tau = HalfCauchy('tau', 25)  # original model uses U[0,infty]
    theta = mu + tau * eta
    obs = Normal('obs', theta, sd=sigma, observed=y)

with schools:
    print('sampling...')
    tr = sample(1000)
    l = loo(tr)  # -29.6821436703
    print('LOO estimate {}'.format(l))

traceplot(tr)

Example #11

0

Show file

File: gelman_schools.py Project: 21hub/pymc3

def run(n=1000): 
    if n == "short":
        n = 50
    with schools:
        tr = sample(n)
        l = loo(tr)

Example #12

0

Show file


# Making the slope conditional
with pm.Model() as m5:
    α = pm.Normal('α', 0, 0.1, shape=2)
    β = pm.Normal('β', 0, 0.3, shape=2)
    σ = pm.Exponential('σ', 1)
    μ = α[dfinal.cont_africa.values] + β[dfinal.cont_africa.values] * (dfinal.rugged_s.values - rbar)
    log_gdp_s_i = pm.Normal('log_gdp_s_i', μ, σ, observed=dfinal.log_gdp_s.values)
    trace_m5 = pm.sample()
pm.summary(trace_m5, alpha=0.11).round(decimals=2)
m5.name = 'm5'
pm.compare({m3: trace_m3, m4: trace_m4, m5: trace_m5}, ic='LOO')
waic_list = pm.waic(trace_m5, model=m5, pointwise=True)

loo_list = pm.loo(trace_m5, model=m5, pointwise=True)

pl.plot(waic_list.WAIC_i, marker='.', ls='', color='k');
pl.plot(loo_list.LOO_i, marker='s', ls='', markeredgecolor='r');


dfinal.head()
# Plotting the interaction

% matplotlib inline
_, axs = pl.subplots(ncols=2, figsize=(8,4))
ttls = ['Non-African', 'African']
df_m5 = pm.trace_to_dataframe(trace_m5)
for i, (axi, ttl) in enumerate(zip(axs, ttls)):
    axs[i].scatter(dfinal[dfinal.cont_africa==i].rugged_s, dfinal[dfinal.cont_africa==i].log_gdp_s)
    m5_μ = link(df_m5.filter(regex=f'{i}', axis=1), x)

Example #13

0

Show file

File: BayesFactors.py Project: hellckt/LearnBayesianAnalysis

plt.show()

with pm.Model() as model_BF_1:
    theta = pm.Beta('theta', 8, 4)
    y = pm.Bernoulli('y', theta, observed=y)

    trace_BF_1 = pm.sample(5000)
chain_BF_1 = trace_BF_1[500:]
pm.traceplot(chain_BF_1)
plt.show()

# the smaller the better
waic_0 = pm.waic(chain_BF_0, model_BF_0)
waic_1 = pm.waic(chain_BF_1, model_BF_1)

loo_0 = pm.loo(chain_BF_0, model_BF_0)
loo_1 = pm.loo(chain_BF_1, model_BF_1)

plt.figure(figsize=(8, 4))
plt.subplot(121)
for idx, ic in enumerate((waic_0, waic_1)):
    plt.errorbar(ic[0], idx, xerr=ic[1], fmt='bo')
plt.title('WAIC')
plt.yticks([0, 1], ['model_0', 'model_1'])
plt.ylim(-1, 2)

plt.subplot(122)
for idx, ic in enumerate((loo_0, loo_1)):
    plt.errorbar(ic[0], idx, xerr=ic[1], fmt='go')
plt.title('LOO')
plt.yticks([0, 1], ['model_0', 'model_1'])

Example #14

0

Show file

File: ch6.py Project: buckipr/StatisticalRethinking

az.summary(trace)
#pm.gelman_rubin(trace)
with m6_11:
    az.plot_trace(trace)
plt.show()
az.plot_autocorr(trace)
plt.show()
az.plot_density(trace)
plt.show()
az.plot_forest(trace)
plt.show()

# might need to multiply by -2 to compare with McElreath
with m6_11:
    print(pm.waic(trace))
    print(pm.loo(trace))


#m6_13 = pm.Model()
with pm.Model() as m6_13:
    alpha = pm.Uniform('alpha', 0, 5)
    bm = pm.Uniform('bm', -10, 10)
    log_sigma = pm.Uniform('log_sigma', -10, 10)
    mu = alpha + bm*d['lmass']
    y_obs = pm.Normal('y_obs', mu=mu, sigma=np.exp(log_sigma), observed=d['kcal.per.g'])
    trace = pm.sample(2000, return_inferencedata=True, chains=2)

with m6_13:
    print(pm.summary(trace))
    print(pm.waic(trace))
    print(pm.loo(trace))