Esempi in Python per compare, esempi in Python per arviz.compare

Esempio n. 1

0

Mostra file

"""
Compare Plot
============

_thumb: .5, .5
"""
import matplotlib.pyplot as plt

import arviz as az

az.style.use("arviz-darkgrid")

model_compare = az.compare({
    "Centered 8 schools":
    az.load_arviz_data("centered_eight"),
    "Non-centered 8 schools":
    az.load_arviz_data("non_centered_eight"),
})
az.plot_compare(model_compare, figsize=(12, 4))

plt.show()

Esempio n. 2

0

Mostra file

def iqr(x, a=0):
    return np.subtract(*np.percentile(x, [75, 25], axis=a))


T_obs = iqr(cs_exp)
for idx, d_sim in enumerate(ppc_mm):
    T_sim = iqr(d_sim['y'][:100].T, 1)
    p_value = np.mean(T_sim >= T_obs)
    az.plot_kde(T_sim, ax=ax[idx])
    ax[idx].axvline(T_obs, 0, 1, color='k', ls='--')
    ax[idx].set_title(f'K = {clusters[idx]} \n p-value {p_value:.2f}')
    ax[idx].set_yticks([])

# %%
comp = az.compare(dict(zip(clusters, traces)), method='BB-pseudo-BMA')
comp

# %%
az.plot_compare(comp)


# %%
def stick_breaking_truncated(α, H, K):
    """
    Truncated stick-breaking process view of a DP
    
    Parameters
    ----------
    α : float
        concentration parameter

Esempio n. 3

0

Mostra file

    sstot = np.sum((y - ybar)**2)
    r2 = ssreg / sstot

    plt.plot(x_n, ffit, label=f'order {i}, $R^2$= {r2:.2f}')

plt.legend(loc=2)
plt.xlabel('x')
plt.ylabel('y', rotation=0)

# %%
waic_1 = az.waic(trace_1)
waic_1
# %%
cmp_df = az.compare({
    'model_1': trace_1,
    'model_p': trace_p
},
                    method='BB-pseudo-BMA')
cmp_df

# %%
az.plot_compare(cmp_df)

# %%
w = .5
y_lp = pm.sample_posterior_predictive_w([trace_1, trace_p],
                                        samples=1000,
                                        models=[model_1, model_p],
                                        weights=[w, 1 - w])

_, ax = plt.subplots(figsize=(10, 6))

Esempio n. 4

0

Mostra file

 def compare_models(self, traces=[], labels=[]):
     return az.compare({
         lab: self.get_trace_stats(trace)
         for lab, trace in zip(labels, traces)
     })

Esempio n. 5

0

Mostra file

File: load_results.py Progetto: wj2/swap_errors

    this_dset['tend'] = this_dset['tbeg'] + this_dset['twindow']

    wa_az = []
    wa_prm = []
    # for which_sess in these_sess:
    dset_info = {**this_dset}
    folds = hlp.folder_hierarchy(dset_info) 

    fits_az = {p:None for p in these_models}
    for mod in these_models:
        with open(SAVE_DIR+folds+'/arviz_fit_%s_model.pkl'%mod, 'rb') as f:
            fits_az[mod] = pkl.load(f)
        # with open(SAVE_DIR+folds+'/fitted_params_%s_model.pkl'%mod, 'rb') as f:
        #     fits_prm[mod] = pkl.load(open(SAVE_DIR+folds+'/fitted_params_%s_model.pkl'%mod, 'rb'))
        
    comp = az.compare(fits_az)
    # wa_prm.append(np.exp(fits_az['hybrid_error_precue'].posterior['logits'].mean())/(1+np.exp(fits_az['hybrid_error_precue'].posterior['logits'].mean())))
    
    wa_az.append(comp)
    
    loos.append(wa_az)
    # az_fits.append(wa_prm)

vals = [np.array([loos[i][0]['loo'][m] for m in these_models]) for i in range(len(list(itt.product(*var_v))))]
errs = [np.array([loos[i][0]['se'][m] for m in these_models]) for i in range(len(list(itt.product(*var_v))))]
warn = [np.array([loos[i][0]['warning'][m] for m in these_models]) for i in range(len(list(itt.product(*var_v))))]


#%%

row_labs = var_k[1:]

Esempio n. 6

0

Mostra file

File: plot_compare.py Progetto: znob/arviz

"""
Compare Plot
============

_thumb: .5, .5
"""
import arviz as az

az.style.use('arviz-darkgrid')

model_compare = az.compare({
    'Centered 8 schools':
    az.load_arviz_data('centered_eight'),
    'Non-centered 8 schools':
    az.load_arviz_data('non_centered_eight')
})
az.plot_compare(model_compare, figsize=(12, 4))

Esempio n. 7

0

Mostra file

import pymc3 as pm

az.style.use('arviz-darkgrid')

# Data of the Eight Schools Model
J = 8
y = np.array([28., 8., -3., 7., -1., 1., 18., 12.])
sigma = np.array([15., 10., 16., 11., 9., 11., 10., 18.])

with pm.Model('Centered Eight Schools') as centered_eight:
    mu = pm.Normal('mu', mu=0, sd=5)
    tau = pm.HalfCauchy('tau', beta=5)
    theta = pm.Normal('theta', mu=mu, sd=tau, shape=J)
    obs = pm.Normal('obs', mu=theta, sd=sigma, observed=y)
    centered_eight_trace = pm.sample()

with pm.Model('Non-Centered Eight Schools') as non_centered:
    mu = pm.Normal('mu', mu=0, sd=5)
    tau = pm.HalfCauchy('tau', beta=5)
    theta_tilde = pm.Normal('theta_t', mu=0, sd=1, shape=J)
    theta = pm.Deterministic('theta', mu + tau * theta_tilde)
    obs = pm.Normal('obs', mu=theta, sd=sigma, observed=y)
    non_centered_eight_trace = pm.sample()

model_compare = az.compare({
    centered_eight: centered_eight_trace,
    non_centered: non_centered_eight_trace
})

az.compareplot(model_compare, figsize=(12, 4))

Esempio n. 8

0

Mostra file

    fit=f,
    posterior_predictive='y_tilde',
    observed_data=['y'],
    log_likelihood='log_lik',
    coords={
        'predictors': pred,
        'regions': regions
    },
    dims={
        'mu': ['predictors'],
        'tau': ['predictors'],
        'b': ['predictors', 'regions']
    }) for f, pred in zip([fit, fit_wait], [predictors, predictors_wait]))
az.compare(
    {
        'waiting_list_excluded': inference_data,
        'waiting_list_included': inference_data_wait
    },
    ic='loo')

# This comparison is problematic as the model with waiting lists included is quite sensitive to single observations, making approximate leave-one-out cross-validation less reliable. Since there doesn't seem to be a very big difference between the two models' scores, from now on we proceed with the simpler model without the waiting list predictor.

# The next cell compares the observed data with the model's predictions

# In[189]:

az.plot_ppc(inference_data,
            data_pairs={'y': 'y_tilde'},
            figsize=[12, 7],
            textsize=12)
ax = plt.gca()
lims = ax.set_xlim(-0.5, 0.4)

Esempio n. 9

0

Mostra file

File: run_model_comparison.py Progetto: victor-m-p/BayesWorkflow

                           sigma=0.5)

# more coords
coords["param"] = ["alpha", "beta"]
coords["param_bis"] = ["alpha", "beta"]

m_covariation = fm.covariation(t=t_train,
                               idx=idx_train,
                               y=y_train,
                               coords=coords,
                               dims=dims,
                               sigma=0.5)

## load the idata
idata_pooled = az.from_netcdf("../models_python/idata_pooled_generic.nc")
idata_intercept = az.from_netcdf("../models_python/idata_intercept_generic.nc")
idata_covariation = az.from_netcdf(
    "../models_python/idata_covariation_generic.nc")

## model comparison
loo_overview = az.compare({
    "m_pooled": idata_pooled,
    "m_intercept": idata_intercept,
    "m_covariation": idata_covariation
})

## export it
dfi.export(obj=loo_overview, filename="../plots_python/loo_comparison.png")

# intervals: https://docs.pymc.io/notebooks/posterior_predictive.html

Esempio n. 10

0

Mostra file

File: hierarchical_optimization_server.py Progetto: Begri/Bayesian-Measurement-and-Verification

                  sigma=sigma,
                  observed=log_electricity,
                  dims='obs_id')

# Fitting without sampling
with complete_pooling_2:
    approx = pm.fit(n=50000,
                    method='fullrank_advi',
                    callbacks=[CheckParametersConvergence(tolerance=0.01)])
    complete_pooling_trace = approx.sample(1000)

#Compare the LOO of the 3 complete models

df_comp_loo = az.compare({
    'partial_pooling': partial_pooling_trace,
    'no_pooling': no_pooling_trace,
    'complete_pooling': complete_pooling_trace
})

#Export Results
cvrmse_list = [
    np.mean(partial_pool_cvrmse_list),
    np.mean(complete_pool_cvrmse_list),
    np.mean(nopool_cvrmse_list)
]
coverage_list = [
    np.mean(partial_pool_coverage_list),
    np.mean(complete_pool_coverage_list),
    np.mean(nopool_coverage_list)
]
models = ['partial_pooling', 'complete_pooling', 'no_pooling']

Esempio n. 11

0

Mostra file

File: pymc3_models.py Progetto: OriolAbril/calaix_de_sastre

    trace_exp = pm.sample(2000, chains=4, cores=4, tune=2000, target_accept=.9)
    idata_exp = az.from_pymc3(trace_exp, dims=dims)

print("\n\n\n###    stored pointwise log likelihood data    ###\n")
print(idata_exp.log_likelihood)

# cross validation
log_lik_exp = idata_exp.log_likelihood
log_lik_pow = idata_pow.log_likelihood

print("\n\nLeave one *observation* out cross validation (whole model)")
condition_dim = xr.DataArray(["compatible", "incompatible"], name="condition")
idata_exp.sample_stats["log_likelihood"] = xr.concat((log_lik_exp.y_obs_comp, log_lik_exp.y_obs_incomp), dim=condition_dim)
idata_pow.sample_stats["log_likelihood"] = xr.concat((log_lik_pow.y_obs_comp, log_lik_pow.y_obs_incomp), dim=condition_dim)
print(az.loo(idata_exp), "\n")
print(az.compare({"exp": idata_exp, "pow": idata_pow}))

print("\n\nLeave one *subject* out cross validation (whole model)")
idata_exp.sample_stats["log_likelihood"] = log_lik_exp.to_array().sum("variable")
idata_pow.sample_stats["log_likelihood"] = log_lik_pow.to_array().sum("variable")
print(az.loo(idata_exp), "\n")
print(az.compare({"exp": idata_exp, "pow": idata_pow}))

print("\n\nLeave one observation out cross validation (y_obs_comp only)")
idata_exp.sample_stats["log_likelihood"] = log_lik_exp.y_obs_comp
idata_pow.sample_stats["log_likelihood"] = log_lik_pow.y_obs_comp
print(az.loo(idata_exp), "\n")
print(az.compare({"exp": idata_exp, "pow": idata_pow}))

print("\n\nLeave one observation out cross validation (y_obs_incomp only)")
idata_exp.sample_stats["log_likelihood"] = log_lik_exp.y_obs_incomp

Esempio n. 12

0

Mostra file

File: 10_counting_and_classification.py Progetto: brown5628/statistical-rethinking

    trace_10_2 = pm.sample(1000, tune=1000)

with pm.Model() as model_10_3:
    a = pm.Normal("a", 0, 10)
    bp = pm.Normal("bp", 0, 10)
    bpC = pm.Normal("bpC", 0, 10)
    p = pm.math.invlogit(a + (bp + bpC * d.condition) * d.prosoc_left)
    pulled_left = pm.Binomial("pulled_left", 1, p, observed=d.pulled_left)

    trace_10_3 = pm.sample(1000, tune=1000)

# %%
comp_df = az.compare({
    "m10.1": trace_10_1,
    "m10.2": trace_10_2,
    "m10.3": trace_10_3
})

comp_df

# %%
az.plot_compare(comp_df)

# %%
az.summary(trace_10_3, credible_interval=0.89, round_to=2)

# %%
np.exp(0.61)

# %%

Esempio n. 13

0

Mostra file

az.waic(model0)

az.loo(model0)

As you can see both WAIC and LOO return similar values. ArviZ comes equipped with the `compare(.)` function. That is more convenient than using `loo(.)` or `waic(.)` 

az.loo(model0)

## The compare function

This function takes a dictionary of names (keys) and models (values) as input and returns a DataFrame ordered (row-wise) from best to worst model.



cmp = az.compare({"m0":model0, "m1":model1,})
cmp

We have many columns, so let's check out their meaning one by one:

0) The index are the names of the models taken from the keys of the dictionary passed to `compare(.)`.

1) **rank**, the ranking on the models starting from 0 (best model) to the number of models.

2) **waic**, the values of WAIC/LOO. The DataFrame is always sorted from best WAIC/LOO to worst. 

3) **p_waic**, the value of the penalization term. We can roughly think of this value as the estimated effective number of parameters (but do not take that too seriously).

4) **d_waic**, the relative difference between the value of WAIC/LOO for the top-ranked model and the value of WAIC/LOO for each model. For this reason we will always get a value of 0 for the first model.

5) **weight**, the weights assigned to each model. These weights can be loosely interpreted as the probability of each model (among the compared models) given the data. See model averaging section for more details.

Esempio n. 14

0

Mostra file

def compare_models(df,
                   models: dict,
                   extra_model_args: list = None,
                   parallel=False,
                   plotose=False,
                   **kwargs):
    """
    kwargs are forwarded to split_train_predict->fit_numpyro
    compare_models(models={'Hier':bayes.Numpyro.model_hier,
                           'Hier+covariance':bayes.Numpyro.model_hier_covar,
                           'Twostep Exponential':bayes.TwoStep.model_twostep,
                           'Twostep Gamma':bayes.TwoStep.model_twostep,
                          },
                   data=[df,df,df_monster,df_monster],
                   extra_args=[{}, {}, {'prior':'Exponential'}, {'prior':'Gamma'}])
    """
    # TODO save all model args in BayesWindow in self
    # Calculate
    extra_model_args = extra_model_args or np.tile({}, len(models))
    if parallel:
        traces = Parallel(n_jobs=min(os.cpu_count(), len(models)))(
            delayed(split_train_predict)(
                df, model, num_chains=1, **kwargs, **extra_model_arg)
            for model, extra_model_arg in zip(models.values(),
                                              extra_model_args))
    else:
        traces = [
            split_train_predict(df, model, y=kwargs['y'], **extra_model_arg)
            for model, extra_model_arg in zip(tqdm(models.values()),
                                              extra_model_args)
        ]

    # save tp dict
    traces_dict = {}  # initialize results
    for key, trace in zip(models.keys(), traces):
        traces_dict[key] = trace

    # Plot
    if plotose:
        for trace_name, trace in traces_dict.items():

            # Plot PPC
            az.plot_ppc(
                trace,
                # flatten=[treatment],
                # flatten_pp=data_cols[2],
                mean=False,
                # num_pp_samples=1000,
                # kind='cumulative'
            )
            plt.title(trace_name)
            plt.show()
            r2(trace)
            # Weird that r2=1
            # Waic
            try:
                print('======= WAIC (higher is better): =========')
                print(az.waic(trace, pointwise=True))
                print(az.waic(trace, var_name='y'))
            except TypeError:
                pass

        try:
            for trace_name in traces_dict.keys():
                trace = traces_dict[trace_name]
                # Print diagnostics and effect size
                print(
                    f"n(Divergences) = {trace.sample_stats.diverging.sum(['chain', 'draw']).values}"
                )
                try:
                    slope = trace.posterior['v_mu'].sel({
                        'v_mu_dim_0': 1
                    }).mean(['chain']).values
                except Exception:
                    slope = trace.posterior['b'].mean(['chain']).values
                print(
                    f'Effect size={(slope.mean() / slope.std()).round(2)}  == {trace_name}'
                )
        except Exception:
            pass

    model_compare = az.compare(traces_dict)  # , var_name='y')
    az.plot_compare(model_compare, textsize=12, show=True)
    return model_compare