""" Compare Plot ============ _thumb: .5, .5 """ import matplotlib.pyplot as plt import arviz as az az.style.use("arviz-darkgrid") model_compare = az.compare({ "Centered 8 schools": az.load_arviz_data("centered_eight"), "Non-centered 8 schools": az.load_arviz_data("non_centered_eight"), }) az.plot_compare(model_compare, figsize=(12, 4)) plt.show()
def iqr(x, a=0): return np.subtract(*np.percentile(x, [75, 25], axis=a)) T_obs = iqr(cs_exp) for idx, d_sim in enumerate(ppc_mm): T_sim = iqr(d_sim['y'][:100].T, 1) p_value = np.mean(T_sim >= T_obs) az.plot_kde(T_sim, ax=ax[idx]) ax[idx].axvline(T_obs, 0, 1, color='k', ls='--') ax[idx].set_title(f'K = {clusters[idx]} \n p-value {p_value:.2f}') ax[idx].set_yticks([]) # %% comp = az.compare(dict(zip(clusters, traces)), method='BB-pseudo-BMA') comp # %% az.plot_compare(comp) # %% def stick_breaking_truncated(α, H, K): """ Truncated stick-breaking process view of a DP Parameters ---------- α : float concentration parameter
sstot = np.sum((y - ybar)**2) r2 = ssreg / sstot plt.plot(x_n, ffit, label=f'order {i}, $R^2$= {r2:.2f}') plt.legend(loc=2) plt.xlabel('x') plt.ylabel('y', rotation=0) # %% waic_1 = az.waic(trace_1) waic_1 # %% cmp_df = az.compare({ 'model_1': trace_1, 'model_p': trace_p }, method='BB-pseudo-BMA') cmp_df # %% az.plot_compare(cmp_df) # %% w = .5 y_lp = pm.sample_posterior_predictive_w([trace_1, trace_p], samples=1000, models=[model_1, model_p], weights=[w, 1 - w]) _, ax = plt.subplots(figsize=(10, 6))
def compare_models(self, traces=[], labels=[]): return az.compare({ lab: self.get_trace_stats(trace) for lab, trace in zip(labels, traces) })
this_dset['tend'] = this_dset['tbeg'] + this_dset['twindow'] wa_az = [] wa_prm = [] # for which_sess in these_sess: dset_info = {**this_dset} folds = hlp.folder_hierarchy(dset_info) fits_az = {p:None for p in these_models} for mod in these_models: with open(SAVE_DIR+folds+'/arviz_fit_%s_model.pkl'%mod, 'rb') as f: fits_az[mod] = pkl.load(f) # with open(SAVE_DIR+folds+'/fitted_params_%s_model.pkl'%mod, 'rb') as f: # fits_prm[mod] = pkl.load(open(SAVE_DIR+folds+'/fitted_params_%s_model.pkl'%mod, 'rb')) comp = az.compare(fits_az) # wa_prm.append(np.exp(fits_az['hybrid_error_precue'].posterior['logits'].mean())/(1+np.exp(fits_az['hybrid_error_precue'].posterior['logits'].mean()))) wa_az.append(comp) loos.append(wa_az) # az_fits.append(wa_prm) vals = [np.array([loos[i][0]['loo'][m] for m in these_models]) for i in range(len(list(itt.product(*var_v))))] errs = [np.array([loos[i][0]['se'][m] for m in these_models]) for i in range(len(list(itt.product(*var_v))))] warn = [np.array([loos[i][0]['warning'][m] for m in these_models]) for i in range(len(list(itt.product(*var_v))))] #%% row_labs = var_k[1:]
""" Compare Plot ============ _thumb: .5, .5 """ import arviz as az az.style.use('arviz-darkgrid') model_compare = az.compare({ 'Centered 8 schools': az.load_arviz_data('centered_eight'), 'Non-centered 8 schools': az.load_arviz_data('non_centered_eight') }) az.plot_compare(model_compare, figsize=(12, 4))
import pymc3 as pm az.style.use('arviz-darkgrid') # Data of the Eight Schools Model J = 8 y = np.array([28., 8., -3., 7., -1., 1., 18., 12.]) sigma = np.array([15., 10., 16., 11., 9., 11., 10., 18.]) with pm.Model('Centered Eight Schools') as centered_eight: mu = pm.Normal('mu', mu=0, sd=5) tau = pm.HalfCauchy('tau', beta=5) theta = pm.Normal('theta', mu=mu, sd=tau, shape=J) obs = pm.Normal('obs', mu=theta, sd=sigma, observed=y) centered_eight_trace = pm.sample() with pm.Model('Non-Centered Eight Schools') as non_centered: mu = pm.Normal('mu', mu=0, sd=5) tau = pm.HalfCauchy('tau', beta=5) theta_tilde = pm.Normal('theta_t', mu=0, sd=1, shape=J) theta = pm.Deterministic('theta', mu + tau * theta_tilde) obs = pm.Normal('obs', mu=theta, sd=sigma, observed=y) non_centered_eight_trace = pm.sample() model_compare = az.compare({ centered_eight: centered_eight_trace, non_centered: non_centered_eight_trace }) az.compareplot(model_compare, figsize=(12, 4))
fit=f, posterior_predictive='y_tilde', observed_data=['y'], log_likelihood='log_lik', coords={ 'predictors': pred, 'regions': regions }, dims={ 'mu': ['predictors'], 'tau': ['predictors'], 'b': ['predictors', 'regions'] }) for f, pred in zip([fit, fit_wait], [predictors, predictors_wait])) az.compare( { 'waiting_list_excluded': inference_data, 'waiting_list_included': inference_data_wait }, ic='loo') # This comparison is problematic as the model with waiting lists included is quite sensitive to single observations, making approximate leave-one-out cross-validation less reliable. Since there doesn't seem to be a very big difference between the two models' scores, from now on we proceed with the simpler model without the waiting list predictor. # The next cell compares the observed data with the model's predictions # In[189]: az.plot_ppc(inference_data, data_pairs={'y': 'y_tilde'}, figsize=[12, 7], textsize=12) ax = plt.gca() lims = ax.set_xlim(-0.5, 0.4)
sigma=0.5) # more coords coords["param"] = ["alpha", "beta"] coords["param_bis"] = ["alpha", "beta"] m_covariation = fm.covariation(t=t_train, idx=idx_train, y=y_train, coords=coords, dims=dims, sigma=0.5) ## load the idata idata_pooled = az.from_netcdf("../models_python/idata_pooled_generic.nc") idata_intercept = az.from_netcdf("../models_python/idata_intercept_generic.nc") idata_covariation = az.from_netcdf( "../models_python/idata_covariation_generic.nc") ## model comparison loo_overview = az.compare({ "m_pooled": idata_pooled, "m_intercept": idata_intercept, "m_covariation": idata_covariation }) ## export it dfi.export(obj=loo_overview, filename="../plots_python/loo_comparison.png") # intervals: https://docs.pymc.io/notebooks/posterior_predictive.html
sigma=sigma, observed=log_electricity, dims='obs_id') # Fitting without sampling with complete_pooling_2: approx = pm.fit(n=50000, method='fullrank_advi', callbacks=[CheckParametersConvergence(tolerance=0.01)]) complete_pooling_trace = approx.sample(1000) #Compare the LOO of the 3 complete models df_comp_loo = az.compare({ 'partial_pooling': partial_pooling_trace, 'no_pooling': no_pooling_trace, 'complete_pooling': complete_pooling_trace }) #Export Results cvrmse_list = [ np.mean(partial_pool_cvrmse_list), np.mean(complete_pool_cvrmse_list), np.mean(nopool_cvrmse_list) ] coverage_list = [ np.mean(partial_pool_coverage_list), np.mean(complete_pool_coverage_list), np.mean(nopool_coverage_list) ] models = ['partial_pooling', 'complete_pooling', 'no_pooling']
trace_exp = pm.sample(2000, chains=4, cores=4, tune=2000, target_accept=.9) idata_exp = az.from_pymc3(trace_exp, dims=dims) print("\n\n\n### stored pointwise log likelihood data ###\n") print(idata_exp.log_likelihood) # cross validation log_lik_exp = idata_exp.log_likelihood log_lik_pow = idata_pow.log_likelihood print("\n\nLeave one *observation* out cross validation (whole model)") condition_dim = xr.DataArray(["compatible", "incompatible"], name="condition") idata_exp.sample_stats["log_likelihood"] = xr.concat((log_lik_exp.y_obs_comp, log_lik_exp.y_obs_incomp), dim=condition_dim) idata_pow.sample_stats["log_likelihood"] = xr.concat((log_lik_pow.y_obs_comp, log_lik_pow.y_obs_incomp), dim=condition_dim) print(az.loo(idata_exp), "\n") print(az.compare({"exp": idata_exp, "pow": idata_pow})) print("\n\nLeave one *subject* out cross validation (whole model)") idata_exp.sample_stats["log_likelihood"] = log_lik_exp.to_array().sum("variable") idata_pow.sample_stats["log_likelihood"] = log_lik_pow.to_array().sum("variable") print(az.loo(idata_exp), "\n") print(az.compare({"exp": idata_exp, "pow": idata_pow})) print("\n\nLeave one observation out cross validation (y_obs_comp only)") idata_exp.sample_stats["log_likelihood"] = log_lik_exp.y_obs_comp idata_pow.sample_stats["log_likelihood"] = log_lik_pow.y_obs_comp print(az.loo(idata_exp), "\n") print(az.compare({"exp": idata_exp, "pow": idata_pow})) print("\n\nLeave one observation out cross validation (y_obs_incomp only)") idata_exp.sample_stats["log_likelihood"] = log_lik_exp.y_obs_incomp
trace_10_2 = pm.sample(1000, tune=1000) with pm.Model() as model_10_3: a = pm.Normal("a", 0, 10) bp = pm.Normal("bp", 0, 10) bpC = pm.Normal("bpC", 0, 10) p = pm.math.invlogit(a + (bp + bpC * d.condition) * d.prosoc_left) pulled_left = pm.Binomial("pulled_left", 1, p, observed=d.pulled_left) trace_10_3 = pm.sample(1000, tune=1000) # %% comp_df = az.compare({ "m10.1": trace_10_1, "m10.2": trace_10_2, "m10.3": trace_10_3 }) comp_df # %% az.plot_compare(comp_df) # %% az.summary(trace_10_3, credible_interval=0.89, round_to=2) # %% np.exp(0.61) # %%
az.waic(model0) az.loo(model0) As you can see both WAIC and LOO return similar values. ArviZ comes equipped with the `compare(.)` function. That is more convenient than using `loo(.)` or `waic(.)` az.loo(model0) ## The compare function This function takes a dictionary of names (keys) and models (values) as input and returns a DataFrame ordered (row-wise) from best to worst model. cmp = az.compare({"m0":model0, "m1":model1,}) cmp We have many columns, so let's check out their meaning one by one: 0) The index are the names of the models taken from the keys of the dictionary passed to `compare(.)`. 1) **rank**, the ranking on the models starting from 0 (best model) to the number of models. 2) **waic**, the values of WAIC/LOO. The DataFrame is always sorted from best WAIC/LOO to worst. 3) **p_waic**, the value of the penalization term. We can roughly think of this value as the estimated effective number of parameters (but do not take that too seriously). 4) **d_waic**, the relative difference between the value of WAIC/LOO for the top-ranked model and the value of WAIC/LOO for each model. For this reason we will always get a value of 0 for the first model. 5) **weight**, the weights assigned to each model. These weights can be loosely interpreted as the probability of each model (among the compared models) given the data. See model averaging section for more details.
def compare_models(df, models: dict, extra_model_args: list = None, parallel=False, plotose=False, **kwargs): """ kwargs are forwarded to split_train_predict->fit_numpyro compare_models(models={'Hier':bayes.Numpyro.model_hier, 'Hier+covariance':bayes.Numpyro.model_hier_covar, 'Twostep Exponential':bayes.TwoStep.model_twostep, 'Twostep Gamma':bayes.TwoStep.model_twostep, }, data=[df,df,df_monster,df_monster], extra_args=[{}, {}, {'prior':'Exponential'}, {'prior':'Gamma'}]) """ # TODO save all model args in BayesWindow in self # Calculate extra_model_args = extra_model_args or np.tile({}, len(models)) if parallel: traces = Parallel(n_jobs=min(os.cpu_count(), len(models)))( delayed(split_train_predict)( df, model, num_chains=1, **kwargs, **extra_model_arg) for model, extra_model_arg in zip(models.values(), extra_model_args)) else: traces = [ split_train_predict(df, model, y=kwargs['y'], **extra_model_arg) for model, extra_model_arg in zip(tqdm(models.values()), extra_model_args) ] # save tp dict traces_dict = {} # initialize results for key, trace in zip(models.keys(), traces): traces_dict[key] = trace # Plot if plotose: for trace_name, trace in traces_dict.items(): # Plot PPC az.plot_ppc( trace, # flatten=[treatment], # flatten_pp=data_cols[2], mean=False, # num_pp_samples=1000, # kind='cumulative' ) plt.title(trace_name) plt.show() r2(trace) # Weird that r2=1 # Waic try: print('======= WAIC (higher is better): =========') print(az.waic(trace, pointwise=True)) print(az.waic(trace, var_name='y')) except TypeError: pass try: for trace_name in traces_dict.keys(): trace = traces_dict[trace_name] # Print diagnostics and effect size print( f"n(Divergences) = {trace.sample_stats.diverging.sum(['chain', 'draw']).values}" ) try: slope = trace.posterior['v_mu'].sel({ 'v_mu_dim_0': 1 }).mean(['chain']).values except Exception: slope = trace.posterior['b'].mean(['chain']).values print( f'Effect size={(slope.mean() / slope.std()).round(2)} == {trace_name}' ) except Exception: pass model_compare = az.compare(traces_dict) # , var_name='y') az.plot_compare(model_compare, textsize=12, show=True) return model_compare