def _run_convergence_checks(self, trace): if trace.nchains == 1: msg = ("Only one chain was sampled, this makes it impossible to " "run some convergence checks") warn = SamplerWarning(WarningType.BAD_PARAMS, msg, 'info', None, None, None) self._add_warnings([warn]) return from pymc3 import diagnostics self._effective_n = effective_n = diagnostics.effective_n(trace) self._gelman_rubin = gelman_rubin = diagnostics.gelman_rubin(trace) warnings = [] rhat_max = max(val.max() for val in gelman_rubin.values()) if rhat_max > 1.4: msg = ("The gelman-rubin statistic is larger than 1.4 for some " "parameters. The sampler did not converge.") warn = SamplerWarning( WarningType.CONVERGENCE, msg, 'error', None, None, gelman_rubin) warnings.append(warn) elif rhat_max > 1.2: msg = ("The gelman-rubin statistic is larger than 1.2 for some " "parameters.") warn = SamplerWarning( WarningType.CONVERGENCE, msg, 'warn', None, None, gelman_rubin) warnings.append(warn) elif rhat_max > 1.05: msg = ("The gelman-rubin statistic is larger than 1.05 for some " "parameters. This indicates slight problems during " "sampling.") warn = SamplerWarning( WarningType.CONVERGENCE, msg, 'info', None, None, gelman_rubin) warnings.append(warn) eff_min = min(val.min() for val in effective_n.values()) n_samples = len(trace) * trace.nchains if eff_min < 200 and n_samples >= 500: msg = ("The estimated number of effective samples is smaller than " "200 for some parameters.") warn = SamplerWarning( WarningType.CONVERGENCE, msg, 'error', None, None, effective_n) warnings.append(warn) elif eff_min / n_samples < 0.1: msg = ("The number of effective samples is smaller than " "10% for some parameters.") warn = SamplerWarning( WarningType.CONVERGENCE, msg, 'warn', None, None, effective_n) warnings.append(warn) elif eff_min / n_samples < 0.25: msg = ("The number of effective samples is smaller than " "25% for some parameters.") warn = SamplerWarning( WarningType.CONVERGENCE, msg, 'info', None, None, effective_n) warnings.append(warn) self._add_warnings(warnings)
def _run_convergence_checks(self, trace): if trace.nchains == 1: msg = ("Only one chain was sampled, this makes it impossible to " "run some convergence checks") warn = SamplerWarning(WarningType.BAD_PARAMS, msg, 'info', None, None, None) self._add_warnings([warn]) return from pymc3 import diagnostics self._effective_n = effective_n = diagnostics.effective_n(trace) self._gelman_rubin = gelman_rubin = diagnostics.gelman_rubin(trace) warnings = [] rhat_max = max(val.max() for val in gelman_rubin.values()) if rhat_max > 1.4: msg = ("The gelman-rubin statistic is larger than 1.4 for some " "parameters. The sampler did not converge.") warn = SamplerWarning( WarningType.CONVERGENCE, msg, 'error', None, None, gelman_rubin) warnings.append(warn) elif rhat_max > 1.2: msg = ("The gelman-rubin statistic is larger than 1.2 for some " "parameters.") warn = SamplerWarning( WarningType.CONVERGENCE, msg, 'warn', None, None, gelman_rubin) warnings.append(warn) elif rhat_max > 1.05: msg = ("The gelman-rubin statistic is larger than 1.05 for some " "parameters. This indicates slight problems during " "sampling.") warn = SamplerWarning( WarningType.CONVERGENCE, msg, 'info', None, None, gelman_rubin) warnings.append(warn) eff_min = min(val.min() for val in effective_n.values()) n_samples = len(trace) * trace.nchains if eff_min < 200 and n_samples >= 500: msg = ("The estimated number of effective samples is smaller than " "200 for some parameters.") warn = SamplerWarning( WarningType.CONVERGENCE, msg, 'error', None, None, effective_n) warnings.append(warn) elif eff_min / n_samples < 0.25: msg = ("The number of effective samples is smaller than " "25% for some parameters.") warn = SamplerWarning( WarningType.CONVERGENCE, msg, 'warn', None, None, effective_n) warnings.append(warn) self._add_warnings(warnings)
def loo(trace, model=None, reff=None, progressbar=False): """Calculates leave-one-out (LOO) cross-validation for out of sample predictive model fit, following Vehtari et al. (2015). Cross-validation is computed using Pareto-smoothed importance sampling (PSIS). Parameters ---------- trace : result of MCMC run model : PyMC Model Optional model. Default None, taken from context. reff : float relative MCMC efficiency, `effective_n / N` i.e. number of effective samples divided by the number of actual samples. Computed from trace by default. progressbar: bool Whether or not to display a progress bar in the command line. The bar shows the percentage of completion, the evaluation speed, and the estimated time to completion Returns ------- df_loo: pandas.DataFrame Estimation and standard error of `elpd_loo`, `p_loo`, and `looic` pointwise: dict point-wise value of `elpd_loo`, `p_loo`, `looic` and pareto shape `k` """ model = modelcontext(model) if reff is None: if trace.nchains == 1: reff = 1. else: eff = effective_n(trace) eff_ave = pmstat.dict2pd(eff, 'eff').mean() samples = len(trace) * trace.nchains reff = eff_ave / samples log_py = pmstat._log_post_trace(trace, model, progressbar=progressbar) if log_py.size == 0: raise ValueError('The model does not contain observed values.') shape_str = ' by '.join(map(str, log_py.shape)) print('Computed from ' + shape_str + ' log-likelihood matrix') lw, ks = pmstat._psislw(-log_py, reff) lw += log_py elpd_loo_i = logsumexp(lw, axis=0) elpd_loo = elpd_loo_i.sum() elpd_loo_se = (len(elpd_loo_i) * np.var(elpd_loo_i)) ** 0.5 loo_lppd_i = - 2 * elpd_loo_i loo_lppd = loo_lppd_i.sum() loo_lppd_se = (len(loo_lppd_i) * np.var(loo_lppd_i)) ** 0.5 lppd_i = logsumexp(log_py, axis=0, b=1. / log_py.shape[0]) p_loo_i = lppd_i - elpd_loo_i p_loo = p_loo_i.sum() p_loo_se = (len(p_loo_i) * np.var(p_loo_i)) ** 0.5 df_loo = (pd.DataFrame(dict(Estimate=[elpd_loo, p_loo, loo_lppd], SE=[elpd_loo_se, p_loo_se, loo_lppd_se])) .rename(index={0: 'elpd_loo', 1: 'p_loo', 2: 'looic'})) pointwise = dict(elpd_loo=elpd_loo_i, p_loo=p_loo_i, looic=loo_lppd_i, ks=ks) return df_loo, pointwise