Example #1
0
    def _run_convergence_checks(self, trace):
        if trace.nchains == 1:
            msg = ("Only one chain was sampled, this makes it impossible to "
                   "run some convergence checks")
            warn = SamplerWarning(WarningType.BAD_PARAMS, msg, 'info',
                                  None, None, None)
            self._add_warnings([warn])
            return

        from pymc3 import diagnostics

        self._effective_n = effective_n = diagnostics.effective_n(trace)
        self._gelman_rubin = gelman_rubin = diagnostics.gelman_rubin(trace)

        warnings = []
        rhat_max = max(val.max() for val in gelman_rubin.values())
        if rhat_max > 1.4:
            msg = ("The gelman-rubin statistic is larger than 1.4 for some "
                   "parameters. The sampler did not converge.")
            warn = SamplerWarning(
                WarningType.CONVERGENCE, msg, 'error', None, None, gelman_rubin)
            warnings.append(warn)
        elif rhat_max > 1.2:
            msg = ("The gelman-rubin statistic is larger than 1.2 for some "
                   "parameters.")
            warn = SamplerWarning(
                WarningType.CONVERGENCE, msg, 'warn', None, None, gelman_rubin)
            warnings.append(warn)
        elif rhat_max > 1.05:
            msg = ("The gelman-rubin statistic is larger than 1.05 for some "
                   "parameters. This indicates slight problems during "
                   "sampling.")
            warn = SamplerWarning(
                WarningType.CONVERGENCE, msg, 'info', None, None, gelman_rubin)
            warnings.append(warn)

        eff_min = min(val.min() for val in effective_n.values())
        n_samples = len(trace) * trace.nchains
        if eff_min < 200 and n_samples >= 500:
            msg = ("The estimated number of effective samples is smaller than "
                   "200 for some parameters.")
            warn = SamplerWarning(
                WarningType.CONVERGENCE, msg, 'error', None, None, effective_n)
            warnings.append(warn)
        elif eff_min / n_samples < 0.1:
            msg = ("The number of effective samples is smaller than "
                   "10% for some parameters.")
            warn = SamplerWarning(
                WarningType.CONVERGENCE, msg, 'warn', None, None, effective_n)
            warnings.append(warn)
        elif eff_min / n_samples < 0.25:
            msg = ("The number of effective samples is smaller than "
                   "25% for some parameters.")
            warn = SamplerWarning(
                WarningType.CONVERGENCE, msg, 'info', None, None, effective_n)
            warnings.append(warn)

        self._add_warnings(warnings)
Example #2
0
    def _run_convergence_checks(self, trace):
        if trace.nchains == 1:
            msg = ("Only one chain was sampled, this makes it impossible to "
                   "run some convergence checks")
            warn = SamplerWarning(WarningType.BAD_PARAMS, msg, 'info',
                                  None, None, None)
            self._add_warnings([warn])
            return

        from pymc3 import diagnostics

        self._effective_n = effective_n = diagnostics.effective_n(trace)
        self._gelman_rubin = gelman_rubin = diagnostics.gelman_rubin(trace)

        warnings = []
        rhat_max = max(val.max() for val in gelman_rubin.values())
        if rhat_max > 1.4:
            msg = ("The gelman-rubin statistic is larger than 1.4 for some "
                   "parameters. The sampler did not converge.")
            warn = SamplerWarning(
                WarningType.CONVERGENCE, msg, 'error', None, None, gelman_rubin)
            warnings.append(warn)
        elif rhat_max > 1.2:
            msg = ("The gelman-rubin statistic is larger than 1.2 for some "
                   "parameters.")
            warn = SamplerWarning(
                WarningType.CONVERGENCE, msg, 'warn', None, None, gelman_rubin)
            warnings.append(warn)
        elif rhat_max > 1.05:
            msg = ("The gelman-rubin statistic is larger than 1.05 for some "
                   "parameters. This indicates slight problems during "
                   "sampling.")
            warn = SamplerWarning(
                WarningType.CONVERGENCE, msg, 'info', None, None, gelman_rubin)
            warnings.append(warn)

        eff_min = min(val.min() for val in effective_n.values())
        n_samples = len(trace) * trace.nchains
        if eff_min < 200 and n_samples >= 500:
            msg = ("The estimated number of effective samples is smaller than "
                   "200 for some parameters.")
            warn = SamplerWarning(
                WarningType.CONVERGENCE, msg, 'error', None, None, effective_n)
            warnings.append(warn)
        elif eff_min / n_samples < 0.25:
            msg = ("The number of effective samples is smaller than "
                   "25% for some parameters.")
            warn = SamplerWarning(
                WarningType.CONVERGENCE, msg, 'warn', None, None, effective_n)
            warnings.append(warn)

        self._add_warnings(warnings)
Example #3
0
def loo(trace, model=None, reff=None, progressbar=False):
    """Calculates leave-one-out (LOO) cross-validation for out of sample
    predictive model fit, following Vehtari et al. (2015). Cross-validation is
    computed using Pareto-smoothed importance sampling (PSIS).

    Parameters
    ----------
    trace : result of MCMC run
    model : PyMC Model
        Optional model. Default None, taken from context.
    reff : float
        relative MCMC efficiency, `effective_n / N` i.e. number of effective
        samples divided by the number of actual samples. Computed from trace by
        default.
    progressbar: bool
        Whether or not to display a progress bar in the command line. The
        bar shows the percentage of completion, the evaluation speed, and
        the estimated time to completion

    Returns
    -------
    df_loo: pandas.DataFrame 
        Estimation and standard error of `elpd_loo`, `p_loo`, and `looic`
    pointwise: dict
        point-wise value of `elpd_loo`, `p_loo`, `looic` and pareto shape `k`
    """
    model = modelcontext(model)

    if reff is None:
        if trace.nchains == 1:
            reff = 1.
        else:
            eff = effective_n(trace)
            eff_ave = pmstat.dict2pd(eff, 'eff').mean()
            samples = len(trace) * trace.nchains
            reff = eff_ave / samples

    log_py = pmstat._log_post_trace(trace, model, progressbar=progressbar)
    if log_py.size == 0:
        raise ValueError('The model does not contain observed values.')

    shape_str = ' by '.join(map(str, log_py.shape))
    print('Computed from ' + shape_str + ' log-likelihood matrix')

    lw, ks = pmstat._psislw(-log_py, reff)
    lw += log_py

    elpd_loo_i = logsumexp(lw, axis=0)
    elpd_loo = elpd_loo_i.sum()
    elpd_loo_se = (len(elpd_loo_i) * np.var(elpd_loo_i)) ** 0.5

    loo_lppd_i = - 2 * elpd_loo_i
    loo_lppd = loo_lppd_i.sum()
    loo_lppd_se = (len(loo_lppd_i) * np.var(loo_lppd_i)) ** 0.5

    lppd_i = logsumexp(log_py, axis=0, b=1. / log_py.shape[0])
    p_loo_i = lppd_i - elpd_loo_i
    p_loo = p_loo_i.sum()
    p_loo_se = (len(p_loo_i) * np.var(p_loo_i)) ** 0.5

    df_loo = (pd.DataFrame(dict(Estimate=[elpd_loo, p_loo, loo_lppd],
                                SE=[elpd_loo_se, p_loo_se, loo_lppd_se]))
                .rename(index={0: 'elpd_loo', 
                               1: 'p_loo', 
                               2: 'looic'}))
    pointwise = dict(elpd_loo=elpd_loo_i,
                        p_loo=p_loo_i,
                        looic=loo_lppd_i, 
                        ks=ks)
    return df_loo, pointwise