Beispiel #1
0
    def test_ovewrite_model_coords_dims(self):
        """Check coords and dims from model object can be partially overwritten."""
        dim1 = ["a", "b"]
        new_dim1 = ["c", "d"]
        coords = {"dim1": dim1, "dim2": ["c1", "c2"]}
        x_data = np.arange(4).reshape((2, 2))
        y = x_data + np.random.normal(size=(2, 2))
        with pm.Model(coords=coords):
            x = pm.ConstantData("x", x_data, dims=("dim1", "dim2"))
            beta = pm.Normal("beta", 0, 1, dims="dim1")
            _ = pm.Normal("obs", x * beta, 1, observed=y, dims=("dim1", "dim2"))
            trace = pm.sample(100, tune=100, return_inferencedata=False)
            idata1 = to_inference_data(trace)
            idata2 = to_inference_data(trace, coords={"dim1": new_dim1}, dims={"beta": ["dim2"]})

        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
        fails1 = check_multiple_attrs(test_dict, idata1)
        assert not fails1
        fails2 = check_multiple_attrs(test_dict, idata2)
        assert not fails2
        assert "dim1" in list(idata1.posterior.beta.dims)
        assert "dim2" in list(idata2.posterior.beta.dims)
        assert np.all(idata1.constant_data.x.dim1.values == np.array(dim1))
        assert np.all(idata1.constant_data.x.dim2.values == np.array(["c1", "c2"]))
        assert np.all(idata2.constant_data.x.dim1.values == np.array(new_dim1))
        assert np.all(idata2.constant_data.x.dim2.values == np.array(["c1", "c2"]))
Beispiel #2
0
    def test_no_trace(self):
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            idata = pm.sample(100, tune=100)
            prior = pm.sample_prior_predictive(return_inferencedata=False)
            posterior_predictive = pm.sample_posterior_predictive(
                idata, return_inferencedata=False)

        # Only prior
        inference_data = to_inference_data(prior=prior, model=model)
        test_dict = {"prior": ["beta"], "prior_predictive": ["obs"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
        # Only posterior_predictive
        inference_data = to_inference_data(
            posterior_predictive=posterior_predictive, model=model)
        test_dict = {"posterior_predictive": ["obs"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
        # Prior and posterior_predictive but no trace
        inference_data = to_inference_data(
            prior=prior,
            posterior_predictive=posterior_predictive,
            model=model)
        test_dict = {
            "prior": ["beta"],
            "prior_predictive": ["obs"],
            "posterior_predictive": ["obs"],
        }
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
    def test_autodetect_coords_from_model(self, use_context):
        pd = pytest.importorskip("pandas")
        df_data = pd.DataFrame(columns=["date"]).set_index("date")
        dates = pd.date_range(start="2020-05-01", end="2020-05-20")
        for city, mu in {"Berlin": 15, "San Marino": 18, "Paris": 16}.items():
            df_data[city] = np.random.normal(loc=mu, size=len(dates))
        df_data.index = dates
        df_data.index.name = "date"

        coords = {"date": df_data.index, "city": df_data.columns}
        with pm.Model(coords=coords) as model:
            europe_mean = pm.Normal("europe_mean_temp", mu=15.0, sigma=3.0)
            city_offset = pm.Normal("city_offset",
                                    mu=0.0,
                                    sigma=3.0,
                                    dims="city")
            city_temperature = pm.Deterministic("city_temperature",
                                                europe_mean + city_offset,
                                                dims="city")

            data_dims = ("date", "city")
            data = pm.ConstantData("data", df_data, dims=data_dims)
            _ = pm.Normal("likelihood",
                          mu=city_temperature,
                          sigma=0.5,
                          observed=data,
                          dims=data_dims)

            trace = pm.sample(
                return_inferencedata=False,
                compute_convergence_checks=False,
                cores=1,
                chains=1,
                tune=20,
                draws=30,
                step=pm.Metropolis(),
            )
            if use_context:
                idata = to_inference_data(trace=trace)
        if not use_context:
            idata = to_inference_data(trace=trace, model=model)

        assert "city" in list(idata.posterior.dims)
        assert "city" in list(idata.observed_data.dims)
        assert "date" in list(idata.observed_data.dims)

        np.testing.assert_array_equal(idata.posterior.coords["city"],
                                      coords["city"])
        np.testing.assert_array_equal(idata.observed_data.coords["date"],
                                      coords["date"])
        np.testing.assert_array_equal(idata.observed_data.coords["city"],
                                      coords["city"])
Beispiel #4
0
 def test_neff(self):
     if hasattr(self, "min_n_eff"):
         with self.model:
             idata = to_inference_data(self.trace[self.burn:])
         n_eff = az.ess(idata)
         for var in n_eff:
             npt.assert_array_less(self.min_n_eff, n_eff[var])
Beispiel #5
0
    def test_predictions_constant_data(self):
        with pm.Model():
            x = pm.ConstantData("x", [1.0, 2.0, 3.0])
            y = pm.MutableData("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            trace = pm.sample(100, tune=100, return_inferencedata=False)
            inference_data = to_inference_data(trace)

        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails

        with pm.Model():
            x = pm.MutableData("x", [1.0, 2.0])
            y = pm.ConstantData("y", [1.0, 2.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            predictive_trace = pm.sample_posterior_predictive(
                inference_data, return_inferencedata=False
            )
            assert set(predictive_trace.keys()) == {"obs"}
            # this should be four chains of 100 samples
            # assert predictive_trace["obs"].shape == (400, 2)
            # but the shape seems to vary between pymc versions
            inference_data = predictions_to_inference_data(predictive_trace, posterior_trace=trace)
        test_dict = {"posterior": ["beta"], "~observed_data": ""}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Posterior data not copied over as expected."
        test_dict = {"predictions": ["obs"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Predictions not instantiated as expected."
        test_dict = {"predictions_constant_data": ["x"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails, "Predictions constant data not instantiated as expected."
Beispiel #6
0
def test_plot_posterior_predictive_glm_non_defaults(inferencedata):
    with pm.Model() as model:
        pm.Normal("x")
        pm.Normal("Intercept")
    trace = point_list_to_multitrace([{
        "x": np.array([1]),
        "Intercept": np.array([1])
    }], model)
    if inferencedata:
        trace = to_inference_data(trace=trace, model=model)
    _, ax = plt.subplots()
    plot_posterior_predictive_glm(trace,
                                  samples=1,
                                  lm=lambda x, _: x,
                                  eval=np.linspace(0, 1, 10),
                                  lw=0.3,
                                  c="b")
    lines = ax.get_lines()
    expected_xvalues = np.linspace(0, 1, 10)
    expected_yvalues = np.linspace(0, 1, 10)
    for line in lines:
        x_axis, y_axis = line.get_data()
        np.testing.assert_array_equal(x_axis, expected_xvalues)
        np.testing.assert_array_equal(y_axis, expected_yvalues)
        assert line.get_lw() == 0.3
        assert line.get_c() == "b"
Beispiel #7
0
    def test_constant_data(self, use_context):
        """Test constant_data group behaviour."""
        with pm.Model() as model:
            x = pm.ConstantData("x", [1.0, 2.0, 3.0])
            y = pm.MutableData("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            trace = pm.sample(100, chains=2, tune=100, return_inferencedata=False)
            if use_context:
                inference_data = to_inference_data(trace=trace)

        if not use_context:
            inference_data = to_inference_data(trace=trace, model=model)
        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
        assert inference_data.log_likelihood["obs"].shape == (2, 100, 3)
Beispiel #8
0
    def test_save_warmup_issue_1208_after_3_9(self):
        with pm.Model():
            pm.Uniform("u1")
            pm.Normal("n1")
            trace = pm.sample(
                tune=100,
                draws=200,
                chains=2,
                cores=1,
                step=pm.Metropolis(),
                discard_tuned_samples=False,
                return_inferencedata=False,
            )
            assert isinstance(trace, pm.backends.base.MultiTrace)
            assert len(trace) == 300

            # from original trace, warmup draws should be separated out
            idata = to_inference_data(trace, save_warmup=True)
            test_dict = {
                "posterior": ["u1", "n1"],
                "sample_stats": ["~tune", "accept"],
                "warmup_posterior": ["u1", "n1"],
                "warmup_sample_stats": ["~tune", "accept"],
            }
            fails = check_multiple_attrs(test_dict, idata)
            assert not fails
            assert idata.posterior.dims["chain"] == 2
            assert idata.posterior.dims["draw"] == 200

            # manually sliced trace triggers the same warning as <=3.8
            with pytest.warns(UserWarning, match="Warmup samples"):
                idata = to_inference_data(trace[-30:], save_warmup=True)
            test_dict = {
                "posterior": ["u1", "n1"],
                "sample_stats": ["~tune", "accept"],
                "~warmup_posterior": [],
                "~warmup_sample_stats": [],
            }
            fails = check_multiple_attrs(test_dict, idata)
            assert not fails
            assert idata.posterior.dims["chain"] == 2
            assert idata.posterior.dims["draw"] == 30
Beispiel #9
0
def _compute_convergence_checks(idata, draws, model, trace):
    if draws < 100:
        warnings.warn(
            "The number of samples is too small to check convergence reliably.",
            stacklevel=2,
        )
    else:
        if idata is None:
            idata = to_inference_data(trace, log_likelihood=False)
        trace.report._run_convergence_checks(idata, model)
    trace.report._log_summary()
Beispiel #10
0
    def test_priors_separation(self, use_context):
        """Test model is enough to get prior, prior predictive and observed_data."""
        with pm.Model() as model:
            x = pm.Data("x", [1.0, 2.0, 3.0])
            y = pm.Data("y", [1.0, 2.0, 3.0])
            beta = pm.Normal("beta", 0, 1)
            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
            prior = pm.sample_prior_predictive(return_inferencedata=False)

        test_dict = {
            "prior": ["beta", "~obs"],
            "observed_data": ["obs"],
            "prior_predictive": ["obs"],
        }
        if use_context:
            with model:
                inference_data = to_inference_data(prior=prior)
        else:
            inference_data = to_inference_data(prior=prior, model=model)
        fails = check_multiple_attrs(test_dict, inference_data)
        assert not fails
Beispiel #11
0
def _save_sample_stats(
    sample_settings,
    sample_stats,
    chains,
    trace,
    return_inferencedata,
    _t_sampling,
    idata_kwargs,
    model,
):
    sample_settings_dict = sample_settings[0]
    sample_settings_dict["_t_sampling"] = _t_sampling
    sample_stats_dict = sample_stats[0]

    if chains > 1:
        # Collect the stat values from each chain in a single list
        for stat in sample_stats[0].keys():
            value_list = []
            for chain_sample_stats in sample_stats:
                value_list.append(chain_sample_stats[stat])
            sample_stats_dict[stat] = value_list

    if not return_inferencedata:
        for stat, value in sample_stats_dict.items():
            setattr(trace.report, stat, value)
        for stat, value in sample_settings_dict.items():
            setattr(trace.report, stat, value)
        idata = None
    else:
        for stat, value in sample_stats_dict.items():
            if chains > 1:
                # Different chains might have more iteration steps, leading to a
                # non-square `sample_stats` dataset, we cast as `object` to avoid
                # numpy ragged array deprecation warning
                sample_stats_dict[stat] = np.array(value, dtype=object)
            else:
                sample_stats_dict[stat] = np.array(value)

        sample_stats = dict_to_dataset(
            sample_stats_dict,
            attrs=sample_settings_dict,
            library=pymc,
        )

        ikwargs = dict(model=model)
        if idata_kwargs is not None:
            ikwargs.update(idata_kwargs)
        idata = to_inference_data(trace, **ikwargs)
        idata = InferenceData(**idata, sample_stats=sample_stats)

    return sample_stats, idata
Beispiel #12
0
    def get_inference_data(self, data, eight_schools_params):
        with data.model:
            prior = pm.sample_prior_predictive(return_inferencedata=False)
            posterior_predictive = pm.sample_posterior_predictive(
                data.obj, return_inferencedata=False
            )

        return to_inference_data(
            trace=data.obj,
            prior=prior,
            posterior_predictive=posterior_predictive,
            coords={"school": np.arange(eight_schools_params["J"])},
            dims={"theta": ["school"], "eta": ["school"]},
            model=data.model,
        )
Beispiel #13
0
    def test_posterior_predictive_warning(self, data, eight_schools_params, caplog):
        with data.model:
            posterior_predictive = pm.sample_posterior_predictive(
                data.obj, 370, return_inferencedata=False, keep_size=False
            )
            with pytest.warns(UserWarning, match="shape of variables"):
                inference_data = to_inference_data(
                    trace=data.obj,
                    posterior_predictive=posterior_predictive,
                    coords={"school": np.arange(eight_schools_params["J"])},
                    dims={"theta": ["school"], "eta": ["school"]},
                )

        shape = inference_data.posterior_predictive.obs.shape
        assert np.all([obs_s == s for obs_s, s in zip(shape, (1, 370, eight_schools_params["J"]))])
Beispiel #14
0
    def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools_params):
        with data.model:
            posterior_predictive = pm.sample_posterior_predictive(
                data.obj, keep_size=True, return_inferencedata=False
            )
            inference_data = to_inference_data(
                trace=data.obj,
                posterior_predictive=posterior_predictive,
                coords={"school": np.arange(eight_schools_params["J"])},
                dims={"theta": ["school"], "eta": ["school"]},
            )

        shape = inference_data.posterior_predictive.obs.shape
        assert np.all(
            [obs_s == s for obs_s, s in zip(shape, (chains, draws, eight_schools_params["J"]))]
        )
Beispiel #15
0
    def get_predictions_inference_data(
        self, data, eight_schools_params, inplace
    ) -> Tuple[InferenceData, Dict[str, np.ndarray]]:
        with data.model:
            prior = pm.sample_prior_predictive(return_inferencedata=False)
            posterior_predictive = pm.sample_posterior_predictive(
                data.obj, keep_size=True, return_inferencedata=False
            )

            idata = to_inference_data(
                trace=data.obj,
                prior=prior,
                coords={"school": np.arange(eight_schools_params["J"])},
                dims={"theta": ["school"], "eta": ["school"]},
            )
            assert isinstance(idata, InferenceData)
            extended = predictions_to_inference_data(
                posterior_predictive, idata_orig=idata, inplace=inplace
            )
            assert isinstance(extended, InferenceData)
            assert (id(idata) == id(extended)) == inplace
        return (extended, posterior_predictive)
Beispiel #16
0
    def test_posterior_predictive_warning(self, data, eight_schools_params,
                                          caplog):
        with data.model:
            posterior_predictive = pm.sample_posterior_predictive(
                data.obj, 370, return_inferencedata=False)
            inference_data = to_inference_data(
                trace=data.obj,
                posterior_predictive=posterior_predictive,
                coords={"school": np.arange(eight_schools_params["J"])},
                dims={
                    "theta": ["school"],
                    "eta": ["school"]
                },
            )

        records = caplog.records
        shape = inference_data.posterior_predictive.obs.shape
        assert np.all([
            obs_s == s
            for obs_s, s in zip(shape, (1, 370, eight_schools_params["J"]))
        ])
        assert len(records) == 1
        assert records[0].levelname == "WARNING"
Beispiel #17
0
def sample_smc(
    draws=2000,
    kernel=IMH,
    *,
    start=None,
    model=None,
    random_seed=None,
    chains=None,
    cores=None,
    compute_convergence_checks=True,
    return_inferencedata=True,
    idata_kwargs=None,
    progressbar=True,
    **kernel_kwargs,
):
    r"""
    Sequential Monte Carlo based sampling.

    Parameters
    ----------
    draws: int
        The number of samples to draw from the posterior (i.e. last stage). And also the number of
        independent chains. Defaults to 2000.
    kernel: SMC Kernel used. Defaults to pm.smc.IMH (Independent Metropolis Hastings)
    start: dict, or array of dict
        Starting point in parameter space. It should be a list of dict with length `chains`.
        When None (default) the starting point is sampled from the prior distribution.
    model: Model (optional if in ``with`` context)).
    random_seed: int
        random seed
    chains : int
        The number of chains to sample. Running independent chains is important for some
        convergence statistics. If ``None`` (default), then set to either ``cores`` or 2, whichever
        is larger.
    cores : int
        The number of chains to run in parallel. If ``None``, set to the number of CPUs in the
        system.
    compute_convergence_checks : bool
        Whether to compute sampler statistics like Gelman-Rubin and ``effective_n``.
        Defaults to ``True``.
    return_inferencedata : bool, default=True
        Whether to return the trace as an :class:`arviz:arviz.InferenceData` (True) object or a `MultiTrace` (False)
        Defaults to ``True``.
    idata_kwargs : dict, optional
        Keyword arguments for :func:`pymc.to_inference_data`
    progressbar : bool, optional default=True
        Whether or not to display a progress bar in the command line.
    **kernel_kwargs: keyword arguments passed to the SMC kernel.
        The default IMH kernel takes the following keywords:
            threshold: float
                Determines the change of beta from stage to stage, i.e. indirectly the number of stages,
                the higher the value of `threshold` the higher the number of stages. Defaults to 0.5.
                It should be between 0 and 1.
            n_steps: int
                The number of steps of each Markov Chain. If ``tune_steps == True`` ``n_steps`` will be used
                for the first stage and for the others it will be determined automatically based on the
                acceptance rate and `p_acc_rate`, the max number of steps is ``n_steps``.
            tune_steps: bool
                Whether to compute the number of steps automatically or not. Defaults to True
            p_acc_rate: float
                Used to compute ``n_steps`` when ``tune_steps == True``. The higher the value of
                ``p_acc_rate`` the higher the number of steps computed automatically. Defaults to 0.85.
                It should be between 0 and 1.
        Keyword arguments for other kernels should be checked in the respective docstrings

    Notes
    -----
    SMC works by moving through successive stages. At each stage the inverse temperature
    :math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0
    we have the prior distribution and when :math:`\beta` =1 we have the posterior distribution.
    So in more general terms we are always computing samples from a tempered posterior that we can
    write as:

    .. math::

        p(\theta \mid y)_{\beta} = p(y \mid \theta)^{\beta} p(\theta)

    A summary of the algorithm is:

     1. Initialize :math:`\beta` at zero and stage at zero.
     2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the
        tempered posterior is the prior).
     3. Increase :math:`\beta` in order to make the effective sample size equals some predefined
        value (we use :math:`Nt`, where :math:`t` is 0.5 by default).
     4. Compute a set of N importance weights W. The weights are computed as the ratio of the
        likelihoods of a sample at stage i+1 and stage i.
     5. Obtain :math:`S_{w}` by re-sampling according to W.
     6. Use W to compute the mean and covariance for the proposal distribution, a MVNormal.
     7. For stages other than 0 use the acceptance rate from the previous stage to estimate
        `n_steps`.
     8. Run N independent Metropolis-Hastings (IMH) chains (each one of length `n_steps`),
        starting each one from a different sample in :math:`S_{w}`. Samples are IMH as the proposal
        mean is the of the previous posterior stage and not the current point in parameter space.
     9. Repeat from step 3 until :math:`\beta \ge 1`.
     10. The final result is a collection of N samples from the posterior.


    References
    ----------
    .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013),
        Bayesian inversion for finite fault earthquake source models I- Theory and algorithm.
        Geophysical Journal International, 2013, 194(3), pp.1701-1726,
        `link <https://gji.oxfordjournals.org/content/194/3/1701.full>`__

    .. [Ching2007] Ching, J. and Chen, Y. (2007).
        Transitional Markov Chain Monte Carlo Method for Bayesian Model Updating, Model Class
        Selection, and Model Averaging. J. Eng. Mech., 10.1061/(ASCE)0733-9399(2007)133:7(816),
        816-832. `link <http://ascelibrary.org/doi/abs/10.1061/%28ASCE%290733-9399
        %282007%29133:7%28816%29>`__
    """

    if isinstance(kernel, str) and kernel.lower() in ("abc", "metropolis"):
        warnings.warn(
            f'The kernel string argument "{kernel}" in sample_smc has been deprecated. '
            f"It is no longer needed to distinguish between `abc` and `metropolis`",
            FutureWarning,
            stacklevel=2,
        )
        kernel = IMH

    if kernel_kwargs.pop("save_sim_data", None) is not None:
        warnings.warn(
            "save_sim_data has been deprecated. Use pm.sample_posterior_predictive "
            "to obtain the same type of samples.",
            FutureWarning,
            stacklevel=2,
        )

    if kernel_kwargs.pop("save_log_pseudolikelihood", None) is not None:
        warnings.warn(
            "save_log_pseudolikelihood has been deprecated. This information is "
            "now saved as log_likelihood in models with Simulator distributions.",
            FutureWarning,
            stacklevel=2,
        )

    parallel = kernel_kwargs.pop("parallel", None)
    if parallel is not None:
        warnings.warn(
            "The argument parallel is deprecated, use the argument cores instead.",
            FutureWarning,
            stacklevel=2,
        )
        if parallel is False:
            cores = 1

    if cores is None:
        cores = _cpu_count()

    if chains is None:
        chains = max(2, cores)
    else:
        cores = min(chains, cores)

    if random_seed == -1:
        raise FutureWarning(
            f"random_seed should be a non-negative integer or None, got: {random_seed}"
            "This will raise a ValueError in the Future")
        random_seed = None
    if isinstance(random_seed, int) or random_seed is None:
        rng = np.random.default_rng(seed=random_seed)
        random_seed = list(rng.integers(2**30, size=chains))
    elif isinstance(random_seed, Iterable):
        if len(random_seed) != chains:
            raise ValueError(
                f"Length of seeds ({len(seeds)}) must match number of chains {chains}"
            )
    else:
        raise TypeError(
            "Invalid value for `random_seed`. Must be tuple, list, int or None"
        )

    model = modelcontext(model)

    _log = logging.getLogger("pymc")
    _log.info("Initializing SMC sampler...")
    _log.info(f"Sampling {chains} chain{'s' if chains > 1 else ''} "
              f"in {cores} job{'s' if cores > 1 else ''}")

    params = (
        draws,
        kernel,
        start,
        model,
    )

    t1 = time.time()
    if cores > 1:
        pbar = progress_bar((), total=100, display=progressbar)
        pbar.update(0)
        pbars = [pbar] + [None] * (chains - 1)

        pool = mp.Pool(cores)

        # "manually" (de)serialize params before/after multiprocessing
        params = tuple(cloudpickle.dumps(p) for p in params)
        kernel_kwargs = {
            key: cloudpickle.dumps(value)
            for key, value in kernel_kwargs.items()
        }
        results = _starmap_with_kwargs(
            pool,
            _sample_smc_int,
            [(*params, random_seed[chain], chain, pbars[chain])
             for chain in range(chains)],
            repeat(kernel_kwargs),
        )
        results = tuple(cloudpickle.loads(r) for r in results)
        pool.close()
        pool.join()

    else:
        results = []
        pbar = progress_bar((), total=100 * chains, display=progressbar)
        pbar.update(0)
        for chain in range(chains):
            pbar.offset = 100 * chain
            pbar.base_comment = f"Chain: {chain+1}/{chains}"
            results.append(
                _sample_smc_int(*params, random_seed[chain], chain, pbar,
                                **kernel_kwargs))

    (
        traces,
        sample_stats,
        sample_settings,
    ) = zip(*results)

    trace = MultiTrace(traces)
    idata = None

    # Save sample_stats
    _t_sampling = time.time() - t1
    sample_settings_dict = sample_settings[0]
    sample_settings_dict["_t_sampling"] = _t_sampling

    sample_stats_dict = sample_stats[0]
    if chains > 1:
        # Collect the stat values from each chain in a single list
        for stat in sample_stats[0].keys():
            value_list = []
            for chain_sample_stats in sample_stats:
                value_list.append(chain_sample_stats[stat])
            sample_stats_dict[stat] = value_list

    if not return_inferencedata:
        for stat, value in sample_stats_dict.items():
            setattr(trace.report, stat, value)
        for stat, value in sample_settings_dict.items():
            setattr(trace.report, stat, value)
    else:
        for stat, value in sample_stats_dict.items():
            if chains > 1:
                # Different chains might have more iteration steps, leading to a
                # non-square `sample_stats` dataset, we cast as `object` to avoid
                # numpy ragged array deprecation warning
                sample_stats_dict[stat] = np.array(value, dtype=object)
            else:
                sample_stats_dict[stat] = np.array(value)

        sample_stats = dict_to_dataset(
            sample_stats_dict,
            attrs=sample_settings_dict,
            library=pymc,
        )

        ikwargs = dict(model=model)
        if idata_kwargs is not None:
            ikwargs.update(idata_kwargs)
        idata = to_inference_data(trace, **ikwargs)
        idata = InferenceData(**idata, sample_stats=sample_stats)

    if compute_convergence_checks:
        if draws < 100:
            warnings.warn(
                "The number of samples is too small to check convergence reliably.",
                stacklevel=2,
            )
        else:
            if idata is None:
                idata = to_inference_data(trace, log_likelihood=False)
            trace.report._run_convergence_checks(idata, model)
    trace.report._log_summary()

    return idata if return_inferencedata else trace
Beispiel #18
0
 def test_Rhat(self):
     with self.model:
         idata = to_inference_data(self.trace[self.burn:])
     rhat = az.rhat(idata)
     for var in rhat:
         npt.assert_allclose(rhat[var], 1, rtol=0.01)