예제 #1
0
def test_concat_group(copy, inplace, sequence):
    idata1 = from_dict(
        posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)}
    )
    if copy and inplace:
        original_idata1_posterior_id = id(idata1.posterior)
    idata2 = from_dict(prior={"C": np.random.randn(2, 10, 2), "D": np.random.randn(2, 10, 5, 2)})
    idata3 = from_dict(observed_data={"E": np.random.randn(100), "F": np.random.randn(2, 100)})
    # basic case
    assert concat(idata1, idata2, copy=True, inplace=False) is not None
    if sequence:
        new_idata = concat((idata1, idata2, idata3), copy=copy, inplace=inplace)
    else:
        new_idata = concat(idata1, idata2, idata3, copy=copy, inplace=inplace)
    if inplace:
        assert new_idata is None
        new_idata = idata1
    assert new_idata is not None
    test_dict = {"posterior": ["A", "B"], "prior": ["C", "D"], "observed_data": ["E", "F"]}
    fails = check_multiple_attrs(test_dict, new_idata)
    assert not fails
    if copy:
        if inplace:
            assert id(new_idata.posterior) == original_idata1_posterior_id
        else:
            assert id(new_idata.posterior) != id(idata1.posterior)
        assert id(new_idata.prior) != id(idata2.prior)
        assert id(new_idata.observed_data) != id(idata3.observed_data)
    else:
        assert id(new_idata.posterior) == id(idata1.posterior)
        assert id(new_idata.prior) == id(idata2.prior)
        assert id(new_idata.observed_data) == id(idata3.observed_data)
예제 #2
0
def test_inference_concat_keeps_all_fields():
    """From failures observed in issue #907"""
    idata1 = from_dict(posterior={"A": [1, 2, 3, 4]}, sample_stats={"B": [2, 3, 4, 5]})
    idata2 = from_dict(prior={"C": [1, 2, 3, 4]}, observed_data={"D": [2, 3, 4, 5]})

    idata_c1 = concat(idata1, idata2)
    idata_c2 = concat(idata2, idata1)

    test_dict = {"posterior": ["A"], "sample_stats": ["B"], "prior": ["C"], "observed_data": ["D"]}

    fails_c1 = check_multiple_attrs(test_dict, idata_c1)
    assert not fails_c1
    fails_c2 = check_multiple_attrs(test_dict, idata_c2)
    assert not fails_c2
예제 #3
0
    def to_inference_object(self) -> az.InferenceData:
        """Convert fitted Stan model into ``arviz`` InferenceData object.

        :returns: ``arviz`` InferenceData object with selected values
        :rtype: az.InferenceData
        """
        if self.fit is None:
            raise ValueError("Model has not been fit!")

        # if already Inference, just return
        if isinstance(self.fit, az.InferenceData):
            return self.fit

        if not self.specified:
            raise ValueError("Model has not been specified!")

        inference = single_feature_fit_to_inference(
            fit=self.fit,
            params=self.params,
            coords=self.coords,
            dims=self.dims,
            posterior_predictive=self.posterior_predictive,
            log_likelihood=self.log_likelihood,
            **self.specifications)

        if self.include_observed_data:
            obs = az.from_dict(observed_data={"observed": self.dat["y"]},
                               coords={"tbl_sample": self.sample_names},
                               dims={"observed": ["tbl_sample"]})
            inference = az.concat(inference, obs)
        return inference
예제 #4
0
def test_concat_dim(dim, copy, inplace, sequence, reset_dim):
    idata1 = from_dict(
        posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)},
        observed_data={"C": np.random.randn(100), "D": np.random.randn(2, 100)},
    )
    if inplace:
        original_idata1_id = id(idata1)
    idata2 = from_dict(
        posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)},
        observed_data={"C": np.random.randn(100), "D": np.random.randn(2, 100)},
    )
    idata3 = from_dict(
        posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)},
        observed_data={"C": np.random.randn(100), "D": np.random.randn(2, 100)},
    )
    # basic case
    assert (
        concat(idata1, idata2, dim=dim, copy=copy, inplace=False, reset_dim=reset_dim) is not None
    )
    if sequence:
        new_idata = concat(
            (idata1, idata2, idata3), copy=copy, dim=dim, inplace=inplace, reset_dim=reset_dim
        )
    else:
        new_idata = concat(
            idata1, idata2, idata3, dim=dim, copy=copy, inplace=inplace, reset_dim=reset_dim
        )
    if inplace:
        assert new_idata is None
        new_idata = idata1
    assert new_idata is not None
    test_dict = {"posterior": ["A", "B"], "observed_data": ["C", "D"]}
    fails = check_multiple_attrs(test_dict, new_idata)
    assert not fails
    if inplace:
        assert id(new_idata) == original_idata1_id
    else:
        assert id(new_idata) != id(idata1)
    assert getattr(new_idata.posterior, dim).size == 6 if dim == "chain" else 30
    if reset_dim:
        assert np.all(
            getattr(new_idata.posterior, dim).values
            == (np.arange(6) if dim == "chain" else np.arange(30))
        )
예제 #5
0
def test_concat_edgecases(copy, inplace, sequence):
    idata = from_dict(posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)})
    empty = concat()
    assert empty is not None
    if sequence:
        new_idata = concat([idata], copy=copy, inplace=inplace)
    else:
        new_idata = concat(idata, copy=copy, inplace=inplace)
    if inplace:
        assert new_idata is None
        new_idata = idata
    else:
        assert new_idata is not None
    test_dict = {"posterior": ["A", "B"]}
    fails = check_multiple_attrs(test_dict, new_idata)
    assert not fails
    if copy and not inplace:
        assert id(new_idata.posterior) != id(idata.posterior)
    else:
        assert id(new_idata.posterior) == id(idata.posterior)
예제 #6
0
def concatenate_inferences(
    inf_list: List[az.InferenceData],
    coords: dict,
    concatenation_name: str = "feature"
) -> az.InferenceData:
    """Concatenates multiple single feature fits into one object.

    :param inf_list: List of InferenceData objects for each feature
    :type inf_list: List[az.InferenceData]

    :param coords: Coordinates containing concatenation name labels
    :type coords: dict

    :param concatenation_name: Name of feature dimension used when
        concatenating, defaults to "feature"
    :type concatenation_name: str

    :returns: Combined InferenceData object
    :rtype: az.InferenceData
    """
    group_list = []
    group_list.append([x.posterior for x in inf_list])
    group_list.append([x.sample_stats for x in inf_list])
    if "log_likelihood" in inf_list[0].groups():
        group_list.append([x.log_likelihood for x in inf_list])
    if "posterior_predictive" in inf_list[0].groups():
        group_list.append([x.posterior_predictive for x in inf_list])

    po_ds = xr.concat(group_list[0], concatenation_name)
    ss_ds = xr.concat(group_list[1], concatenation_name)
    group_dict = {"posterior": po_ds, "sample_stats": ss_ds}

    if "log_likelihood" in inf_list[0].groups():
        ll_ds = xr.concat(group_list[2], concatenation_name)
        group_dict["log_likelihood"] = ll_ds
    if "posterior_predictive" in inf_list[0].groups():
        pp_ds = xr.concat(group_list[3], concatenation_name)
        group_dict["posterior_predictive"] = pp_ds

    all_group_inferences = []
    for group in group_dict:
        # Set concatenation dim coords
        group_ds = group_dict[group].assign_coords(
            {concatenation_name: coords[concatenation_name]}
        )

        group_inf = az.InferenceData(**{group: group_ds})  # hacky
        all_group_inferences.append(group_inf)

    return az.concat(*all_group_inferences)
예제 #7
0
def test_concat_bad():
    with pytest.raises(TypeError):
        concat("hello", "hello")
    idata = from_dict(posterior={
        "A": np.random.randn(2, 10, 2),
        "B": np.random.randn(2, 10, 5, 2)
    })
    with pytest.raises(TypeError):
        concat(idata, np.array([1, 2, 3, 4, 5]))
    with pytest.raises(NotImplementedError):
        concat(idata, idata)
예제 #8
0
파일: diagnostics.py 프로젝트: salilab/hmc
def get_inference_data(*hmcs, **kwargs):
    """Build an Arviz `InferenceData` instance from 1 or more chains."""
    varnames = kwargs.get("varnames", None)
    if varnames is None:
        varnames = hmcs[0].opt_vars.get_names()

    datasets = []
    for hmc in hmcs:
        posterior = dict(
            zip(varnames, map(np.array, zip(*hmc.sample_saver.get_values())))
        )
        dataset = az.from_dict(
            posterior=posterior, sample_stats=hmc.stats.get_samples()
        )
        datasets.append(dataset)

    dataset = az.concat(*datasets, dim="chain")

    return dataset
예제 #9
0
def merge_inferences(inf_list,
                     log_likelihood,
                     posterior_predictive,
                     coords,
                     concatenation_name='features'):
    group_list = []
    group_list.append(dask.persist(*[x.posterior for x in inf_list]))
    group_list.append(dask.persist(*[x.sample_stats for x in inf_list]))
    if log_likelihood is not None:
        group_list.append(dask.persist(*[x.log_likelihood for x in inf_list]))
    if posterior_predictive is not None:
        group_list.append(
            dask.persist(*[x.posterior_predictive for x in inf_list]))

    group_list = dask.compute(*group_list)
    po_ds = xr.concat(group_list[0], concatenation_name)
    ss_ds = xr.concat(group_list[1], concatenation_name)
    group_dict = {"posterior": po_ds, "sample_stats": ss_ds}

    if log_likelihood is not None:
        ll_ds = xr.concat(group_list[2], concatenation_name)
        group_dict["log_likelihood"] = ll_ds
    if posterior_predictive is not None:
        pp_ds = xr.concat(group_list[3], concatenation_name)
        group_dict["posterior_predictive"] = pp_ds

    all_group_inferences = []
    for group in group_dict:
        # Set concatenation dim coords
        group_ds = group_dict[group].assign_coords(
            {concatenation_name: coords[concatenation_name]})

        group_inf = az.InferenceData(**{group: group_ds})  # hacky
        all_group_inferences.append(group_inf)

    return az.concat(*all_group_inferences)
results = []
n_chains = 50

model_palms = mod.CompositionalAnalysis(data[data.obs["site"].isin(
    ["left palm", "right palm"])],
                                        "site",
                                        baseline_index=None)

for n in range(n_chains):
    result_temp = model_palms.sample_hmc(num_results=int(20000), n_burnin=5000)

    results.append(result_temp)

#%%
res_all = az.concat(results, dim="chain")

print(res_all.posterior)

#%%
az.to_netcdf(res_all, write_path + "/multi_chain_50_len20000_all")

#%%

acc_probs = pd.DataFrame(
    pd.concat([r.effect_df.loc[:, "Inclusion probability"] for r in results]))

acc_probs["chain_no"] = np.concatenate(
    [np.repeat(i + 1, 21) for i in range(n_chains)])

acc_probs.index = acc_probs.index.droplevel(0)
예제 #11
0
    def to_inference_object(
        self,
        combine_individual_fits: bool = True,
    ) -> az.InferenceData:
        """Convert fitted Stan model into ``arviz`` InferenceData object.

        :param combine_individual_fits: Whether to combine the results of
            parallelized feature fits, defaults to True
        :type combine_individual_fits: bool

        :returns: ``arviz`` InferenceData object with selected values
        :rtype: az.InferenceData
        """
        if self.fit is None:
            raise ValueError("Model has not been fit!")

        # if already Inference, just return
        if isinstance(self.fit, az.InferenceData):
            return self.fit
        # if sequence of Inferences, concatenate if specified
        if isinstance(self.fit, list) or isinstance(self.fit, tuple):
            if isinstance(self.fit[0], az.InferenceData):
                if combine_individual_fits:
                    cat_name = self.specifications["concatenation_name"]
                    return concatenate_inferences(
                        self.fit,
                        coords=self.specifications["coords"],
                        concatenation_name=cat_name
                    )
                else:
                    return self.fit

        args = {
            k: self.specifications.get(k)
            for k in ["params", "coords", "dims", "posterior_predictive",
                      "log_likelihood"]
        }
        if isinstance(self.fit, CmdStanMCMC):
            fit_to_inference = single_fit_to_inference
            args["alr_params"] = self.specifications["alr_params"]
        elif isinstance(self.fit, Sequence):
            fit_to_inference = multiple_fits_to_inference
            if combine_individual_fits:
                args["concatenation_name"] = self.specifications.get(
                    "concatenation_name", "feature"
                )
                args["concatenate"] = True
            else:
                args["concatenate"] = False
            # TODO: Check that dims and concatenation_match

            if self.specifications.get("alr_params") is not None:
                warnings.warn("ALR to CLR not performed on parallel models.",
                              UserWarning)
        else:
            raise ValueError("Unrecognized fit type!")

        inference = fit_to_inference(self.fit, **args)
        if self.specifications["include_observed_data"]:
            # Can't include observed data in individual fits
            include_obs_fail = (
                not combine_individual_fits
                and self.parallelize_across == "features"
            )
            if include_obs_fail:
                warnings.warn(
                    "Cannot include observed data in un-concatenated"
                    "fits!"
                )
            else:
                obs = az.from_dict(
                    observed_data={"observed": self.dat["y"]},
                    coords={
                        "tbl_sample": self.sample_names,
                        "feature": self.feature_names
                    },
                    dims={"observed": ["tbl_sample", "feature"]}
                )
                inference = az.concat(inference, obs)
        return inference
예제 #12
0
def predict(
    mi: MaudInput,
    output_dir: str,
    idata_train: az.InferenceData,
) -> az.InferenceData:
    """Call CmdStanModel.sample for out of sample predictions.

    :param mi: a MaudInput object
    :param output_dir: directory where output will be saved
    :param idata_train: InferenceData object with posterior draws
    """
    model = cmdstanpy.CmdStanModel(
        stan_file=os.path.join(HERE, STAN_PROGRAM_RELATIVE_PATH_PREDICT),
        cpp_options=mi.config.cpp_options,
        stanc_options=mi.config.stanc_options,
    )
    set_up_output_dir(output_dir, mi)
    kinetic_parameters = [
        "keq",
        "km",
        "kcat",
        "dissociation_constant",
        "transfer_constant",
        "kcat_phos",
        "ki",
    ]
    posterior = idata_train.get("posterior")
    sample_stats = idata_train.get("sample_stats")
    assert posterior is not None
    assert sample_stats is not None
    chains = sample_stats["chain"]
    draws = sample_stats["draw"]
    dims = {
        "conc": ["experiment", "mic"],
        "conc_enzyme": ["experiment", "enzyme"],
        "flux": ["experiment", "reaction"],
    }
    for chain in chains:
        for draw in draws:
            inits = {
                par: (
                    posterior[par]
                    .sel(chain=chain, draw=draw)
                    .to_series()
                    .values
                )
                for par in kinetic_parameters
                if par in posterior.keys()
            }
            sample_args: dict = {
                "data": os.path.join(output_dir, "input_data_test.json"),
                "inits": inits,
                "output_dir": output_dir,
                "iter_warmup": 0,
                "iter_sampling": 1,
                "fixed_param": True,
                "show_progress": False,
            }
            if mi.config.cmdstanpy_config_predict is not None:
                sample_args = {
                    **sample_args,
                    **mi.config.cmdstanpy_config_predict,
                }
            mcmc_draw = model.sample(**sample_args)
            idata_draw = az.from_cmdstan(
                mcmc_draw.runset.csv_files,
                coords={
                    "experiment": [
                        e.id for e in mi.measurements.experiments if e.is_test
                    ],
                    "mic": [m.id for m in mi.kinetic_model.mics],
                    "enzyme": [e.id for e in mi.kinetic_model.enzymes],
                    "reaction": [r.id for r in mi.kinetic_model.reactions],
                },
                dims=dims,
            ).assign_coords(
                coords={"chain": [chain], "draw": [draw]},
                groups="posterior_groups",
            )
            if draw == 0:
                idata_chain = idata_draw.copy()
            else:
                idata_chain = az.concat(
                    [idata_chain, idata_draw], dim="draw", reset_dim=False
                )
        if chain == 0:
            out = idata_chain.copy()
        else:
            out = az.concat([out, idata_chain], dim="chain", reset_dim=False)
    return out
def bayes_dummy_model_ref_std(uniform,
                              max_allowed_specificMB=None,
                              gd=None,
                              sampler='nuts', ys=np.arange(1979, 2019, 1),
                              gd_mb=None,
                              h=None, w=None, use_two_msm=True, nosigma=False,
                              nosigmastd=False, first_ppc=True,
                              pd_calib_opt=None,
                              pd_geodetic_comp=None, random_seed=42,
                              y0=None, y1=None):
    # test
    slope_pfs = []
    slope_melt_fs = []
    for y in ys:
        slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=y)
        slope_pfs.append(slope_pf.mean())
        slope_melt_fs.append(slope_melt_f.mean())
    with pm.Model() as model_T:
        if uniform:
            melt_f = pm.Uniform("melt_f", lower=10, upper=1000)
            pf = pm.Uniform('pf', lower=0.1, upper=10)
        else:
            pf = pm.TruncatedNormal('pf', mu=pd_calib_opt['pf_opt'][
                pd_calib_opt.reg == 11.0].dropna().mean(),
                                    sigma=pd_calib_opt['pf_opt'][
                                        pd_calib_opt.reg == 11.0].dropna().std(),
                                    lower=0.5, upper=10)
            melt_f = pm.TruncatedNormal('melt_f',
                                        mu=pd_calib_opt['melt_f_opt_pf'][
                                            pd_calib_opt.reg == 11.0].dropna().mean(),
                                        sigma=pd_calib_opt['melt_f_opt_pf'][
                                            pd_calib_opt.reg == 11.0].dropna().std(),
                                        lower=10, upper=1000)

        ##
        if use_two_msm:
            # should not use the stuff before 2000
            aet_slope_melt_fs_two = pm.Data('aet_slope_melt_fs_two',
                                            [np.array(slope_melt_fs)[
                                                 (ys >= 2000) & (
                                                             ys <= 2009)].mean(),
                                             np.array(slope_melt_fs)[
                                                 ys >= 2010].mean()])
            aet_slope_pfs_two = pm.Data('aet_slope_pfs_two',
                                        ([np.array(slope_pfs)[(ys >= 2000) & (
                                                    ys <= 2009)].mean(),
                                          np.array(slope_pfs)[
                                              ys >= 2010].mean()]))
        else:
            aet_slope_melt_fs_two = pm.Data('aet_slope_melt_fs_two',
                                            [np.array(slope_melt_fs)[
                                                 ys >= 2000].mean()])
            aet_slope_pfs_two = pm.Data('aet_slope_pfs_two',
                                        [np.array(slope_pfs)[
                                             ys >= 2000].mean()])
        aet_mbs_two = aet_slope_pfs_two * pf + aet_slope_melt_fs_two * melt_f
        # make a deterministic out of it to save it also in the traces
        mb_mod = pm.Deterministic('mb_mod', aet_mbs_two)

        # std
        # need to put slope_melt_fs and slope_pfs into []???
        aet_slope_melt_fs = pm.Data('aet_slope_melt_fs',
                                    slope_melt_fs)  # pd.DataFrame(slope_melt_fs, columns=['slope_melt_fs'])['slope_melt_fs'])
        aet_slope_pfs = pm.Data('aet_slope_pfs',
                                slope_pfs)  # pd.DataFrame(slope_pfs, columns=['slope_pfs'])['slope_pfs'])
        aet_mbs = aet_slope_pfs * pf + aet_slope_melt_fs * melt_f
        mod_std = pm.Deterministic('mod_std', aet_mbs.std())

        if use_two_msm:
            sigma = pm.Data('sigma', pd_geodetic_comp.loc[gd.rgi_id][
                ['err_dmdtda_2000_2010', 'err_dmdtda_2010_2020']].values * 1000)
            observed = pm.Data('observed', pd_geodetic_comp.loc[gd.rgi_id][
                ['dmdtda_2000_2010', 'dmdtda_2010_2020']].values * 1000)
            if nosigma == False:
                geodetic_massbal = pm.Normal('geodetic_massbal',
                                             mu=mb_mod,
                                             sigma=sigma,  # standard devia
                                             observed=observed)  # likelihood
            else:
                geodetic_massbal = pm.Normal('geodetic_massbal',
                                             mu=mb_mod,
                                             observed=observed)  # likelihood
            # diff_geodetic_massbal = pm.Deterministic("diff_geodetic_massbal",
            #                                      geodetic_massbal - observed)
        else:
            # sigma and observed need to have dim 1 (not zero), --> [value]
            sigma = pm.Data('sigma', [
                pd_geodetic_comp.loc[gd.rgi_id]['err_dmdtda'] * 1000])
            observed = pm.Data('observed', [
                pd_geodetic_comp.loc[gd.rgi_id]['dmdtda'] * 1000])
            if nosigma == False:
                # likelihood
                geodetic_massbal = pm.TruncatedNormal('geodetic_massbal',
                                                      mu=mb_mod,
                                                      sigma=sigma,
                                                      # standard devia
                                                      observed=observed,
                                                      lower=max_allowed_specificMB)
            else:
                geodetic_massbal = pm.TruncatedNormal('geodetic_massbal',
                                                      mu=mb_mod,
                                                      observed=observed,
                                                      lower=max_allowed_specificMB)  # likelihood

            # constrained already by using TruncatedNormal geodetic massbalance ...
            # pot_max_melt = pm.Potential('pot_max_melt', aet.switch(
            #    geodetic_massbal < max_allowed_specificMB, -np.inf, 0))
        diff_geodetic_massbal = pm.Deterministic("diff_geodetic_massbal",
                                                 geodetic_massbal - observed)

        # pot_max_melt = pm.Potential('pot_max_melt', aet.switch(geodetic_massbal < max_allowed_specificMB, -np.inf, 0) )

        # std
        # sigma = pm.Data('sigma', 100) # how large are the uncertainties of the direct glaciological method !!!
        ref_df = gd.get_ref_mb_data(y0=y0, y1=y1)
        sigma_std = aet.constant((ref_df[
                                      'ANNUAL_BALANCE'].values / 10).std())  # how large are the uncertainties of the direct glaciological method !!!
        observed_std = aet.constant(ref_df['ANNUAL_BALANCE'].values.std())

        # std should always be above zero
        if nosigmastd:
            glaciological_std = pm.TruncatedNormal('glaciological_std',
                                                   mu=mod_std,
                                                   # sigma=sigma_std,
                                                   observed=observed_std,
                                                   lower=0.001)  # likelihood
        else:
            glaciological_std = pm.TruncatedNormal('glaciological_std',
                                                   mu=mod_std, sigma=sigma_std,
                                                   observed=observed_std,
                                                   lower=0.001)  # likelihood

        quot_std = pm.Deterministic("quot_std",
                                    glaciological_std / observed_std)
        # pot_std = pm.Potential('pot_std', aet.switch(mod_std <= 0, -np.inf, 0) )
        prior = pm.sample_prior_predictive(random_seed=random_seed,
                                           samples=1000)  # , keep_size = True)

    with model_T:
        # sampling
        if sampler == 'nuts':
            trace = pm.sample(25000, chains=3, tune=25000, target_accept=0.99,
                              compute_convergence_checks=True,
                              return_inferencedata=True)
        #                 #start={'pf':2.5, 'melt_f': 200})
        elif sampler == 'jax':
            import pymc3.sampling_jax
            trace = pm.sampling_jax.sample_numpyro_nuts(20000, chains=4,
                                                        tune=20000,
                                                        target_accept=0.98)  # , compute_convergence_checks= True)

        burned_trace = trace.sel(draw=slice(5000, None))
        burned_trace.posterior['draw'] = np.arange(0, len(burned_trace.posterior.draw))
        burned_trace.log_likelihood['draw'] = np.arange(0, len(burned_trace.posterior.draw))
        burned_trace.sample_stats['draw']  = np.arange(0, len(burned_trace.posterior.draw))
        
        if first_ppc:
            print(az.summary(burned_trace.posterior))
            ppc = pm.sample_posterior_predictive(burned_trace,
                                                 random_seed=random_seed,
                                                 var_names=['geodetic_massbal',
                                                            'glaciological_std',
                                                            'pf', 'melt_f',
                                                            'mb_mod',
                                                            'diff_geodetic_massbal',
                                                            'quot_std'],
                                                 keep_size=True)
            az.concat(burned_trace, az.from_dict(posterior_predictive=ppc,
                                                 prior=prior), inplace=True)
    with model_T:
        slope_pf_new = []
        slope_melt_f_new = []
        for y in ys:
            slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=y)
            slope_pf_new.append(slope_pf.mean())
            slope_melt_f_new.append(slope_melt_f.mean())
        pm.set_data(new_data={'aet_slope_melt_fs_two': slope_melt_f_new,
                              'aet_slope_pfs_two': slope_pf_new,
                              'observed': np.empty(len(ys)),
                              'sigma': np.empty(len(ys))})
        ppc_new = pm.sample_posterior_predictive(burned_trace,
                                                 random_seed=random_seed,
                                                 var_names=['geodetic_massbal',
                                                            'pf', 'melt_f',
                                                            'mb_mod',
                                                            'diff_geodetic_massbal'],
                                                 keep_size=True)
    predict_data = az.from_dict(posterior_predictive=ppc_new)
    return burned_trace, model_T, predict_data
예제 #14
0
    def __init__(self,
                 *inference_data,
                 universe_save=None,
                 npix=2**5,
                 fast_open=False):
        """FIXME! briefly describe function

        :param inference_data:
        :param universe_save:
        :param npix:
        :returns: we love
        :rtype:

        """

        if len(inference_data) == 1:

            self._posterior = inference_data[0]

        else:

            self._posterior = inference_data[0]

            for idata in inference_data[1:]:

                self._posterior = av.concat(self._posterior,
                                            idata,
                                            dim="chain")

        self._npix = npix

        try:
            ang_sep = self._posterior.posterior.ang_sep.stack(
                sample=("chain", "draw")).values

            self._do_contour = True

        except:

            self._do_contour = False

        self._beta1 = self._posterior.posterior.beta1.stack(
            sample=("chain", "draw")).values

        # set the number of samples.. flattened over chain

        self._n_samples = self._beta1.shape[-1]

        self._beta2 = self._posterior.posterior.beta2.stack(
            sample=("chain", "draw")).values

        self._omega1 = self._posterior.posterior.omega.stack(
            sample=("chain", "draw")).values[0]

        self._omega2 = self._posterior.posterior.omega.stack(
            sample=("chain", "draw")).values[1]

        try:

            self._amplitude = self._posterior.posterior.amplitude.stack(
                sample=("chain", "draw")).values

        except:

            self._amplitude = np.ones(self._n_samples)

        self._background = self._posterior.posterior.bkg.stack(
            sample=("chain", "draw")).values

        self._scale = self._posterior.posterior.scale.stack(
            sample=("chain", "draw")).values

        if self._scale.shape[0] == 2:

            self._multi_scale = True

        else:

            self._multi_scale = False

        try:

            self._dt = self._posterior.posterior.dt.stack(
                sample=("chain", "draw")).values

            self._grb_theta = self._posterior.posterior.grb_theta.stack(
                sample=("chain", "draw")).values
            self._grb_phi = self._posterior.posterior.grb_phi.stack(
                sample=("chain", "draw")).values

            self._is_dt_fit = True

            self._n_dets = self._background.shape[0]

        except:

            self._is_dt_fit = False

            self._dt = None
            self._grb_theta = None
            self._grb_phi = None

            self._n_dets = 1

        self._use_bw = False

        try:

            self._bw1 = self._posterior.posterior.bw1.stack(
                sample=("chain", "draw")).values

            self._bw2 = self._posterior.posterior.bw2.stack(
                sample=("chain", "draw")).values

            self._multi_bw = True

        except:

            try:

                self._bw = self._posterior.posterior.bw.stack(
                    sample=("chain", "draw")).values

                if self._bw.shape[0] == 2:

                    self._multi_bw = True

                else:

                    self._multi_bw = False

            except:

                self._bw = self._posterior.posterior.bw_out.stack(
                    sample=("chain", "draw")).values

                self._use_bw = False
                self._multi_bw = True

        self.grb_color = "k"
        self._grb_style = "lrtb"

        self._has_universe = False

        if universe_save is not None:

            self._universe = Universe.from_save_file(universe_save)

            self._has_universe = True

        if self._is_dt_fit and self._n_dets > 2 and (not fast_open):

            self._build_moc_map()

        elif self._do_contour:

            self._build_moc_map()
def bayes_dummy_model_ref(uniform,
                          max_allowed_specificMB=None, gd=None,
                          sampler='nuts',
                          ys=None, gd_mb=None, h=None, w=None, use_two_msm=True,
                          nosigma=False, pd_calib_opt=None,
                          random_seed=4, y0=None, y1=None):
    # if use_two_msm:
    slope_pfs = []
    slope_melt_fs = []
    for y in ys:
        slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=y)
        slope_pfs.append(slope_pf.mean())
        slope_melt_fs.append(slope_melt_f.mean())
    with pm.Model() as model_T:
        if uniform:
            melt_f = pm.Uniform("melt_f", lower=10, upper=1000)
            pf = pm.Uniform('pf', lower=0.1, upper=10)
        else:
            pf = pm.TruncatedNormal('pf', mu=pd_calib_opt['pf_opt'][
                pd_calib_opt.reg == 11.0].dropna().mean(),
                                    sigma=pd_calib_opt['pf_opt'][
                                        pd_calib_opt.reg == 11.0].dropna().std(),
                                    lower=0.5, upper=10)
            melt_f = pm.TruncatedNormal('melt_f',
                                        mu=pd_calib_opt['melt_f_opt_pf'][
                                            pd_calib_opt.reg == 11.0].dropna().mean(),
                                        sigma=pd_calib_opt['melt_f_opt_pf'][
                                            pd_calib_opt.reg == 11.0].dropna().std(),
                                        lower=1, upper=1000)
            # need to put slope_melt_fs and slope_pfs into [], other wise it does not work for jay
        aet_slope_melt_fs = pm.Data('aet_slope_melt_fs',
                                    slope_melt_fs)  # pd.DataFrame(slope_melt_fs, columns=['slope_melt_fs'])['slope_melt_fs'])
        aet_slope_pfs = pm.Data('aet_slope_pfs',
                                slope_pfs)  # pd.DataFrame(slope_pfs, columns=['slope_pfs'])['slope_pfs'])
        aet_mbs = aet_slope_pfs * pf + aet_slope_melt_fs * melt_f
        mb_mod = pm.Deterministic('mb_mod', aet_mbs)
    with model_T:
        ref_df = gd.get_ref_mb_data(y0=y0, y1=y1)
        # sigma = pm.Data('sigma', 100) # how large are the uncertainties of the direct glaciological method !!!
        sigma = pm.Data('sigma',
                        100)  # np.abs(ref_df['ANNUAL_BALANCE'].values/10)) # how large are the uncertainties of the direct glaciological method !!!
        observed = pm.Data('observed', ref_df['ANNUAL_BALANCE'].values)
        if nosigma:
            geodetic_massbal = pm.TruncatedNormal('geodetic_massbal',
                                                  mu=mb_mod,  # sigma=sigma,
                                                  observed=observed,
                                                  lower=max_allowed_specificMB)
        else:
            geodetic_massbal = pm.TruncatedNormal('geodetic_massbal',
                                                  mu=mb_mod, sigma=sigma,
                                                  observed=observed,
                                                  lower=max_allowed_specificMB)  # likelihood

        diff_geodetic_massbal = pm.Deterministic("diff_geodetic_massbal",
                                                 geodetic_massbal - observed)
        # pot_max_melt = pm.Potential('pot_max_melt', aet.switch(geodetic_massbal < max_allowed_specificMB, -np.inf, 0) )
        prior = pm.sample_prior_predictive(random_seed=random_seed,
                                           samples=1000)  # , keep_size = True)
        if sampler == 'nuts':
            trace = pm.sample(10000, chains=4, tune=10000, target_accept=0.98,
                              compute_convergence_checks=True,
                              return_inferencedata=True)
        #                 #start={'pf':2.5, 'melt_f': 200})
        elif sampler == 'jax':
            import pymc3.sampling_jax
            trace = pm.sampling_jax.sample_numpyro_nuts(20000, chains=4,
                                                        tune=20000,
                                                        target_accept=0.98)  # , compute_convergence_checks= True)

    with model_T:
        burned_trace = trace.sel(draw=slice(5000, None))
        az.summary(burned_trace.posterior)
        ppc = pm.sample_posterior_predictive(burned_trace,
                                             random_seed=random_seed,
                                             var_names=['geodetic_massbal',
                                                        'pf', 'melt_f',
                                                        'mb_mod',
                                                        'diff_geodetic_massbal'],
                                             keep_size=True)
        az.concat(burned_trace,
                  az.from_dict(posterior_predictive=ppc, prior=prior),
                  inplace=True)

    # with model_T:
    #     slope_pf_new = []
    #     slope_melt_f_new = []
    #     for y in ys:
    #             slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h = h, w =w, ys = y)
    #             slope_pf_new.append(slope_pf.mean())
    #             slope_melt_f_new.append(slope_melt_f.mean())
    #     if nosigma:
    #         pm.set_data(new_data={'aet_slope_melt_fs': slope_melt_f_new, 'aet_slope_pfs':slope_pf_new,
    #                 'observed':np.empty(len(ys))}) # , 'sigma':np.empty(len(ys))})
    ##    else:
    #        pm.set_data(new_data={'aet_slope_melt_fs': slope_melt_f_new, 'aet_slope_pfs':slope_pf_new,
    #                'observed':np.empty(len(ys)), 'sigma':np.empty(len(ys))})
    ##   ppc_new = pm.sample_posterior_predictive(burned_trace, random_seed=random_seed,
    #                               var_names=['geodetic_massbal', 'pf', 'melt_f', 'mb_mod','diff_geodetic_massbal'],
    #                               keep_size = True)
    # predict_data = az.from_dict(posterior_predictive=ppc_new)
    return burned_trace, model_T  # , predict_data
    # idata_kwargs={"density_dist_obs": False}
예제 #16
0
def predictions_to_inference_data(
    predictions,
    posterior_trace: Optional["MultiTrace"] = None,
    model: Optional["Model"] = None,
    coords: Optional[CoordSpec] = None,
    dims: Optional[DimSpec] = None,
    idata_orig: Optional[InferenceData] = None,
    inplace: bool = False,
) -> InferenceData:
    """Translate out-of-sample predictions into ``InferenceData``.

    Parameters
    ----------
    predictions: Dict[str, np.ndarray]
        The predictions are the return value of :func:`~pymc.sample_posterior_predictive`,
        a dictionary of strings (variable names) to numpy ndarrays (draws).
        Requires the arrays to follow the convention ``chain, draw, *shape``.
    posterior_trace: MultiTrace
        This should be a trace that has been thinned appropriately for
        ``pymc.sample_posterior_predictive``. Specifically, any variable whose shape is
        a deterministic function of the shape of any predictor (explanatory, independent, etc.)
        variables must be *removed* from this trace.
    model: Model
        The pymc model. It can be ommited if within a model context.
    coords: Dict[str, array-like[Any]]
        Coordinates for the variables.  Map from coordinate names to coordinate values.
    dims: Dict[str, array-like[str]]
        Map from variable name to ordered set of coordinate names.
    idata_orig: InferenceData, optional
        If supplied, then modify this inference data in place, adding ``predictions`` and
        (if available) ``predictions_constant_data`` groups. If this is not supplied, make a
        fresh InferenceData
    inplace: boolean, optional
        If idata_orig is supplied and inplace is True, merge the predictions into idata_orig,
        rather than returning a fresh InferenceData object.

    Returns
    -------
    InferenceData:
        May be modified ``idata_orig``.
    """
    if inplace and not idata_orig:
        raise ValueError("Do not pass True for inplace unless passing"
                         "an existing InferenceData as idata_orig")
    converter = InferenceDataConverter(
        trace=posterior_trace,
        predictions=predictions,
        model=model,
        coords=coords,
        dims=dims,
        log_likelihood=False,
    )
    if hasattr(idata_orig, "posterior"):
        converter.nchains = idata_orig.posterior.dims["chain"]
        converter.ndraws = idata_orig.posterior.dims["draw"]
    else:
        aelem = next(iter(predictions.values()))
        converter.nchains, converter.ndraws = aelem.shape[:2]
    new_idata = converter.to_inference_data()
    if idata_orig is None:
        return new_idata
    elif inplace:
        concat([idata_orig, new_idata], dim=None, inplace=True)
        return idata_orig
    else:
        # if we are not returning in place, then merge the old groups into the new inference
        # data and return that.
        concat([new_idata, idata_orig], dim=None, copy=True, inplace=True)
        return new_idata
예제 #17
0
def test_concat_bad():
    with pytest.raises(TypeError):
        concat("hello", "hello")
    idata = from_dict(posterior={
        "A": np.random.randn(2, 10, 2),
        "B": np.random.randn(2, 10, 5, 2)
    })
    idata2 = from_dict(posterior={"A": np.random.randn(2, 10, 2)})
    idata3 = from_dict(prior={"A": np.random.randn(2, 10, 2)})
    with pytest.raises(TypeError):
        concat(idata, np.array([1, 2, 3, 4, 5]))
    with pytest.raises(TypeError):
        concat(idata, idata, dim=None)
    with pytest.raises(TypeError):
        concat(idata, idata2, dim="chain")
    with pytest.raises(TypeError):
        concat(idata2, idata, dim="chain")
    with pytest.raises(TypeError):
        concat(idata, idata3, dim="chain")
    with pytest.raises(TypeError):
        concat(idata3, idata, dim="chain")
def bayes_dummy_model_better_OLD(uniform,
                                 max_allowed_specificMB=None,
                                 gd=None, sampler='nuts',
                                 ys=np.arange(2000, 2019, 1),
                                 gd_mb=None, h=None, w=None, use_two_msm=True,
                                 nosigma=False, model=None, pd_calib_opt=None,
                                 first_ppc=True, pd_geodetic_comp=None,
                                 random_seed=42, y0=None, y1=None):
    if use_two_msm:
        slope_pfs = []
        slope_melt_fs = []
        for y in ys:
            slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=y)
            slope_pfs.append(slope_pf.mean())
            slope_melt_fs.append(slope_melt_f.mean())
    else:
        slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=ys)
    if model == None:
        model_T = pm.Model()
    else:
        model_T = model
    with model_T:
        if uniform == True:
            melt_f = pm.Uniform("melt_f", lower=10, upper=1000)
            pf = pm.Uniform('pf', lower=0.1, upper=10)
        else:
            if model == None:
                pf = pm.TruncatedNormal('pf', mu=pd_calib_opt['pf_opt'][
                    pd_calib_opt.reg == 11.0].dropna().mean(),
                                        sigma=pd_calib_opt['pf_opt'][
                                            pd_calib_opt.reg == 11.0].dropna().std(),
                                        lower=0.5, upper=10)
                melt_f = pm.TruncatedNormal('melt_f',
                                            mu=pd_calib_opt['melt_f_opt_pf'][
                                                pd_calib_opt.reg == 11.0].dropna().mean(),
                                            sigma=pd_calib_opt['melt_f_opt_pf'][
                                                pd_calib_opt.reg == 11.0].dropna().std(),
                                            lower=1, upper=1000)
            else:
                pass  # melt_f = melt_f

            # slopes have to be defined as theano constants

        # aet_slope_pf_0 = aet.constant(np.array(slope_pfs)[ys<=2010].mean())
        # aet_slope_pf_1 = aet.constant(np.array(slope_pfs)[ys>2010].mean())
        # aet_slope_melt_f_0 = aet.constant(np.array(slope_melt_fs)[ys<=2010].mean())
        # aet_slope_melt_f_1 = aet.constant(np.array(slope_melt_fs)[ys>2010].mean())
        if use_two_msm:
            aet_slope_melt_fs = pm.Data('aet_slope_melt_fs', [
                np.array(slope_melt_fs)[ys <= 2010].mean(),
                np.array(slope_melt_fs)[ys > 2010].mean()])
            aet_slope_pfs = pm.Data('aet_slope_pfs', (
                [np.array(slope_pfs)[ys <= 2010].mean(),
                 np.array(slope_pfs)[ys > 2010].mean()]))
        else:
            aet_slope_melt_fs = pm.Data('aet_slope_melt_fs',
                                        [np.array(slope_melt_f).mean()])
            aet_slope_pfs = pm.Data('aet_slope_pfs',
                                    [np.array(slope_pf).mean()])
        # aet_mbs = [aet_slope_pf_0, aet_slope_pf_1] *pf + aet_slope_melt_fs*melt_f

        # aet_mb_0 = aet_slope_pf_0 *pf + aet_slope_melt_f_0*melt_f
        # aet_mb_1 = aet_slope_pf_1 *pf + aet_slope_melt_f_1*melt_f
        if model == None:
            aet_mbs = aet_slope_pfs * pf + aet_slope_melt_fs * melt_f
        else:
            aet_mbs = aet_slope_pfs * model.pf + aet_slope_melt_fs * model.melt_f
        # aet_mbs = aet.as_tensor_variable([aet_mb_0, aet_mb_1])
        # aet_slope_melt_fs = aet.vector(np.array([np.array(slope_melt_fs)[ys<=2010].mean(), np.array(slope_melt_fs)[ys>2010].mean()]))

        # this is not the new simple theano compatible
        # mass balance function that depends on pf and melt_f
        # aet_mbs = [aet_slope_pf_0, aet_slope_pf_1] *pf + aet_slope_melt_fs*melt_f

        # make a deterministic out of it to save it also in the traces
        mb_mod = pm.Deterministic('mb_mod', aet_mbs)
    with model_T:
        if use_two_msm:
            sigma = pm.Data('sigma', pd_geodetic_comp.loc[gd.rgi_id][
                ['err_dmdtda_2000_2010', 'err_dmdtda_2010_2020']].values * 1000)
            observed = pm.Data('observed', pd_geodetic_comp.loc[gd.rgi_id][
                ['dmdtda_2000_2010', 'dmdtda_2010_2020']].values * 1000)
            if nosigma == False:

                geodetic_massbal = pm.Normal('geodetic_massbal',
                                             mu=mb_mod,
                                             sigma=sigma,  # standard devia
                                             observed=observed)  # likelihood
            else:
                geodetic_massbal = pm.Normal('geodetic_massbal',
                                             mu=mb_mod,
                                             observed=observed)  # likelihood

            diff_geodetic_massbal = pm.Deterministic("diff_geodetic_massbal",
                                                     geodetic_massbal - observed)
        else:
            # sigma and observed need to have dim 1 (not zero), --> [value]
            sigma = pm.Data('sigma', [
                pd_geodetic_comp.loc[gd.rgi_id]['err_dmdtda'] * 1000])
            observed = pm.Data('observed', [
                pd_geodetic_comp.loc[gd.rgi_id]['dmdtda'] * 1000])
            geodetic_massbal = pm.TruncatedNormal('geodetic_massbal',
                                                  mu=mb_mod,
                                                  sigma=sigma,  # standard devia
                                                  observed=observed,
                                                  lower=max_allowed_specificMB)  # likelihood
            diff_geodetic_massbal = pm.Deterministic("diff_geodetic_massbal",
                                                     geodetic_massbal - observed)
        # constrained already by using TruncatedNormal geodetic massbalance ...
        # pot_max_melt = pm.Potential('pot_max_melt', aet.switch(
        #    geodetic_massbal < max_allowed_specificMB, -np.inf, 0))

        # also compute this difference just to be sure ...
        prior = pm.sample_prior_predictive(random_seed=random_seed,
                                           samples=1000)  # , keep_size = True)
    with model_T:
        if sampler == 'nuts':
            trace = pm.sample(20000, chains=4, tune=20000, target_accept=0.98,
                              compute_convergence_checks=True,
                              return_inferencedata=True)
        #                 #start={'pf':2.5, 'melt_f': 200})
        elif sampler == 'jax':
            import pymc3.sampling_jax
            trace = pm.sampling_jax.sample_numpyro_nuts(20000, chains=4,
                                                        tune=20000,
                                                        target_accept=0.98)  # , compute_convergence_checks= True)

    with model_T:
        burned_trace = trace.sel(draw=slice(5000, None))

        # trace = pm.sample(10000, chains=4, tune=10000, target_accept = 0.98)
        # need high target_accept to have no divergences, effective sample number
        #  and # We have stored the paths of all our variables, or "traces", in the trace variable.,
        # these paths are the routes the unknown parameters (here just 'n') have taken thus far.
        # Inference using the first few thousand points is a bad idea, as they are unrelated to the
        # final distribution we are interested in.
        # Thus is it a good idea to discard those samples before using the samples for inference.
        # We call this period before converge the burn-in period.
        # burned_trace = trace[1000:]
        # if arviz dataset
        if first_ppc:
            # TODO: then sometimes a problem occurs that a warning is raised
            #  about more chains (1000) than draws (2) ... why ???
            ppc = pm.sample_posterior_predictive(burned_trace,
                                                 random_seed=random_seed,
                                                 var_names=['geodetic_massbal',
                                                            'pf', 'melt_f',
                                                            'mb_mod',
                                                            'diff_geodetic_massbal'],
                                                 keep_size=True)
            az.concat(burned_trace,
                      az.from_dict(posterior_predictive=ppc, prior=prior),
                      inplace=True)

    ys_ref = gd.get_ref_mb_data(y0=y0, y1=y1).index.values
    with model_T:
        slope_pf_new = []
        slope_melt_f_new = []
        for y in ys_ref:
            slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=y)
            slope_pf_new.append(slope_pf.mean())
            slope_melt_f_new.append(slope_melt_f.mean())
        pm.set_data(new_data={'aet_slope_melt_fs': slope_melt_f_new,
                              'aet_slope_pfs': slope_pf_new,
                              'observed': np.empty(len(ys_ref)),
                              'sigma': np.empty(len(ys_ref))})
        ppc_new = pm.sample_posterior_predictive(burned_trace,
                                                 random_seed=random_seed,
                                                 var_names=['geodetic_massbal',
                                                            'pf', 'melt_f',
                                                            'mb_mod',
                                                            'diff_geodetic_massbal'],
                                                 keep_size=True)
    predict_data = az.from_dict(posterior_predictive=ppc_new)
    return burned_trace, model_T, predict_data