def test_concat_group(copy, inplace, sequence): idata1 = from_dict( posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)} ) if copy and inplace: original_idata1_posterior_id = id(idata1.posterior) idata2 = from_dict(prior={"C": np.random.randn(2, 10, 2), "D": np.random.randn(2, 10, 5, 2)}) idata3 = from_dict(observed_data={"E": np.random.randn(100), "F": np.random.randn(2, 100)}) # basic case assert concat(idata1, idata2, copy=True, inplace=False) is not None if sequence: new_idata = concat((idata1, idata2, idata3), copy=copy, inplace=inplace) else: new_idata = concat(idata1, idata2, idata3, copy=copy, inplace=inplace) if inplace: assert new_idata is None new_idata = idata1 assert new_idata is not None test_dict = {"posterior": ["A", "B"], "prior": ["C", "D"], "observed_data": ["E", "F"]} fails = check_multiple_attrs(test_dict, new_idata) assert not fails if copy: if inplace: assert id(new_idata.posterior) == original_idata1_posterior_id else: assert id(new_idata.posterior) != id(idata1.posterior) assert id(new_idata.prior) != id(idata2.prior) assert id(new_idata.observed_data) != id(idata3.observed_data) else: assert id(new_idata.posterior) == id(idata1.posterior) assert id(new_idata.prior) == id(idata2.prior) assert id(new_idata.observed_data) == id(idata3.observed_data)
def test_inference_concat_keeps_all_fields(): """From failures observed in issue #907""" idata1 = from_dict(posterior={"A": [1, 2, 3, 4]}, sample_stats={"B": [2, 3, 4, 5]}) idata2 = from_dict(prior={"C": [1, 2, 3, 4]}, observed_data={"D": [2, 3, 4, 5]}) idata_c1 = concat(idata1, idata2) idata_c2 = concat(idata2, idata1) test_dict = {"posterior": ["A"], "sample_stats": ["B"], "prior": ["C"], "observed_data": ["D"]} fails_c1 = check_multiple_attrs(test_dict, idata_c1) assert not fails_c1 fails_c2 = check_multiple_attrs(test_dict, idata_c2) assert not fails_c2
def to_inference_object(self) -> az.InferenceData: """Convert fitted Stan model into ``arviz`` InferenceData object. :returns: ``arviz`` InferenceData object with selected values :rtype: az.InferenceData """ if self.fit is None: raise ValueError("Model has not been fit!") # if already Inference, just return if isinstance(self.fit, az.InferenceData): return self.fit if not self.specified: raise ValueError("Model has not been specified!") inference = single_feature_fit_to_inference( fit=self.fit, params=self.params, coords=self.coords, dims=self.dims, posterior_predictive=self.posterior_predictive, log_likelihood=self.log_likelihood, **self.specifications) if self.include_observed_data: obs = az.from_dict(observed_data={"observed": self.dat["y"]}, coords={"tbl_sample": self.sample_names}, dims={"observed": ["tbl_sample"]}) inference = az.concat(inference, obs) return inference
def test_concat_dim(dim, copy, inplace, sequence, reset_dim): idata1 = from_dict( posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)}, observed_data={"C": np.random.randn(100), "D": np.random.randn(2, 100)}, ) if inplace: original_idata1_id = id(idata1) idata2 = from_dict( posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)}, observed_data={"C": np.random.randn(100), "D": np.random.randn(2, 100)}, ) idata3 = from_dict( posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)}, observed_data={"C": np.random.randn(100), "D": np.random.randn(2, 100)}, ) # basic case assert ( concat(idata1, idata2, dim=dim, copy=copy, inplace=False, reset_dim=reset_dim) is not None ) if sequence: new_idata = concat( (idata1, idata2, idata3), copy=copy, dim=dim, inplace=inplace, reset_dim=reset_dim ) else: new_idata = concat( idata1, idata2, idata3, dim=dim, copy=copy, inplace=inplace, reset_dim=reset_dim ) if inplace: assert new_idata is None new_idata = idata1 assert new_idata is not None test_dict = {"posterior": ["A", "B"], "observed_data": ["C", "D"]} fails = check_multiple_attrs(test_dict, new_idata) assert not fails if inplace: assert id(new_idata) == original_idata1_id else: assert id(new_idata) != id(idata1) assert getattr(new_idata.posterior, dim).size == 6 if dim == "chain" else 30 if reset_dim: assert np.all( getattr(new_idata.posterior, dim).values == (np.arange(6) if dim == "chain" else np.arange(30)) )
def test_concat_edgecases(copy, inplace, sequence): idata = from_dict(posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)}) empty = concat() assert empty is not None if sequence: new_idata = concat([idata], copy=copy, inplace=inplace) else: new_idata = concat(idata, copy=copy, inplace=inplace) if inplace: assert new_idata is None new_idata = idata else: assert new_idata is not None test_dict = {"posterior": ["A", "B"]} fails = check_multiple_attrs(test_dict, new_idata) assert not fails if copy and not inplace: assert id(new_idata.posterior) != id(idata.posterior) else: assert id(new_idata.posterior) == id(idata.posterior)
def concatenate_inferences( inf_list: List[az.InferenceData], coords: dict, concatenation_name: str = "feature" ) -> az.InferenceData: """Concatenates multiple single feature fits into one object. :param inf_list: List of InferenceData objects for each feature :type inf_list: List[az.InferenceData] :param coords: Coordinates containing concatenation name labels :type coords: dict :param concatenation_name: Name of feature dimension used when concatenating, defaults to "feature" :type concatenation_name: str :returns: Combined InferenceData object :rtype: az.InferenceData """ group_list = [] group_list.append([x.posterior for x in inf_list]) group_list.append([x.sample_stats for x in inf_list]) if "log_likelihood" in inf_list[0].groups(): group_list.append([x.log_likelihood for x in inf_list]) if "posterior_predictive" in inf_list[0].groups(): group_list.append([x.posterior_predictive for x in inf_list]) po_ds = xr.concat(group_list[0], concatenation_name) ss_ds = xr.concat(group_list[1], concatenation_name) group_dict = {"posterior": po_ds, "sample_stats": ss_ds} if "log_likelihood" in inf_list[0].groups(): ll_ds = xr.concat(group_list[2], concatenation_name) group_dict["log_likelihood"] = ll_ds if "posterior_predictive" in inf_list[0].groups(): pp_ds = xr.concat(group_list[3], concatenation_name) group_dict["posterior_predictive"] = pp_ds all_group_inferences = [] for group in group_dict: # Set concatenation dim coords group_ds = group_dict[group].assign_coords( {concatenation_name: coords[concatenation_name]} ) group_inf = az.InferenceData(**{group: group_ds}) # hacky all_group_inferences.append(group_inf) return az.concat(*all_group_inferences)
def test_concat_bad(): with pytest.raises(TypeError): concat("hello", "hello") idata = from_dict(posterior={ "A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2) }) with pytest.raises(TypeError): concat(idata, np.array([1, 2, 3, 4, 5])) with pytest.raises(NotImplementedError): concat(idata, idata)
def get_inference_data(*hmcs, **kwargs): """Build an Arviz `InferenceData` instance from 1 or more chains.""" varnames = kwargs.get("varnames", None) if varnames is None: varnames = hmcs[0].opt_vars.get_names() datasets = [] for hmc in hmcs: posterior = dict( zip(varnames, map(np.array, zip(*hmc.sample_saver.get_values()))) ) dataset = az.from_dict( posterior=posterior, sample_stats=hmc.stats.get_samples() ) datasets.append(dataset) dataset = az.concat(*datasets, dim="chain") return dataset
def merge_inferences(inf_list, log_likelihood, posterior_predictive, coords, concatenation_name='features'): group_list = [] group_list.append(dask.persist(*[x.posterior for x in inf_list])) group_list.append(dask.persist(*[x.sample_stats for x in inf_list])) if log_likelihood is not None: group_list.append(dask.persist(*[x.log_likelihood for x in inf_list])) if posterior_predictive is not None: group_list.append( dask.persist(*[x.posterior_predictive for x in inf_list])) group_list = dask.compute(*group_list) po_ds = xr.concat(group_list[0], concatenation_name) ss_ds = xr.concat(group_list[1], concatenation_name) group_dict = {"posterior": po_ds, "sample_stats": ss_ds} if log_likelihood is not None: ll_ds = xr.concat(group_list[2], concatenation_name) group_dict["log_likelihood"] = ll_ds if posterior_predictive is not None: pp_ds = xr.concat(group_list[3], concatenation_name) group_dict["posterior_predictive"] = pp_ds all_group_inferences = [] for group in group_dict: # Set concatenation dim coords group_ds = group_dict[group].assign_coords( {concatenation_name: coords[concatenation_name]}) group_inf = az.InferenceData(**{group: group_ds}) # hacky all_group_inferences.append(group_inf) return az.concat(*all_group_inferences)
results = [] n_chains = 50 model_palms = mod.CompositionalAnalysis(data[data.obs["site"].isin( ["left palm", "right palm"])], "site", baseline_index=None) for n in range(n_chains): result_temp = model_palms.sample_hmc(num_results=int(20000), n_burnin=5000) results.append(result_temp) #%% res_all = az.concat(results, dim="chain") print(res_all.posterior) #%% az.to_netcdf(res_all, write_path + "/multi_chain_50_len20000_all") #%% acc_probs = pd.DataFrame( pd.concat([r.effect_df.loc[:, "Inclusion probability"] for r in results])) acc_probs["chain_no"] = np.concatenate( [np.repeat(i + 1, 21) for i in range(n_chains)]) acc_probs.index = acc_probs.index.droplevel(0)
def to_inference_object( self, combine_individual_fits: bool = True, ) -> az.InferenceData: """Convert fitted Stan model into ``arviz`` InferenceData object. :param combine_individual_fits: Whether to combine the results of parallelized feature fits, defaults to True :type combine_individual_fits: bool :returns: ``arviz`` InferenceData object with selected values :rtype: az.InferenceData """ if self.fit is None: raise ValueError("Model has not been fit!") # if already Inference, just return if isinstance(self.fit, az.InferenceData): return self.fit # if sequence of Inferences, concatenate if specified if isinstance(self.fit, list) or isinstance(self.fit, tuple): if isinstance(self.fit[0], az.InferenceData): if combine_individual_fits: cat_name = self.specifications["concatenation_name"] return concatenate_inferences( self.fit, coords=self.specifications["coords"], concatenation_name=cat_name ) else: return self.fit args = { k: self.specifications.get(k) for k in ["params", "coords", "dims", "posterior_predictive", "log_likelihood"] } if isinstance(self.fit, CmdStanMCMC): fit_to_inference = single_fit_to_inference args["alr_params"] = self.specifications["alr_params"] elif isinstance(self.fit, Sequence): fit_to_inference = multiple_fits_to_inference if combine_individual_fits: args["concatenation_name"] = self.specifications.get( "concatenation_name", "feature" ) args["concatenate"] = True else: args["concatenate"] = False # TODO: Check that dims and concatenation_match if self.specifications.get("alr_params") is not None: warnings.warn("ALR to CLR not performed on parallel models.", UserWarning) else: raise ValueError("Unrecognized fit type!") inference = fit_to_inference(self.fit, **args) if self.specifications["include_observed_data"]: # Can't include observed data in individual fits include_obs_fail = ( not combine_individual_fits and self.parallelize_across == "features" ) if include_obs_fail: warnings.warn( "Cannot include observed data in un-concatenated" "fits!" ) else: obs = az.from_dict( observed_data={"observed": self.dat["y"]}, coords={ "tbl_sample": self.sample_names, "feature": self.feature_names }, dims={"observed": ["tbl_sample", "feature"]} ) inference = az.concat(inference, obs) return inference
def predict( mi: MaudInput, output_dir: str, idata_train: az.InferenceData, ) -> az.InferenceData: """Call CmdStanModel.sample for out of sample predictions. :param mi: a MaudInput object :param output_dir: directory where output will be saved :param idata_train: InferenceData object with posterior draws """ model = cmdstanpy.CmdStanModel( stan_file=os.path.join(HERE, STAN_PROGRAM_RELATIVE_PATH_PREDICT), cpp_options=mi.config.cpp_options, stanc_options=mi.config.stanc_options, ) set_up_output_dir(output_dir, mi) kinetic_parameters = [ "keq", "km", "kcat", "dissociation_constant", "transfer_constant", "kcat_phos", "ki", ] posterior = idata_train.get("posterior") sample_stats = idata_train.get("sample_stats") assert posterior is not None assert sample_stats is not None chains = sample_stats["chain"] draws = sample_stats["draw"] dims = { "conc": ["experiment", "mic"], "conc_enzyme": ["experiment", "enzyme"], "flux": ["experiment", "reaction"], } for chain in chains: for draw in draws: inits = { par: ( posterior[par] .sel(chain=chain, draw=draw) .to_series() .values ) for par in kinetic_parameters if par in posterior.keys() } sample_args: dict = { "data": os.path.join(output_dir, "input_data_test.json"), "inits": inits, "output_dir": output_dir, "iter_warmup": 0, "iter_sampling": 1, "fixed_param": True, "show_progress": False, } if mi.config.cmdstanpy_config_predict is not None: sample_args = { **sample_args, **mi.config.cmdstanpy_config_predict, } mcmc_draw = model.sample(**sample_args) idata_draw = az.from_cmdstan( mcmc_draw.runset.csv_files, coords={ "experiment": [ e.id for e in mi.measurements.experiments if e.is_test ], "mic": [m.id for m in mi.kinetic_model.mics], "enzyme": [e.id for e in mi.kinetic_model.enzymes], "reaction": [r.id for r in mi.kinetic_model.reactions], }, dims=dims, ).assign_coords( coords={"chain": [chain], "draw": [draw]}, groups="posterior_groups", ) if draw == 0: idata_chain = idata_draw.copy() else: idata_chain = az.concat( [idata_chain, idata_draw], dim="draw", reset_dim=False ) if chain == 0: out = idata_chain.copy() else: out = az.concat([out, idata_chain], dim="chain", reset_dim=False) return out
def bayes_dummy_model_ref_std(uniform, max_allowed_specificMB=None, gd=None, sampler='nuts', ys=np.arange(1979, 2019, 1), gd_mb=None, h=None, w=None, use_two_msm=True, nosigma=False, nosigmastd=False, first_ppc=True, pd_calib_opt=None, pd_geodetic_comp=None, random_seed=42, y0=None, y1=None): # test slope_pfs = [] slope_melt_fs = [] for y in ys: slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=y) slope_pfs.append(slope_pf.mean()) slope_melt_fs.append(slope_melt_f.mean()) with pm.Model() as model_T: if uniform: melt_f = pm.Uniform("melt_f", lower=10, upper=1000) pf = pm.Uniform('pf', lower=0.1, upper=10) else: pf = pm.TruncatedNormal('pf', mu=pd_calib_opt['pf_opt'][ pd_calib_opt.reg == 11.0].dropna().mean(), sigma=pd_calib_opt['pf_opt'][ pd_calib_opt.reg == 11.0].dropna().std(), lower=0.5, upper=10) melt_f = pm.TruncatedNormal('melt_f', mu=pd_calib_opt['melt_f_opt_pf'][ pd_calib_opt.reg == 11.0].dropna().mean(), sigma=pd_calib_opt['melt_f_opt_pf'][ pd_calib_opt.reg == 11.0].dropna().std(), lower=10, upper=1000) ## if use_two_msm: # should not use the stuff before 2000 aet_slope_melt_fs_two = pm.Data('aet_slope_melt_fs_two', [np.array(slope_melt_fs)[ (ys >= 2000) & ( ys <= 2009)].mean(), np.array(slope_melt_fs)[ ys >= 2010].mean()]) aet_slope_pfs_two = pm.Data('aet_slope_pfs_two', ([np.array(slope_pfs)[(ys >= 2000) & ( ys <= 2009)].mean(), np.array(slope_pfs)[ ys >= 2010].mean()])) else: aet_slope_melt_fs_two = pm.Data('aet_slope_melt_fs_two', [np.array(slope_melt_fs)[ ys >= 2000].mean()]) aet_slope_pfs_two = pm.Data('aet_slope_pfs_two', [np.array(slope_pfs)[ ys >= 2000].mean()]) aet_mbs_two = aet_slope_pfs_two * pf + aet_slope_melt_fs_two * melt_f # make a deterministic out of it to save it also in the traces mb_mod = pm.Deterministic('mb_mod', aet_mbs_two) # std # need to put slope_melt_fs and slope_pfs into []??? aet_slope_melt_fs = pm.Data('aet_slope_melt_fs', slope_melt_fs) # pd.DataFrame(slope_melt_fs, columns=['slope_melt_fs'])['slope_melt_fs']) aet_slope_pfs = pm.Data('aet_slope_pfs', slope_pfs) # pd.DataFrame(slope_pfs, columns=['slope_pfs'])['slope_pfs']) aet_mbs = aet_slope_pfs * pf + aet_slope_melt_fs * melt_f mod_std = pm.Deterministic('mod_std', aet_mbs.std()) if use_two_msm: sigma = pm.Data('sigma', pd_geodetic_comp.loc[gd.rgi_id][ ['err_dmdtda_2000_2010', 'err_dmdtda_2010_2020']].values * 1000) observed = pm.Data('observed', pd_geodetic_comp.loc[gd.rgi_id][ ['dmdtda_2000_2010', 'dmdtda_2010_2020']].values * 1000) if nosigma == False: geodetic_massbal = pm.Normal('geodetic_massbal', mu=mb_mod, sigma=sigma, # standard devia observed=observed) # likelihood else: geodetic_massbal = pm.Normal('geodetic_massbal', mu=mb_mod, observed=observed) # likelihood # diff_geodetic_massbal = pm.Deterministic("diff_geodetic_massbal", # geodetic_massbal - observed) else: # sigma and observed need to have dim 1 (not zero), --> [value] sigma = pm.Data('sigma', [ pd_geodetic_comp.loc[gd.rgi_id]['err_dmdtda'] * 1000]) observed = pm.Data('observed', [ pd_geodetic_comp.loc[gd.rgi_id]['dmdtda'] * 1000]) if nosigma == False: # likelihood geodetic_massbal = pm.TruncatedNormal('geodetic_massbal', mu=mb_mod, sigma=sigma, # standard devia observed=observed, lower=max_allowed_specificMB) else: geodetic_massbal = pm.TruncatedNormal('geodetic_massbal', mu=mb_mod, observed=observed, lower=max_allowed_specificMB) # likelihood # constrained already by using TruncatedNormal geodetic massbalance ... # pot_max_melt = pm.Potential('pot_max_melt', aet.switch( # geodetic_massbal < max_allowed_specificMB, -np.inf, 0)) diff_geodetic_massbal = pm.Deterministic("diff_geodetic_massbal", geodetic_massbal - observed) # pot_max_melt = pm.Potential('pot_max_melt', aet.switch(geodetic_massbal < max_allowed_specificMB, -np.inf, 0) ) # std # sigma = pm.Data('sigma', 100) # how large are the uncertainties of the direct glaciological method !!! ref_df = gd.get_ref_mb_data(y0=y0, y1=y1) sigma_std = aet.constant((ref_df[ 'ANNUAL_BALANCE'].values / 10).std()) # how large are the uncertainties of the direct glaciological method !!! observed_std = aet.constant(ref_df['ANNUAL_BALANCE'].values.std()) # std should always be above zero if nosigmastd: glaciological_std = pm.TruncatedNormal('glaciological_std', mu=mod_std, # sigma=sigma_std, observed=observed_std, lower=0.001) # likelihood else: glaciological_std = pm.TruncatedNormal('glaciological_std', mu=mod_std, sigma=sigma_std, observed=observed_std, lower=0.001) # likelihood quot_std = pm.Deterministic("quot_std", glaciological_std / observed_std) # pot_std = pm.Potential('pot_std', aet.switch(mod_std <= 0, -np.inf, 0) ) prior = pm.sample_prior_predictive(random_seed=random_seed, samples=1000) # , keep_size = True) with model_T: # sampling if sampler == 'nuts': trace = pm.sample(25000, chains=3, tune=25000, target_accept=0.99, compute_convergence_checks=True, return_inferencedata=True) # #start={'pf':2.5, 'melt_f': 200}) elif sampler == 'jax': import pymc3.sampling_jax trace = pm.sampling_jax.sample_numpyro_nuts(20000, chains=4, tune=20000, target_accept=0.98) # , compute_convergence_checks= True) burned_trace = trace.sel(draw=slice(5000, None)) burned_trace.posterior['draw'] = np.arange(0, len(burned_trace.posterior.draw)) burned_trace.log_likelihood['draw'] = np.arange(0, len(burned_trace.posterior.draw)) burned_trace.sample_stats['draw'] = np.arange(0, len(burned_trace.posterior.draw)) if first_ppc: print(az.summary(burned_trace.posterior)) ppc = pm.sample_posterior_predictive(burned_trace, random_seed=random_seed, var_names=['geodetic_massbal', 'glaciological_std', 'pf', 'melt_f', 'mb_mod', 'diff_geodetic_massbal', 'quot_std'], keep_size=True) az.concat(burned_trace, az.from_dict(posterior_predictive=ppc, prior=prior), inplace=True) with model_T: slope_pf_new = [] slope_melt_f_new = [] for y in ys: slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=y) slope_pf_new.append(slope_pf.mean()) slope_melt_f_new.append(slope_melt_f.mean()) pm.set_data(new_data={'aet_slope_melt_fs_two': slope_melt_f_new, 'aet_slope_pfs_two': slope_pf_new, 'observed': np.empty(len(ys)), 'sigma': np.empty(len(ys))}) ppc_new = pm.sample_posterior_predictive(burned_trace, random_seed=random_seed, var_names=['geodetic_massbal', 'pf', 'melt_f', 'mb_mod', 'diff_geodetic_massbal'], keep_size=True) predict_data = az.from_dict(posterior_predictive=ppc_new) return burned_trace, model_T, predict_data
def __init__(self, *inference_data, universe_save=None, npix=2**5, fast_open=False): """FIXME! briefly describe function :param inference_data: :param universe_save: :param npix: :returns: we love :rtype: """ if len(inference_data) == 1: self._posterior = inference_data[0] else: self._posterior = inference_data[0] for idata in inference_data[1:]: self._posterior = av.concat(self._posterior, idata, dim="chain") self._npix = npix try: ang_sep = self._posterior.posterior.ang_sep.stack( sample=("chain", "draw")).values self._do_contour = True except: self._do_contour = False self._beta1 = self._posterior.posterior.beta1.stack( sample=("chain", "draw")).values # set the number of samples.. flattened over chain self._n_samples = self._beta1.shape[-1] self._beta2 = self._posterior.posterior.beta2.stack( sample=("chain", "draw")).values self._omega1 = self._posterior.posterior.omega.stack( sample=("chain", "draw")).values[0] self._omega2 = self._posterior.posterior.omega.stack( sample=("chain", "draw")).values[1] try: self._amplitude = self._posterior.posterior.amplitude.stack( sample=("chain", "draw")).values except: self._amplitude = np.ones(self._n_samples) self._background = self._posterior.posterior.bkg.stack( sample=("chain", "draw")).values self._scale = self._posterior.posterior.scale.stack( sample=("chain", "draw")).values if self._scale.shape[0] == 2: self._multi_scale = True else: self._multi_scale = False try: self._dt = self._posterior.posterior.dt.stack( sample=("chain", "draw")).values self._grb_theta = self._posterior.posterior.grb_theta.stack( sample=("chain", "draw")).values self._grb_phi = self._posterior.posterior.grb_phi.stack( sample=("chain", "draw")).values self._is_dt_fit = True self._n_dets = self._background.shape[0] except: self._is_dt_fit = False self._dt = None self._grb_theta = None self._grb_phi = None self._n_dets = 1 self._use_bw = False try: self._bw1 = self._posterior.posterior.bw1.stack( sample=("chain", "draw")).values self._bw2 = self._posterior.posterior.bw2.stack( sample=("chain", "draw")).values self._multi_bw = True except: try: self._bw = self._posterior.posterior.bw.stack( sample=("chain", "draw")).values if self._bw.shape[0] == 2: self._multi_bw = True else: self._multi_bw = False except: self._bw = self._posterior.posterior.bw_out.stack( sample=("chain", "draw")).values self._use_bw = False self._multi_bw = True self.grb_color = "k" self._grb_style = "lrtb" self._has_universe = False if universe_save is not None: self._universe = Universe.from_save_file(universe_save) self._has_universe = True if self._is_dt_fit and self._n_dets > 2 and (not fast_open): self._build_moc_map() elif self._do_contour: self._build_moc_map()
def bayes_dummy_model_ref(uniform, max_allowed_specificMB=None, gd=None, sampler='nuts', ys=None, gd_mb=None, h=None, w=None, use_two_msm=True, nosigma=False, pd_calib_opt=None, random_seed=4, y0=None, y1=None): # if use_two_msm: slope_pfs = [] slope_melt_fs = [] for y in ys: slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=y) slope_pfs.append(slope_pf.mean()) slope_melt_fs.append(slope_melt_f.mean()) with pm.Model() as model_T: if uniform: melt_f = pm.Uniform("melt_f", lower=10, upper=1000) pf = pm.Uniform('pf', lower=0.1, upper=10) else: pf = pm.TruncatedNormal('pf', mu=pd_calib_opt['pf_opt'][ pd_calib_opt.reg == 11.0].dropna().mean(), sigma=pd_calib_opt['pf_opt'][ pd_calib_opt.reg == 11.0].dropna().std(), lower=0.5, upper=10) melt_f = pm.TruncatedNormal('melt_f', mu=pd_calib_opt['melt_f_opt_pf'][ pd_calib_opt.reg == 11.0].dropna().mean(), sigma=pd_calib_opt['melt_f_opt_pf'][ pd_calib_opt.reg == 11.0].dropna().std(), lower=1, upper=1000) # need to put slope_melt_fs and slope_pfs into [], other wise it does not work for jay aet_slope_melt_fs = pm.Data('aet_slope_melt_fs', slope_melt_fs) # pd.DataFrame(slope_melt_fs, columns=['slope_melt_fs'])['slope_melt_fs']) aet_slope_pfs = pm.Data('aet_slope_pfs', slope_pfs) # pd.DataFrame(slope_pfs, columns=['slope_pfs'])['slope_pfs']) aet_mbs = aet_slope_pfs * pf + aet_slope_melt_fs * melt_f mb_mod = pm.Deterministic('mb_mod', aet_mbs) with model_T: ref_df = gd.get_ref_mb_data(y0=y0, y1=y1) # sigma = pm.Data('sigma', 100) # how large are the uncertainties of the direct glaciological method !!! sigma = pm.Data('sigma', 100) # np.abs(ref_df['ANNUAL_BALANCE'].values/10)) # how large are the uncertainties of the direct glaciological method !!! observed = pm.Data('observed', ref_df['ANNUAL_BALANCE'].values) if nosigma: geodetic_massbal = pm.TruncatedNormal('geodetic_massbal', mu=mb_mod, # sigma=sigma, observed=observed, lower=max_allowed_specificMB) else: geodetic_massbal = pm.TruncatedNormal('geodetic_massbal', mu=mb_mod, sigma=sigma, observed=observed, lower=max_allowed_specificMB) # likelihood diff_geodetic_massbal = pm.Deterministic("diff_geodetic_massbal", geodetic_massbal - observed) # pot_max_melt = pm.Potential('pot_max_melt', aet.switch(geodetic_massbal < max_allowed_specificMB, -np.inf, 0) ) prior = pm.sample_prior_predictive(random_seed=random_seed, samples=1000) # , keep_size = True) if sampler == 'nuts': trace = pm.sample(10000, chains=4, tune=10000, target_accept=0.98, compute_convergence_checks=True, return_inferencedata=True) # #start={'pf':2.5, 'melt_f': 200}) elif sampler == 'jax': import pymc3.sampling_jax trace = pm.sampling_jax.sample_numpyro_nuts(20000, chains=4, tune=20000, target_accept=0.98) # , compute_convergence_checks= True) with model_T: burned_trace = trace.sel(draw=slice(5000, None)) az.summary(burned_trace.posterior) ppc = pm.sample_posterior_predictive(burned_trace, random_seed=random_seed, var_names=['geodetic_massbal', 'pf', 'melt_f', 'mb_mod', 'diff_geodetic_massbal'], keep_size=True) az.concat(burned_trace, az.from_dict(posterior_predictive=ppc, prior=prior), inplace=True) # with model_T: # slope_pf_new = [] # slope_melt_f_new = [] # for y in ys: # slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h = h, w =w, ys = y) # slope_pf_new.append(slope_pf.mean()) # slope_melt_f_new.append(slope_melt_f.mean()) # if nosigma: # pm.set_data(new_data={'aet_slope_melt_fs': slope_melt_f_new, 'aet_slope_pfs':slope_pf_new, # 'observed':np.empty(len(ys))}) # , 'sigma':np.empty(len(ys))}) ## else: # pm.set_data(new_data={'aet_slope_melt_fs': slope_melt_f_new, 'aet_slope_pfs':slope_pf_new, # 'observed':np.empty(len(ys)), 'sigma':np.empty(len(ys))}) ## ppc_new = pm.sample_posterior_predictive(burned_trace, random_seed=random_seed, # var_names=['geodetic_massbal', 'pf', 'melt_f', 'mb_mod','diff_geodetic_massbal'], # keep_size = True) # predict_data = az.from_dict(posterior_predictive=ppc_new) return burned_trace, model_T # , predict_data # idata_kwargs={"density_dist_obs": False}
def predictions_to_inference_data( predictions, posterior_trace: Optional["MultiTrace"] = None, model: Optional["Model"] = None, coords: Optional[CoordSpec] = None, dims: Optional[DimSpec] = None, idata_orig: Optional[InferenceData] = None, inplace: bool = False, ) -> InferenceData: """Translate out-of-sample predictions into ``InferenceData``. Parameters ---------- predictions: Dict[str, np.ndarray] The predictions are the return value of :func:`~pymc.sample_posterior_predictive`, a dictionary of strings (variable names) to numpy ndarrays (draws). Requires the arrays to follow the convention ``chain, draw, *shape``. posterior_trace: MultiTrace This should be a trace that has been thinned appropriately for ``pymc.sample_posterior_predictive``. Specifically, any variable whose shape is a deterministic function of the shape of any predictor (explanatory, independent, etc.) variables must be *removed* from this trace. model: Model The pymc model. It can be ommited if within a model context. coords: Dict[str, array-like[Any]] Coordinates for the variables. Map from coordinate names to coordinate values. dims: Dict[str, array-like[str]] Map from variable name to ordered set of coordinate names. idata_orig: InferenceData, optional If supplied, then modify this inference data in place, adding ``predictions`` and (if available) ``predictions_constant_data`` groups. If this is not supplied, make a fresh InferenceData inplace: boolean, optional If idata_orig is supplied and inplace is True, merge the predictions into idata_orig, rather than returning a fresh InferenceData object. Returns ------- InferenceData: May be modified ``idata_orig``. """ if inplace and not idata_orig: raise ValueError("Do not pass True for inplace unless passing" "an existing InferenceData as idata_orig") converter = InferenceDataConverter( trace=posterior_trace, predictions=predictions, model=model, coords=coords, dims=dims, log_likelihood=False, ) if hasattr(idata_orig, "posterior"): converter.nchains = idata_orig.posterior.dims["chain"] converter.ndraws = idata_orig.posterior.dims["draw"] else: aelem = next(iter(predictions.values())) converter.nchains, converter.ndraws = aelem.shape[:2] new_idata = converter.to_inference_data() if idata_orig is None: return new_idata elif inplace: concat([idata_orig, new_idata], dim=None, inplace=True) return idata_orig else: # if we are not returning in place, then merge the old groups into the new inference # data and return that. concat([new_idata, idata_orig], dim=None, copy=True, inplace=True) return new_idata
def test_concat_bad(): with pytest.raises(TypeError): concat("hello", "hello") idata = from_dict(posterior={ "A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2) }) idata2 = from_dict(posterior={"A": np.random.randn(2, 10, 2)}) idata3 = from_dict(prior={"A": np.random.randn(2, 10, 2)}) with pytest.raises(TypeError): concat(idata, np.array([1, 2, 3, 4, 5])) with pytest.raises(TypeError): concat(idata, idata, dim=None) with pytest.raises(TypeError): concat(idata, idata2, dim="chain") with pytest.raises(TypeError): concat(idata2, idata, dim="chain") with pytest.raises(TypeError): concat(idata, idata3, dim="chain") with pytest.raises(TypeError): concat(idata3, idata, dim="chain")
def bayes_dummy_model_better_OLD(uniform, max_allowed_specificMB=None, gd=None, sampler='nuts', ys=np.arange(2000, 2019, 1), gd_mb=None, h=None, w=None, use_two_msm=True, nosigma=False, model=None, pd_calib_opt=None, first_ppc=True, pd_geodetic_comp=None, random_seed=42, y0=None, y1=None): if use_two_msm: slope_pfs = [] slope_melt_fs = [] for y in ys: slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=y) slope_pfs.append(slope_pf.mean()) slope_melt_fs.append(slope_melt_f.mean()) else: slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=ys) if model == None: model_T = pm.Model() else: model_T = model with model_T: if uniform == True: melt_f = pm.Uniform("melt_f", lower=10, upper=1000) pf = pm.Uniform('pf', lower=0.1, upper=10) else: if model == None: pf = pm.TruncatedNormal('pf', mu=pd_calib_opt['pf_opt'][ pd_calib_opt.reg == 11.0].dropna().mean(), sigma=pd_calib_opt['pf_opt'][ pd_calib_opt.reg == 11.0].dropna().std(), lower=0.5, upper=10) melt_f = pm.TruncatedNormal('melt_f', mu=pd_calib_opt['melt_f_opt_pf'][ pd_calib_opt.reg == 11.0].dropna().mean(), sigma=pd_calib_opt['melt_f_opt_pf'][ pd_calib_opt.reg == 11.0].dropna().std(), lower=1, upper=1000) else: pass # melt_f = melt_f # slopes have to be defined as theano constants # aet_slope_pf_0 = aet.constant(np.array(slope_pfs)[ys<=2010].mean()) # aet_slope_pf_1 = aet.constant(np.array(slope_pfs)[ys>2010].mean()) # aet_slope_melt_f_0 = aet.constant(np.array(slope_melt_fs)[ys<=2010].mean()) # aet_slope_melt_f_1 = aet.constant(np.array(slope_melt_fs)[ys>2010].mean()) if use_two_msm: aet_slope_melt_fs = pm.Data('aet_slope_melt_fs', [ np.array(slope_melt_fs)[ys <= 2010].mean(), np.array(slope_melt_fs)[ys > 2010].mean()]) aet_slope_pfs = pm.Data('aet_slope_pfs', ( [np.array(slope_pfs)[ys <= 2010].mean(), np.array(slope_pfs)[ys > 2010].mean()])) else: aet_slope_melt_fs = pm.Data('aet_slope_melt_fs', [np.array(slope_melt_f).mean()]) aet_slope_pfs = pm.Data('aet_slope_pfs', [np.array(slope_pf).mean()]) # aet_mbs = [aet_slope_pf_0, aet_slope_pf_1] *pf + aet_slope_melt_fs*melt_f # aet_mb_0 = aet_slope_pf_0 *pf + aet_slope_melt_f_0*melt_f # aet_mb_1 = aet_slope_pf_1 *pf + aet_slope_melt_f_1*melt_f if model == None: aet_mbs = aet_slope_pfs * pf + aet_slope_melt_fs * melt_f else: aet_mbs = aet_slope_pfs * model.pf + aet_slope_melt_fs * model.melt_f # aet_mbs = aet.as_tensor_variable([aet_mb_0, aet_mb_1]) # aet_slope_melt_fs = aet.vector(np.array([np.array(slope_melt_fs)[ys<=2010].mean(), np.array(slope_melt_fs)[ys>2010].mean()])) # this is not the new simple theano compatible # mass balance function that depends on pf and melt_f # aet_mbs = [aet_slope_pf_0, aet_slope_pf_1] *pf + aet_slope_melt_fs*melt_f # make a deterministic out of it to save it also in the traces mb_mod = pm.Deterministic('mb_mod', aet_mbs) with model_T: if use_two_msm: sigma = pm.Data('sigma', pd_geodetic_comp.loc[gd.rgi_id][ ['err_dmdtda_2000_2010', 'err_dmdtda_2010_2020']].values * 1000) observed = pm.Data('observed', pd_geodetic_comp.loc[gd.rgi_id][ ['dmdtda_2000_2010', 'dmdtda_2010_2020']].values * 1000) if nosigma == False: geodetic_massbal = pm.Normal('geodetic_massbal', mu=mb_mod, sigma=sigma, # standard devia observed=observed) # likelihood else: geodetic_massbal = pm.Normal('geodetic_massbal', mu=mb_mod, observed=observed) # likelihood diff_geodetic_massbal = pm.Deterministic("diff_geodetic_massbal", geodetic_massbal - observed) else: # sigma and observed need to have dim 1 (not zero), --> [value] sigma = pm.Data('sigma', [ pd_geodetic_comp.loc[gd.rgi_id]['err_dmdtda'] * 1000]) observed = pm.Data('observed', [ pd_geodetic_comp.loc[gd.rgi_id]['dmdtda'] * 1000]) geodetic_massbal = pm.TruncatedNormal('geodetic_massbal', mu=mb_mod, sigma=sigma, # standard devia observed=observed, lower=max_allowed_specificMB) # likelihood diff_geodetic_massbal = pm.Deterministic("diff_geodetic_massbal", geodetic_massbal - observed) # constrained already by using TruncatedNormal geodetic massbalance ... # pot_max_melt = pm.Potential('pot_max_melt', aet.switch( # geodetic_massbal < max_allowed_specificMB, -np.inf, 0)) # also compute this difference just to be sure ... prior = pm.sample_prior_predictive(random_seed=random_seed, samples=1000) # , keep_size = True) with model_T: if sampler == 'nuts': trace = pm.sample(20000, chains=4, tune=20000, target_accept=0.98, compute_convergence_checks=True, return_inferencedata=True) # #start={'pf':2.5, 'melt_f': 200}) elif sampler == 'jax': import pymc3.sampling_jax trace = pm.sampling_jax.sample_numpyro_nuts(20000, chains=4, tune=20000, target_accept=0.98) # , compute_convergence_checks= True) with model_T: burned_trace = trace.sel(draw=slice(5000, None)) # trace = pm.sample(10000, chains=4, tune=10000, target_accept = 0.98) # need high target_accept to have no divergences, effective sample number # and # We have stored the paths of all our variables, or "traces", in the trace variable., # these paths are the routes the unknown parameters (here just 'n') have taken thus far. # Inference using the first few thousand points is a bad idea, as they are unrelated to the # final distribution we are interested in. # Thus is it a good idea to discard those samples before using the samples for inference. # We call this period before converge the burn-in period. # burned_trace = trace[1000:] # if arviz dataset if first_ppc: # TODO: then sometimes a problem occurs that a warning is raised # about more chains (1000) than draws (2) ... why ??? ppc = pm.sample_posterior_predictive(burned_trace, random_seed=random_seed, var_names=['geodetic_massbal', 'pf', 'melt_f', 'mb_mod', 'diff_geodetic_massbal'], keep_size=True) az.concat(burned_trace, az.from_dict(posterior_predictive=ppc, prior=prior), inplace=True) ys_ref = gd.get_ref_mb_data(y0=y0, y1=y1).index.values with model_T: slope_pf_new = [] slope_melt_f_new = [] for y in ys_ref: slope_pf, slope_melt_f = get_slope_pf_melt_f(gd_mb, h=h, w=w, ys=y) slope_pf_new.append(slope_pf.mean()) slope_melt_f_new.append(slope_melt_f.mean()) pm.set_data(new_data={'aet_slope_melt_fs': slope_melt_f_new, 'aet_slope_pfs': slope_pf_new, 'observed': np.empty(len(ys_ref)), 'sigma': np.empty(len(ys_ref))}) ppc_new = pm.sample_posterior_predictive(burned_trace, random_seed=random_seed, var_names=['geodetic_massbal', 'pf', 'melt_f', 'mb_mod', 'diff_geodetic_massbal'], keep_size=True) predict_data = az.from_dict(posterior_predictive=ppc_new) return burned_trace, model_T, predict_data