def _single_feature_to_inf( fit: CmdStanMCMC, coords: dict, dims: dict, vars_to_drop: Sequence[str], posterior_predictive: str = None, log_likelihood: str = None, ) -> az.InferenceData: """Convert single feature fit to InferenceData. :param fit: Single feature fit with CmdStanPy :type fit: cmdstanpy.CmdStanMCMC :param coords: Coordinates to use for annotating Inference dims :type coords: dict :param dims: Dimensions of parameters in fitted model :type dims: dict :param posterior_predictive: Name of variable holding PP values :type posterior_predictive: str :param log_likelihood: Name of variable holding LL values :type log_likelihood: str :returns: InferenceData object of single feature :rtype: az.InferenceData """ feat_inf = az.from_cmdstanpy(posterior=fit, posterior_predictive=posterior_predictive, log_likelihood=log_likelihood, coords=coords, dims=dims) feat_inf.posterior = _drop_data(feat_inf.posterior, vars_to_drop) return feat_inf
def standard_validate_arviz(fit, ll_label, pp_label, observed, variables): inferred = az.from_cmdstanpy(fit, log_likelihood=ll_label, posterior_predictive=pp_label, observed_data={'y': observed}) print("Displaying posterior plots.") param_posterior_arviz_plots(inferred, variables) print("Validating inference run.") _ = run_validate_arviz(inferred) print("Validating parameter sampling.") param_validate_arviz(inferred, variables)
def get_inference_data4(self, data, eight_schools_params): """multiple vars as lists.""" return from_cmdstanpy( posterior=data.obj, posterior_predictive=None, prior=data.obj, prior_predictive=None, observed_data={"y": eight_schools_params["y"]}, coords=None, dims=None, )
def full_fit_to_inference( fit: CmdStanMCMC, params: Sequence[str], coords: dict, dims: dict, alr_params: Sequence[str] = None, posterior_predictive: str = None, log_likelihood: str = None, ) -> az.InferenceData: """Convert fitted Stan model into inference object. :param fit: Fitted model :type params: CmdStanMCMC :param params: Posterior fitted parameters to include :type params: Sequence[str] :param coords: Mapping of entries in dims to labels :type coords: dict :param dims: Dimensions of parameters in the model :type dims: dict :param alr_params: Parameters to convert from ALR to CLR :type alr_params: Sequence[str], optional :param posterior_predictive: Name of posterior predictive values from Stan model to include in ``arviz`` InferenceData object :type posterior_predictive: str, optional :param log_likelihood: Name of log likelihood values from Stan model to include in ``arviz`` InferenceData object :type log_likelihood: str, optional :returns: ``arviz`` InferenceData object with selected values :rtype: az.InferenceData """ if log_likelihood is not None and log_likelihood not in dims: raise KeyError("Must include dimensions for log-likelihood!") if posterior_predictive is not None and posterior_predictive not in dims: raise KeyError("Must include dimensions for posterior predictive!") inference = az.from_cmdstanpy( fit, coords=coords, log_likelihood=log_likelihood, posterior_predictive=posterior_predictive, dims=dims ) vars_to_drop = set(inference.posterior.data_vars).difference(params) inference.posterior = _drop_data(inference.posterior, vars_to_drop) return inference
def get_inference_data3(self, data, eight_schools_params): """multiple vars as lists.""" return from_cmdstanpy( posterior=data.obj, posterior_predictive=["y_hat", "log_lik"], prior=data.obj, prior_predictive=["y_hat", "log_lik"], observed_data={"y": eight_schools_params["y"]}, coords={"school": np.arange(eight_schools_params["J"])}, dims={"theta": ["school"], "y": ["school"], "y_hat": ["school"], "eta": ["school"]}, )
def get_inference_data5(self, data, eight_schools_params): """multiple vars as lists.""" return from_cmdstanpy( posterior=data.obj, posterior_predictive=None, prior=data.obj, prior_predictive=None, log_likelihood="log_lik", observed_data={"y": eight_schools_params["y"]}, coords=None, dims=None, dtypes=data.model.code(), )
def get_inference_data4(self, data, eight_schools_params): """multiple vars as lists.""" return from_cmdstanpy( posterior=data.obj, posterior_predictive=None, prior=data.obj, prior_predictive=None, log_likelihood=False, observed_data={"y": eight_schools_params["y"]}, coords=None, dims=None, dtypes={"eta": int, "theta": int}, )
def single_feature_fit_to_inference( fit: CmdStanMCMC, params: Sequence[str], coords: dict, dims: dict, posterior_predictive: str = None, log_likelihood: str = None, ) -> az.InferenceData: """Convert single feature fit to InferenceData. :param fit: Single feature fit with CmdStanPy :type fit: cmdstanpy.CmdStanMCMC :param params: Posterior fitted parameters to include :type params: Sequence[str] :param coords: Coordinates to use for annotating Inference dims :type coords: dict :param dims: Dimensions of parameters in fitted model :type dims: dict :param posterior_predictive: Name of variable holding PP values :type posterior_predictive: str :param log_likelihood: Name of variable holding LL values :type log_likelihood: str :returns: InferenceData object of single feature :rtype: az.InferenceData """ _coords = coords.copy() if "feature" in coords: _coords.pop("feature") _dims = dims.copy() for k, v in _dims.items(): if "feature" in v: v.remove("feature") feat_inf = az.from_cmdstanpy( posterior=fit, posterior_predictive=posterior_predictive, log_likelihood=log_likelihood, coords=_coords, dims=_dims ) vars_to_drop = set(feat_inf.posterior.data_vars).difference(params) feat_inf.posterior = _drop_data(feat_inf.posterior, vars_to_drop) return feat_inf
def get_inference_data(self, data, eight_schools_params): """vars as str.""" return from_cmdstanpy( posterior=data.obj, posterior_predictive="y_hat", prior=data.obj, prior_predictive="y_hat", observed_data={"y": eight_schools_params["y"]}, log_likelihood="log_lik", coords={"school": np.arange(eight_schools_params["J"])}, dims={ "theta": ["school"], "y": ["school"], "log_lik": ["school"], "y_hat": ["school"], "eta": ["school"], }, )
def generate_samples( study_name: str, measurements: pd.DataFrame, model_configurations: List[ModelConfiguration], ) -> None: """Run cmdstanpy.CmdStanModel.sample, do diagnostics and save results. :param study_name: a string """ infds = {} for model_config in model_configurations: fit_name = f"{study_name}-{model_config.name}" print(f"Fitting model {fit_name}...") loo_file = os.path.join(LOO_DIR, f"loo_{fit_name}.pkl") infd_file = os.path.join(INFD_DIR, f"infd_{fit_name}.ncdf") json_file = os.path.join(JSON_DIR, f"input_data_{fit_name}.json") stan_input = model_config.stan_input_function(measurements) print(f"Writing input data to {json_file}") jsondump(json_file, stan_input) model = CmdStanModel( model_name=fit_name, stan_file=model_config.stan_file ) print(f"Writing csv files to {SAMPLES_DIR}...") mcmc = model.sample( data=stan_input, output_dir=SAMPLES_DIR, **model_config.sample_kwargs, ) print(mcmc.diagnose().replace("\n\n", "\n")) infd = az.from_cmdstanpy( mcmc, **model_config.infd_kwargs_function(measurements) ) print(az.summary(infd)) infds[fit_name] = infd print(f"Writing inference data to {infd_file}") infd.to_netcdf(infd_file) print(f"Writing psis-loo results to {loo_file}\n") az.loo(infd, pointwise=True).to_pickle(loo_file) if len(infds) > 1: comparison = az.compare(infds) print(f"Loo comparison:\n{comparison}") comparison.to_csv(os.path.join(LOO_DIR, "loo_comparison.csv"))
def get_inference_data3(self, data, eight_schools_params): """multiple vars as lists.""" return from_cmdstanpy( posterior=data.obj, posterior_predictive=["y_hat", "log_lik"], prior=data.obj, prior_predictive=["y_hat", "log_lik"], observed_data={"y": eight_schools_params["y"]}, coords={ "school": np.arange(eight_schools_params["J"]), "half school": ["a", "b", "c", "d"], "extra_dim": ["x", "y"], }, dims={ "eta": ["extra_dim", "half school"], "y": ["school"], "y_hat": ["school"], "theta": ["school"], "log_lik": ["log_lik_dim"], }, dtypes=data.model, )
def get_inference_data_warmup_true_is_false(self, data, eight_schools_params): """vars as str.""" return from_cmdstanpy( posterior=data.obj_warmup, posterior_predictive="y_hat", predictions="y_hat", prior=data.obj_warmup, prior_predictive="y_hat", observed_data={"y": eight_schools_params["y"]}, constant_data={"y": eight_schools_params["y"]}, predictions_constant_data={"y": eight_schools_params["y"]}, log_likelihood="log_lik", coords={"school": np.arange(eight_schools_params["J"])}, dims={ "eta": ["extra_dim", "half school"], "y": ["school"], "log_lik": ["school"], "y_hat": ["school"], "theta": ["school"], }, save_warmup=False, )
def get_inference_data2(self, data, eight_schools_params): """vars as lists.""" return from_cmdstanpy( posterior=data.obj, posterior_predictive=["y_hat"], predictions=["y_hat", "log_lik"], prior=data.obj, prior_predictive=["y_hat"], observed_data={"y": eight_schools_params["y"]}, constant_data=eight_schools_params, predictions_constant_data=eight_schools_params, log_likelihood=["log_lik", "y_hat"], coords={ "school": np.arange(eight_schools_params["J"]), "log_lik_dim": np.arange(eight_schools_params["J"]), }, dims={ "theta": ["school"], "y": ["school"], "y_hat": ["school"], "eta": ["school"], "log_lik": ["log_lik_dim"], }, )
repo = Repo("./", search_parent_directories=True) # repo_rootdir holds the absolute path to the top-level of our repo repo_rootdir = repo.working_tree_dir sm_gaussF = cmdstanpy.CmdStanModel( stan_file=f"{repo_rootdir}/code/stan/test_gaussF.stan", compile=True,) # stan needs to know how many data points to generate, # so pick a representative promoter stan_data = dict( a=22.35, b=-17.3, c=12.5, z=-1.59, ) stan_output = sm_gaussF.sample( data=stan_data, fixed_param=True, iter_sampling=1, ) # Convert to ArviZ InferenceData object stan_output = az.from_cmdstanpy( stan_output, posterior_predictive=["output"] ) # uncomment for running live in ipython, stan_output.posterior_predictive.output
# Define the data dictionary data_dict = { 'J': d['replicate'].max(), 'N_yield': len(d), 'N_calib': len(calib), 'idx': d['replicate'].values.astype(int), 'calib_conc': calib['carbon_conc_mM'].values.astype(float), 'calib_rel_areas': calib['rel_area_phosphate'].values.astype(float), 'optical_density': d['od_600nm'].values.astype(float), 'yield_rel_areas': d['rel_area_phosphate'].values.astype(float) } # Sample the inferrential model samps = model.sample(data=data_dict) samps = az.from_cmdstanpy(samps) samps = samps.posterior.to_dataframe().reset_index() # Tidy low-level parameters _samps = samps[[ 'yield_inter_dim_0', 'yield_slope_dim_0', 'yield_inter', 'yield_slope' ]] _samps.drop_duplicates(inplace=True) pairs = [['yield_inter_dim_0', 'yield_inter'], ['yield_slope_dim_0', 'yield_slope']] dfs = [] for p in pairs: _params = _samps[p] _df = pd.DataFrame([]) _df['value'] = _params[p[1]] _df['parameter'] = p[1]
# maximum likelihood estimation optim = sm.optimize(data=mdl_data).optimized_params_pd optim[optim.columns[~optim.columns.str.startswith("lp")]] # variational inference vb = sm.variational(data=mdl_data) vb.variational_sample.columns = vb.variational_params_dict.keys() vb_name = vb.variational_params_pd.columns[~vb.variational_params_pd.columns. str.startswith(("lp", "log_"))] vb.variational_params_pd[vb_name] vb.variational_sample[vb_name] # Markov chain Monte Carlo fit = sm.sample(data=mdl_data, show_progress=True, chains=4, iter_sampling=50000, iter_warmup=10000, thin=5) fit.draws().shape # iterations, chains, parameters fit.summary().loc[vb_name] # pandas DataFrame print(fit.diagnose()) posterior = fit.stan_variables() az_trace = az.from_cmdstanpy(fit) az.summary(az_trace).loc[vb_name] # pandas DataFrame az.plot_trace(az_trace)
# maximum likelihood estimation optim_modif = sm_modif.optimize(data=mdl_data).optimized_params_pd optim_modif[optim_modif.columns[~optim_modif.columns.str.startswith("lp")]] # variational inference vb_modif = sm_modif.variational(data=mdl_data) vb_modif.variational_sample.columns = vb_modif.variational_params_dict.keys() vb_name = vb_modif.variational_params_pd.columns[ ~vb_modif.variational_params_pd.columns.str.startswith(("lp", "log_"))] vb_modif.variational_params_pd[vb_name] vb_modif.variational_sample[vb_name] # Markov chain Monte Carlo fit_modif = sm_modif.sample(data=mdl_data, show_progress=True, chains=4, iter_sampling=50000, iter_warmup=10000, thin=5) fit_modif.draws().shape # iterations, chains, parameters fit_modif.summary().loc[vb_name] # pandas DataFrame print(fit_modif.diagnose()) posterior = {k: fit_modif.stan_variable(k) for k in var_name} az_trace = az.from_cmdstanpy(fit_modif) az.summary(az_trace).loc[vb_name] # pandas DataFrame az.plot_trace(az_trace, var_names=var_name)
# ---- data ---- # eight_school_data = { "J": 8, "y": np.array([28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0]), "sigma": np.array([15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0]), } # ---- model ---- # stan_file = model_dir / "schools.stan" stan_model = CmdStanModel(stan_file=stan_file) stan_model.compile() # ---- fitting ---- # stan_fit = stan_model.sample(data=eight_school_data) # ---- results ---- # cmdstanpy_data = az.from_cmdstanpy( posterior=stan_fit, posterior_predictive="y_hat", observed_data={"y": eight_school_data["y"]}, log_likelihood="log_lik", coords={"school": np.arange(eight_school_data["J"])}, dims={ "theta": ["school"], "y": ["school"], "log_lik": ["school"], "y_hat": ["school"], "theta_tilde": ["school"], },
sm_ols = CmdStanModel(stan_file=modelfile_ols) fit_ols = sm_ols.sample(data=model_data_dict, show_progress=True, chains=4, iter_sampling=50000, iter_warmup=10000, thin=5) fit_ols.draws().shape # iterations, chains, parameters fit_ols.summary().loc[var_name] # pandas DataFrame print(fit_ols.diagnose()) posterior_ols = {k: fit_ols.stan_variable(k) for k in var_name} az_trace_ols = az.from_cmdstanpy(fit_ols) az.summary(az_trace_ols).loc[var_name] # pandas DataFrame az.plot_trace(az_trace_ols, var_names=var_name) gd = sns.jointplot( x=posterior_ols["b0"], y=posterior_ols["b1"], marginal_kws={ "kde": True, "kde_kws": { "cut": 1 } }, ) gd.plot_joint(sns.kdeplot, zorder=2, n_levels=10, cmap="gray_r") gd.fig.suptitle("Posterior joint distribution (OLS)", y=1.02)
repo_rootdir = repo.working_tree_dir # first load data using module util df_unreg, = load_FISH_by_promoter(("unreg", )) # pull out one specific promoter for convenience for prior pred check & SBC df_UV5 = df_unreg[df_unreg["experiment"] == "UV5"] sm = cmdstanpy.CmdStanModel( stan_file=f"{repo_rootdir}/code/stan/constit_post_inf.stan", compile=True, ) all_samples = {} for gene in df_unreg['experiment'].unique(): temp_df = df_unreg[df_unreg['experiment'] == gene] stan_data = dict( N=len(temp_df), mRNA_counts=temp_df["mRNA_cell"].values.astype(int), ppc=0 # if you produce ppc samples, the InferenceData obj is HUGE ) with be_quiet_stan(): posterior_samples = sm.sample(data=stan_data, chains=6) all_samples[gene] = az.from_cmdstanpy( posterior_samples, posterior_predictive=["mRNA_counts_ppc"]) print(f"For promoter {gene}...") check_all_diagnostics(all_samples[gene]) # pickle the samples. ~20 separate netcdfs, only for use together? No thanks outfile = open(f"{repo_rootdir}/data/mcmc_samples/constit_post_inf.pkl", 'wb') pickle.dump(all_samples, outfile) outfile.close()
def single_fit_to_inference( fit: CmdStanMCMC, params: Sequence[str], coords: dict, dims: dict, alr_params: Sequence[str] = None, posterior_predictive: str = None, log_likelihood: str = None, ) -> az.InferenceData: """Convert fitted Stan model into inference object. :param fit: Fitted model :type params: CmdStanMCMC :param params: Posterior fitted parameters to include :type params: Sequence[str] :param coords: Mapping of entries in dims to labels :type coords: dict :param dims: Dimensions of parameters in the model :type dims: dict :param alr_params: Parameters to convert from ALR to CLR (this will be ignored if the model has been parallelized across features) :type alr_params: Sequence[str], optional :param posterior_predictive: Name of posterior predictive values from Stan model to include in ``arviz`` InferenceData object :type posterior_predictive: str, optional :param log_likelihood: Name of log likelihood values from Stan model to include in ``arviz`` InferenceData object :type log_likelihood: str, optional :returns: ``arviz`` InferenceData object with selected values :rtype: az.InferenceData """ # remove alr params so initial dim fitting works new_dims = {k: v for k, v in dims.items() if k not in alr_params} if log_likelihood is not None and log_likelihood not in dims: raise KeyError("Must include dimensions for log-likelihood!") if posterior_predictive is not None and posterior_predictive not in dims: raise KeyError("Must include dimensions for posterior predictive!") inference = az.from_cmdstanpy(fit, coords=coords, log_likelihood=log_likelihood, posterior_predictive=posterior_predictive, dims=new_dims) vars_to_drop = set(inference.posterior.data_vars).difference(params) inference.posterior = _drop_data(inference.posterior, vars_to_drop) # Convert each param in ALR coordinates to CLR coordinates for param in alr_params: # Want to run on each chain independently all_chain_clr_coords = [] all_chain_alr_coords = np.split(fit.stan_variable(param), fit.chains, axis=0) for i, chain_alr_coords in enumerate(all_chain_alr_coords): # arviz 0.11.2 seems to flatten for some reason even though # the PR was specifically supposed to do the opposite. # Not sure what's going on but just going to go through cmdstanpy. chain_clr_coords = convert_beta_coordinates(chain_alr_coords) all_chain_clr_coords.append(chain_clr_coords) all_chain_clr_coords = np.array(all_chain_clr_coords) tmp_dims = ["chain", "draw"] + dims[param] mcmc_coords = { "chain": np.arange(fit.chains), "draw": np.arange(fit.num_draws_sampling) } # restrict param DataArray to only required dims/coords tmp_coords = {k: coords[k] for k in dims[param]} param_da = xr.DataArray(all_chain_clr_coords, dims=tmp_dims, coords={ **tmp_coords, **mcmc_coords }) inference.posterior[param] = param_da # TODO: Clean this up all_dims = list(inference.posterior.dims) dims_to_drop = [] for dim in all_dims: if re.match(f"{param}_dim_\\d", dim): dims_to_drop.append(dim) inference.posterior = inference.posterior.drop_dims(dims_to_drop) return inference
iter_sampling=200, chains=4) # save to arviz var_name = ["slack_comments", "github_commits"] idata_stan = az.from_cmdstanpy( # DOES NOT WORK, problems everywhere, idk why posterior=posterior, prior=prior, posterior_predictive=[i + "_hat" for i in var_name], prior_predictive=[i + "_hat" for i in var_name], observed_data=var_name, constant_data=["time_since_joined"], log_likelihood={i: "log_likelihood_" + i for i in var_name}, predictions=[i + "_pred" for i in var_name], predictions_constant_data=["time_since_joined_pred"], coords={ "developer": names, "candidate developer": candidate_devs }, dims={ "slack_comments": ["developer"], "github_commits": ["developer"], "slack_comments_hat": ["developer"], "github_commits_hat": ["developer"], "time_since_joined": ["developer"], "slack_comments_pred": ["candidate developer"], "github_commits_pred": ["candidate developer"], "time_since_joined_pred": ["candidate developer"], })
def _batch_func(counts: np.array, replicates: np.array, batches: np.array, depth: int, mc_samples: int = 1000, chains: int = 4, disp_scale: float = 1, sigma_scale: float = 1, reference_loc: float = -5, reference_scale: float = 5) -> dict: replicate_encoder = LabelEncoder() replicate_encoder.fit(replicates) replicate_ids = replicate_encoder.transform(replicates) # identify reference replicates - these will be the # first sample for each replicate group ref_ids, lookup = np.zeros(len(replicate_ids)), {} for i, c in enumerate(replicate_ids): if c not in lookup: lookup[c] = i for i, c in enumerate(replicate_ids): ref_ids[i] = lookup[c] batch_encoder = LabelEncoder() batch_encoder.fit(batches) batch_ids = batch_encoder.transform(batches) batch_ids = batch_ids.astype(np.int64) + 1 ref_ids = ref_ids.astype(np.int64) + 1 code = os.path.join(os.path.dirname(__file__), 'assets/batch_pln_single.stan') sm = CmdStanModel(stan_file=code) dat = { 'N': counts.shape[0], 'R': int(max(ref_ids) + 1), 'B': int(max(batch_ids) + 1), 'depth': list(np.log(depth)), 'y': list(map(int, counts.astype(np.int64))), 'ref_ids': list(map(int, ref_ids)), 'batch_ids': list(map(int, batch_ids)), 'sigma_scale': sigma_scale, 'disp_scale': disp_scale, 'reference_loc': reference_loc, 'reference_scale': reference_scale } with tempfile.TemporaryDirectory() as temp_dir_name: data_path = os.path.join(temp_dir_name, 'data.json') with open(data_path, 'w') as f: json.dump(dat, f) # Obtain an initial guess with MLE # guess = sm.optimize(data=data_path, inits=0) # see https://mattocci27.github.io/assets/poilog.html # for recommended parameters for poisson log normal fit = sm.sample( data=data_path, iter_sampling=mc_samples, # inits=guess.optimized_params_dict, chains=chains, iter_warmup=1000, adapt_delta=0.9, max_treedepth=20) fit.diagnose() inf = az.from_cmdstanpy( fit, posterior_predictive='y_predict', log_likelihood='log_lhood', ) return inf
def from_cmdstanpy(cls, fit): inference_data = av.from_cmdstanpy(fit) return cls(inference_data)
import numpy as np import matplotlib.pyplot as plt from typing import Dict import arviz as az model_dir = Path("stan_code") data_dir = Path("data") # ---- data ---- # discharge_data: Dict = {"T": len(Q), "Q": Q, "P": precip} # ---- model ---- # stan_file = model_dir / "abcmodel.stan" stan_model = CmdStanModel(stan_file=stan_file) stan_model.compile() # ---- fit parameters ---- # abcmodel_fit: CmdStanMCMC = stan_model.sample( data=discharge_data, chains=4, cores=1, seed=1111, show_progress=True, ) # ---- get simulations ---- # posterior = az.from_cmdstanpy( posterior=bern_fit, posterior_predictive="y", )
df_unreg, = load_FISH_by_promoter(("unreg", )) # pull out one specific promoter for convenience for prior pred check & SBC df_UV5 = df_unreg[df_unreg["experiment"] == "UV5"] # ############################################################################ # PRIOR PREDICTIVE CHECK # ############################################################################ sm_prior_pred = cmdstanpy.CmdStanModel( stan_file=f"{repo_rootdir}/code/stan/constit_prior_pred.stan", compile=True, ) # stan needs to know how many data points to generate, # so pick a representative promoter data_prior_pred = dict(N=len(df_UV5)) prior_pred_samples = sm_prior_pred.sample( data=data_prior_pred, fixed_param=True, iter_sampling=1000, ) # Convert to ArviZ InferenceData object prior_pred_samples = az.from_cmdstanpy( posterior=prior_pred_samples, # this line b/c of arviz bug, PR#979 prior=prior_pred_samples, prior_predictive=['mRNA_counts']) prior_pred_samples.to_netcdf( f"{repo_rootdir}/data/stan_samples/constit_prior_pred.nc")
percs = [(2.5, 97.5), (12.5, 87.5), (25, 75), (37.5, 62.5), (47.5, 52.5)] perc_labels = [95, 75, 50, 25, 5] # %% # Iterate through each carbon source and strain and perform the inference samples, summary, fits = [], [], [] for g, d in data.groupby(['strain', 'carbon_source', 'medium_base']): data_dict = { 'J': d['replicate'].max(), 'N': len(d), 'idx': d['replicate'].values.astype(int), 'time': d['elapsed_time_hr'].values.astype(float), 'OD': d['od_600nm'].values.astype(float) } _samples = model.sample(data=data_dict) _samples = az.from_cmdstanpy(_samples) _samples = _samples.posterior.to_dataframe().reset_index() # Get the low level parameters params = _samples[['lam_dim_0', 'OD_init', 'lam']] params.rename(columns={'lam_dim_0': 'level'}, inplace=True) params['level'] = [f'replicate {i+1}' for i in params['level'].values] params = params.melt('level', var_name='parameter') # hyper_params hyper_params = _samples[[ 'OD_init_mu', 'lam_mu', 'OD_init_sigma', 'lam_sigma', 'sigma' ]] hyper_params['level'] = 'hyperparameter' hyper_params = hyper_params.melt('level', var_name='parameter') hyper_params.loc[hyper_params['parameter'] == 'lam_mu',
bern_fit: CmdStanMCMC = bernoulli_model.sample( data=bernoulli_data, chains=4, cores=1, seed=1111, show_progress=True, ) # ---- results ---- # """samples = multi-dimensional array all draws from all chains arranged as dimensions: (draws, chains, columns). """ posterior = az.from_cmdstanpy( posterior=bern_fit, posterior_predictive="y", # observed_data={"y": np.array(bernoulli_data["y"])}, ) # POSTERIOR PREDICTIVE CHECKS # bernoulli_path = bernoulli_ppc = CmdStanModel(stan_file=model_dir / "bernoulli_ppc.stan") # fit the model to the data bern_fit = bernoulli_ppc.sample(data=bernoulli_data) # PRIOR PREDICTIVE CHECKS # https://cmdstanpy.readthedocs.io/en/latest/sample.html # generate data - fixed_param=True datagen_stan = model_dir / "bernoulli.stan"