Exemple #1
0
def _single_feature_to_inf(
    fit: CmdStanMCMC,
    coords: dict,
    dims: dict,
    vars_to_drop: Sequence[str],
    posterior_predictive: str = None,
    log_likelihood: str = None,
) -> az.InferenceData:
    """Convert single feature fit to InferenceData.

    :param fit: Single feature fit with CmdStanPy
    :type fit: cmdstanpy.CmdStanMCMC

    :param coords: Coordinates to use for annotating Inference dims
    :type coords: dict

    :param dims: Dimensions of parameters in fitted model
    :type dims: dict

    :param posterior_predictive: Name of variable holding PP values
    :type posterior_predictive: str

    :param log_likelihood: Name of variable holding LL values
    :type log_likelihood: str

    :returns: InferenceData object of single feature
    :rtype: az.InferenceData
    """
    feat_inf = az.from_cmdstanpy(posterior=fit,
                                 posterior_predictive=posterior_predictive,
                                 log_likelihood=log_likelihood,
                                 coords=coords,
                                 dims=dims)
    feat_inf.posterior = _drop_data(feat_inf.posterior, vars_to_drop)
    return feat_inf
Exemple #2
0
def standard_validate_arviz(fit, ll_label, pp_label, observed, variables):
    inferred = az.from_cmdstanpy(fit,
                                 log_likelihood=ll_label,
                                 posterior_predictive=pp_label,
                                 observed_data={'y': observed})
    print("Displaying posterior plots.")
    param_posterior_arviz_plots(inferred, variables)
    print("Validating inference run.")
    _ = run_validate_arviz(inferred)
    print("Validating parameter sampling.")
    param_validate_arviz(inferred, variables)
Exemple #3
0
 def get_inference_data4(self, data, eight_schools_params):
     """multiple vars as lists."""
     return from_cmdstanpy(
         posterior=data.obj,
         posterior_predictive=None,
         prior=data.obj,
         prior_predictive=None,
         observed_data={"y": eight_schools_params["y"]},
         coords=None,
         dims=None,
     )
Exemple #4
0
def full_fit_to_inference(
    fit: CmdStanMCMC,
    params: Sequence[str],
    coords: dict,
    dims: dict,
    alr_params: Sequence[str] = None,
    posterior_predictive: str = None,
    log_likelihood: str = None,
) -> az.InferenceData:
    """Convert fitted Stan model into inference object.

    :param fit: Fitted model
    :type params: CmdStanMCMC

    :param params: Posterior fitted parameters to include
    :type params: Sequence[str]

    :param coords: Mapping of entries in dims to labels
    :type coords: dict

    :param dims: Dimensions of parameters in the model
    :type dims: dict

    :param alr_params: Parameters to convert from ALR to CLR
    :type alr_params: Sequence[str], optional

    :param posterior_predictive: Name of posterior predictive values from
        Stan model to include in ``arviz`` InferenceData object
    :type posterior_predictive: str, optional

    :param log_likelihood: Name of log likelihood values from Stan model
        to include in ``arviz`` InferenceData object
    :type log_likelihood: str, optional

    :returns: ``arviz`` InferenceData object with selected values
    :rtype: az.InferenceData
    """
    if log_likelihood is not None and log_likelihood not in dims:
        raise KeyError("Must include dimensions for log-likelihood!")
    if posterior_predictive is not None and posterior_predictive not in dims:
        raise KeyError("Must include dimensions for posterior predictive!")

    inference = az.from_cmdstanpy(
        fit,
        coords=coords,
        log_likelihood=log_likelihood,
        posterior_predictive=posterior_predictive,
        dims=dims
    )

    vars_to_drop = set(inference.posterior.data_vars).difference(params)
    inference.posterior = _drop_data(inference.posterior, vars_to_drop)

    return inference
Exemple #5
0
 def get_inference_data3(self, data, eight_schools_params):
     """multiple vars as lists."""
     return from_cmdstanpy(
         posterior=data.obj,
         posterior_predictive=["y_hat", "log_lik"],
         prior=data.obj,
         prior_predictive=["y_hat", "log_lik"],
         observed_data={"y": eight_schools_params["y"]},
         coords={"school": np.arange(eight_schools_params["J"])},
         dims={"theta": ["school"], "y": ["school"], "y_hat": ["school"], "eta": ["school"]},
     )
Exemple #6
0
 def get_inference_data5(self, data, eight_schools_params):
     """multiple vars as lists."""
     return from_cmdstanpy(
         posterior=data.obj,
         posterior_predictive=None,
         prior=data.obj,
         prior_predictive=None,
         log_likelihood="log_lik",
         observed_data={"y": eight_schools_params["y"]},
         coords=None,
         dims=None,
         dtypes=data.model.code(),
     )
Exemple #7
0
 def get_inference_data4(self, data, eight_schools_params):
     """multiple vars as lists."""
     return from_cmdstanpy(
         posterior=data.obj,
         posterior_predictive=None,
         prior=data.obj,
         prior_predictive=None,
         log_likelihood=False,
         observed_data={"y": eight_schools_params["y"]},
         coords=None,
         dims=None,
         dtypes={"eta": int, "theta": int},
     )
Exemple #8
0
def single_feature_fit_to_inference(
    fit: CmdStanMCMC,
    params: Sequence[str],
    coords: dict,
    dims: dict,
    posterior_predictive: str = None,
    log_likelihood: str = None,
) -> az.InferenceData:
    """Convert single feature fit to InferenceData.

    :param fit: Single feature fit with CmdStanPy
    :type fit: cmdstanpy.CmdStanMCMC

    :param params: Posterior fitted parameters to include
    :type params: Sequence[str]

    :param coords: Coordinates to use for annotating Inference dims
    :type coords: dict

    :param dims: Dimensions of parameters in fitted model
    :type dims: dict

    :param posterior_predictive: Name of variable holding PP values
    :type posterior_predictive: str

    :param log_likelihood: Name of variable holding LL values
    :type log_likelihood: str

    :returns: InferenceData object of single feature
    :rtype: az.InferenceData
    """
    _coords = coords.copy()
    if "feature" in coords:
        _coords.pop("feature")

    _dims = dims.copy()
    for k, v in _dims.items():
        if "feature" in v:
            v.remove("feature")

    feat_inf = az.from_cmdstanpy(
        posterior=fit,
        posterior_predictive=posterior_predictive,
        log_likelihood=log_likelihood,
        coords=_coords,
        dims=_dims
    )
    vars_to_drop = set(feat_inf.posterior.data_vars).difference(params)
    feat_inf.posterior = _drop_data(feat_inf.posterior, vars_to_drop)
    return feat_inf
Exemple #9
0
 def get_inference_data(self, data, eight_schools_params):
     """vars as str."""
     return from_cmdstanpy(
         posterior=data.obj,
         posterior_predictive="y_hat",
         prior=data.obj,
         prior_predictive="y_hat",
         observed_data={"y": eight_schools_params["y"]},
         log_likelihood="log_lik",
         coords={"school": np.arange(eight_schools_params["J"])},
         dims={
             "theta": ["school"],
             "y": ["school"],
             "log_lik": ["school"],
             "y_hat": ["school"],
             "eta": ["school"],
         },
     )
Exemple #10
0
def generate_samples(
    study_name: str,
    measurements: pd.DataFrame,
    model_configurations: List[ModelConfiguration],
) -> None:
    """Run cmdstanpy.CmdStanModel.sample, do diagnostics and save results.

    :param study_name: a string
    """
    infds = {}
    for model_config in model_configurations:
        fit_name = f"{study_name}-{model_config.name}"
        print(f"Fitting model {fit_name}...")
        loo_file = os.path.join(LOO_DIR, f"loo_{fit_name}.pkl")
        infd_file = os.path.join(INFD_DIR, f"infd_{fit_name}.ncdf")
        json_file = os.path.join(JSON_DIR, f"input_data_{fit_name}.json")
        stan_input = model_config.stan_input_function(measurements)
        print(f"Writing input data to {json_file}")
        jsondump(json_file, stan_input)
        model = CmdStanModel(
            model_name=fit_name, stan_file=model_config.stan_file
        )
        print(f"Writing csv files to {SAMPLES_DIR}...")
        mcmc = model.sample(
            data=stan_input,
            output_dir=SAMPLES_DIR,
            **model_config.sample_kwargs,
        )
        print(mcmc.diagnose().replace("\n\n", "\n"))
        infd = az.from_cmdstanpy(
            mcmc, **model_config.infd_kwargs_function(measurements)
        )
        print(az.summary(infd))
        infds[fit_name] = infd
        print(f"Writing inference data to {infd_file}")
        infd.to_netcdf(infd_file)
        print(f"Writing psis-loo results to {loo_file}\n")
        az.loo(infd, pointwise=True).to_pickle(loo_file)
    if len(infds) > 1:
        comparison = az.compare(infds)
        print(f"Loo comparison:\n{comparison}")
        comparison.to_csv(os.path.join(LOO_DIR, "loo_comparison.csv"))
Exemple #11
0
 def get_inference_data3(self, data, eight_schools_params):
     """multiple vars as lists."""
     return from_cmdstanpy(
         posterior=data.obj,
         posterior_predictive=["y_hat", "log_lik"],
         prior=data.obj,
         prior_predictive=["y_hat", "log_lik"],
         observed_data={"y": eight_schools_params["y"]},
         coords={
             "school": np.arange(eight_schools_params["J"]),
             "half school": ["a", "b", "c", "d"],
             "extra_dim": ["x", "y"],
         },
         dims={
             "eta": ["extra_dim", "half school"],
             "y": ["school"],
             "y_hat": ["school"],
             "theta": ["school"],
             "log_lik": ["log_lik_dim"],
         },
         dtypes=data.model,
     )
Exemple #12
0
 def get_inference_data_warmup_true_is_false(self, data, eight_schools_params):
     """vars as str."""
     return from_cmdstanpy(
         posterior=data.obj_warmup,
         posterior_predictive="y_hat",
         predictions="y_hat",
         prior=data.obj_warmup,
         prior_predictive="y_hat",
         observed_data={"y": eight_schools_params["y"]},
         constant_data={"y": eight_schools_params["y"]},
         predictions_constant_data={"y": eight_schools_params["y"]},
         log_likelihood="log_lik",
         coords={"school": np.arange(eight_schools_params["J"])},
         dims={
             "eta": ["extra_dim", "half school"],
             "y": ["school"],
             "log_lik": ["school"],
             "y_hat": ["school"],
             "theta": ["school"],
         },
         save_warmup=False,
     )
Exemple #13
0
 def get_inference_data2(self, data, eight_schools_params):
     """vars as lists."""
     return from_cmdstanpy(
         posterior=data.obj,
         posterior_predictive=["y_hat"],
         predictions=["y_hat", "log_lik"],
         prior=data.obj,
         prior_predictive=["y_hat"],
         observed_data={"y": eight_schools_params["y"]},
         constant_data=eight_schools_params,
         predictions_constant_data=eight_schools_params,
         log_likelihood=["log_lik", "y_hat"],
         coords={
             "school": np.arange(eight_schools_params["J"]),
             "log_lik_dim": np.arange(eight_schools_params["J"]),
         },
         dims={
             "theta": ["school"],
             "y": ["school"],
             "y_hat": ["school"],
             "eta": ["school"],
             "log_lik": ["log_lik_dim"],
         },
     )
Exemple #14
0
repo = Repo("./", search_parent_directories=True)
# repo_rootdir holds the absolute path to the top-level of our repo                 
repo_rootdir = repo.working_tree_dir

sm_gaussF = cmdstanpy.CmdStanModel(
    stan_file=f"{repo_rootdir}/code/stan/test_gaussF.stan",
    compile=True,)

# stan needs to know how many data points to generate,
# so pick a representative promoter
stan_data = dict(
    a=22.35,
    b=-17.3,
    c=12.5,
    z=-1.59,
    )

stan_output = sm_gaussF.sample(
    data=stan_data,
    fixed_param=True,
    iter_sampling=1,
)

# Convert to ArviZ InferenceData object
stan_output = az.from_cmdstanpy(
    stan_output, 
    posterior_predictive=["output"]
)

# uncomment for running live in ipython,
stan_output.posterior_predictive.output 
    # Define the data dictionary
    data_dict = {
        'J': d['replicate'].max(),
        'N_yield': len(d),
        'N_calib': len(calib),
        'idx': d['replicate'].values.astype(int),
        'calib_conc': calib['carbon_conc_mM'].values.astype(float),
        'calib_rel_areas': calib['rel_area_phosphate'].values.astype(float),
        'optical_density': d['od_600nm'].values.astype(float),
        'yield_rel_areas': d['rel_area_phosphate'].values.astype(float)
    }

    # Sample the inferrential model
    samps = model.sample(data=data_dict)
    samps = az.from_cmdstanpy(samps)
    samps = samps.posterior.to_dataframe().reset_index()

    # Tidy low-level parameters
    _samps = samps[[
        'yield_inter_dim_0', 'yield_slope_dim_0', 'yield_inter', 'yield_slope'
    ]]
    _samps.drop_duplicates(inplace=True)
    pairs = [['yield_inter_dim_0', 'yield_inter'],
             ['yield_slope_dim_0', 'yield_slope']]
    dfs = []
    for p in pairs:
        _params = _samps[p]
        _df = pd.DataFrame([])
        _df['value'] = _params[p[1]]
        _df['parameter'] = p[1]
Exemple #16
0
# maximum likelihood estimation
optim = sm.optimize(data=mdl_data).optimized_params_pd
optim[optim.columns[~optim.columns.str.startswith("lp")]]

# variational inference
vb = sm.variational(data=mdl_data)
vb.variational_sample.columns = vb.variational_params_dict.keys()
vb_name = vb.variational_params_pd.columns[~vb.variational_params_pd.columns.
                                           str.startswith(("lp", "log_"))]
vb.variational_params_pd[vb_name]
vb.variational_sample[vb_name]

# Markov chain Monte Carlo
fit = sm.sample(data=mdl_data,
                show_progress=True,
                chains=4,
                iter_sampling=50000,
                iter_warmup=10000,
                thin=5)

fit.draws().shape  # iterations, chains, parameters
fit.summary().loc[vb_name]  # pandas DataFrame
print(fit.diagnose())

posterior = fit.stan_variables()

az_trace = az.from_cmdstanpy(fit)
az.summary(az_trace).loc[vb_name]  # pandas DataFrame
az.plot_trace(az_trace)
Exemple #17
0
# maximum likelihood estimation
optim_modif = sm_modif.optimize(data=mdl_data).optimized_params_pd
optim_modif[optim_modif.columns[~optim_modif.columns.str.startswith("lp")]]

# variational inference
vb_modif = sm_modif.variational(data=mdl_data)
vb_modif.variational_sample.columns = vb_modif.variational_params_dict.keys()
vb_name = vb_modif.variational_params_pd.columns[
    ~vb_modif.variational_params_pd.columns.str.startswith(("lp", "log_"))]
vb_modif.variational_params_pd[vb_name]
vb_modif.variational_sample[vb_name]

# Markov chain Monte Carlo
fit_modif = sm_modif.sample(data=mdl_data,
                            show_progress=True,
                            chains=4,
                            iter_sampling=50000,
                            iter_warmup=10000,
                            thin=5)

fit_modif.draws().shape  # iterations, chains, parameters
fit_modif.summary().loc[vb_name]  # pandas DataFrame
print(fit_modif.diagnose())

posterior = {k: fit_modif.stan_variable(k) for k in var_name}

az_trace = az.from_cmdstanpy(fit_modif)
az.summary(az_trace).loc[vb_name]  # pandas DataFrame
az.plot_trace(az_trace, var_names=var_name)
Exemple #18
0
# ---- data ---- #
eight_school_data = {
    "J": 8,
    "y": np.array([28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0]),
    "sigma": np.array([15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0]),
}

# ---- model ---- #
stan_file = model_dir / "schools.stan"
stan_model = CmdStanModel(stan_file=stan_file)
stan_model.compile()

# ---- fitting ---- #
stan_fit = stan_model.sample(data=eight_school_data)

# ---- results ---- #
cmdstanpy_data = az.from_cmdstanpy(
    posterior=stan_fit,
    posterior_predictive="y_hat",
    observed_data={"y": eight_school_data["y"]},
    log_likelihood="log_lik",
    coords={"school": np.arange(eight_school_data["J"])},
    dims={
        "theta": ["school"],
        "y": ["school"],
        "log_lik": ["school"],
        "y_hat": ["school"],
        "theta_tilde": ["school"],
    },

Exemple #19
0
sm_ols = CmdStanModel(stan_file=modelfile_ols)
fit_ols = sm_ols.sample(data=model_data_dict,
                        show_progress=True,
                        chains=4,
                        iter_sampling=50000,
                        iter_warmup=10000,
                        thin=5)

fit_ols.draws().shape  # iterations, chains, parameters
fit_ols.summary().loc[var_name]  # pandas DataFrame
print(fit_ols.diagnose())

posterior_ols = {k: fit_ols.stan_variable(k) for k in var_name}

az_trace_ols = az.from_cmdstanpy(fit_ols)
az.summary(az_trace_ols).loc[var_name]  # pandas DataFrame
az.plot_trace(az_trace_ols, var_names=var_name)

gd = sns.jointplot(
    x=posterior_ols["b0"],
    y=posterior_ols["b1"],
    marginal_kws={
        "kde": True,
        "kde_kws": {
            "cut": 1
        }
    },
)
gd.plot_joint(sns.kdeplot, zorder=2, n_levels=10, cmap="gray_r")
gd.fig.suptitle("Posterior joint distribution (OLS)", y=1.02)
repo_rootdir = repo.working_tree_dir

# first load data using module util
df_unreg, = load_FISH_by_promoter(("unreg", ))
# pull out one specific promoter for convenience for prior pred check & SBC
df_UV5 = df_unreg[df_unreg["experiment"] == "UV5"]

sm = cmdstanpy.CmdStanModel(
    stan_file=f"{repo_rootdir}/code/stan/constit_post_inf.stan",
    compile=True,
)

all_samples = {}
for gene in df_unreg['experiment'].unique():
    temp_df = df_unreg[df_unreg['experiment'] == gene]
    stan_data = dict(
        N=len(temp_df),
        mRNA_counts=temp_df["mRNA_cell"].values.astype(int),
        ppc=0  # if you produce ppc samples, the InferenceData obj is HUGE
    )
    with be_quiet_stan():
        posterior_samples = sm.sample(data=stan_data, chains=6)
    all_samples[gene] = az.from_cmdstanpy(
        posterior_samples, posterior_predictive=["mRNA_counts_ppc"])
    print(f"For promoter {gene}...")
    check_all_diagnostics(all_samples[gene])

# pickle the samples. ~20 separate netcdfs, only for use together? No thanks
outfile = open(f"{repo_rootdir}/data/mcmc_samples/constit_post_inf.pkl", 'wb')
pickle.dump(all_samples, outfile)
outfile.close()
Exemple #21
0
def single_fit_to_inference(
    fit: CmdStanMCMC,
    params: Sequence[str],
    coords: dict,
    dims: dict,
    alr_params: Sequence[str] = None,
    posterior_predictive: str = None,
    log_likelihood: str = None,
) -> az.InferenceData:
    """Convert fitted Stan model into inference object.

    :param fit: Fitted model
    :type params: CmdStanMCMC

    :param params: Posterior fitted parameters to include
    :type params: Sequence[str]

    :param coords: Mapping of entries in dims to labels
    :type coords: dict

    :param dims: Dimensions of parameters in the model
    :type dims: dict

    :param alr_params: Parameters to convert from ALR to CLR (this will
        be ignored if the model has been parallelized across features)
    :type alr_params: Sequence[str], optional

    :param posterior_predictive: Name of posterior predictive values from
        Stan model to include in ``arviz`` InferenceData object
    :type posterior_predictive: str, optional

    :param log_likelihood: Name of log likelihood values from Stan model
        to include in ``arviz`` InferenceData object
    :type log_likelihood: str, optional

    :returns: ``arviz`` InferenceData object with selected values
    :rtype: az.InferenceData
    """
    # remove alr params so initial dim fitting works
    new_dims = {k: v for k, v in dims.items() if k not in alr_params}

    if log_likelihood is not None and log_likelihood not in dims:
        raise KeyError("Must include dimensions for log-likelihood!")
    if posterior_predictive is not None and posterior_predictive not in dims:
        raise KeyError("Must include dimensions for posterior predictive!")

    inference = az.from_cmdstanpy(fit,
                                  coords=coords,
                                  log_likelihood=log_likelihood,
                                  posterior_predictive=posterior_predictive,
                                  dims=new_dims)

    vars_to_drop = set(inference.posterior.data_vars).difference(params)
    inference.posterior = _drop_data(inference.posterior, vars_to_drop)

    # Convert each param in ALR coordinates to CLR coordinates
    for param in alr_params:
        # Want to run on each chain independently
        all_chain_clr_coords = []
        all_chain_alr_coords = np.split(fit.stan_variable(param),
                                        fit.chains,
                                        axis=0)
        for i, chain_alr_coords in enumerate(all_chain_alr_coords):
            # arviz 0.11.2 seems to flatten for some reason even though
            # the PR was specifically supposed to do the opposite.
            # Not sure what's going on but just going to go through cmdstanpy.
            chain_clr_coords = convert_beta_coordinates(chain_alr_coords)
            all_chain_clr_coords.append(chain_clr_coords)
        all_chain_clr_coords = np.array(all_chain_clr_coords)

        tmp_dims = ["chain", "draw"] + dims[param]
        mcmc_coords = {
            "chain": np.arange(fit.chains),
            "draw": np.arange(fit.num_draws_sampling)
        }
        # restrict param DataArray to only required dims/coords
        tmp_coords = {k: coords[k] for k in dims[param]}
        param_da = xr.DataArray(all_chain_clr_coords,
                                dims=tmp_dims,
                                coords={
                                    **tmp_coords,
                                    **mcmc_coords
                                })
        inference.posterior[param] = param_da

        # TODO: Clean this up
        all_dims = list(inference.posterior.dims)
        dims_to_drop = []
        for dim in all_dims:
            if re.match(f"{param}_dim_\\d", dim):
                dims_to_drop.append(dim)
        inference.posterior = inference.posterior.drop_dims(dims_to_drop)
    return inference
                                iter_sampling=200,
                                chains=4)

# save to arviz
var_name = ["slack_comments", "github_commits"]
idata_stan = az.from_cmdstanpy(  # DOES NOT WORK, problems everywhere, idk why
    posterior=posterior,
    prior=prior,
    posterior_predictive=[i + "_hat" for i in var_name],
    prior_predictive=[i + "_hat" for i in var_name],
    observed_data=var_name,
    constant_data=["time_since_joined"],
    log_likelihood={i: "log_likelihood_" + i
                    for i in var_name},
    predictions=[i + "_pred" for i in var_name],
    predictions_constant_data=["time_since_joined_pred"],
    coords={
        "developer": names,
        "candidate developer": candidate_devs
    },
    dims={
        "slack_comments": ["developer"],
        "github_commits": ["developer"],
        "slack_comments_hat": ["developer"],
        "github_commits_hat": ["developer"],
        "time_since_joined": ["developer"],
        "slack_comments_pred": ["candidate developer"],
        "github_commits_pred": ["candidate developer"],
        "time_since_joined_pred": ["candidate developer"],
    })
Exemple #23
0
def _batch_func(counts: np.array,
                replicates: np.array,
                batches: np.array,
                depth: int,
                mc_samples: int = 1000,
                chains: int = 4,
                disp_scale: float = 1,
                sigma_scale: float = 1,
                reference_loc: float = -5,
                reference_scale: float = 5) -> dict:

    replicate_encoder = LabelEncoder()
    replicate_encoder.fit(replicates)
    replicate_ids = replicate_encoder.transform(replicates)
    # identify reference replicates - these will be the
    # first sample for each replicate group
    ref_ids, lookup = np.zeros(len(replicate_ids)), {}
    for i, c in enumerate(replicate_ids):
        if c not in lookup:
            lookup[c] = i
    for i, c in enumerate(replicate_ids):
        ref_ids[i] = lookup[c]
    batch_encoder = LabelEncoder()
    batch_encoder.fit(batches)
    batch_ids = batch_encoder.transform(batches)

    batch_ids = batch_ids.astype(np.int64) + 1
    ref_ids = ref_ids.astype(np.int64) + 1
    code = os.path.join(os.path.dirname(__file__),
                        'assets/batch_pln_single.stan')
    sm = CmdStanModel(stan_file=code)
    dat = {
        'N': counts.shape[0],
        'R': int(max(ref_ids) + 1),
        'B': int(max(batch_ids) + 1),
        'depth': list(np.log(depth)),
        'y': list(map(int, counts.astype(np.int64))),
        'ref_ids': list(map(int, ref_ids)),
        'batch_ids': list(map(int, batch_ids)),
        'sigma_scale': sigma_scale,
        'disp_scale': disp_scale,
        'reference_loc': reference_loc,
        'reference_scale': reference_scale
    }
    with tempfile.TemporaryDirectory() as temp_dir_name:
        data_path = os.path.join(temp_dir_name, 'data.json')
        with open(data_path, 'w') as f:
            json.dump(dat, f)
        # Obtain an initial guess with MLE
        # guess = sm.optimize(data=data_path, inits=0)
        # see https://mattocci27.github.io/assets/poilog.html
        # for recommended parameters for poisson log normal
        fit = sm.sample(
            data=data_path,
            iter_sampling=mc_samples,
            # inits=guess.optimized_params_dict,
            chains=chains,
            iter_warmup=1000,
            adapt_delta=0.9,
            max_treedepth=20)
        fit.diagnose()
        inf = az.from_cmdstanpy(
            fit,
            posterior_predictive='y_predict',
            log_likelihood='log_lhood',
        )
        return inf
Exemple #24
0
    def from_cmdstanpy(cls, fit):

        inference_data = av.from_cmdstanpy(fit)

        return cls(inference_data)
Exemple #25
0
import numpy as np
import matplotlib.pyplot as plt
from typing import Dict
import arviz as az

model_dir = Path("stan_code")
data_dir = Path("data")

# ---- data ---- #
discharge_data: Dict = {"T": len(Q), "Q": Q, "P": precip}

# ---- model ---- #
stan_file = model_dir / "abcmodel.stan"
stan_model = CmdStanModel(stan_file=stan_file)
stan_model.compile()

# ---- fit parameters ---- #
abcmodel_fit: CmdStanMCMC = stan_model.sample(
    data=discharge_data,
    chains=4,
    cores=1,
    seed=1111,
    show_progress=True,
)

# ---- get simulations ---- #
posterior = az.from_cmdstanpy(
    posterior=bern_fit,
    posterior_predictive="y",
)
Exemple #26
0
df_unreg, = load_FISH_by_promoter(("unreg", ))
# pull out one specific promoter for convenience for prior pred check & SBC
df_UV5 = df_unreg[df_unreg["experiment"] == "UV5"]

# ############################################################################
# PRIOR PREDICTIVE CHECK
# ############################################################################

sm_prior_pred = cmdstanpy.CmdStanModel(
    stan_file=f"{repo_rootdir}/code/stan/constit_prior_pred.stan",
    compile=True,
)

# stan needs to know how many data points to generate,
# so pick a representative promoter
data_prior_pred = dict(N=len(df_UV5))

prior_pred_samples = sm_prior_pred.sample(
    data=data_prior_pred,
    fixed_param=True,
    iter_sampling=1000,
)

# Convert to ArviZ InferenceData object
prior_pred_samples = az.from_cmdstanpy(
    posterior=prior_pred_samples,  # this line b/c of arviz bug, PR#979
    prior=prior_pred_samples,
    prior_predictive=['mRNA_counts'])

prior_pred_samples.to_netcdf(
    f"{repo_rootdir}/data/stan_samples/constit_prior_pred.nc")
percs = [(2.5, 97.5), (12.5, 87.5), (25, 75), (37.5, 62.5), (47.5, 52.5)]
perc_labels = [95, 75, 50, 25, 5]
# %%

# Iterate through each carbon source and strain and perform the inference
samples, summary, fits = [], [], []
for g, d in data.groupby(['strain', 'carbon_source', 'medium_base']):
    data_dict = {
        'J': d['replicate'].max(),
        'N': len(d),
        'idx': d['replicate'].values.astype(int),
        'time': d['elapsed_time_hr'].values.astype(float),
        'OD': d['od_600nm'].values.astype(float)
    }
    _samples = model.sample(data=data_dict)
    _samples = az.from_cmdstanpy(_samples)
    _samples = _samples.posterior.to_dataframe().reset_index()

    # Get the low level parameters
    params = _samples[['lam_dim_0', 'OD_init', 'lam']]
    params.rename(columns={'lam_dim_0': 'level'}, inplace=True)
    params['level'] = [f'replicate {i+1}' for i in params['level'].values]
    params = params.melt('level', var_name='parameter')

    # hyper_params
    hyper_params = _samples[[
        'OD_init_mu', 'lam_mu', 'OD_init_sigma', 'lam_sigma', 'sigma'
    ]]
    hyper_params['level'] = 'hyperparameter'
    hyper_params = hyper_params.melt('level', var_name='parameter')
    hyper_params.loc[hyper_params['parameter'] == 'lam_mu',
Exemple #28
0
bern_fit: CmdStanMCMC = bernoulli_model.sample(
    data=bernoulli_data,
    chains=4,
    cores=1,
    seed=1111,
    show_progress=True,
)

# ---- results ---- #
"""samples = multi-dimensional array
    all draws from all chains arranged as dimensions:
    (draws, chains, columns).
"""
posterior = az.from_cmdstanpy(
    posterior=bern_fit,
    posterior_predictive="y",
    # observed_data={"y": np.array(bernoulli_data["y"])},
)

# POSTERIOR PREDICTIVE CHECKS

# bernoulli_path =
bernoulli_ppc = CmdStanModel(stan_file=model_dir / "bernoulli_ppc.stan")

# fit the model to the data
bern_fit = bernoulli_ppc.sample(data=bernoulli_data)

# PRIOR PREDICTIVE CHECKS
# https://cmdstanpy.readthedocs.io/en/latest/sample.html
# generate data - fixed_param=True
datagen_stan = model_dir / "bernoulli.stan"