def load_infd_fit(fit, mi: MaudInput) -> az.InferenceData: """Get an arviz InferenceData object from out-of-sample tmp generated csvs.""" coords = { **mi.stan_coords.__dict__, **{ "reactions": mi.stan_coords.reactions, "kms": join_list_of_strings(mi.stan_coords.km_enzs, mi.stan_coords.km_mics), "kis": join_list_of_strings(mi.stan_coords.ci_enzs, mi.stan_coords.ci_mics), "diss_ts": join_list_of_strings(mi.stan_coords.ai_enzs, mi.stan_coords.ai_mics), "diss_rs": join_list_of_strings(mi.stan_coords.aa_enzs, mi.stan_coords.aa_mics), }, } return az.from_cmdstan( fit, coords=coords, dims={ "flux": ["experiments", "reactions"], "conc": ["experiments", "mics"], "conc_enzyme": ["experiments", "enzymes"], "conc_unbalanced": ["experiments", "unbalanced_mics"], "conc_phos": ["experiments", "phos_enzs"], "saturation": ["experiments", "edges"], "allostery": ["experiments", "edges"], "phosphorylation": ["experiments", "edges"], "reversibility": ["experiments", "edges"], }, save_warmup=True, )
def sample( stan_file: str, input_json: str, coords: dict, dims: dict, sample_kwargs: dict, cpp_options: Optional[dict], stanc_options: Optional[dict], ) -> InferenceData: """Run cmdstanpy.CmdStanModel.sample and return an InferenceData.""" model = CmdStanModel( stan_file=stan_file, cpp_options=cpp_options, stanc_options=stanc_options, ) with open(input_json, "r") as f: stan_input = json.load(f) coords["ix_train"] = [i - 1 for i in stan_input["ix_train"]] coords["ix_test"] = [i - 1 for i in stan_input["ix_test"]] mcmc = model.sample(data=input_json, **sample_kwargs) return az.from_cmdstan( posterior=mcmc.runset.csv_files, log_likelihood="llik", posterior_predictive="yrep", observed_data=input_json, coords=coords, dims=dims, )
def load_infd(csvs: List[str], mi: MaudInput) -> az.InferenceData: """Get an arviz InferenceData object from Maud csvs.""" Numba.disable_numba() coords = { **mi.stan_coords.__dict__, **{ "reactions": mi.stan_coords.reactions, "kms": join_list_of_strings(mi.stan_coords.km_enzs, mi.stan_coords.km_mics), "kis": join_list_of_strings(mi.stan_coords.ci_enzs, mi.stan_coords.ci_mics), "diss_ts": join_list_of_strings(mi.stan_coords.ai_enzs, mi.stan_coords.ai_mics), "diss_rs": join_list_of_strings(mi.stan_coords.aa_enzs, mi.stan_coords.aa_mics), "yconcs": join_list_of_strings(mi.stan_coords.yconc_exps, mi.stan_coords.yconc_mics), "yfluxs": join_list_of_strings(mi.stan_coords.yflux_exps, mi.stan_coords.yflux_rxns), "yenzs": join_list_of_strings(mi.stan_coords.yenz_exps, mi.stan_coords.yenz_enzs), }, } return az.from_cmdstan( csvs, coords=coords, dims={ "flux": ["experiments", "reactions"], "conc": ["experiments", "mics"], "conc_enzyme": ["experiments", "enzymes"], "conc_unbalanced": ["experiments", "unbalanced_mics"], "conc_phos": ["experiments", "phos_enzs"], "drain": ["experiments", "drains"], "diss_t": ["diss_ts"], "diss_r": ["diss_rs"], "transfer_constant": ["allosteric_enzymes"], "dgf": ["metabolites"], "dgrs": ["edges"], "keq": ["edges"], "kcat": ["enzymes"], "kcat_phos": ["phos_enzs"], "km": ["kms"], "ki": ["kis"], "yconc_sim": ["yconcs"], "yflux_sim": ["yfluxs"], "yenz_sim": ["yenzs"], "log_lik_conc": ["yconcs"], "log_lik_flux": ["yfluxs"], "log_lik_enz": ["yenzs"], "saturation": ["experiments", "edges"], "allostery": ["experiments", "edges"], "phosphorylation": ["experiments", "edges"], "reversibility": ["experiments", "edges"], }, save_warmup=True, )
def get_infd(csvs, stan_codes, tecrdb): return az.from_cmdstan(csvs, coords={ "compound": list(stan_codes["compound_id"].keys()), "measurement": list(tecrdb.index) }, dims={ "dgf": ["compound"], "dgr_prime": ["measurement"], "log_lik_dgr": ["measurement"], "kpr_rep": ["measurement"] }, log_likelihood='log_lik_dgr')
def get_experiment_table_from_sim(sim_dir: str) -> pd.DataFrame: """Get a table of simulated measurements. The output should be compatible with maud, so that it is possible to overwrite the experiments file. """ ui_path = os.path.join(sim_dir, "user_input") sim_csv_path = os.path.join(sim_dir, "samples") csv_file = os.path.join( sim_csv_path, next(filter(lambda f: f.endswith(".csv"), os.listdir(sim_csv_path))), ) with open(os.path.join(sim_csv_path, "input_data.json"), "r") as f: stan_input = json.load(f) mi = load_maud_input_from_toml(ui_path) infd = az.from_cmdstan(csv_file) code_to_exp = {v: k for k, v in mi.stan_codes.experiment_codes.items()} code_to_mic = {v: k for k, v in mi.stan_codes.mic_codes.items()} code_to_rxn = {v: k for k, v in mi.stan_codes.reaction_codes.items()} conc_sim = pd.DataFrame({ "measurement_type": "mic", "target_id": map(code_to_mic.get, stan_input["mic_ix_yconc"]), "experiment_id": map(code_to_exp.get, stan_input["experiment_yconc"]), "measurement": infd.posterior["yconc_sim"].to_series().values, "error_scale": stan_input["sigma_conc"], }) flux_sim = pd.DataFrame({ "measurement_type": "mic", "target_id": map(code_to_rxn.get, stan_input["reaction_yflux"]), "experiment_id": map(code_to_exp.get, stan_input["experiment_yflux"]), "measurement": infd.posterior["yflux_sim"].to_series().values, "error_scale": stan_input["sigma_flux"], }) enz_og = pd.read_csv(mi.config.experiments_file ).loc[lambda df: df["measurement_type"] == "enz"] return pd.concat([conc_sim, flux_sim, enz_og], ignore_index=True)
def load_infd(csvs: List[str], mi: MaudInput) -> az.InferenceData: """Get an arviz InferenceData object from Maud csvs.""" return az.from_cmdstan( csvs, coords={ "enzyme_name": list(mi.stan_codes.enzyme_codes.keys()), "mic_name": list(mi.stan_codes.mic_codes.keys()), "reaction": list(mi.stan_codes.reaction_codes.keys()), "metabolite": list(mi.stan_codes.metabolite_codes.keys()), "experiment": list(mi.stan_codes.experiment_codes.keys()), "km_id": [p.id[3:] for p in mi.priors.km_priors], }, dims={ "enzyme": ["experiment", "enzyme_name"], "conc": ["experiment", "mic_name"], "flux": ["experiment", "reaction"], "formation_energy": ["metabolite"], "kcat": ["enzyme_name"], "km": ["km_id"], }, )
ca_cases = pd.read_csv('../data/CA_covid_data/statewide_cases.csv') daily_cases = ca_cases.groupby( 'date').newcountconfirmed.sum().to_frame().reset_index() daily_cases.date = pd.to_datetime(daily_cases.date) init_prop = [36e6, 4000, 12000, 20000] init = np.zeros(30) init[0] = init_prop[0] init[1:5] = np.repeat(init_prop[1] / 4, 4) init[5:17] = np.repeat(0.2 * init_prop[2] / 12, 12) init[17:29] = np.repeat(0.8 * init_prop[2] / 12, 12) init[29] = init_prop[3] t_eval = np.arange(0, 180) inference_data = az.from_cmdstan('../results/outputs/*.csv') chains = [i for i in range(18)] samples = [i for i in range(20000)] incidence = [] for i in range(5000): chain = np.random.choice(chains) sample = np.random.choice(samples) beta_start = inference_data.posterior.data_vars['beta_start'][chain, sample].data beta_end = inference_data.posterior.data_vars['beta_end'][chain, sample].data k = inference_data.posterior.data_vars['k'][chain, sample].data seir = TimeVaryingSLAPIR(t_eval=t_eval, beta_start=beta_start, beta_end=beta_end, k=k,
def get_inference_data(self, output, **kwargs): return from_cmdstan(output=output, **kwargs)
import sys import pandas as pd import numpy as np import scipy as sp import scipy.stats as ss import arviz as az from subprocess import Popen, PIPE import glob # glob string posterior_glob = glob.glob(sys.argv[1] + '/trace-[0-9]*') cmdstan_data = az.from_cmdstan(posterior=posterior_glob) func_dict = { "q2.5": lambda x: np.percentile(x, 2.5), "q25": lambda x: np.percentile(x, 25), "median": lambda x: np.percentile(x, 50), "q75": lambda x: np.percentile(x, 75), "q97.5": lambda x: np.percentile(x, 97.5) } # include mean and hpd stats = az.summary( cmdstan_data, credible_interval=0.95 ).loc[:, ['mean', 'hpd_2.5%', 'hpd_97.5%', 'ess_bulk', 'ess_tail', 'r_hat' ]].reset_index().rename(columns={ 'index': 'var',
def get_inference_data(self, posterior, **kwargs): return from_cmdstan(posterior=posterior, **kwargs)
def predict( mi: MaudInput, output_dir: str, idata_train: az.InferenceData, ) -> az.InferenceData: """Call CmdStanModel.sample for out of sample predictions. :param mi: a MaudInput object :param output_dir: directory where output will be saved :param idata_train: InferenceData object with posterior draws """ model = cmdstanpy.CmdStanModel( stan_file=os.path.join(HERE, STAN_PROGRAM_RELATIVE_PATH_PREDICT), cpp_options=mi.config.cpp_options, stanc_options=mi.config.stanc_options, ) set_up_output_dir(output_dir, mi) kinetic_parameters = [ "keq", "km", "kcat", "dissociation_constant", "transfer_constant", "kcat_phos", "ki", ] posterior = idata_train.get("posterior") sample_stats = idata_train.get("sample_stats") assert posterior is not None assert sample_stats is not None chains = sample_stats["chain"] draws = sample_stats["draw"] dims = { "conc": ["experiment", "mic"], "conc_enzyme": ["experiment", "enzyme"], "flux": ["experiment", "reaction"], } for chain in chains: for draw in draws: inits = { par: ( posterior[par] .sel(chain=chain, draw=draw) .to_series() .values ) for par in kinetic_parameters if par in posterior.keys() } sample_args: dict = { "data": os.path.join(output_dir, "input_data_test.json"), "inits": inits, "output_dir": output_dir, "iter_warmup": 0, "iter_sampling": 1, "fixed_param": True, "show_progress": False, } if mi.config.cmdstanpy_config_predict is not None: sample_args = { **sample_args, **mi.config.cmdstanpy_config_predict, } mcmc_draw = model.sample(**sample_args) idata_draw = az.from_cmdstan( mcmc_draw.runset.csv_files, coords={ "experiment": [ e.id for e in mi.measurements.experiments if e.is_test ], "mic": [m.id for m in mi.kinetic_model.mics], "enzyme": [e.id for e in mi.kinetic_model.enzymes], "reaction": [r.id for r in mi.kinetic_model.reactions], }, dims=dims, ).assign_coords( coords={"chain": [chain], "draw": [draw]}, groups="posterior_groups", ) if draw == 0: idata_chain = idata_draw.copy() else: idata_chain = az.concat( [idata_chain, idata_draw], dim="draw", reset_dim=False ) if chain == 0: out = idata_chain.copy() else: out = az.concat([out, idata_chain], dim="chain", reset_dim=False) return out
def get_idata(csvs: List[str], mi: MaudInput, mode: str) -> az.InferenceData: """Get an arviz InferenceData object from Maud csvs.""" Numba.disable_numba() experiments = ([e.id for e in mi.measurements.experiments if e.is_train] if mode == "train" else [e.id for e in mi.measurements.experiments if e.is_test]) coords = { "enzymes": [e.id for e in mi.kinetic_model.enzymes], "experiments": experiments, "reactions": [r.id for r in mi.kinetic_model.reactions], "drains": [r.id for r in mi.kinetic_model.drains], "metabolites": [m.id for m in mi.kinetic_model.metabolites], "mics": [m.id for m in mi.kinetic_model.mics], "edges": [e.id for e in mi.kinetic_model.edges], "unbalanced_mics": [m.id for m in mi.kinetic_model.mics if not m.balanced], "phosphorylations": [p.id for p in mi.kinetic_model.phosphorylations] if mi.kinetic_model.phosphorylations is not None else [], "phosphorylation_modifying_enzymes": [pme.id for pme in mi.kinetic_model.phosphorylation_modifying_enzymes] if mi.kinetic_model.phosphorylation_modifying_enzymes is not None else [], "allosteries": [p.id for p in mi.kinetic_model.allosteries] if mi.kinetic_model.allosteries is not None else [], "allosteric_enzymes": [e.id for e in mi.kinetic_model.allosteric_enzymes] if mi.kinetic_model.allosteric_enzymes is not None else [], "competitive_inhibitions": [p.id for p in mi.kinetic_model.competitive_inhibitions] if mi.kinetic_model.competitive_inhibitions is not None else [], "kms": mi.stan_variable_set.km.ids[0], "kis": mi.stan_variable_set.ki.ids[0], "dissociation_constants": (mi.stan_variable_set.dissociation_constant.ids[0]), "yconcs": join_str_cols( mi.measurements.yconc[[ "experiment_id", "target_id" ]].loc[lambda df: df["experiment_id"].isin(experiments)], sep=ID_SEPARATOR, ).to_list(), "yfluxs": join_str_cols( mi.measurements.yflux[[ "experiment_id", "target_id" ]].loc[lambda df: df["experiment_id"].isin(experiments)], sep=ID_SEPARATOR, ).to_list(), "yenz": join_str_cols( mi.measurements.yenz[[ "experiment_id", "target_id" ]].loc[lambda df: df["experiment_id"].isin(experiments)], sep=ID_SEPARATOR, ).to_list(), } return az.from_cmdstan( csvs, coords=coords, dims={ "flux": ["experiments", "reactions"], "conc": ["experiments", "mics"], "conc_enzyme": ["experiments", "enzymes"], "conc_unbalanced": ["experiments", "unbalanced_mics"], "conc_pme": ["experiments", "phosphorylation_modifying_enzymes"], "drain": ["experiments", "drains"], "dissociation_constant": ["allosteries"], "transfer_constant": ["allosteric_enzymes"], "dgf": ["metabolites"], "dgrs": ["experiments", "edges"], "keq": ["experiments", "edges"], "kcat": ["enzymes"], "kcat_pme": ["phosphorylation_modifying_enzymes"], "km": ["kms"], "ki": ["kis"], "psi": ["experiments"], "yconc_sim": ["yconcs"], "yflux_sim": ["yfluxs"], "yenz_sim": ["yenzs"], "log_lik_conc": ["yconcs"], "log_lik_flux": ["yfluxs"], "log_lik_enz": ["yenzs"], "saturation": ["experiments", "edges"], "allostery": ["experiments", "edges"], "phosphorylation": ["experiments", "edges"], "reversibility": ["experiments", "edges"], }, save_warmup=True, )