def sample(data_path, output_dir): """Generate MCMC samples given a user input directory. This function creates a new directory in output_dir with a name starting with "maud_output". It first copies the directory at data_path into the new this directory at new_dir/user_input, then runs the sampling.sample function to write samples in new_dir/samples. Finally it prints the results of cmdstanpy's diagnose and summary methods. """ mi = load_maud_input(data_path, mode="sample") now = datetime.now().strftime("%Y%m%d%H%M%S") output_name = f"maud_output-{mi.config.name}-{now}" output_path = os.path.join(output_dir, output_name) samples_path = os.path.join(output_path, "samples") ui_dir = os.path.join(output_path, "user_input") print("Creating output directory: " + output_path) os.mkdir(output_path) os.mkdir(samples_path) print(f"Copying user input from {data_path} to {ui_dir}") shutil.copytree(data_path, ui_dir) stanfit = sampling.sample(mi, samples_path) print(stanfit.diagnose()) print(stanfit.summary()) infd = load_infd(stanfit.runset.csv_files, mi) infd.to_netcdf(os.path.join(output_path, "infd.nc")) return output_path
def generate_inits(data_path, chain, draw, warmup): """Generate template for init definitions. :params data_path: a path to a maud output folder with both samples and user_input folders :params chain: the sampling chain of the stan sampler you want to export :params draw: the sampling draw of the sampling chain you want to export from the start of the sampling or warmup phase :params warmup: indicator variable of if it is for the warmup or sampling phase """ csvs = [ os.path.join(data_path, "samples", f) for f in os.listdir(os.path.join(data_path, "samples")) if f.endswith(".csv") ] mi = load_maud_input(os.path.join(data_path, "user_input"), mode="sample") infd = load_infd(csvs, mi) output_name = "generated_inits.csv" output_path = os.path.join(data_path, output_name) print("Creating init") init_dataframe = get_inits_from_draw(infd, mi, chain, draw, warmup) print(f"Saving inits to: {output_path}") init_dataframe.to_csv(output_path) return "Successfully generated prior template"
def simulate(data_path, output_dir, n): """Generate draws from the prior mean.""" mi = load_maud_input(data_path=data_path, mode="sample") now = datetime.now().strftime("%Y%m%d%H%M%S") output_name = f"maud_output_sim-{mi.config.name}-{now}" output_path = os.path.join(output_dir, output_name) samples_path = os.path.join(output_path, "samples") ui_dir = os.path.join(output_path, "user_input") print("Creating output directory: " + output_path) os.mkdir(output_path) os.mkdir(samples_path) print(f"Copying user input from {data_path} to {ui_dir}") shutil.copytree(data_path, ui_dir) stanfit = sampling.simulate(mi, samples_path, n) infd = load_infd(stanfit.runset.csv_files, mi) infd.to_netcdf(os.path.join(output_path, "infd.nc")) print("\nSimulated concentrations and fluxes:") print(infd.posterior["conc"].mean(dim=["chain", "draw"]).to_series()) print(infd.posterior["flux"].mean(dim=["chain", "draw"]).to_series()) print( infd.posterior["conc_enzyme"].mean(dim=["chain", "draw"]).to_series()) print("\nSimulated measurements:") print(infd.posterior["yconc_sim"].mean(dim=["chain", "draw"]).to_series()) print(infd.posterior["yflux_sim"].mean(dim=["chain", "draw"]).to_series()) print("\nSimulated log likelihoods:") print( infd.posterior["log_lik_conc"].mean(dim=["chain", "draw"]).to_series()) print( infd.posterior["log_lik_flux"].mean(dim=["chain", "draw"]).to_series()) return output_path
def test_linear(input_dirname): """Test that the linear model works.""" input_dir_path = os.path.join(HERE, input_dirname) mi_in = load_maud_input_from_toml(input_dir_path) true_params_path = os.path.join(input_dir_path, TRUE_PARAMS_FILENAME) with open(true_params_path, "r") as f: true_params = json.load(f) study = run_simulation_study(mi_in, true_params) infd = load_infd(study.samples.runset.csv_files, study.mi) for param_name, param_vals in true_params.items(): if any(param_vals): dimnames = [ d for d in infd.posterior[param_name].dims if d not in ["chain", "draw"] ] q = ( infd.posterior[param_name] .to_series() .unstack(dimnames) .quantile([0.025, 0.975]) .T.assign(true=np.array(param_vals).ravel()) ) q.columns = ["low", "high", "true"] for i, row in q.iterrows(): msg = ( f"True value for {param_name} outside 95% CI at coord {str(i)}!\n" f"\tTrue value: {str(row['true'])}\n" f"\t2.5% posterior quantile: {str(row['low'])}\n" f"\t97.5% posterior quantile: {str(row['high'])}\n" ) assert row["true"] >= row["low"] and row["true"] <= row["high"], msg
def return_dict_of_infd(csvs, mi): """Return dict of chain with associated infd object. :params csvs: a list of csv file paths :params mi: a MaudInput object """ return { re.split("\.", re.split('-', chain)[-1])[0]: load_infd(chain, mi) for chain in csvs }
def main(): """Run the script.""" parser = argparse.ArgumentParser(description=HELP_MSG) parser.add_argument("maud_output_dir", type=str, nargs=1, help="A path to Maud output directory") parser.add_argument("--chain", default=0, help="Chain number to export parameter values for") parser.add_argument("--draw", default=0, help="Draw number to export parameter values for") parser.add_argument("--warmup", default=0, help="If draw is in warmup phase or not") parser.add_argument("--yaml_output", default="output.yaml", help="Output of constructed yaml") parser.add_argument("--selected_experiment", default=None, help="Experiment parameters exported") args = parser.parse_args() maud_output_dir = args.maud_output_dir[0] chain = int(args.chain) draw = int(args.draw) warmup = bool(args.warmup) yaml_output = os.path.join(HERE, args.yaml_output) csvs = get_csvs(maud_output_dir) mi = load_maud_input(os.path.join(maud_output_dir, "user_input"), mode="sample") infd = load_infd(csvs, mi) selected_experiment = None if selected_experiment is None: selected_experiment = list(mi.stan_coords.experiments)[0] # Defining Stoichiometry S = get_stoichiometry(mi) # Selecting a set of parameters from a previous run par_values = get_inits_from_draw(infd, mi, chain, draw, warmup) par_input = [] # Selecting measurements conc_measurements = mi.measurements.yconc balanced_conc_values = conc_measurements.loc[selected_experiment] balanced_mic_values = { mic.id: balanced_conc_values.loc[mic.id]["measurement"] if mic.id in balanced_conc_values.index else 0.001 for mic in mi.kinetic_model.mics if mic.balanced } # Experiment specific parameters exp_values = par_values[par_values["experiment_id"] == selected_experiment] conc_values = exp_values[exp_values["parameter_name"] == "conc_unbalanced"] enz_values = exp_values[exp_values["parameter_name"] == "conc_enzyme"] drain_values = exp_values[exp_values["parameter_name"] == "drain"] for mic in mi.kinetic_model.mics: if mic.balanced is False: par_input.append([ f"m{mic.id}", list(conc_values[conc_values["mic_id"] == mic.id]["value"])[0], ]) for rxn in mi.kinetic_model.reactions: for enz in rxn.enzymes: par_input.append([ f"e{enz.id}", list( enz_values[enz_values["enzyme_id"] == enz.id]["value"])[0], ]) for rxn in mi.kinetic_model.reactions: if rxn.reaction_mechanism == "drain": par_input.append([ f"r{rxn.id}", list(drain_values[drain_values["drain_id"] == rxn.id]["value"]) [0], ]) # Metabolite gibbs energies dgfs = par_values[par_values["parameter_name"] == "dgf"] flux_dict = {} for rxn in mi.kinetic_model.reactions: # calculating the gibbs energy of reaction # accounting for water if rxn.reaction_mechanism == "reversible_modular_rate_law": tmp_dg = 0 for mic_id, stoic in rxn.stoichiometry.items(): met_id = next( filter(lambda mic: mic.id == mic_id, mi.kinetic_model.mics)).metabolite_id met_dgf = list( dgfs[dgfs["metabolite_id"] == met_id]["value"])[0] tmp_dg += stoic * met_dgf if rxn.water_stoichiometry: tmp_dg += rxn.water_stoichiometry * -157.6 tmp_Keq = np.exp(tmp_dg / (-0.008314 * 298.15)) for enz in rxn.enzymes: tmp_enz_pars = par_values[par_values["enzyme_id"] == enz.id] tmp_kms = tmp_enz_pars.loc[tmp_enz_pars["parameter_name"] == "km"] par_input += [[ f"km_{row['enzyme_id']}_{row['mic_id']}", row["value"] ] for _, row in tmp_kms.iterrows()] tmp_kcats = tmp_enz_pars.loc[tmp_enz_pars["parameter_name"] == "kcat"] par_input += [[f"kcat_{row['enzyme_id']}", row["value"]] for _, row in tmp_kcats.iterrows()] tmp_kis = tmp_enz_pars.loc[tmp_enz_pars["parameter_name"] == "ki"] par_input += [[ f"ki_{row['enzyme_id']}_{row['mic_id']}", row["value"] ] for _, row in tmp_kis.iterrows()] tmp_aas = tmp_enz_pars.loc[tmp_enz_pars["parameter_name"] == "diss_r"] par_input += [[ f"aa_{row['enzyme_id']}_{row['mic_id']}", row["value"] ] for _, row in tmp_aas.iterrows()] tmp_ais = tmp_enz_pars.loc[tmp_enz_pars["parameter_name"] == "diss_t"] par_input += [[ f"ai_{row['enzyme_id']}_{row['mic_id']}", row["value"] ] for _, row in tmp_ais.iterrows()] tmp_transfer_constants = tmp_enz_pars.loc[ tmp_enz_pars["parameter_name"] == "transfer_constant"] par_input += [[ f"transfer_constant_{row['enzyme_id']}", row["value"] ] for _, row in tmp_transfer_constants.iterrows()] substrate_list = [ f"m{mic}" for mic, stoic in rxn.stoichiometry.items() if stoic < 0 ] product_list = [ f"m{mic}" for mic, stoic in rxn.stoichiometry.items() if stoic > 0 ] mic_list = [f"m{mic}" for mic, _ in rxn.stoichiometry.items()] substrate_entry = list( zip( substrate_list, [f"km_{enz.id}_{mic[1:]}" for mic in substrate_list], [ np.abs(rxn.stoichiometry[mic[1:]]) for mic in substrate_list ], )) product_entry = list( zip( product_list, [f"km_{enz.id}_{mic[1:]}" for mic in product_list], [ np.abs(rxn.stoichiometry[mic[1:]]) for mic in product_list ], )) haldane_entry = list( zip( [f"km_{enz.id}_{mic[1:]}" for mic in mic_list], [rxn.stoichiometry[mic[1:]] for mic in mic_list], )) competitive_entry = [] allosteric_inhibitors = [] allosteric_activators = [] for mod in enz.modifiers["competitive_inhibitor"]: competitive_entry.append( [f"m{mod.mic_id}", f"ki_{enz.id}_{mod.mic_id}"]) for mod in enz.modifiers["allosteric_activator"]: allosteric_activators.append( [f"m{mod.mic_id}", f"aa_{enz.id}_{mod.mic_id}"]) for mod in enz.modifiers["allosteric_inhibitor"]: allosteric_inhibitors.append( [f"m{mod.mic_id}", f"ai_{enz.id}_{mod.mic_id}"]) if rxn.reaction_mechanism == "reversible_modular_rate_law": Trf = Template_T_met.render(met_array=substrate_entry) Trr = Template_T_met.render(met_array=product_entry) Hal = Template_Haldane.render(Km_array=haldane_entry, Keq=tmp_Keq) Tr = Template_Tr.render(enz=f"e{enz.id}", kcat=f"kcat_{enz.id}", Trf=Trf, Trr=Trr, Hal=Hal) Dr = Template_Dr.render(sub_array=substrate_entry, prod_array=product_entry) elif rxn.reaction_mechanism == "irreversible_modular_rate_law": Trf = Template_T_met.render(met_array=substrate_entry) Tr = Template_Tr_irr.render(enz=f"e{enz.id}", kcat=f"kcat_{enz.id}", Trf=Trf) Dr = Template_Dr_irr.render(sub_array=substrate_entry) Drreg = Template_Drreg.render(met_array=competitive_entry) if competitive_entry == []: Drreg = "0" Allo_Act = Template_Allo_Act_Inh.render( met_array=allosteric_activators) Allo_Inh = Template_Allo_Act_Inh.render( met_array=allosteric_inhibitors) if allosteric_activators == []: Allo_Act = "1" if allosteric_inhibitors == []: Allo_Inh = "1" if any([allosteric_inhibitors, allosteric_activators]): Allo = Template_Allo.render( L0=f"transfer_constant_{enz.id}", Dr=Dr, Drreg=Drreg, Allo_Inh=Allo_Inh, Allo_Act=Allo_Act, Subunits=enz.subunits, ) else: Allo = "1" flux = Template_flux.render(Tr=Tr, Dr=Dr, Drreg=Drreg, Allo=Allo) flux_dict[enz.id] = flux if rxn.reaction_mechanism == "drain": substrate_list = [ f"m{mic}" for mic, stoic in rxn.stoichiometry.items() if stoic < 0 ] if substrate_list == []: substrate_list = [1] flux = Template_drain.render(drain=f"r{rxn.id}", sub_array=substrate_list) flux_dict[rxn.id] = flux system_odes = {} for mic in mi.kinetic_model.mics: if mic.balanced is True: tmp_met_ode = "" first = 0 for edge in mi.stan_coords.edges: if S.loc[mic.id, edge] != 0: if first == 0: first += 1 tmp_met_ode += f"({S.loc[mic.id, edge]}*{flux_dict[edge]})" else: tmp_met_ode += f"+({S.loc[mic.id, edge]}*{flux_dict[edge]})" system_odes[mic.id] = tmp_met_ode ode_input = [[ f"m{mic.id}", system_odes[mic.id], balanced_mic_values[mic.id] ] for mic in mi.kinetic_model.mics if mic.balanced is True] yaml_input = Template_yaml.render(parameters=par_input, odes=ode_input) with open(yaml_output, "w") as file: file.writelines(yaml_input)
def plot_posteriors(maud_output_dir, output_dir): """Plot posterior distributions of Maud model.""" # Collecting information from draws and maud input csvs = list(Path(maud_output_dir / "samples").rglob("*.csv")) mi = io.load_maud_input(data_path=maud_output_dir / "user_input", mode="sample") parameter_coords = get_parameter_coords(mi.stan_coords) infd = load_infd(csvs, mi) list_of_model_variables = list(infd.posterior.variables.keys()) var_to_dims = { var: list(infd.posterior[var].dims[2:]) for var in VARIABLES_TO_ANALYSE if var in list_of_model_variables } var_to_draws = { var: infd.posterior[var].to_dataframe().reset_index() for var in VARIABLES_TO_ANALYSE if var in list_of_model_variables } enzyme_dims = { par: get_dims_enz(par, parameter_coords, var_to_dims) for par in ENZYME_GROUP if par in list_of_model_variables } priors = mi.priors confidence_intervals = dict() measurements = dict() # Retriving priors with confidence intervals (CIs) for par in parameter_coords: if par.id in list_of_model_variables: if f"priors_{par.id}" in dir(priors): par_dataframe = pd.DataFrame.from_dict(par.coords) if par.linking_list is None: coords_rename = { scs: infd_coord for scs, infd_coord in zip(list(par.coords.keys()), par.infd_coord_list) } par_dataframe = par_dataframe.rename( columns=(coords_rename)) else: par_dataframe[par.infd_coord_list[0]] = list( par.linking_list.values())[0] par_dataframe["parameter_name"] = par.id p = getattr(priors, f"priors_{par.id}") if isinstance(p, IndPrior1d): lower_ci, upper_ci = get_ci_1d(p) par_dataframe["lower_ci"] = lower_ci par_dataframe["upper_ci"] = upper_ci confidence_intervals[par.id] = par_dataframe elif isinstance(p, IndPrior2d): location_df = (p.location.unstack().reset_index().rename( columns=({ 0: "location" }))) scale_df = (p.scale.unstack().reset_index().rename( columns=({ 0: "scale" }))) par_dataframe = par_dataframe.merge( location_df, left_on=par.infd_coord_list, right_on=list(par.coords.keys()), ) par_dataframe = par_dataframe.drop(list(par.coords.keys()), axis=1) par_dataframe = par_dataframe.merge( scale_df, left_on=par.infd_coord_list, right_on=list(par.coords.keys()), ) par_dataframe = par_dataframe.drop(list(par.coords.keys()), axis=1) if par.id in LOG_SCALE_VARIABLES: par_dataframe["lower_ci"] = par_dataframe.apply( lambda x: np.exp( np.log(x["location"]) - 2 * x["scale"]), axis=1, ) par_dataframe["upper_ci"] = par_dataframe.apply( lambda x: np.exp( np.log(x["location"]) + 2 * x["scale"]), axis=1, ) else: par_dataframe["lower_ci"] = par_dataframe.apply( lambda x: x["location"] - 2 * x["scale"], axis=1) par_dataframe["upper_ci"] = par_dataframe.apply( lambda x: x["location"] + 2 * x["scale"], axis=1) confidence_intervals[par.id] = par_dataframe # Retriving mean of measurement for measurement_id, measurement_type in zip( ["yconc", "yflux", "yenz"], ["conc", "flux", "conc_enzyme"]): rename_columns = { "conc": "mics", "flux": "reactions", "conc_enzyme": "enzymes" } tmp_measurements = getattr(mi.measurements, measurement_id).reset_index() tmp_measurements = tmp_measurements.rename( columns=({ "experiment_id": "experiments", "target_id": measurement_type })) tmp_measurements = tmp_measurements.rename(columns=(rename_columns)) measurements[measurement_type] = tmp_measurements # Plotting violin plots from parameter distributions for var in list(var_to_dims.keys()): dims = var_to_dims[var] draws = var_to_draws[var] plot = plot_violin_plots( var, dims, draws, LOG_SCALE_VARIABLES, UNITS, confidence_intervals, measurements, ) plot.save( filename=output_dir / f"{var}_posterior.png", verbose=False, dpi=300, ) # plotting pairplots of enzyme parameters for enz in mi.stan_coords.enzymes: enz_par_df = pd.DataFrame() for par, par_df in enzyme_dims.items(): par_draws = var_to_draws[par] enz_dims = par_df[par_df["enzyme_id"] == enz]["par_id"].to_list() if len(enz_dims) > 0: for par_ind in enz_dims: tmp_enz_par_df = pd.DataFrame() tmp_enz_par_df = par_draws.loc[par_draws[ var_to_dims[par][0]] == par_ind].copy() enz_par_df[par + "-" + par_ind] = np.log( tmp_enz_par_df[par].to_list()) sns.pairplot(enz_par_df) plt.savefig(output_dir / f"{enz}_pairplot.png")