def test_linear(input_dirname): """Test that the linear model works.""" input_dir_path = os.path.join(HERE, input_dirname) mi_in = load_maud_input_from_toml(input_dir_path) true_params_path = os.path.join(input_dir_path, TRUE_PARAMS_FILENAME) with open(true_params_path, "r") as f: true_params = json.load(f) study = run_simulation_study(mi_in, true_params) infd = load_infd(study.samples.runset.csv_files, study.mi) for param_name, param_vals in true_params.items(): if any(param_vals): dimnames = [ d for d in infd.posterior[param_name].dims if d not in ["chain", "draw"] ] q = ( infd.posterior[param_name] .to_series() .unstack(dimnames) .quantile([0.025, 0.975]) .T.assign(true=np.array(param_vals).ravel()) ) q.columns = ["low", "high", "true"] for i, row in q.iterrows(): msg = ( f"True value for {param_name} outside 95% CI at coord {str(i)}!\n" f"\tTrue value: {str(row['true'])}\n" f"\t2.5% posterior quantile: {str(row['low'])}\n" f"\t97.5% posterior quantile: {str(row['high'])}\n" ) assert row["true"] >= row["low"] and row["true"] <= row["high"], msg
def main(path_to_output_dir: Path): """Run maudit: this is the main entrypoint. :param path_to_output_dir: Path to a directory that was created by maud to store output files. """ ui_path = os.path.join(path_to_output_dir, "user_input") samples_dir = os.path.join(path_to_output_dir, "samples") csvs = [ os.path.join(samples_dir, f) for f in os.listdir(samples_dir) if f.endswith("csv") ] typer.echo(f"Reading data from {path_to_output_dir}") typer.echo(f"Found csv files: {csvs}") mi = load_maud_input_from_toml(ui_path) infd_dict = return_dict_of_infd(csvs, mi) lp_pd = return_pd_var(infd_dict, "lp") step_size_pd = return_pd_var(infd_dict, "step_size") lp_pd_w = return_pd_var(infd_dict, "lp", True) step_size_pd_w = return_pd_var(infd_dict, "step_size", True) lp_plot = plot_var_time_series(lp_pd, "lp") lp_plot.save(filename='lp_time_series.png') step_size_plot = plot_var_time_series(step_size_pd, "step_size") step_size_plot.save(filename='step_size_time_series.png') lp_plot_w = plot_var_time_series(lp_pd_w, "lp") lp_plot_w.save(filename='lp_time_series_warmup.png') step_size_plot_w = plot_var_time_series(step_size_pd_w, "step_size") step_size_plot_w.save(filename='step_size_time_series_warmup.png')
def sample(data_path, output_dir): """Generate MCMC samples given a user input directory. This function creates a new directory in output_dir with a name starting with "maud_output". It first copies the directory at data_path into the new this directory at new_dir/user_input, then runs the sampling.sample function to write samples in new_dir/samples. Finally it prints the results of cmdstanpy's diagnose and summary methods. """ mi = load_maud_input_from_toml(data_path) now = datetime.now().strftime("%Y%m%d%H%M%S") output_name = f"maud_output-{mi.config.name}-{now}" output_path = os.path.join(output_dir, output_name) samples_path = os.path.join(output_path, "samples") ui_dir = os.path.join(output_path, "user_input") print("Creating output directory: " + output_path) os.mkdir(output_path) os.mkdir(samples_path) print(f"Copying user input from {data_path} to {ui_dir}") shutil.copytree(data_path, ui_dir) stanfit = sampling.sample(mi, samples_path) print(stanfit.diagnose()) print(stanfit.summary()) return output_path
def test_get_input_data(): """Test that the function get_input_data behaves as expected.""" input_path = os.path.join(data_path, "linear") mi = io.load_maud_input_from_toml(input_path) expected = json.load(open(os.path.join(input_path, "linear.json"), "r")) actual = sampling.get_input_data(mi) assert actual.keys() == expected.keys() for k in actual.keys(): assert_equal(actual[k], expected[k], err_msg=f"{k} is different from expected.")
def get_experiment_table_from_sim(sim_dir: str) -> pd.DataFrame: """Get a table of simulated measurements. The output should be compatible with maud, so that it is possible to overwrite the experiments file. """ ui_path = os.path.join(sim_dir, "user_input") sim_csv_path = os.path.join(sim_dir, "samples") csv_file = os.path.join( sim_csv_path, next(filter(lambda f: f.endswith(".csv"), os.listdir(sim_csv_path))), ) with open(os.path.join(sim_csv_path, "input_data.json"), "r") as f: stan_input = json.load(f) mi = load_maud_input_from_toml(ui_path) infd = az.from_cmdstan(csv_file) code_to_exp = {v: k for k, v in mi.stan_codes.experiment_codes.items()} code_to_mic = {v: k for k, v in mi.stan_codes.mic_codes.items()} code_to_rxn = {v: k for k, v in mi.stan_codes.reaction_codes.items()} conc_sim = pd.DataFrame({ "measurement_type": "mic", "target_id": map(code_to_mic.get, stan_input["mic_ix_yconc"]), "experiment_id": map(code_to_exp.get, stan_input["experiment_yconc"]), "measurement": infd.posterior["yconc_sim"].to_series().values, "error_scale": stan_input["sigma_conc"], }) flux_sim = pd.DataFrame({ "measurement_type": "mic", "target_id": map(code_to_rxn.get, stan_input["reaction_yflux"]), "experiment_id": map(code_to_exp.get, stan_input["experiment_yflux"]), "measurement": infd.posterior["yflux_sim"].to_series().values, "error_scale": stan_input["sigma_flux"], }) enz_og = pd.read_csv(mi.config.experiments_file ).loc[lambda df: df["measurement_type"] == "enz"] return pd.concat([conc_sim, flux_sim, enz_og], ignore_index=True)
def test_load_maud_input_from_toml(): """Test that the function load_maud_input_from_toml behaves as expected.""" expected_stan_codes = { "metabolite_codes": { "M1": 1, "M2": 2 }, "mic_codes": { "M1_e": 1, "M2_e": 2, "M1_c": 3, "M2_c": 4 }, "balanced_mic_codes": { "M1_c": 3, "M2_c": 4 }, "unbalanced_mic_codes": { "M1_e": 1, "M2_e": 2 }, "reaction_codes": { "r1": 1, "r2": 2, "r3": 3 }, "experiment_codes": { "condition_1": 1, "condition_2": 2 }, "enzyme_codes": { "r1": 1, "r2": 2, "r3": 3 }, "phos_enz_codes": {}, "drain_codes": {}, } mi = io.load_maud_input_from_toml(os.path.join(data_path, "linear")) assert mi.kinetic_model.reactions["r1"].stoichiometry == { "M1_e": -1, "M1_c": 1 } assert "r1" in map(lambda p: p.enzyme_id, mi.priors.kcat_priors) exp = [e for e in mi.experiments.experiments if e.id == "condition_1"][0] assert exp.measurements["mic"]["M1_c"].target_type == "mic" assert mi.stan_codes.__dict__ == expected_stan_codes
def simulate(data_path, output_dir, n): """Generate draws from the prior mean.""" mi = load_maud_input_from_toml(data_path) now = datetime.now().strftime("%Y%m%d%H%M%S") output_name = f"maud_output_sim-{mi.config.name}-{now}" output_path = os.path.join(output_dir, output_name) samples_path = os.path.join(output_path, "samples") ui_dir = os.path.join(output_path, "user_input") print("Creating output directory: " + output_path) os.mkdir(output_path) os.mkdir(samples_path) print(f"Copying user input from {data_path} to {ui_dir}") shutil.copytree(data_path, ui_dir) stanfit = sampling.simulate(mi, samples_path, n) print("\nSimulated concentrations and fluxes:") print(stanfit.draws_pd(params=["conc", "flux"]).T) print("\nSimulated measurements:") print(stanfit.draws_pd(params=["yconc_sim", "yflux_sim"]).T) print("\nSimulated log likelihoods:") print(stanfit.draws_pd(params=["log_lik_conc", "log_lik_flux"]).T) return output_path
def main(): """Run the script.""" here = os.path.dirname(os.path.abspath(__file__)) now = datetime.now().strftime("%Y%m%d%H%M%S") input_path = os.path.join(here, INPUT_DATA) input_dirname = os.path.split(input_path)[-1] sim_study_folder = os.path.join(here, f"sim_study-{input_dirname}-{now}") # make the output directory os.mkdir(sim_study_folder) # copy the input directory to output/sim_input and output/sample_input sim_input_dir, sample_input_dir = (shutil.copytree( input_path, os.path.join(sim_study_folder, dirname)) for dirname in ["sim_input", "sample_input"]) # simulate some measurements sim_dir = simulate(sim_input_dir, output_dir=sim_study_folder, n=1) # overwrite the measurements in the sample input based on the simulation new_experiments = get_experiment_table_from_sim(sim_dir) csv_target = load_maud_input_from_toml( sample_input_dir).config.experiments_file new_experiments.to_csv(csv_target) # run maud sample against the doctored input sample(sample_input_dir, output_dir=sim_study_folder)
def test_linear(): """Tests linear model. tests from code generation to sampling of the linear model by computing 50 samples after 50 warmups and checking if the sampled median is within the 94% CI of a precomputed set. """ expected = { # marginal 3% and 97% intervals from a control run. Use the following # code to generate a dictionary like this from a CmdStanMCMC object # called 'fit': # # draws = fit.draws_pd() # expected = dict(zip( # draws.columns, map(list, draws.quantile([0.03, 0.97]).T.values) # )) "lp__": [-29.316072, -16.076300999999997], "accept_stat__": [0.96072929, 0.99988951], "stepsize__": [0.133839, 0.133839], "treedepth__": [5.0, 5.0], "n_leapfrog__": [31.0, 31.0], "divergent__": [0.0, 0.0], "energy__": [26.507623, 44.448424], "formation_energy_z[1]": [-1.6088815, 1.7438237], "formation_energy_z[2]": [-1.9428797, 1.8600326], "kcat[1]": [0.7354878399999999, 3.3920119], "kcat[2]": [0.7169074799999999, 3.5383422], "kcat[3]": [0.5103703599999999, 2.3251939000000004], "km[1]": [0.29829006999999996, 2.1484104000000004], "km[2]": [0.39007882, 3.3307122000000002], "km[3]": [0.30081803999999995, 2.1872641], "km[4]": [0.30032217, 2.956041], "km[5]": [0.35870995999999994, 2.4696105], "km[6]": [0.35748458, 3.1736108000000005], "enzyme[1,1]": [0.90327701, 1.1091029000000001], "enzyme[2,1]": [1.3650513999999998, 1.6433149], "enzyme[1,2]": [0.9169324400000001, 1.0900779], "enzyme[2,2]": [0.45675294, 0.55617683], "enzyme[1,3]": [0.9258319500000001, 1.087983], "enzyme[2,3]": [1.3852606, 1.6246428], "conc_unbalanced[1,1]": [1.8437724, 2.2289229], "conc_unbalanced[2,1]": [1.83488, 2.1967661], "conc_unbalanced[1,2]": [0.92532526, 1.0731987], "conc_unbalanced[2,2]": [0.9100531199999999, 1.0846023999999999], "ki[1]": [0.35249536, 2.0479711000000003], "dissociation_constant_t[1]": [0.39240253999999997, 2.9900479000000004], "dissociation_constant_r[1]": [0.3673301, 2.4169647000000003], "transfer_constant[1]": [0.26424406999999994, 2.6506271], "transfer_constant[2]": [0.27446419000000005, 2.8339571], "formation_energy[1]": [-1.0804408, -0.9128092999999999], "formation_energy[2]": [-2.0971405, -1.9069949], "conc[1,1]": [1.8437724, 2.2289229], "conc[2,1]": [1.83488, 2.1967661], "conc[1,2]": [0.92532526, 1.0731987], "conc[2,2]": [0.9100531199999999, 1.0846023999999999], "conc[1,3]": [1.4225884000000002, 1.8135276], "conc[2,3]": [1.5866924, 1.9812213], "conc[1,4]": [1.2548785, 1.6608011999999999], "conc[2,4]": [1.0856881999999999, 1.3590284], "flux[1,1]": [0.058571784999999994, 0.26232753], "flux[2,1]": [0.044527502, 0.2225318], "flux[1,2]": [0.058571784999999994, 0.26232753], "flux[2,2]": [0.044527502, 0.2225318], "flux[1,3]": [0.058571781999999996, 0.26232753], "flux[2,3]": [0.044527502, 0.2225318], "keq[1]": [1.0, 1.0], "keq[2]": [1.4224362, 1.5845171], "keq[3]": [1.0, 1.0], "log_lik_flux[1]": [-1.2944431, 1.3392327], "log_lik_flux[2]": [0.014465301999999961, 1.376049], "log_lik_conc[1]": [-0.73380201, 0.85230665], "log_lik_conc[2]": [-0.55109339, 1.0460312], "log_lik_conc[3]": [-0.9655723700000002, 1.3825698], "log_lik_conc[4]": [0.5349423899999999, 2.07629], "log_lik_conc[5]": [-0.014904273000000003, 0.7953745], "log_lik_conc[6]": [-0.50145479, 1.1199048], "log_lik_conc[7]": [-0.87171379, 1.3808676000000002], "log_lik_conc[8]": [-0.07020213300000022, 2.0754574999999997], } linear_input = os.path.join(data_path, "linear") temp_directory = tempfile.mkdtemp(dir=data_path) mi = load_maud_input_from_toml(linear_input) mi.config.cmdstanpy_config.update({ "chains": 1, "iter_sampling": 50, "iter_warmup": 50, "save_warmup": False }) fit = sample(mi, output_dir=temp_directory) samples_test = fit.draws_pd() # Check that each output column (other than the diagnostic ones) is # statistically similar to its matching control column. test_mean = samples_test.mean() cols = [c for c in expected.keys() if not c.endswith("__")] for col in cols: assert col in samples_test.columns, col + " is not present in test" assert test_mean[col] >= expected[col][0], col + " is too low." assert test_mean[col] <= expected[col][1], col + " is too high." # Delete temporary directory shutil.rmtree(temp_directory)