Exemple #1
0
def test_linear(input_dirname):
    """Test that the linear model works."""

    input_dir_path = os.path.join(HERE, input_dirname)
    mi_in = load_maud_input_from_toml(input_dir_path)
    true_params_path = os.path.join(input_dir_path, TRUE_PARAMS_FILENAME)
    with open(true_params_path, "r") as f:
        true_params = json.load(f)
    study = run_simulation_study(mi_in, true_params)
    infd = load_infd(study.samples.runset.csv_files, study.mi)
    for param_name, param_vals in true_params.items():
        if any(param_vals):
            dimnames = [
                d for d in infd.posterior[param_name].dims if d not in ["chain", "draw"]
            ]
            q = (
                infd.posterior[param_name]
                .to_series()
                .unstack(dimnames)
                .quantile([0.025, 0.975])
                .T.assign(true=np.array(param_vals).ravel())
            )
            q.columns = ["low", "high", "true"]
            for i, row in q.iterrows():
                msg = (
                    f"True value for {param_name} outside 95% CI at coord {str(i)}!\n"
                    f"\tTrue value: {str(row['true'])}\n"
                    f"\t2.5% posterior quantile: {str(row['low'])}\n"
                    f"\t97.5% posterior quantile: {str(row['high'])}\n"
                )
                assert row["true"] >= row["low"] and row["true"] <= row["high"], msg
Exemple #2
0
def main(path_to_output_dir: Path):
    """Run maudit: this is the main entrypoint.

    :param path_to_output_dir: Path to a directory that was created by maud to
    store output files.

    """
    ui_path = os.path.join(path_to_output_dir, "user_input")
    samples_dir = os.path.join(path_to_output_dir, "samples")
    csvs = [
        os.path.join(samples_dir, f) for f in os.listdir(samples_dir)
        if f.endswith("csv")
    ]
    typer.echo(f"Reading data from {path_to_output_dir}")
    typer.echo(f"Found csv files: {csvs}")
    mi = load_maud_input_from_toml(ui_path)
    infd_dict = return_dict_of_infd(csvs, mi)
    lp_pd = return_pd_var(infd_dict, "lp")
    step_size_pd = return_pd_var(infd_dict, "step_size")
    lp_pd_w = return_pd_var(infd_dict, "lp", True)
    step_size_pd_w = return_pd_var(infd_dict, "step_size", True)

    lp_plot = plot_var_time_series(lp_pd, "lp")
    lp_plot.save(filename='lp_time_series.png')
    step_size_plot = plot_var_time_series(step_size_pd, "step_size")
    step_size_plot.save(filename='step_size_time_series.png')
    lp_plot_w = plot_var_time_series(lp_pd_w, "lp")
    lp_plot_w.save(filename='lp_time_series_warmup.png')
    step_size_plot_w = plot_var_time_series(step_size_pd_w, "step_size")
    step_size_plot_w.save(filename='step_size_time_series_warmup.png')
Exemple #3
0
def sample(data_path, output_dir):
    """Generate MCMC samples given a user input directory.

    This function creates a new directory in output_dir with a name starting
    with "maud_output". It first copies the directory at data_path into the new
    this directory at new_dir/user_input, then runs the sampling.sample
    function to write samples in new_dir/samples. Finally it prints the results
    of cmdstanpy's diagnose and summary methods.

    """
    mi = load_maud_input_from_toml(data_path)
    now = datetime.now().strftime("%Y%m%d%H%M%S")
    output_name = f"maud_output-{mi.config.name}-{now}"
    output_path = os.path.join(output_dir, output_name)
    samples_path = os.path.join(output_path, "samples")
    ui_dir = os.path.join(output_path, "user_input")
    print("Creating output directory: " + output_path)
    os.mkdir(output_path)
    os.mkdir(samples_path)
    print(f"Copying user input from {data_path} to {ui_dir}")
    shutil.copytree(data_path, ui_dir)
    stanfit = sampling.sample(mi, samples_path)
    print(stanfit.diagnose())
    print(stanfit.summary())
    return output_path
Exemple #4
0
def test_get_input_data():
    """Test that the function get_input_data behaves as expected."""
    input_path = os.path.join(data_path, "linear")
    mi = io.load_maud_input_from_toml(input_path)
    expected = json.load(open(os.path.join(input_path, "linear.json"), "r"))
    actual = sampling.get_input_data(mi)
    assert actual.keys() == expected.keys()
    for k in actual.keys():
        assert_equal(actual[k],
                     expected[k],
                     err_msg=f"{k} is different from expected.")
def get_experiment_table_from_sim(sim_dir: str) -> pd.DataFrame:
    """Get a table of simulated measurements.

    The output should be compatible with maud, so that it is possible to
    overwrite the experiments file.

    """
    ui_path = os.path.join(sim_dir, "user_input")
    sim_csv_path = os.path.join(sim_dir, "samples")
    csv_file = os.path.join(
        sim_csv_path,
        next(filter(lambda f: f.endswith(".csv"), os.listdir(sim_csv_path))),
    )
    with open(os.path.join(sim_csv_path, "input_data.json"), "r") as f:
        stan_input = json.load(f)
    mi = load_maud_input_from_toml(ui_path)
    infd = az.from_cmdstan(csv_file)
    code_to_exp = {v: k for k, v in mi.stan_codes.experiment_codes.items()}
    code_to_mic = {v: k for k, v in mi.stan_codes.mic_codes.items()}
    code_to_rxn = {v: k for k, v in mi.stan_codes.reaction_codes.items()}
    conc_sim = pd.DataFrame({
        "measurement_type":
        "mic",
        "target_id":
        map(code_to_mic.get, stan_input["mic_ix_yconc"]),
        "experiment_id":
        map(code_to_exp.get, stan_input["experiment_yconc"]),
        "measurement":
        infd.posterior["yconc_sim"].to_series().values,
        "error_scale":
        stan_input["sigma_conc"],
    })
    flux_sim = pd.DataFrame({
        "measurement_type":
        "mic",
        "target_id":
        map(code_to_rxn.get, stan_input["reaction_yflux"]),
        "experiment_id":
        map(code_to_exp.get, stan_input["experiment_yflux"]),
        "measurement":
        infd.posterior["yflux_sim"].to_series().values,
        "error_scale":
        stan_input["sigma_flux"],
    })
    enz_og = pd.read_csv(mi.config.experiments_file
                         ).loc[lambda df: df["measurement_type"] == "enz"]
    return pd.concat([conc_sim, flux_sim, enz_og], ignore_index=True)
Exemple #6
0
def test_load_maud_input_from_toml():
    """Test that the function load_maud_input_from_toml behaves as expected."""
    expected_stan_codes = {
        "metabolite_codes": {
            "M1": 1,
            "M2": 2
        },
        "mic_codes": {
            "M1_e": 1,
            "M2_e": 2,
            "M1_c": 3,
            "M2_c": 4
        },
        "balanced_mic_codes": {
            "M1_c": 3,
            "M2_c": 4
        },
        "unbalanced_mic_codes": {
            "M1_e": 1,
            "M2_e": 2
        },
        "reaction_codes": {
            "r1": 1,
            "r2": 2,
            "r3": 3
        },
        "experiment_codes": {
            "condition_1": 1,
            "condition_2": 2
        },
        "enzyme_codes": {
            "r1": 1,
            "r2": 2,
            "r3": 3
        },
        "phos_enz_codes": {},
        "drain_codes": {},
    }
    mi = io.load_maud_input_from_toml(os.path.join(data_path, "linear"))
    assert mi.kinetic_model.reactions["r1"].stoichiometry == {
        "M1_e": -1,
        "M1_c": 1
    }
    assert "r1" in map(lambda p: p.enzyme_id, mi.priors.kcat_priors)
    exp = [e for e in mi.experiments.experiments if e.id == "condition_1"][0]
    assert exp.measurements["mic"]["M1_c"].target_type == "mic"
    assert mi.stan_codes.__dict__ == expected_stan_codes
Exemple #7
0
def simulate(data_path, output_dir, n):
    """Generate draws from the prior mean."""

    mi = load_maud_input_from_toml(data_path)
    now = datetime.now().strftime("%Y%m%d%H%M%S")
    output_name = f"maud_output_sim-{mi.config.name}-{now}"
    output_path = os.path.join(output_dir, output_name)
    samples_path = os.path.join(output_path, "samples")
    ui_dir = os.path.join(output_path, "user_input")
    print("Creating output directory: " + output_path)
    os.mkdir(output_path)
    os.mkdir(samples_path)
    print(f"Copying user input from {data_path} to {ui_dir}")
    shutil.copytree(data_path, ui_dir)
    stanfit = sampling.simulate(mi, samples_path, n)
    print("\nSimulated concentrations and fluxes:")
    print(stanfit.draws_pd(params=["conc", "flux"]).T)
    print("\nSimulated measurements:")
    print(stanfit.draws_pd(params=["yconc_sim", "yflux_sim"]).T)
    print("\nSimulated log likelihoods:")
    print(stanfit.draws_pd(params=["log_lik_conc", "log_lik_flux"]).T)
    return output_path
def main():
    """Run the script."""
    here = os.path.dirname(os.path.abspath(__file__))
    now = datetime.now().strftime("%Y%m%d%H%M%S")
    input_path = os.path.join(here, INPUT_DATA)
    input_dirname = os.path.split(input_path)[-1]
    sim_study_folder = os.path.join(here, f"sim_study-{input_dirname}-{now}")
    # make the output directory
    os.mkdir(sim_study_folder)
    # copy the input directory to output/sim_input and output/sample_input
    sim_input_dir, sample_input_dir = (shutil.copytree(
        input_path,
        os.path.join(sim_study_folder,
                     dirname)) for dirname in ["sim_input", "sample_input"])
    # simulate some measurements
    sim_dir = simulate(sim_input_dir, output_dir=sim_study_folder, n=1)
    # overwrite the measurements in the sample input based on the simulation
    new_experiments = get_experiment_table_from_sim(sim_dir)
    csv_target = load_maud_input_from_toml(
        sample_input_dir).config.experiments_file
    new_experiments.to_csv(csv_target)
    # run maud sample against the doctored input
    sample(sample_input_dir, output_dir=sim_study_folder)
Exemple #9
0
def test_linear():
    """Tests linear model.

    tests from code generation to sampling of the linear model by computing 50
    samples after 50 warmups and checking if the sampled median is within the
    94% CI of a precomputed set.
    """
    expected = {
        # marginal 3% and 97% intervals from a control run. Use the following
        # code to generate a dictionary like this from a CmdStanMCMC object
        # called 'fit':
        #
        # draws = fit.draws_pd()
        # expected = dict(zip(
        #     draws.columns, map(list, draws.quantile([0.03, 0.97]).T.values)
        # ))
        "lp__": [-29.316072, -16.076300999999997],
        "accept_stat__": [0.96072929, 0.99988951],
        "stepsize__": [0.133839, 0.133839],
        "treedepth__": [5.0, 5.0],
        "n_leapfrog__": [31.0, 31.0],
        "divergent__": [0.0, 0.0],
        "energy__": [26.507623, 44.448424],
        "formation_energy_z[1]": [-1.6088815, 1.7438237],
        "formation_energy_z[2]": [-1.9428797, 1.8600326],
        "kcat[1]": [0.7354878399999999, 3.3920119],
        "kcat[2]": [0.7169074799999999, 3.5383422],
        "kcat[3]": [0.5103703599999999, 2.3251939000000004],
        "km[1]": [0.29829006999999996, 2.1484104000000004],
        "km[2]": [0.39007882, 3.3307122000000002],
        "km[3]": [0.30081803999999995, 2.1872641],
        "km[4]": [0.30032217, 2.956041],
        "km[5]": [0.35870995999999994, 2.4696105],
        "km[6]": [0.35748458, 3.1736108000000005],
        "enzyme[1,1]": [0.90327701, 1.1091029000000001],
        "enzyme[2,1]": [1.3650513999999998, 1.6433149],
        "enzyme[1,2]": [0.9169324400000001, 1.0900779],
        "enzyme[2,2]": [0.45675294, 0.55617683],
        "enzyme[1,3]": [0.9258319500000001, 1.087983],
        "enzyme[2,3]": [1.3852606, 1.6246428],
        "conc_unbalanced[1,1]": [1.8437724, 2.2289229],
        "conc_unbalanced[2,1]": [1.83488, 2.1967661],
        "conc_unbalanced[1,2]": [0.92532526, 1.0731987],
        "conc_unbalanced[2,2]": [0.9100531199999999, 1.0846023999999999],
        "ki[1]": [0.35249536, 2.0479711000000003],
        "dissociation_constant_t[1]":
        [0.39240253999999997, 2.9900479000000004],
        "dissociation_constant_r[1]": [0.3673301, 2.4169647000000003],
        "transfer_constant[1]": [0.26424406999999994, 2.6506271],
        "transfer_constant[2]": [0.27446419000000005, 2.8339571],
        "formation_energy[1]": [-1.0804408, -0.9128092999999999],
        "formation_energy[2]": [-2.0971405, -1.9069949],
        "conc[1,1]": [1.8437724, 2.2289229],
        "conc[2,1]": [1.83488, 2.1967661],
        "conc[1,2]": [0.92532526, 1.0731987],
        "conc[2,2]": [0.9100531199999999, 1.0846023999999999],
        "conc[1,3]": [1.4225884000000002, 1.8135276],
        "conc[2,3]": [1.5866924, 1.9812213],
        "conc[1,4]": [1.2548785, 1.6608011999999999],
        "conc[2,4]": [1.0856881999999999, 1.3590284],
        "flux[1,1]": [0.058571784999999994, 0.26232753],
        "flux[2,1]": [0.044527502, 0.2225318],
        "flux[1,2]": [0.058571784999999994, 0.26232753],
        "flux[2,2]": [0.044527502, 0.2225318],
        "flux[1,3]": [0.058571781999999996, 0.26232753],
        "flux[2,3]": [0.044527502, 0.2225318],
        "keq[1]": [1.0, 1.0],
        "keq[2]": [1.4224362, 1.5845171],
        "keq[3]": [1.0, 1.0],
        "log_lik_flux[1]": [-1.2944431, 1.3392327],
        "log_lik_flux[2]": [0.014465301999999961, 1.376049],
        "log_lik_conc[1]": [-0.73380201, 0.85230665],
        "log_lik_conc[2]": [-0.55109339, 1.0460312],
        "log_lik_conc[3]": [-0.9655723700000002, 1.3825698],
        "log_lik_conc[4]": [0.5349423899999999, 2.07629],
        "log_lik_conc[5]": [-0.014904273000000003, 0.7953745],
        "log_lik_conc[6]": [-0.50145479, 1.1199048],
        "log_lik_conc[7]": [-0.87171379, 1.3808676000000002],
        "log_lik_conc[8]": [-0.07020213300000022, 2.0754574999999997],
    }

    linear_input = os.path.join(data_path, "linear")
    temp_directory = tempfile.mkdtemp(dir=data_path)
    mi = load_maud_input_from_toml(linear_input)
    mi.config.cmdstanpy_config.update({
        "chains": 1,
        "iter_sampling": 50,
        "iter_warmup": 50,
        "save_warmup": False
    })
    fit = sample(mi, output_dir=temp_directory)
    samples_test = fit.draws_pd()
    # Check that each output column (other than the diagnostic ones) is
    # statistically similar to its matching control column.
    test_mean = samples_test.mean()
    cols = [c for c in expected.keys() if not c.endswith("__")]
    for col in cols:
        assert col in samples_test.columns, col + " is not present in test"
        assert test_mean[col] >= expected[col][0], col + " is too low."
        assert test_mean[col] <= expected[col][1], col + " is too high."
    # Delete temporary directory
    shutil.rmtree(temp_directory)