Ejemplo n.º 1
0
def sample(data_path, output_dir):
    """Generate MCMC samples given a user input directory.

    This function creates a new directory in output_dir with a name starting
    with "maud_output". It first copies the directory at data_path into the new
    this directory at new_dir/user_input, then runs the sampling.sample
    function to write samples in new_dir/samples. Finally it prints the results
    of cmdstanpy's diagnose and summary methods.

    """
    mi = load_maud_input(data_path, mode="sample")
    now = datetime.now().strftime("%Y%m%d%H%M%S")
    output_name = f"maud_output-{mi.config.name}-{now}"
    output_path = os.path.join(output_dir, output_name)
    samples_path = os.path.join(output_path, "samples")
    ui_dir = os.path.join(output_path, "user_input")
    print("Creating output directory: " + output_path)
    os.mkdir(output_path)
    os.mkdir(samples_path)
    print(f"Copying user input from {data_path} to {ui_dir}")
    shutil.copytree(data_path, ui_dir)
    stanfit = sampling.sample(mi, samples_path)
    print(stanfit.diagnose())
    print(stanfit.summary())
    infd = load_infd(stanfit.runset.csv_files, mi)
    infd.to_netcdf(os.path.join(output_path, "infd.nc"))
    return output_path
Ejemplo n.º 2
0
def generate_inits(data_path, chain, draw, warmup):
    """Generate template for init definitions.

    :params data_path: a path to a maud output folder with both samples
    and user_input folders
    :params chain: the sampling chain of the stan sampler you want to
    export
    :params draw: the sampling draw of the sampling chain you want to
    export from the start of the sampling or warmup phase
    :params warmup: indicator variable of if it is for the warmup
    or sampling phase
    """

    csvs = [
        os.path.join(data_path, "samples", f)
        for f in os.listdir(os.path.join(data_path, "samples"))
        if f.endswith(".csv")
    ]
    mi = load_maud_input(os.path.join(data_path, "user_input"), mode="sample")
    infd = load_infd(csvs, mi)
    output_name = "generated_inits.csv"
    output_path = os.path.join(data_path, output_name)
    print("Creating init")
    init_dataframe = get_inits_from_draw(infd, mi, chain, draw, warmup)
    print(f"Saving inits to: {output_path}")
    init_dataframe.to_csv(output_path)
    return "Successfully generated prior template"
Ejemplo n.º 3
0
def simulate(data_path, output_dir, n):
    """Generate draws from the prior mean."""

    mi = load_maud_input(data_path=data_path, mode="sample")
    now = datetime.now().strftime("%Y%m%d%H%M%S")
    output_name = f"maud_output_sim-{mi.config.name}-{now}"
    output_path = os.path.join(output_dir, output_name)
    samples_path = os.path.join(output_path, "samples")
    ui_dir = os.path.join(output_path, "user_input")
    print("Creating output directory: " + output_path)
    os.mkdir(output_path)
    os.mkdir(samples_path)
    print(f"Copying user input from {data_path} to {ui_dir}")
    shutil.copytree(data_path, ui_dir)
    stanfit = sampling.simulate(mi, samples_path, n)
    infd = load_infd(stanfit.runset.csv_files, mi)
    infd.to_netcdf(os.path.join(output_path, "infd.nc"))
    print("\nSimulated concentrations and fluxes:")
    print(infd.posterior["conc"].mean(dim=["chain", "draw"]).to_series())
    print(infd.posterior["flux"].mean(dim=["chain", "draw"]).to_series())
    print(
        infd.posterior["conc_enzyme"].mean(dim=["chain", "draw"]).to_series())
    print("\nSimulated measurements:")
    print(infd.posterior["yconc_sim"].mean(dim=["chain", "draw"]).to_series())
    print(infd.posterior["yflux_sim"].mean(dim=["chain", "draw"]).to_series())
    print("\nSimulated log likelihoods:")
    print(
        infd.posterior["log_lik_conc"].mean(dim=["chain", "draw"]).to_series())
    print(
        infd.posterior["log_lik_flux"].mean(dim=["chain", "draw"]).to_series())
    return output_path
Ejemplo n.º 4
0
def test_linear(input_dirname):
    """Test that the linear model works."""

    input_dir_path = os.path.join(HERE, input_dirname)
    mi_in = load_maud_input_from_toml(input_dir_path)
    true_params_path = os.path.join(input_dir_path, TRUE_PARAMS_FILENAME)
    with open(true_params_path, "r") as f:
        true_params = json.load(f)
    study = run_simulation_study(mi_in, true_params)
    infd = load_infd(study.samples.runset.csv_files, study.mi)
    for param_name, param_vals in true_params.items():
        if any(param_vals):
            dimnames = [
                d for d in infd.posterior[param_name].dims if d not in ["chain", "draw"]
            ]
            q = (
                infd.posterior[param_name]
                .to_series()
                .unstack(dimnames)
                .quantile([0.025, 0.975])
                .T.assign(true=np.array(param_vals).ravel())
            )
            q.columns = ["low", "high", "true"]
            for i, row in q.iterrows():
                msg = (
                    f"True value for {param_name} outside 95% CI at coord {str(i)}!\n"
                    f"\tTrue value: {str(row['true'])}\n"
                    f"\t2.5% posterior quantile: {str(row['low'])}\n"
                    f"\t97.5% posterior quantile: {str(row['high'])}\n"
                )
                assert row["true"] >= row["low"] and row["true"] <= row["high"], msg
Ejemplo n.º 5
0
def return_dict_of_infd(csvs, mi):
    """Return dict of chain with associated infd object.
    :params csvs: a list of csv file paths
    :params mi: a MaudInput object
    """
    return {
        re.split("\.",
                 re.split('-', chain)[-1])[0]: load_infd(chain, mi)
        for chain in csvs
    }
Ejemplo n.º 6
0
def main():
    """Run the script."""
    parser = argparse.ArgumentParser(description=HELP_MSG)
    parser.add_argument("maud_output_dir",
                        type=str,
                        nargs=1,
                        help="A path to Maud output directory")
    parser.add_argument("--chain",
                        default=0,
                        help="Chain number to export parameter values for")
    parser.add_argument("--draw",
                        default=0,
                        help="Draw number to export parameter values for")
    parser.add_argument("--warmup",
                        default=0,
                        help="If draw is in warmup phase or not")
    parser.add_argument("--yaml_output",
                        default="output.yaml",
                        help="Output of constructed yaml")
    parser.add_argument("--selected_experiment",
                        default=None,
                        help="Experiment parameters exported")
    args = parser.parse_args()
    maud_output_dir = args.maud_output_dir[0]
    chain = int(args.chain)
    draw = int(args.draw)
    warmup = bool(args.warmup)
    yaml_output = os.path.join(HERE, args.yaml_output)
    csvs = get_csvs(maud_output_dir)
    mi = load_maud_input(os.path.join(maud_output_dir, "user_input"),
                         mode="sample")
    infd = load_infd(csvs, mi)
    selected_experiment = None
    if selected_experiment is None:
        selected_experiment = list(mi.stan_coords.experiments)[0]

    # Defining Stoichiometry
    S = get_stoichiometry(mi)

    # Selecting a set of parameters from a previous run
    par_values = get_inits_from_draw(infd, mi, chain, draw, warmup)
    par_input = []

    # Selecting measurements
    conc_measurements = mi.measurements.yconc
    balanced_conc_values = conc_measurements.loc[selected_experiment]
    balanced_mic_values = {
        mic.id: balanced_conc_values.loc[mic.id]["measurement"]
        if mic.id in balanced_conc_values.index else 0.001
        for mic in mi.kinetic_model.mics if mic.balanced
    }

    # Experiment specific parameters
    exp_values = par_values[par_values["experiment_id"] == selected_experiment]
    conc_values = exp_values[exp_values["parameter_name"] == "conc_unbalanced"]
    enz_values = exp_values[exp_values["parameter_name"] == "conc_enzyme"]
    drain_values = exp_values[exp_values["parameter_name"] == "drain"]
    for mic in mi.kinetic_model.mics:
        if mic.balanced is False:
            par_input.append([
                f"m{mic.id}",
                list(conc_values[conc_values["mic_id"] == mic.id]["value"])[0],
            ])
    for rxn in mi.kinetic_model.reactions:
        for enz in rxn.enzymes:
            par_input.append([
                f"e{enz.id}",
                list(
                    enz_values[enz_values["enzyme_id"] == enz.id]["value"])[0],
            ])
    for rxn in mi.kinetic_model.reactions:
        if rxn.reaction_mechanism == "drain":
            par_input.append([
                f"r{rxn.id}",
                list(drain_values[drain_values["drain_id"] == rxn.id]["value"])
                [0],
            ])

    # Metabolite gibbs energies
    dgfs = par_values[par_values["parameter_name"] == "dgf"]

    flux_dict = {}

    for rxn in mi.kinetic_model.reactions:
        # calculating the gibbs energy of reaction
        # accounting for water
        if rxn.reaction_mechanism == "reversible_modular_rate_law":
            tmp_dg = 0
            for mic_id, stoic in rxn.stoichiometry.items():
                met_id = next(
                    filter(lambda mic: mic.id == mic_id,
                           mi.kinetic_model.mics)).metabolite_id
                met_dgf = list(
                    dgfs[dgfs["metabolite_id"] == met_id]["value"])[0]
                tmp_dg += stoic * met_dgf
            if rxn.water_stoichiometry:
                tmp_dg += rxn.water_stoichiometry * -157.6
            tmp_Keq = np.exp(tmp_dg / (-0.008314 * 298.15))

        for enz in rxn.enzymes:
            tmp_enz_pars = par_values[par_values["enzyme_id"] == enz.id]
            tmp_kms = tmp_enz_pars.loc[tmp_enz_pars["parameter_name"] == "km"]
            par_input += [[
                f"km_{row['enzyme_id']}_{row['mic_id']}", row["value"]
            ] for _, row in tmp_kms.iterrows()]
            tmp_kcats = tmp_enz_pars.loc[tmp_enz_pars["parameter_name"] ==
                                         "kcat"]
            par_input += [[f"kcat_{row['enzyme_id']}", row["value"]]
                          for _, row in tmp_kcats.iterrows()]
            tmp_kis = tmp_enz_pars.loc[tmp_enz_pars["parameter_name"] == "ki"]
            par_input += [[
                f"ki_{row['enzyme_id']}_{row['mic_id']}", row["value"]
            ] for _, row in tmp_kis.iterrows()]
            tmp_aas = tmp_enz_pars.loc[tmp_enz_pars["parameter_name"] ==
                                       "diss_r"]
            par_input += [[
                f"aa_{row['enzyme_id']}_{row['mic_id']}", row["value"]
            ] for _, row in tmp_aas.iterrows()]
            tmp_ais = tmp_enz_pars.loc[tmp_enz_pars["parameter_name"] ==
                                       "diss_t"]
            par_input += [[
                f"ai_{row['enzyme_id']}_{row['mic_id']}", row["value"]
            ] for _, row in tmp_ais.iterrows()]
            tmp_transfer_constants = tmp_enz_pars.loc[
                tmp_enz_pars["parameter_name"] == "transfer_constant"]
            par_input += [[
                f"transfer_constant_{row['enzyme_id']}", row["value"]
            ] for _, row in tmp_transfer_constants.iterrows()]

            substrate_list = [
                f"m{mic}" for mic, stoic in rxn.stoichiometry.items()
                if stoic < 0
            ]
            product_list = [
                f"m{mic}" for mic, stoic in rxn.stoichiometry.items()
                if stoic > 0
            ]
            mic_list = [f"m{mic}" for mic, _ in rxn.stoichiometry.items()]

            substrate_entry = list(
                zip(
                    substrate_list,
                    [f"km_{enz.id}_{mic[1:]}" for mic in substrate_list],
                    [
                        np.abs(rxn.stoichiometry[mic[1:]])
                        for mic in substrate_list
                    ],
                ))

            product_entry = list(
                zip(
                    product_list,
                    [f"km_{enz.id}_{mic[1:]}" for mic in product_list],
                    [
                        np.abs(rxn.stoichiometry[mic[1:]])
                        for mic in product_list
                    ],
                ))

            haldane_entry = list(
                zip(
                    [f"km_{enz.id}_{mic[1:]}" for mic in mic_list],
                    [rxn.stoichiometry[mic[1:]] for mic in mic_list],
                ))

            competitive_entry = []
            allosteric_inhibitors = []
            allosteric_activators = []

            for mod in enz.modifiers["competitive_inhibitor"]:
                competitive_entry.append(
                    [f"m{mod.mic_id}", f"ki_{enz.id}_{mod.mic_id}"])
            for mod in enz.modifiers["allosteric_activator"]:
                allosteric_activators.append(
                    [f"m{mod.mic_id}", f"aa_{enz.id}_{mod.mic_id}"])
            for mod in enz.modifiers["allosteric_inhibitor"]:
                allosteric_inhibitors.append(
                    [f"m{mod.mic_id}", f"ai_{enz.id}_{mod.mic_id}"])

            if rxn.reaction_mechanism == "reversible_modular_rate_law":
                Trf = Template_T_met.render(met_array=substrate_entry)
                Trr = Template_T_met.render(met_array=product_entry)
                Hal = Template_Haldane.render(Km_array=haldane_entry,
                                              Keq=tmp_Keq)
                Tr = Template_Tr.render(enz=f"e{enz.id}",
                                        kcat=f"kcat_{enz.id}",
                                        Trf=Trf,
                                        Trr=Trr,
                                        Hal=Hal)
                Dr = Template_Dr.render(sub_array=substrate_entry,
                                        prod_array=product_entry)

            elif rxn.reaction_mechanism == "irreversible_modular_rate_law":
                Trf = Template_T_met.render(met_array=substrate_entry)
                Tr = Template_Tr_irr.render(enz=f"e{enz.id}",
                                            kcat=f"kcat_{enz.id}",
                                            Trf=Trf)
                Dr = Template_Dr_irr.render(sub_array=substrate_entry)

            Drreg = Template_Drreg.render(met_array=competitive_entry)
            if competitive_entry == []:
                Drreg = "0"
            Allo_Act = Template_Allo_Act_Inh.render(
                met_array=allosteric_activators)
            Allo_Inh = Template_Allo_Act_Inh.render(
                met_array=allosteric_inhibitors)
            if allosteric_activators == []:
                Allo_Act = "1"
            if allosteric_inhibitors == []:
                Allo_Inh = "1"
            if any([allosteric_inhibitors, allosteric_activators]):
                Allo = Template_Allo.render(
                    L0=f"transfer_constant_{enz.id}",
                    Dr=Dr,
                    Drreg=Drreg,
                    Allo_Inh=Allo_Inh,
                    Allo_Act=Allo_Act,
                    Subunits=enz.subunits,
                )
            else:
                Allo = "1"
            flux = Template_flux.render(Tr=Tr, Dr=Dr, Drreg=Drreg, Allo=Allo)
            flux_dict[enz.id] = flux
        if rxn.reaction_mechanism == "drain":
            substrate_list = [
                f"m{mic}" for mic, stoic in rxn.stoichiometry.items()
                if stoic < 0
            ]
            if substrate_list == []:
                substrate_list = [1]
            flux = Template_drain.render(drain=f"r{rxn.id}",
                                         sub_array=substrate_list)
            flux_dict[rxn.id] = flux

    system_odes = {}
    for mic in mi.kinetic_model.mics:
        if mic.balanced is True:
            tmp_met_ode = ""
            first = 0
            for edge in mi.stan_coords.edges:
                if S.loc[mic.id, edge] != 0:
                    if first == 0:
                        first += 1
                        tmp_met_ode += f"({S.loc[mic.id, edge]}*{flux_dict[edge]})"
                    else:
                        tmp_met_ode += f"+({S.loc[mic.id, edge]}*{flux_dict[edge]})"
            system_odes[mic.id] = tmp_met_ode
    ode_input = [[
        f"m{mic.id}", system_odes[mic.id], balanced_mic_values[mic.id]
    ] for mic in mi.kinetic_model.mics if mic.balanced is True]
    yaml_input = Template_yaml.render(parameters=par_input, odes=ode_input)
    with open(yaml_output, "w") as file:
        file.writelines(yaml_input)
Ejemplo n.º 7
0
def plot_posteriors(maud_output_dir, output_dir):
    """Plot posterior distributions of Maud model."""
    # Collecting information from draws and maud input
    csvs = list(Path(maud_output_dir / "samples").rglob("*.csv"))
    mi = io.load_maud_input(data_path=maud_output_dir / "user_input",
                            mode="sample")
    parameter_coords = get_parameter_coords(mi.stan_coords)
    infd = load_infd(csvs, mi)
    list_of_model_variables = list(infd.posterior.variables.keys())
    var_to_dims = {
        var: list(infd.posterior[var].dims[2:])
        for var in VARIABLES_TO_ANALYSE if var in list_of_model_variables
    }
    var_to_draws = {
        var: infd.posterior[var].to_dataframe().reset_index()
        for var in VARIABLES_TO_ANALYSE if var in list_of_model_variables
    }
    enzyme_dims = {
        par: get_dims_enz(par, parameter_coords, var_to_dims)
        for par in ENZYME_GROUP if par in list_of_model_variables
    }
    priors = mi.priors
    confidence_intervals = dict()
    measurements = dict()
    # Retriving priors with confidence intervals (CIs)
    for par in parameter_coords:
        if par.id in list_of_model_variables:
            if f"priors_{par.id}" in dir(priors):
                par_dataframe = pd.DataFrame.from_dict(par.coords)
                if par.linking_list is None:
                    coords_rename = {
                        scs: infd_coord
                        for scs, infd_coord in zip(list(par.coords.keys()),
                                                   par.infd_coord_list)
                    }
                    par_dataframe = par_dataframe.rename(
                        columns=(coords_rename))
                else:
                    par_dataframe[par.infd_coord_list[0]] = list(
                        par.linking_list.values())[0]
                par_dataframe["parameter_name"] = par.id
                p = getattr(priors, f"priors_{par.id}")
                if isinstance(p, IndPrior1d):
                    lower_ci, upper_ci = get_ci_1d(p)
                    par_dataframe["lower_ci"] = lower_ci
                    par_dataframe["upper_ci"] = upper_ci
                    confidence_intervals[par.id] = par_dataframe
                elif isinstance(p, IndPrior2d):
                    location_df = (p.location.unstack().reset_index().rename(
                        columns=({
                            0: "location"
                        })))
                    scale_df = (p.scale.unstack().reset_index().rename(
                        columns=({
                            0: "scale"
                        })))
                    par_dataframe = par_dataframe.merge(
                        location_df,
                        left_on=par.infd_coord_list,
                        right_on=list(par.coords.keys()),
                    )
                    par_dataframe = par_dataframe.drop(list(par.coords.keys()),
                                                       axis=1)
                    par_dataframe = par_dataframe.merge(
                        scale_df,
                        left_on=par.infd_coord_list,
                        right_on=list(par.coords.keys()),
                    )
                    par_dataframe = par_dataframe.drop(list(par.coords.keys()),
                                                       axis=1)
                    if par.id in LOG_SCALE_VARIABLES:
                        par_dataframe["lower_ci"] = par_dataframe.apply(
                            lambda x: np.exp(
                                np.log(x["location"]) - 2 * x["scale"]),
                            axis=1,
                        )
                        par_dataframe["upper_ci"] = par_dataframe.apply(
                            lambda x: np.exp(
                                np.log(x["location"]) + 2 * x["scale"]),
                            axis=1,
                        )
                    else:
                        par_dataframe["lower_ci"] = par_dataframe.apply(
                            lambda x: x["location"] - 2 * x["scale"], axis=1)
                        par_dataframe["upper_ci"] = par_dataframe.apply(
                            lambda x: x["location"] + 2 * x["scale"], axis=1)
                    confidence_intervals[par.id] = par_dataframe
    # Retriving mean of measurement
    for measurement_id, measurement_type in zip(
        ["yconc", "yflux", "yenz"], ["conc", "flux", "conc_enzyme"]):
        rename_columns = {
            "conc": "mics",
            "flux": "reactions",
            "conc_enzyme": "enzymes"
        }
        tmp_measurements = getattr(mi.measurements,
                                   measurement_id).reset_index()
        tmp_measurements = tmp_measurements.rename(
            columns=({
                "experiment_id": "experiments",
                "target_id": measurement_type
            }))
        tmp_measurements = tmp_measurements.rename(columns=(rename_columns))
        measurements[measurement_type] = tmp_measurements
    # Plotting violin plots from parameter distributions
    for var in list(var_to_dims.keys()):
        dims = var_to_dims[var]
        draws = var_to_draws[var]
        plot = plot_violin_plots(
            var,
            dims,
            draws,
            LOG_SCALE_VARIABLES,
            UNITS,
            confidence_intervals,
            measurements,
        )
        plot.save(
            filename=output_dir / f"{var}_posterior.png",
            verbose=False,
            dpi=300,
        )
    # plotting pairplots of enzyme parameters
    for enz in mi.stan_coords.enzymes:
        enz_par_df = pd.DataFrame()
        for par, par_df in enzyme_dims.items():
            par_draws = var_to_draws[par]
            enz_dims = par_df[par_df["enzyme_id"] == enz]["par_id"].to_list()
            if len(enz_dims) > 0:
                for par_ind in enz_dims:
                    tmp_enz_par_df = pd.DataFrame()
                    tmp_enz_par_df = par_draws.loc[par_draws[
                        var_to_dims[par][0]] == par_ind].copy()
                    enz_par_df[par + "-" + par_ind] = np.log(
                        tmp_enz_par_df[par].to_list())
        sns.pairplot(enz_par_df)
        plt.savefig(output_dir / f"{enz}_pairplot.png")