def do_sample(data_path, output_dir): """Generate MCMC samples given a user input directory. This function creates a new directory in output_dir with a name starting with "maud_output". It first copies the directory at data_path into the new this directory at new_dir/user_input, then runs the running_stan.sample function to write samples in new_dir/samples. Finally it prints the results of cmdstanpy's diagnose and summary methods. """ mi = load_maud_input(data_path) now = datetime.now().strftime("%Y%m%d%H%M%S") output_name = f"maud_output-{mi.config.name}-{now}" output_path = os.path.join(output_dir, output_name) samples_path = os.path.join(output_path, "samples") ui_dir = os.path.join(output_path, "user_input") print("Creating output directory: " + output_path) os.mkdir(output_path) os.mkdir(samples_path) print(f"Copying user input from {data_path} to {ui_dir}") shutil.copytree(data_path, ui_dir) stanfit = sample(mi, samples_path) print(stanfit.diagnose()) print(stanfit.summary()) idata = get_idata(stanfit.runset.csv_files, mi, "train") idata.to_netcdf(os.path.join(output_path, "idata.nc")) return output_path
def do_predict(data_path: str): """Generate MCMC samples given a Maud output folder at train_path. This function creates a new directory in output_dir with a name starting with "maud-predict-output". It first copies the testing directory at train_path into the new this directory at new_dir/user_input, then runs the running_stan.predict_out_of_sample function to write samples in new_dir/oos_samples. The trained output is stored in the new_dir/trained_samples folder along with the user input required to generate the trained samples. """ idata_train = az.from_netcdf(os.path.join(data_path, "idata.nc")) mi = load_maud_input(os.path.join(data_path, "user_input")) now = datetime.now().strftime("%Y%m%d%H%M%S") output_name = f"maud-predict_output-{mi.config.name}-{now}" output_path = os.path.join(data_path, output_name) test_samples_path = os.path.join(output_path, "test_samples") print("Creating output directory: " + output_path) os.mkdir(output_path) os.mkdir(test_samples_path) idata_predict = predict(mi, output_path, idata_train) # delete attrs hack to make netcdf save work: # https://github.com/arviz-devs/arviz/issues/1554 idata_predict.sample_stats.attrs = {} # type: ignore idata_predict.posterior.attrs = {} # type: ignore idata_predict.to_netcdf(os.path.join(output_path, "idata_predict.nc"))
def do_simulate(data_path, output_dir, n): """Generate draws from the initial values.""" mi = load_maud_input(data_path=data_path) now = datetime.now().strftime("%Y%m%d%H%M%S") output_name = f"maud_output_sim-{mi.config.name}-{now}" output_path = os.path.join(output_dir, output_name) samples_path = os.path.join(output_path, "samples") ui_dir = os.path.join(output_path, "user_input") print("Creating output directory: " + output_path) os.mkdir(output_path) os.mkdir(samples_path) print(f"Copying user input from {data_path} to {ui_dir}") stanfit = simulate(mi, samples_path, n) idata = get_idata(stanfit.runset.csv_files, mi, "train") idata.to_netcdf(os.path.join(output_path, "idata.nc")) print("\n\nSimulated concentrations:") print(idata.posterior["conc"].mean( dim=["chain", "draw"]).to_series().unstack().T) print("\n\nSimulated fluxes:") print(idata.posterior["flux"].mean( dim=["chain", "draw"]).to_series().unstack().T) print("\n\nSimulated enzyme concentrations:") print(idata.posterior["conc_enzyme"].mean( dim=["chain", "draw"]).to_series().unstack().T) print("\n\nSimulated reaction delta Gs:") print(idata.posterior["dgrs"].mean(dim=["chain", "draw"]).to_series()) print("\n\nSimulated measurements:") print(idata.posterior["yconc_sim"].mean(dim=["chain", "draw"]).to_series()) print(idata.posterior["yflux_sim"].mean(dim=["chain", "draw"]).to_series()) print("\n\nSimulated log likelihoods:") print(idata.posterior["log_lik_conc"].mean( dim=["chain", "draw"]).to_series()) print(idata.posterior["log_lik_flux"].mean( dim=["chain", "draw"]).to_series()) print("\n\nSimulated allostery terms:") print(idata.posterior["allostery"].mean( dim=["chain", "draw"]).to_series().unstack().T) print("\n\nSimulated reversibility terms:") print(idata.posterior["reversibility"].mean( dim=["chain", "draw"]).to_series().unstack().T) print("\n\nSimulated saturation terms:") print(idata.posterior["saturation"].mean( dim=["chain", "draw"]).to_series().unstack().T) if mi.kinetic_model.phosphorylations is not None: print("\n\nSimulated phosphorylation terms:") print(idata.posterior["phosphorylation"].mean( dim=["chain", "draw"]).to_series().unstack().T) print("\n\nSimulated membrane potential:") print(idata.posterior["psi"].mean(dim=["chain", "draw"]).to_series().T) return output_path
def test_load_maud_input(): """Test that the function load_maud_input behaves as expected.""" expected_var_ids = { "dgf": [["M1", "M2"]], "conc_unbalanced": [["condition1", "condition2"], ["M1_e", "M2_e"]], "conc_pme": [["condition1", "condition2"], []], "dissociation_constant": [["r1_M2_c", "r2_M1_c"]], } mi = load_maud_input(data_path=LINEAR_PATH) r1 = next(r for r in mi.kinetic_model.reactions if r.id == "r1") assert r1.stoichiometry == {"M1_e": -1, "M1_c": 1} assert "r1_r1" in mi.priors.kcat.location.index for var_name, expected in expected_var_ids.items(): assert var_name in mi.stan_variable_set.__dataclass_fields__ assert getattr(mi.stan_variable_set, var_name).ids == expected actual_stan_input = mi.stan_input_train.stan_input_dict with open(EXPECTED_STAN_INPUT_PATH, "r") as f: expected_stan_input = json.load(f) assert set(actual_stan_input.keys()) == set(expected_stan_input.keys()) for k, v in actual_stan_input.items(): actual = v.tolist() if isinstance(v, np.ndarray) else v expected = expected_stan_input[k] assert_equal(actual, expected, err_msg=f"{k} different from expected.")