def load_mcmc_tables(calib_dirpath: str): mcmc_tables = [] for db_path in _find_db_paths(calib_dirpath): db = Database(db_path) mcmc_tables.append(db.query("mcmc_run")) return mcmc_tables
def load_derived_output_tables(calib_dirpath: str): derived_output_tables = [] for db_path in _find_db_paths(calib_dirpath): db = Database(db_path) derived_output_tables.append(db.query("derived_outputs")) return derived_output_tables
def plot_timeseries_with_uncertainty_for_powerbi( region_name: str, powerbi_db_path: str, output_dir: str ): """ works on powerbi version Assumes a COVID model. TODO: Unify PowerBI and local version """ os.makedirs(output_dir, exist_ok=True) plot_config = load_plot_config(region_name) db = Database(powerbi_db_path) uncertainty_df = db.query("uncertainty") outputs = uncertainty_df["type"].unique().tolist() quantile_vals = uncertainty_df["quantile"].unique().tolist() for output_name in outputs: this_output_dir = os.path.join(output_dir, output_name) os.makedirs(this_output_dir, exist_ok=True) plotter = FilePlotter(this_output_dir, plot_config["translations"]) mask = uncertainty_df["type"] == output_name output_df = uncertainty_df[mask] scenarios = output_df.Scenario.unique().tolist() for scenario in scenarios: mask = output_df["Scenario"] == scenario scenario_df = output_df[mask] quantiles = {} for q in quantile_vals: mask = scenario_df["quantile"] == q quantiles[q] = scenario_df[mask]["value"].tolist() times = scenario_df.time.unique() logger.info("Plotting uncertainty for output %s, scenario %s", output_name, scenario) plots.plot_timeseries_with_uncertainty_for_powerbi( plotter, output_name, scenario, quantiles, times, plot_config )
def test_calibrate_autumn_mcmc(temp_data_dir): # Import autumn stuff inside function so we can mock out the database. priors = [{ "param_name": "ice_cream_sales", "distribution": "uniform", "distri_params": [1, 5], }] target_outputs = [{ "output_key": "shark_attacks", "years": [2000, 2001, 2002, 2003, 2004], "values": [3, 6, 9, 12, 15], "loglikelihood_distri": "poisson", }] multipliers = {} params = { "default": { "start_time": 2000 }, "scenario_start_time": 2000, "scenarios": {}, } calib = Calibration( "sharks", _build_mock_model, params, priors, target_outputs, multipliers, 1, 1, ) calib.run_fitting_algorithm( run_mode=CalibrationMode.AUTUMN_MCMC, n_iterations=50, n_burned=10, n_chains=1, available_time=1e6, ) app_dir = os.path.join(temp_data_dir, "outputs", "calibrate", "sharks", "main") run_dir = os.path.join(app_dir, os.listdir(app_dir)[0]) db_fname = [ fname for fname in os.listdir(run_dir) if fname.endswith(".db") ][0] out_db_path = os.path.join(run_dir, db_fname) assert os.path.exists(out_db_path) out_db = Database(out_db_path) assert set(out_db.engine.table_names()) == { "outputs", "derived_outputs", "mcmc_run", } mcmc_runs = out_db.query("mcmc_run") max_idx = mcmc_runs.loglikelihood.idxmax() best_run = mcmc_runs.iloc[max_idx] ice_cream_sales_mle = best_run.ice_cream_sales # This value is non-deterministic due to fixed seed. assert 2.9 < ice_cream_sales_mle < 3.1
def load_output_tables(calib_dirpath: str): output_tables = [] for db_path in find_db_paths(calib_dirpath): db = Database(db_path) df = db.query("outputs") output_tables.append(df) return output_tables
def preprocess_demography(input_db: Database): loc_df = read_location_df() pop_df = read_population_df(loc_df) birth_df = read_crude_birth_df(loc_df) death_df = read_death_df(loc_df) expect_df = read_life_expectancy_df(loc_df) input_db.dump_df("countries", loc_df) input_db.dump_df("population", pop_df) input_db.dump_df("birth_rates", birth_df) input_db.dump_df("deaths", death_df) input_db.dump_df("life_expectancy", expect_df) return loc_df
def preprocess_mobility(input_db: Database, country_df): """ Read Google Mobility data from CSV into input database """ mob_df = pd.read_csv(MOBILITY_CSV_PATH) dhhs_cluster_mobility = reshape_to_clusters(mob_df) # Drop all sub-region 2 data, too detailed. major_region_mask = mob_df["sub_region_2"].isnull() & mob_df["metro_area"].isnull() davao_mask = mob_df.metro_area == "Davao City Metropolitan Area" mob_df = mob_df[major_region_mask | davao_mask].copy() # These two regions are the same mob_df.loc[(mob_df.sub_region_1 == "National Capital Region"), "sub_region_1"] = "Metro Manila" mob_df.loc[(mob_df.metro_area == "Davao City Metropolitan Area"), "sub_region_1"] = "Davao City" mob_df.loc[ (mob_df.sub_region_1 == "Federal Territory of Kuala Lumpur"), "sub_region_1" ] = "Kuala Lumpur" mob_df = mob_df.append(dhhs_cluster_mobility) # Drop all rows that have NA values in 1 or more mobility columns. mob_cols = [c for c in mob_df.columns if c.endswith(MOBILITY_SUFFIX)] mask = False for c in mob_cols: mask = mask | mob_df[c].isnull() mob_df = mob_df[~mask].copy() for c in mob_cols: # Convert percent values to decimal: 1.0 being no change. mob_df[c] = mob_df[c].apply(lambda x: 1 + x / 100) # Drop unused columns, rename kept columns cols_to_keep = [*mob_cols, "country_region", "sub_region_1", "date"] cols_to_drop = [c for c in mob_df.columns if not c in cols_to_keep] mob_df = mob_df.drop(columns=cols_to_drop) mob_col_rename = {c: c.replace(MOBILITY_SUFFIX, "") for c in mob_cols} mob_df.rename(columns={**mob_col_rename, "sub_region_1": "region"}, inplace=True) # Convert countries to ISO3 countries = mob_df["country_region"].unique().tolist() iso3s = {c: get_iso3(c, country_df) for c in countries} iso3_series = mob_df["country_region"].apply(lambda c: iso3s[c]) mob_df.insert(0, "iso3", iso3_series) mob_df = mob_df.drop(columns=["country_region"]) mob_df = mob_df.sort_values(["iso3", "region", "date"]) input_db.dump_df("mobility", mob_df)
def run_full_models_for_mcmc(burn_in: int, src_db_path: str, dest_db_path: str, build_model, params: dict): """ Run the full baseline model and all scenarios for all accepted MCMC runs in src db. """ src_db = Database(src_db_path) dest_db = Database(dest_db_path) logger.info("Copying mcmc_run table to %s", dest_db_path) mcmc_run_df = src_db.query("mcmc_run") # Apply burn in and save to destination burned_runs_str = ", ".join(mcmc_run_df[:burn_in].idx) logger.info("Burned MCMC runs %s", burned_runs_str) mcmc_run_df = mcmc_run_df[burn_in:] dest_db.dump_df("mcmc_run", mcmc_run_df) mcmc_runs = list(mcmc_run_df.T.to_dict().values()) for mcmc_run in mcmc_runs: meta = {k: v for k, v in mcmc_run.items() if k in META_COLS} if not meta["accept"]: logger.info("Ignoring non-accepted MCMC run %s", meta["idx"]) continue logger.info("Running full model for MCMC run %s", meta["idx"]) param_updates = { k: v for k, v in mcmc_run.items() if k not in META_COLS } run_idx = meta["idx"].split("_")[-1] def update_func(ps: dict): return update_params(ps, param_updates) with Timer("Running model scenarios"): num_scenarios = 1 + len(params["scenarios"].keys()) scenarios = [] for scenario_idx in range(num_scenarios): scenario = Scenario(build_model, scenario_idx, params) scenarios.append(scenario) # Run the baseline scenario. baseline_scenario = scenarios[0] baseline_scenario.run(update_func=update_func) baseline_model = baseline_scenario.model # Run all the other scenarios for scenario in scenarios[1:]: scenario.run(base_model=baseline_model, update_func=update_func) with Timer("Saving model outputs to the database"): models = [s.model for s in scenarios] store_run_models(models, dest_db_path, run_idx=run_idx) logger.info("Finished running full models for all accepted MCMC runs.")
def preprocess_social_mixing(input_db: Database, country_df): for location in LOCATIONS: for sheet_number, header_arg in SHEET_NUMBERS: sheet_name = f"MUestimates_{location}_{sheet_number}.xlsx" sheet_path = os.path.join(MIXING_DIRPATH, sheet_name) xl = pd.ExcelFile(sheet_path) sheet_names = xl.sheet_names iso3s = [get_iso3(n, country_df) for n in sheet_names] for idx, sheet_name in enumerate(sheet_names): iso3 = iso3s[idx] mix_df = pd.read_excel(xl, header=header_arg, sheet_name=sheet_name) if sheet_number == "2": renames = {n - 1: f"X{n}" for n in range(1, 17)} mix_df.rename(columns=renames, inplace=True) mix_df.insert(0, "location", [location for _ in range(len(mix_df))]) mix_df.insert(0, "iso3", [iso3 for _ in range(len(mix_df))]) input_db.dump_df("social_mixing", mix_df)
def collect_map_estimate(calib_dirpath: str): """ Read all MCMC outputs found in mcmc_db_folder and print the map parameter values. :return: dict of parameters """ mcmc_tables = [] db_paths = [ os.path.join(calib_dirpath, f) for f in os.listdir(calib_dirpath) if f.endswith(".db") and not f.startswith("mcmc_percentiles") ] for db_path in db_paths: db = Database(db_path) mcmc_tables.append( db.query("mcmc_run").sort_values(by="loglikelihood", ascending=False)) print("Maximum loglikelihood for each chain:") print([ mcmc_tables[i]["loglikelihood"].iloc[0] for i in range(len(mcmc_tables)) ]) print() print("Chains' lengths:") print([ len(mcmc_tables[i]["loglikelihood"]) for i in range(len(mcmc_tables)) ]) print() best_chain_index = np.argmax([ mcmc_tables[i]["loglikelihood"].iloc[0] for i in range(len(mcmc_tables)) ]) non_param_cols = ["idx", "Scenario", "loglikelihood", "accept"] param_list = [c for c in mcmc_tables[0].columns if c not in non_param_cols] map_estimates = {} for param in param_list: map_estimates[param] = mcmc_tables[best_chain_index][param].iloc[0] return map_estimates, best_chain_index
def run_mcmc_plots(): app_dirname, app_dirpath = selectors.app() calib_dirname, calib_dirpath = selectors.calibration_run(app_dirpath) if not calib_dirname: st.write("No calibration folder found") return # Load MCMC tables mcmc_tables = [] db_paths = [ os.path.join(calib_dirpath, f) for f in os.listdir(calib_dirpath) if f.endswith(".db") ] for db_path in db_paths: db = Database(db_path) mcmc_tables.append(db.db_query("mcmc_run")) plotter = StreamlitPlotter({}) plot_type = st.sidebar.selectbox("Select plot type", list(PLOT_FUNCS.keys())) plot_func = PLOT_FUNCS[plot_type] plot_func(plotter, mcmc_tables)
def build_input_database(force: bool = False, rebuild: bool = False): """ Builds the input database from scratch. If force is True, build the database from scratch and ignore any previous hashes. If force is False, do not build if it already exists, and crash if the built database hash does not match. If rebuild is True, then we force rebuild the database, but we don't write a new hash. Returns a Database, representing the input database. """ if os.path.exists(input_db_path) and not (force or rebuild): input_db = Database(input_db_path) else: logger.info("Building a new database.") input_db = Database(input_db_path) with Timer("Deleting all existing data."): input_db.delete_everything() with Timer("Ingesting COVID AU data."): preprocess_covid_au(input_db) with Timer("Ingesting COVID PHL data."): preprocess_covid_phl(input_db) with Timer("Ingesting Our World in Data data."): preprocess_our_world_in_data(input_db) with Timer("Ingesting demography data."): country_df = preprocess_demography(input_db) with Timer("Ingesting social mixing data."): preprocess_social_mixing(input_db, country_df) with Timer("Ingesting mobility data."): preprocess_mobility(input_db, country_df) current_db_hash = input_db.get_hash() if force: # Write the file hash write_file_hash(current_db_hash, input_db_hash_path) else: # Read the file hash and compare saved_db_hash = read_file_hash(input_db_hash_path) is_hash_mismatch = current_db_hash != saved_db_hash if rebuild and is_hash_mismatch: msg = "Input database does not match canonical version." raise ValueError(msg) elif is_hash_mismatch: logger.info("Hash mismatch, try rebuilding database...") build_input_database(rebuild=True) return input_db
def plot_uncertainty(targets: dict, powerbi_db_path: str, output_dir: str): """ works on powerbi version Assumes a COVID model. """ os.makedirs(output_dir, exist_ok=True) db = Database(powerbi_db_path) uncertainty_df = db.query("uncertainty") outputs = uncertainty_df["type"].unique().tolist() for output_name in outputs: this_output_dir = os.path.join(output_dir, output_name) os.makedirs(this_output_dir, exist_ok=True) plotter = FilePlotter(this_output_dir, targets) scenario_idxs = uncertainty_df["scenario"].unique().tolist() for scenario_idx in scenario_idxs: logger.info("Plotting uncertainty for output %s, scenario %s", output_name, scenario_idx) if scenario_idx == 0: # Just plot the baseline scenario for the full time period. scenario_idxs = [0] x_low = 0 else: # Plot the baseline compared ot the scenario, but only for the time period # where the scenario is active. scenario_idxs = [0, scenario_idx] mask = uncertainty_df["scenario"] == scenario_idx x_low = uncertainty_df[mask]["time"].min() plots.plot_timeseries_with_uncertainty( plotter, uncertainty_df, output_name, scenario_idxs, targets, x_low=x_low, )
def test_plot_uncertainty(tmp_path): """ Ensure uncertainty plotting code works. """ output_dir = tmp_path powerbi_db_path = os.path.join(tmp_path, "powerbi.db") targets = { "incidence": { "output_key": "incidence", "title": "incidence", "times": [], "values": [], "quantiles": [0.25, 0.5, 0.75], }, "foo": { "output_key": "foo", "title": "foo", "times": [], "values": [], "quantiles": [0.25, 0.5, 0.75], }, } funcs = [ lambda t: 2 * t + random.random(), lambda t: t**3 + random.random() ] # Build data for plotting do_df, mcmc_df, _ = build_synthetic_calibration(targets, funcs, chains=2, runs=20, times=20) unc_df = calculate_mcmc_uncertainty(mcmc_df, do_df, targets) # Create database for plotting db = Database(powerbi_db_path) db.dump_df("mcmc_run", mcmc_df) db.dump_df("derived_outputs", do_df) db.dump_df("uncertainty", unc_df) # Create plots plot_uncertainty(targets, powerbi_db_path, output_dir) # Check plots expected_foo_path = os.path.join(tmp_path, "foo", "uncertainty-foo-0.png") expected_incidence_path = os.path.join(tmp_path, "incidence", "uncertainty-incidence-0.png") assert os.path.exists(expected_foo_path) assert os.path.exists(expected_incidence_path)
import os from autumn import constants from autumn.demography.social_mixing import get_all_prem_countries from autumn.db import Database from apps.covid_19.john_hopkins import ( get_all_jh_countries, read_john_hopkins_data_from_csv, plot_jh_data, ) INPUT_DB_PATH = os.path.join(constants.DATA_PATH, "inputs.db") input_database = Database(database_name=INPUT_DB_PATH) prem_country_list = get_all_prem_countries() # N=152 jh_country_list = get_all_jh_countries() # N=180 intercept_country_list = list(set(prem_country_list) & set(jh_country_list)) # N=126 all_data = {} for i, country in enumerate(intercept_country_list): all_data[country] = read_john_hopkins_data_from_csv(country=country) # plot_jh_data(all_data) # print list of countries with more than 1000 cases countries_1000 = [] for country, n_cases in all_data.items(): if sum(n_cases) >= 1000: countries_1000.append(country) print(countries_1000)
def preprocess_social_mixing(input_db: Database, country_df): for location in LOCATIONS: for sheet_number, header_arg in SHEET_NUMBERS: sheet_name = f"MUestimates_{location}_{sheet_number}.xlsx" sheet_path = os.path.join(MIXING_DIRPATH, sheet_name) xl = pd.ExcelFile(sheet_path) sheet_names = xl.sheet_names iso3s = [get_iso3(n, country_df) for n in sheet_names] for idx, sheet_name in enumerate(sheet_names): iso3 = iso3s[idx] mix_df = pd.read_excel(xl, header=header_arg, sheet_name=sheet_name) if sheet_number == "2": renames = {n - 1: f"X{n}" for n in range(1, 17)} mix_df.rename(columns=renames, inplace=True) mix_df.insert(0, "location", [location for _ in range(len(mix_df))]) mix_df.insert(0, "iso3", [iso3 for _ in range(len(mix_df))]) input_db.dump_df("social_mixing", mix_df) # Next gen social mixing original_mm = input_db.query("social_mixing") df = pd.read_csv( os.path.join(MIXING_DIRPATH, "synthetic_contacts_2020.csv")) df = df[df.setting == "overall"] df.drop(columns="setting", inplace=True) df.replace( { "0 to 4": "00 to 04", "5 to 9": "05 to 09", "all": "all_locations", "others": "other_locations", }, inplace=True, ) # The contactor is in j (columns) and the contactee is in i (rows) df = df.pivot_table( index=["iso3c", "location_contact", "age_cotactee"], columns="age_contactor", values="mean_number_of_contacts", ) df = df.reset_index() df.drop(columns="age_cotactee", inplace=True) cols = list(df.columns[2:]) new_col = ["X" + str(x) for x in range(1, len(cols) + 1)] replace_col = dict(zip(cols, new_col)) df.rename(columns=replace_col, inplace=True) df.rename(columns={ "iso3c": "iso3", "location_contact": "location" }, inplace=True) iso3_diff = set(original_mm.iso3).difference(df.iso3) iso3_mask = original_mm.iso3.isin(iso3_diff) df = df.append(original_mm[iso3_mask], ignore_index=True) input_db.dump_df("social_mixing_2020", df)
def preprocess_covid_au(input_db: Database): df = pd.read_csv(COVID_AU_CSV_PATH) input_db.dump_df("covid_au", df) df = pd.read_csv(COVID_LGA_CSV_PATH) df = reshape_to_clusters(df) input_db.dump_df("covid_dhhs_test", df)
def preprocess_our_world_in_data(input_db: Database): df = pd.read_csv(OUR_WORLD_IN_DATA_CSV_PATH) # Replace the one strange value for test numbers in Malaysia df.loc[(df.iso_code == "MYS") & (df.new_tests > 1e5), "new_tests"] = np.nan input_db.dump_df("owid", df)
def build_model(params, update_params={}): external_params = deepcopy(params) external_params.update(update_params) model_parameters = { "contact_rate": external_params["contact_rate"], "contact_rate_recovered": external_params["contact_rate"] * external_params["rr_transmission_recovered"], "contact_rate_late_latent": external_params["contact_rate"] * external_params["rr_transmission_late_latent"], "recovery": external_params["self_recovery_rate"], "infect_death": external_params["tb_mortality_rate"], **external_params, } stratify_by = external_params["stratify_by"] derived_output_types = external_params["derived_outputs"] input_database = Database(database_name=INPUT_DB_PATH) n_iter = ( int( round( (external_params["end_time"] - external_params["start_time"]) / external_params["time_step"] ) ) + 1 ) integration_times = numpy.linspace( external_params["start_time"], external_params["end_time"], n_iter ).tolist() model_parameters.update(change_parameter_unit(provide_aggregated_latency_parameters(), 365.251)) # sequentially add groups of flows flows = add_standard_infection_flows([]) flows = add_standard_latency_flows(flows) flows = add_standard_natural_history_flows(flows) # compartments compartments = ["susceptible", "early_latent", "late_latent", "infectious", "recovered"] # define model #replace_deaths add_crude_birth_rate init_pop = {"infectious": 1000, "late_latent": 1000000} tb_model = StratifiedModel( integration_times, compartments, init_pop, model_parameters, flows, birth_approach="replace_deaths", starting_population=external_params["start_population"], output_connections={}, derived_output_functions={}, death_output_categories=((), ("age_0",)), ) # add crude birth rate from un estimates tb_model = add_birth_rate_functions(tb_model, input_database, "MNG") # add case detection process to basic model tb_model.add_transition_flow( { "type": "standard_flows", "parameter": "case_detection", "origin": "infectious", "to": "recovered", } ) # Add IPT as a customised flow def ipt_flow_func(model, n_flow, _time, _compartment_values): """ Work out the number of detected individuals from the relevant active TB compartments (with regard to the origin latent compartment of n_flow) multiplied with the proportion of the relevant infected contacts that is from this latent compartment. """ dict_flows = model.transition_flows_dict origin_comp_name = dict_flows["origin"][n_flow] components_latent_comp = find_name_components(origin_comp_name) # find compulsory tags to be found in relevant infectious compartments tags = [] for component in components_latent_comp: if "location_" in component or "strain_" in component: tags.append(component) # loop through all relevant infectious compartments total_tb_detected = 0.0 for comp_ind in model.infectious_indices["all_strains"]: active_components = find_name_components(model.compartment_names[comp_ind]) if all(elem in active_components for elem in tags): infectious_pop = _compartment_values[comp_ind] detection_indices = [ index for index, val in dict_flows["parameter"].items() if "case_detection" in val ] flow_index = [ index for index in detection_indices if dict_flows["origin"][index] == model.compartment_names[comp_ind] ][0] param_name = dict_flows["parameter"][flow_index] detection_tx_rate = model.get_parameter_value(param_name, _time) tsr = mongolia_tsr(_time) + external_params["reduction_negative_tx_outcome"] * ( 1.0 - mongolia_tsr(_time) ) if "strain_mdr" in model.compartment_names[comp_ind]: tsr = external_params["mdr_tsr"] * external_params["prop_mdr_detected_as_mdr"] if tsr > 0.0: total_tb_detected += infectious_pop * detection_tx_rate / tsr # list all latent compartments relevant to the relevant infectious population relevant_latent_compartments_indices = [ i for i, comp_name in enumerate(model.compartment_names) if find_stem(comp_name) == "early_latent" and all(elem in comp_name for elem in tags) ] total_relevant_latent_size = sum( _compartment_values[i] for i in relevant_latent_compartments_indices ) current_latent_size = _compartment_values[model.compartment_names.index(origin_comp_name)] prop_of_relevant_latent = ( current_latent_size / total_relevant_latent_size if total_relevant_latent_size > 0.0 else 0.0 ) return total_tb_detected * prop_of_relevant_latent tb_model.add_transition_flow( { "type": "customised_flows", "parameter": "ipt_rate", "origin": "early_latent", "to": "recovered", "function": ipt_flow_func, } ) # add ACF flow tb_model.add_transition_flow( { "type": "standard_flows", "parameter": "acf_rate", "origin": "infectious", "to": "recovered", } ) # load time-variant case detection rate cdr_scaleup_overall = build_mongolia_timevariant_cdr(external_params["cdr_multiplier"]) # targeted TB prevalence proportions by organ prop_smearpos = 0.25 prop_smearneg = 0.40 prop_extrapul = 0.35 # disease duration by organ overall_duration = prop_smearpos * 1.6 + 5.3 * (1 - prop_smearpos) disease_duration = { "smearpos": 1.6, "smearneg": 5.3, "extrapul": 5.3, "overall": overall_duration, } # work out the CDR for smear-positive TB def cdr_smearpos(time): # Had to replace external_params['diagnostic_sensitivity_smearneg'] with its hard-coded value .7 to avoid # cdr_smearpos to be affected when increasing diagnostic_sensitivity_smearneg in interventions (e.g. Xpert) # return (cdr_scaleup_overall(time) / # (prop_smearpos + prop_smearneg * external_params['diagnostic_sensitivity_smearneg'] + # prop_extrapul * external_params['diagnostic_sensitivity_extrapul'])) return cdr_scaleup_overall(time) / ( prop_smearpos + prop_smearneg * 0.7 + prop_extrapul * external_params["diagnostic_sensitivity_extrapul"] ) def cdr_smearneg(time): return cdr_smearpos(time) * external_params["diagnostic_sensitivity_smearneg"] def cdr_extrapul(time): return cdr_smearpos(time) * external_params["diagnostic_sensitivity_extrapul"] cdr_by_organ = { "smearpos": cdr_smearpos, "smearneg": cdr_smearneg, "extrapul": cdr_extrapul, "overall": cdr_scaleup_overall, } detect_rate_by_organ = {} for organ in ["smearpos", "smearneg", "extrapul", "overall"]: prop_to_rate = convert_competing_proportion_to_rate(1.0 / disease_duration[organ]) detect_rate_by_organ[organ] = return_function_of_function(cdr_by_organ[organ], prop_to_rate) # load time-variant treatment success rate mongolia_tsr = build_mongolia_timevariant_tsr() # create a treatment succes rate function adjusted for treatment support intervention tsr_function = lambda t: mongolia_tsr(t) + external_params["reduction_negative_tx_outcome"] * ( 1.0 - mongolia_tsr(t) ) # tb control recovery rate (detection and treatment) function set for overall if not organ-specific, smearpos otherwise if "organ" not in stratify_by: tb_control_recovery_rate = lambda t: tsr_function(t) * detect_rate_by_organ["overall"](t) else: tb_control_recovery_rate = lambda t: tsr_function(t) * detect_rate_by_organ["smearpos"](t) # initialise ipt_rate function assuming coverage of 1.0 before age stratification ipt_rate_function = ( lambda t: 1.0 * external_params["yield_contact_ct_tstpos_per_detected_tb"] * external_params["ipt_efficacy"] ) # initialise acf_rate function acf_rate_function = ( lambda t: external_params["acf_coverage"] * external_params["acf_sensitivity"] * ( mongolia_tsr(t) + external_params["reduction_negative_tx_outcome"] * (1.0 - mongolia_tsr(t)) ) ) # assign newly created functions to model parameters tb_model.adaptation_functions["case_detection"] = tb_control_recovery_rate tb_model.parameters["case_detection"] = "case_detection" tb_model.adaptation_functions["ipt_rate"] = ipt_rate_function tb_model.parameters["ipt_rate"] = "ipt_rate" tb_model.adaptation_functions["acf_rate"] = acf_rate_function tb_model.parameters["acf_rate"] = "acf_rate" if "strain" in stratify_by: mdr_adjustment = ( external_params["prop_mdr_detected_as_mdr"] * external_params["mdr_tsr"] / 0.9 ) # /.9 for last DS TSR tb_model.stratify( "strain", ["ds", "mdr"], ["early_latent", "late_latent", "infectious"], verbose=False, requested_proportions={"mdr": 0.0}, adjustment_requests={ "contact_rate": {"ds": 1.0, "mdr": 1.0}, "case_detection": {"mdr": mdr_adjustment}, "ipt_rate": { "ds": 1.0, # external_params['ds_ipt_switch'], "mdr": external_params["mdr_ipt_switch"], }, }, infectiousness_adjustments={ "ds": 1.0, "mdr": external_params["mdr_infectiousness_multiplier"], }, ) tb_model.add_transition_flow( { "type": "standard_flows", "parameter": "dr_amplification", "origin": "infectiousXstrain_ds", "to": "infectiousXstrain_mdr", "implement": len(tb_model.all_stratifications), } ) dr_amplification_rate = ( lambda t: detect_rate_by_organ["overall"](t) * (1.0 - mongolia_tsr(t)) * (1.0 - external_params["reduction_negative_tx_outcome"]) * external_params["dr_amplification_prop_among_nonsuccess"] ) tb_model.adaptation_functions["dr_amplification"] = dr_amplification_rate tb_model.parameters["dr_amplification"] = "dr_amplification" if "age" in stratify_by: age_breakpoints = [0, 5, 15, 60] age_infectiousness = get_parameter_dict_from_function( logistic_scaling_function(10.0), age_breakpoints ) age_params = get_adapted_age_parameters(age_breakpoints) age_params.update(split_age_parameter(age_breakpoints, "contact_rate")) # adjustment of latency parameters for param in ["early_progression", "late_progression"]: for age_break in age_breakpoints: if age_break > 5: age_params[param][str(age_break) + "W"] *= external_params[ "adult_latency_adjustment" ] pop_morts = get_pop_mortality_functions( input_database, age_breakpoints, country_iso_code="MNG" ) age_params["universal_death_rate"] = {} for age_break in age_breakpoints: tb_model.time_variants["universal_death_rateXage_" + str(age_break)] = pop_morts[ age_break ] tb_model.parameters[ "universal_death_rateXage_" + str(age_break) ] = "universal_death_rateXage_" + str(age_break) age_params["universal_death_rate"][ str(age_break) + "W" ] = "universal_death_rateXage_" + str(age_break) tb_model.parameters["universal_death_rateX"] = 0.0 # age-specific IPT ipt_by_age = {"ipt_rate": {}} for age_break in age_breakpoints: ipt_by_age["ipt_rate"][str(age_break)] = external_params[ "ipt_age_" + str(age_break) + "_ct_coverage" ] age_params.update(ipt_by_age) # add BCG effect without stratification assuming constant 100% coverage bcg_wane = create_sloping_step_function(15.0, 0.3, 30.0, 1.0) age_bcg_efficacy_dict = get_parameter_dict_from_function( lambda value: bcg_wane(value), age_breakpoints ) age_params.update({"contact_rate": age_bcg_efficacy_dict}) tb_model.stratify( "age", deepcopy(age_breakpoints), [], {}, adjustment_requests=age_params, infectiousness_adjustments=age_infectiousness, verbose=False, ) # patch for IPT to overwrite parameters when ds_ipt has been turned off while we still need some coverage at baseline if external_params["ds_ipt_switch"] == 0.0 and external_params["mdr_ipt_switch"] == 1.0: tb_model.parameters["ipt_rateXstrain_dsXage_0"] = 0.17 for age_break in [5, 15, 60]: tb_model.parameters["ipt_rateXstrain_dsXage_" + str(age_break)] = 0.0 if "organ" in stratify_by: props_smear = { "smearpos": external_params["prop_smearpos"], "smearneg": 1.0 - (external_params["prop_smearpos"] + 0.20), "extrapul": 0.20, } mortality_adjustments = {"smearpos": 1.0, "smearneg": 0.064, "extrapul": 0.064} recovery_adjustments = {"smearpos": 1.0, "smearneg": 0.56, "extrapul": 0.56} # workout the detection rate adjustment by organ status adjustment_smearneg = ( detect_rate_by_organ["smearneg"](2015.0) / detect_rate_by_organ["smearpos"](2015.0) if detect_rate_by_organ["smearpos"](2015.0) > 0.0 else 1.0 ) adjustment_extrapul = ( detect_rate_by_organ["extrapul"](2015.0) / detect_rate_by_organ["smearpos"](2015.0) if detect_rate_by_organ["smearpos"](2015.0) > 0.0 else 1.0 ) tb_model.stratify( "organ", ["smearpos", "smearneg", "extrapul"], ["infectious"], infectiousness_adjustments={"smearpos": 1.0, "smearneg": 0.25, "extrapul": 0.0}, verbose=False, requested_proportions=props_smear, adjustment_requests={ "recovery": recovery_adjustments, "infect_death": mortality_adjustments, "case_detection": { "smearpos": 1.0, "smearneg": adjustment_smearneg, "extrapul": adjustment_extrapul, }, "early_progression": props_smear, "late_progression": props_smear, }, ) if "location" in stratify_by: props_location = { "rural_province": 0.48, "urban_nonger": 0.368, "urban_ger": 0.15, "prison": 0.002, } raw_relative_risks_loc = {"rural_province": 1.0} for stratum in ["urban_nonger", "urban_ger", "prison"]: raw_relative_risks_loc[stratum] = external_params["rr_transmission_" + stratum] scaled_relative_risks_loc = scale_relative_risks_for_equivalence( props_location, raw_relative_risks_loc ) # dummy matrix for mixing by location location_mixing = numpy.array( [ 0.899, 0.05, 0.05, 0.001, 0.049, 0.7, 0.25, 0.001, 0.049, 0.25, 0.7, 0.001, 0.1, 0.1, 0.1, 0.7, ] ).reshape((4, 4)) location_mixing *= 3.0 # adjusted such that heterogeneous mixing yields similar overall burden as homogeneous location_adjustments = {} for beta_type in ["", "_late_latent", "_recovered"]: location_adjustments["contact_rate" + beta_type] = scaled_relative_risks_loc location_adjustments["acf_rate"] = {} for stratum in ["rural_province", "urban_nonger", "urban_ger", "prison"]: location_adjustments["acf_rate"][stratum] = external_params[ "acf_" + stratum + "_switch" ] tb_model.stratify( "location", ["rural_province", "urban_nonger", "urban_ger", "prison"], [], requested_proportions=props_location, verbose=False, entry_proportions=props_location, adjustment_requests=location_adjustments, mixing_matrix=location_mixing, ) # tb_model.transition_flows.to_csv("transitions.csv") # tb_model.death_flows.to_csv("deaths.csv") # create some customised derived_outputs if "notifications" in derived_output_types: def notification_function_builder(stratum): """ example of stratum: "Xage_0Xstrain_mdr" """ def calculate_notifications(model, time): total_notifications = 0.0 dict_flows = model.transition_flows_dict comp_ind = model.compartment_names.index("infectious" + stratum) infectious_pop = model.compartment_values[comp_ind] detection_indices = [ index for index, val in dict_flows["parameter"].items() if "case_detection" in val ] flow_index = [ index for index in detection_indices if dict_flows["origin"][index] == model.compartment_names[comp_ind] ][0] param_name = dict_flows["parameter"][flow_index] detection_tx_rate = model.get_parameter_value(param_name, time) tsr = mongolia_tsr(time) + external_params["reduction_negative_tx_outcome"] * ( 1.0 - mongolia_tsr(time) ) if "strain_mdr" in model.compartment_names[comp_ind]: tsr = external_params["mdr_tsr"] * external_params["prop_mdr_detected_as_mdr"] if tsr > 0.0: total_notifications += infectious_pop * detection_tx_rate / tsr return total_notifications return calculate_notifications for compartment in tb_model.compartment_names: if "infectious" in compartment: stratum = compartment.split("infectious")[1] tb_model.derived_output_functions[ "notifications" + stratum ] = notification_function_builder(stratum) # tb_model.derived_output_functions['popsize_treatment_support' + stratum] = notification_function_builder(stratum) if "incidence" in derived_output_types: # add output_connections for all stratum-specific incidence outputs incidence_output_conns = create_output_connections_for_incidence_by_stratum( tb_model.compartment_names ) tb_model.output_connections.update(incidence_output_conns) # Create a 'combined incidence' derived output early_names = [k for k in incidence_output_conns.keys() if k.startswith("incidence_early")] for early_name in early_names: rootname = early_name[15:] late_name = f"incidence_late{rootname}" combined_name = f"incidence{rootname}" def add_combined_incidence(model, time, e=early_name, l=late_name): time_idx = model.times.index(time) early_incidence = model.derived_outputs[e][time_idx] late_incidence = model.derived_outputs[l][time_idx] return early_incidence + late_incidence tb_model.derived_output_functions[combined_name] = add_combined_incidence if "mortality" in derived_output_types: # prepare death outputs for all strata tb_model.death_output_categories = list_all_strata_for_mortality(tb_model.compartment_names) ############################################ # population sizes for costing ############################################ if "popsizes" in derived_output_types: # nb of detected individuals by strain: def detected_popsize_function_builder(tag): """ example of tag: "starin_mdr" or "organ_smearpos" """ def calculate_nb_detected(model, time): nb_treated = 0.0 for key, value in model.derived_outputs.items(): if "notifications" in key and tag in key: this_time_index = model.times.index(time) nb_treated += value[this_time_index] return nb_treated return calculate_nb_detected for tag in [ "strain_mdr", "strain_ds", "organ_smearpos", "organ_smearneg", "organ_extrapul", ]: tb_model.derived_output_functions[ "popsizeXnb_detectedX" + tag ] = detected_popsize_function_builder(tag) # ACF popsize: number of people screened def popsize_acf(model, time): if external_params["acf_coverage"] == 0.0: return 0.0 pop_urban_ger = sum( [ model.compartment_values[i] for i, c_name in enumerate(model.compartment_names) if "location_urban_ger" in c_name ] ) return external_params["acf_coverage"] * pop_urban_ger tb_model.derived_output_functions["popsizeXnb_screened_acf"] = popsize_acf return tb_model
def build_model(params: dict, update_params={}): """ Build the master function to run the TB model for the Republic of the Marshall Islands :param update_params: dict Any parameters that need to be updated for the current run :return: StratifiedModel The final model with all parameters and stratifications """ input_database = Database(database_name=INPUT_DB_PATH) # Define compartments and initial conditions. compartments = [ Compartment.SUSCEPTIBLE, Compartment.EARLY_LATENT, Compartment.LATE_LATENT, Compartment.EARLY_INFECTIOUS, Compartment.ON_TREATMENT, Compartment.RECOVERED, # Compartment.LTBI_TREATED, ] init_pop = {Compartment.EARLY_INFECTIOUS: 10, Compartment.LATE_LATENT: 100} model_parameters = params model_parameters.update(update_params) # Update partial immunity/susceptibility parameters model_parameters = update_transmission_parameters( model_parameters, [Compartment.RECOVERED, Compartment.LATE_LATENT, Compartment.LTBI_TREATED] ) # Set integration times integration_times = get_model_times_from_inputs( model_parameters["start_time"], model_parameters["end_time"], model_parameters["time_step"] ) # Sequentially add groups of flows to flows list flows = add_standard_infection_flows([]) flows = add_standard_latency_flows(flows) flows = add_standard_natural_history_flows(flows) # flows = add_latency_progression(flows) flows = add_case_detection(flows, compartments) flows = add_treatment_flows(flows) # flows = add_acf(flows, compartments) # flows = add_acf_ltbi(flows) # Make sure incidence and notifications are tracked during integration out_connections = {} out_connections.update( create_request_stratified_incidence( model_parameters["incidence_stratification"], model_parameters["all_stratifications"] ) ) out_connections.update( create_request_stratified_notifications( model_parameters["notification_stratifications"], model_parameters["all_stratifications"], ) ) # Define model tb_model = StratifiedModel( integration_times, compartments, init_pop, model_parameters, flows, birth_approach="add_crude_birth_rate", starting_population=model_parameters["start_population"], output_connections=out_connections, death_output_categories=list_all_strata_for_mortality(compartments), ) # Add crude birth rate from UN estimates (using Federated States of Micronesia as a proxy as no data for RMI) tb_model = add_birth_rate_functions(tb_model, input_database, "FSM") # Find raw case detection rate with multiplier, which is 1 by default, and adjust for differences by organ status cdr_scaleup_raw = build_scale_up_function( model_parameters["cdr"], model_parameters["cdr_multiplier"] ) detect_rate_by_organ = find_organ_specific_cdr( cdr_scaleup_raw, model_parameters, model_parameters["all_stratifications"]["organ"], target_organ_props=model_parameters["target_organ_props"], ) # Find base case detection rate and time-variant treatment completion function base_detection_rate = detect_rate_by_organ[ "smearpos" if "organ" in model_parameters["stratify_by"] else "overall" ] treatment_success_rate = ( lambda time: build_scale_up_function(model_parameters["tsr"])(time) / model_parameters["treatment_duration"] ) treatment_nonsuccess_rate = ( lambda time: (1.0 - build_scale_up_function(model_parameters["tsr"])(time)) / model_parameters["treatment_duration"] ) # Set acf screening rate using proportion of population reached and duration of intervention # acf_screening_rate = -numpy.log(1 - 0.9) / 0.5 # acf_rate_over_time = progressive_step_function_maker( # 2018.2, 2018.7, acf_screening_rate, scaling_time_fraction=0.3 # ) # Initialise acf_rate function # acf_rate_function = ( # lambda t: model_parameters["acf_coverage"] # * (acf_rate_over_time(t)) # * model_parameters["acf_sensitivity"] # ) # acf_ltbi_rate_function = ( # lambda t: model_parameters["acf_coverage"] # * (acf_rate_over_time(t)) # * model_parameters["acf_ltbi_sensitivity"] # * model_parameters["acf_ltbi_efficacy"] # ) # Assign newly created functions to model parameters add_time_variant_parameter_to_model( tb_model, "case_detection", base_detection_rate, len(model_parameters["stratify_by"]) ) add_time_variant_parameter_to_model( tb_model, "treatment_success", treatment_success_rate, len(model_parameters["stratify_by"]) ) add_time_variant_parameter_to_model( tb_model, "treatment_nonsuccess", treatment_nonsuccess_rate, len(model_parameters["stratify_by"]), ) # add_time_variant_parameter_to_model( # tb_model, 'acf_rate', acf_rate_function, len(model_parameters['stratify_by'])) # add_time_variant_parameter_to_model( # tb_model, 'acf_ltbi_rate', acf_ltbi_rate_function, len(model_parameters['stratify_by'])) # Stratification processes if "age" in model_parameters["stratify_by"]: age_specific_latency_parameters = manually_create_age_specific_latency_parameters( model_parameters ) tb_model = stratify_by_age( tb_model, age_specific_latency_parameters, input_database, model_parameters["all_stratifications"]["age"], ) if "diabetes" in model_parameters["stratify_by"]: tb_model = stratify_by_diabetes( tb_model, model_parameters, model_parameters["all_stratifications"]["diabetes"], model_parameters["diabetes_target_props"], age_specific_prevalence=False, ) if "organ" in model_parameters["stratify_by"]: tb_model = stratify_by_organ( tb_model, model_parameters, detect_rate_by_organ, model_parameters["all_stratifications"]["organ"], ) if "location" in model_parameters["stratify_by"]: tb_model = stratify_by_location( tb_model, model_parameters, model_parameters["all_stratifications"]["location"] ) # Capture reported prevalence in Majuro assuming over-reporting (needed for calibration) def calculate_reported_majuro_prevalence(model, time): true_prev = 0.0 pop_majuro = 0.0 for i, compartment in enumerate(model.compartment_names): if "majuro" in compartment: pop_majuro += model.compartment_values[i] if "infectious" in compartment: true_prev += model.compartment_values[i] return ( 1.0e5 * true_prev / pop_majuro * (1.0 + model_parameters["over_reporting_prevalence_proportion"]) ) tb_model.derived_output_functions.update( {"reported_majuro_prevalence": calculate_reported_majuro_prevalence} ) return tb_model
def preprocess_covid_phl(input_db: Database): df = pd.read_csv(COVID_PHL_CSV_PATH) df = create_region_aggregates(df) input_db.dump_df("covid_phl", df)
def test_unpivot_outputs(tmp_path): """ Verify that unpivot_outputs works. """ out_db_path = os.path.join(tmp_path, "out.db") mock_model = get_mock_model( times=[2000, 2001, 2002, 2003, 2004, 2005], outputs=[ [300.0, 300.0, 300.0, 33.0, 33.0, 33.0, 93.0, 39.0], [271.0, 300.0, 271.0, 62.0, 33.0, 62.0, 93.0, 69.0], [246.0, 300.0, 246.0, 88.0, 33.0, 88.0, 93.0, 89.0], [222.0, 300.0, 222.0, 111.0, 33.0, 111.0, 39.0, 119.0], [201.0, 300.0, 201.0, 132.0, 33.0, 132.0, 39.0, 139.0], [182.0, 300.0, 182.0, 151.0, 33.0, 151.0, 39.0, 159.0], ], ) store_run_models([mock_model], out_db_path) out_db = Database(out_db_path) outputs_df = out_db.query("outputs") unpivoted_df = unpivot_outputs(outputs_df) expected_columns = [ "idx", "Scenario", "times", "value", "age", "compartment", "mood", ] expected_data = [ ["run_0", "S_0", 2000, 300.0, "age_old", "susceptible", "mood_happy"], ["run_0", "S_0", 2001, 271.0, "age_old", "susceptible", "mood_happy"], ["run_0", "S_0", 2002, 246.0, "age_old", "susceptible", "mood_happy"], ["run_0", "S_0", 2003, 222.0, "age_old", "susceptible", "mood_happy"], ["run_0", "S_0", 2004, 201.0, "age_old", "susceptible", "mood_happy"], ["run_0", "S_0", 2005, 182.0, "age_old", "susceptible", "mood_happy"], ["run_0", "S_0", 2000, 300.0, "age_old", "susceptible", "mood_sad"], ["run_0", "S_0", 2001, 300.0, "age_old", "susceptible", "mood_sad"], ["run_0", "S_0", 2002, 300.0, "age_old", "susceptible", "mood_sad"], ["run_0", "S_0", 2003, 300.0, "age_old", "susceptible", "mood_sad"], ["run_0", "S_0", 2004, 300.0, "age_old", "susceptible", "mood_sad"], ["run_0", "S_0", 2005, 300.0, "age_old", "susceptible", "mood_sad"], [ "run_0", "S_0", 2000, 300.0, "age_young", "susceptible", "mood_happy" ], [ "run_0", "S_0", 2001, 271.0, "age_young", "susceptible", "mood_happy" ], [ "run_0", "S_0", 2002, 246.0, "age_young", "susceptible", "mood_happy" ], [ "run_0", "S_0", 2003, 222.0, "age_young", "susceptible", "mood_happy" ], [ "run_0", "S_0", 2004, 201.0, "age_young", "susceptible", "mood_happy" ], [ "run_0", "S_0", 2005, 182.0, "age_young", "susceptible", "mood_happy" ], ["run_0", "S_0", 2000, 33.0, "age_young", "susceptible", "mood_sad"], ["run_0", "S_0", 2001, 62.0, "age_young", "susceptible", "mood_sad"], ["run_0", "S_0", 2002, 88.0, "age_young", "susceptible", "mood_sad"], ["run_0", "S_0", 2003, 111.0, "age_young", "susceptible", "mood_sad"], ["run_0", "S_0", 2004, 132.0, "age_young", "susceptible", "mood_sad"], ["run_0", "S_0", 2005, 151.0, "age_young", "susceptible", "mood_sad"], ["run_0", "S_0", 2000, 33.0, "age_old", "infectious", "mood_happy"], ["run_0", "S_0", 2001, 33.0, "age_old", "infectious", "mood_happy"], ["run_0", "S_0", 2002, 33.0, "age_old", "infectious", "mood_happy"], ["run_0", "S_0", 2003, 33.0, "age_old", "infectious", "mood_happy"], ["run_0", "S_0", 2004, 33.0, "age_old", "infectious", "mood_happy"], ["run_0", "S_0", 2005, 33.0, "age_old", "infectious", "mood_happy"], ["run_0", "S_0", 2000, 33.0, "age_old", "infectious", "mood_sad"], ["run_0", "S_0", 2001, 62.0, "age_old", "infectious", "mood_sad"], ["run_0", "S_0", 2002, 88.0, "age_old", "infectious", "mood_sad"], ["run_0", "S_0", 2003, 111.0, "age_old", "infectious", "mood_sad"], ["run_0", "S_0", 2004, 132.0, "age_old", "infectious", "mood_sad"], ["run_0", "S_0", 2005, 151.0, "age_old", "infectious", "mood_sad"], ["run_0", "S_0", 2000, 93.0, "age_young", "infectious", "mood_happy"], ["run_0", "S_0", 2001, 93.0, "age_young", "infectious", "mood_happy"], ["run_0", "S_0", 2002, 93.0, "age_young", "infectious", "mood_happy"], ["run_0", "S_0", 2003, 39.0, "age_young", "infectious", "mood_happy"], ["run_0", "S_0", 2004, 39.0, "age_young", "infectious", "mood_happy"], ["run_0", "S_0", 2005, 39.0, "age_young", "infectious", "mood_happy"], ["run_0", "S_0", 2000, 39.0, "age_young", "infectious", "mood_sad"], ["run_0", "S_0", 2001, 69.0, "age_young", "infectious", "mood_sad"], ["run_0", "S_0", 2002, 89.0, "age_young", "infectious", "mood_sad"], ["run_0", "S_0", 2003, 119.0, "age_young", "infectious", "mood_sad"], ["run_0", "S_0", 2004, 139.0, "age_young", "infectious", "mood_sad"], ["run_0", "S_0", 2005, 159.0, "age_young", "infectious", "mood_sad"], ] expected_df = pd.DataFrame(expected_data, columns=expected_columns) assert_frame_equal(expected_df, unpivoted_df)
def test_plot_post_calibration(tmp_path): plot_dir = tmp_path mcmc_dir_path = os.path.join(tmp_path, "mcmc") os.makedirs(mcmc_dir_path) targets = { "incidence": { "output_key": "incidence", "title": "incidence", "times": [], "values": [], "quantiles": [0.25, 0.5, 0.75], }, "foo": { "output_key": "foo", "title": "foo", "times": [], "values": [], "quantiles": [0.25, 0.5, 0.75], }, } # A dummy prior to pass postirior checks priors = [{ "param_name": "contact_rate", "distribution": "uniform", "distri_params": [0.01, 0.03] }] funcs = [ lambda t: 2 * t + random.random(), lambda t: t**3 + random.random() ] # Build data for plotting do_df, mcmc_df, params_df = build_synthetic_calibration(targets, funcs, chains=2, runs=20, times=20) chains = set(mcmc_df["chain"].tolist()) # Create databases for plotting for chain in chains: db_path = os.path.join(mcmc_dir_path, f"chain-{chain}.db") db = Database(db_path) db.dump_df("mcmc_run", mcmc_df[mcmc_df["chain"] == chain]) db.dump_df("mcmc_params", params_df[params_df["chain"] == chain]) db.dump_df("derived_outputs", do_df[do_df["chain"] == chain]) # Create plots plot_post_calibration(targets, mcmc_dir_path, plot_dir, priors) # Check plots - do a super basic check expected_files = [ "burn-in.png", "loglikelihood-traces.png", "acceptance_ratio.png", "params-traces", "calibration-fit", "params-vs-loglikelihood", "posteriors", ] for fname in expected_files: p = os.path.join(plot_dir, fname) assert os.path.exists(p) if os.path.isdir(p): assert len(os.listdir(p)) > 0
def test_collate_outputs(tmp_path): """ Test the collation of multiple calibration output databases into a single file. """ # Setup database tables mcmc_run_cols = [ "idx", "Scenario", "ice_cream_sales", "loglikelihood", "accept" ] mcmc_run_1 = [ ["run_0", "S_0", 1, -1, 1], ["run_1", "S_0", 2, -2, 1], ["run_2", "S_0", 3, -3, 0], ["run_3", "S_0", 4, -4, 1], ] mcmc_run_2 = [ ["run_0", "S_0", 11, -11, 1], ["run_1", "S_0", 12, -12, 0], ["run_2", "S_0", 13, -13, 1], ["run_3", "S_0", 14, -14, 1], ] derived_outputs_cols = ["idx", "Scenario", "times", "shark_attacks"] derived_outputs_1 = [ ["run_0", "S_0", 2000, 3], ["run_0", "S_0", 2001, 6], ["run_0", "S_0", 2002, 10], ["run_1", "S_0", 2000, 4], ["run_1", "S_0", 2001, 7], ["run_1", "S_0", 2002, 11], ["run_2", "S_0", 2000, 2], ["run_2", "S_0", 2001, 5], ["run_2", "S_0", 2002, 9], ["run_3", "S_0", 2000, 1], ["run_3", "S_0", 2001, 2], ["run_3", "S_0", 2002, 3], ] derived_outputs_2 = [ ["run_0", "S_0", 2000, 3.1], ["run_0", "S_0", 2001, 6.1], ["run_0", "S_0", 2002, 10.1], ["run_1", "S_0", 2000, 4.1], ["run_1", "S_0", 2001, 7.1], ["run_1", "S_0", 2002, 11.1], ["run_2", "S_0", 2000, 2.1], ["run_2", "S_0", 2001, 5.1], ["run_2", "S_0", 2002, 9.1], ["run_3", "S_0", 2000, 1.1], ["run_3", "S_0", 2001, 2.1], ["run_3", "S_0", 2002, 3.1], ] outputs_cols = ["idx", "Scenario", "times", "happy", "sad"] outputs_1 = [ ["run_0", "S_0", 2000, 11, 11], ["run_0", "S_0", 2001, 12, 21], ["run_0", "S_0", 2002, 13, 31], ["run_1", "S_0", 2000, 21, 12], ["run_1", "S_0", 2001, 22, 22], ["run_1", "S_0", 2002, 23, 32], ["run_2", "S_0", 2000, 31, 13], ["run_2", "S_0", 2001, 32, 23], ["run_2", "S_0", 2002, 33, 33], ["run_3", "S_0", 2000, 41, 14], ["run_3", "S_0", 2001, 42, 24], ["run_3", "S_0", 2002, 43, 34], ] outputs_2 = [ ["run_0", "S_0", 2000, 111, 211], ["run_0", "S_0", 2001, 112, 221], ["run_0", "S_0", 2002, 113, 231], ["run_1", "S_0", 2000, 121, 212], ["run_1", "S_0", 2001, 122, 222], ["run_1", "S_0", 2002, 123, 232], ["run_2", "S_0", 2000, 131, 213], ["run_2", "S_0", 2001, 132, 223], ["run_2", "S_0", 2002, 133, 233], ["run_3", "S_0", 2000, 141, 214], ["run_3", "S_0", 2001, 142, 224], ["run_3", "S_0", 2002, 143, 234], ] # Create dataframes to save to db mcmc_run_1_df = pd.DataFrame(mcmc_run_1, columns=mcmc_run_cols) mcmc_run_2_df = pd.DataFrame(mcmc_run_2, columns=mcmc_run_cols) derived_ouputs_1_df = pd.DataFrame(derived_outputs_1, columns=derived_outputs_cols) derived_ouputs_2_df = pd.DataFrame(derived_outputs_2, columns=derived_outputs_cols) outputs_1_df = pd.DataFrame(outputs_1, columns=outputs_cols) outputs_2_df = pd.DataFrame(outputs_2, columns=outputs_cols) # Connect to test databases target_db_path = os.path.join(tmp_path, "target.db") db_1_path = os.path.join(tmp_path, f"src-1.db") db_2_path = os.path.join(tmp_path, f"src-2.db") src_db_paths = [db_1_path, db_2_path] target_db = Database(target_db_path) src_1_db = Database(db_1_path) src_2_db = Database(db_2_path) # Save test data to databases mcmc_run_1_df.to_sql("mcmc_run", con=src_1_db.engine, index=False) mcmc_run_2_df.to_sql("mcmc_run", con=src_2_db.engine, index=False) derived_ouputs_1_df.to_sql("derived_outputs", con=src_1_db.engine, index=False) derived_ouputs_2_df.to_sql("derived_outputs", con=src_2_db.engine, index=False) outputs_1_df.to_sql("outputs", con=src_1_db.engine, index=False) outputs_2_df.to_sql("outputs", con=src_2_db.engine, index=False) collate_outputs(src_db_paths, target_db_path, num_runs=2) expected_mcmc_runs = [ ["run_0", "S_0", 2, -2, 1], ["run_1", "S_0", 4, -4, 1], ["run_2", "S_0", 13, -13, 1], ["run_3", "S_0", 14, -14, 1], ] expected_derived_ouputs = [ ["run_0", "S_0", 2000, 4], ["run_0", "S_0", 2001, 7], ["run_0", "S_0", 2002, 11], ["run_1", "S_0", 2000, 1], ["run_1", "S_0", 2001, 2], ["run_1", "S_0", 2002, 3], ["run_2", "S_0", 2000, 2.1], ["run_2", "S_0", 2001, 5.1], ["run_2", "S_0", 2002, 9.1], ["run_3", "S_0", 2000, 1.1], ["run_3", "S_0", 2001, 2.1], ["run_3", "S_0", 2002, 3.1], ] expected_outputs = [ ["run_0", "S_0", 2000, 21, 12], ["run_0", "S_0", 2001, 22, 22], ["run_0", "S_0", 2002, 23, 32], ["run_1", "S_0", 2000, 41, 14], ["run_1", "S_0", 2001, 42, 24], ["run_1", "S_0", 2002, 43, 34], ["run_2", "S_0", 2000, 131, 213], ["run_2", "S_0", 2001, 132, 223], ["run_2", "S_0", 2002, 133, 233], ["run_3", "S_0", 2000, 141, 214], ["run_3", "S_0", 2001, 142, 224], ["run_3", "S_0", 2002, 143, 234], ] expected_mcmc_run_df = pd.DataFrame(expected_mcmc_runs, columns=mcmc_run_cols) expected_derived_ouputs_df = pd.DataFrame(expected_derived_ouputs, columns=derived_outputs_cols) expected_outputs_df = pd.DataFrame(expected_outputs, columns=outputs_cols) # Extract the outputs mcmc_df = target_db.query("mcmc_run") derived_outputs_df = target_db.query("derived_outputs") outputs_df = target_db.query("outputs") # Check that the outputs are correct assert_frame_equal(expected_mcmc_run_df, mcmc_df) assert_frame_equal(expected_derived_ouputs_df, derived_outputs_df) assert_frame_equal(expected_outputs_df, outputs_df)
def test_create_power_bi_outputs(tmp_path): """ Ensure that PowerBI outputs are correctly created from a model output database. """ # Prepare models models = [ get_mock_model( times=[2000, 2001, 2002, 2003, 2004, 2005], outputs=[ [1, 2, 3, 4, 5, 6, 7, 8], [11, 12, 13, 14, 15, 16, 17, 18], [21, 22, 23, 24, 25, 26, 27, 28], [31, 32, 33, 34, 35, 36, 37, 38], [41, 42, 43, 44, 45, 46, 47, 48], [5, 4, 3, 2, 1, 0, -1, -2], ], derived_outputs={ "times": [2000, 2001, 2002, 2003, 2004, 2005], "snacks": [1, 2, 3, 4, 5, 6], }, ), get_mock_model( times=[2000, 2001, 2002, 2003, 2004, 2005], outputs=[ [51, 52, 53, 54, 55, 56, 57, 58], [61, 62, 63, 64, 65, 66, 67, 68], [71, 72, 73, 74, 75, 76, 77, 78], [81, 82, 83, 94, 95, 96, 97, 98], [91, 92, 93, 84, 85, 86, 87, 88], [5, 4, 3, 2, 1, 0, -1, -2], ], derived_outputs={ "times": [2000, 2001, 2002, 2003, 2004, 2005], "snacks": [7, 8, 9, 10, 11, 12], }, ), ] mcmc_run_df = pd.DataFrame.from_dict({ "contact_rate": [5, 10, 6, 4], "loglikelihood": [-1, -3, -2, -0.5], "accept": [1, 0, 0, 1], }) db_path = os.path.join(tmp_path, "out.db") powerbi_db_path = os.path.join(tmp_path, "pbi.db") # Store the models store_run_models(models, db_path) store_database(mcmc_run_df, db_path, "mcmc_run", scenario=0, run_idx=1) src_db = Database(db_path) mcmc_run_src = src_db.query("mcmc_run") derived_outputs_src = src_db.query("derived_outputs") # Create Power BI outputs create_power_bi_outputs(db_path, powerbi_db_path) # Query Power BI outputs pbi_db = Database(powerbi_db_path) table_0 = pbi_db.query("pbi_scenario_0") table_1 = pbi_db.query("pbi_scenario_1") mcmc_run_dest = pbi_db.query("mcmc_run") derived_outputs_dest = pbi_db.query("derived_outputs") # Validate derived_outputs copied over assert_frame_equal(derived_outputs_src, derived_outputs_dest) # Validate MCMC run copied over assert_frame_equal(mcmc_run_src, mcmc_run_dest) def get_expected_df(model, scenario): outputs_df = pd.DataFrame(model.outputs, columns=model.compartment_names) outputs_df.insert(0, "times", model.times) outputs_df.insert(0, "Scenario", scenario) outputs_df.insert(0, "idx", "run_0") return unpivot_outputs(outputs_df) # Validate Power BI outputs transformed correctly expected_df = get_expected_df(models[0], "S_0") assert_frame_equal(expected_df, table_0) expected_df = get_expected_df(models[1], "S_1") assert_frame_equal(expected_df, table_1)