def test_update_params__deep_array_request__expect_updated(): old_params = { "foo": 1, "bar": { "baz": 2, "boop": [7, 8, 9], "bing": { "bonk": 3, }, }, "boop": [4, 5, 6], } update_request = {"bar.boop(0)": 1} expected_new_params = { "foo": 1, "bar": { "baz": 2, "boop": [1, 8, 9], "bing": { "bonk": 3, }, }, "boop": [4, 5, 6], } actual_new_params = update_params(old_params, update_request) assert actual_new_params == expected_new_params
def test_update_params__multiple_shallow_requests__expect_updated(): old_params = { "foo": 1, "bank": 10, "bar": { "baz": 2, "boop": [7, 8, 9], "bing": { "bonk": 3, }, }, "boop": [4, 5, 6], } update_request = {"foo": 2, "bank": 3} expected_new_params = { "foo": 2, "bank": 3, "bar": { "baz": 2, "boop": [7, 8, 9], "bing": { "bonk": 3, }, }, "boop": [4, 5, 6], } actual_new_params = update_params(old_params, update_request) assert actual_new_params == expected_new_params
def test_update_params__no_request__expect_no_change(): old_params = { "foo": 1, "bar": { "baz": 2, "boop": [7, 8, 9], "bing": { "bonk": 3, }, }, "boop": [4, 5, 6], } update_request = {} expected_new_params = { "foo": 1, "bar": { "baz": 2, "boop": [7, 8, 9], "bing": { "bonk": 3, }, }, "boop": [4, 5, 6], } actual_new_params = update_params(old_params, update_request) assert actual_new_params == expected_new_params
def test_update_params__dict_in_array_request__expect_updated(): old_params = { "foo": [{"a": 1}, {"a": 2}, {"a": 3}], } update_request = {"foo(1).a": 4} expected_new_params = { "foo": [{"a": 1}, {"a": 4}, {"a": 3}], } actual_new_params = update_params(old_params, update_request) assert actual_new_params == expected_new_params
def run_model_with_params(self, proposed_params: dict): """ Run the model with a set of params. """ logger.info(f"Running iteration {self.run_num}...") # Update default parameters to use calibration params. param_updates = {"time.end": self.end_time} for i, param_name in enumerate(self.param_list): param_updates[param_name] = proposed_params[i] params = copy.deepcopy(self.model_parameters) update_func = lambda ps: update_params(ps, param_updates) scenario = Scenario(self.model_builder, 0, params) scenario.run(update_func=update_func, derived_outputs_whitelist=self.derived_outputs_to_plot) self.latest_scenario = scenario return scenario
def run_full_model_for_chain( run_id: str, src_db_path: str, chain_id: int, burn_in: int, sample_size: int, quiet: bool ): """ Run the full model (all time steps, all scenarios) for a subset of accepted calibration runs. It works like this: - We start off with a calibration chain of length C - We apply "burn in" by throwing away the first B iterations of the chain, leaving us with C - B iterations - We then sample runs from the chain using a "sample size" parameter S by calculating N = floor(C - B / S) once we know N, we then start from the end of the chain, working backwards, and select every Nth run if a run is accepted then we select it if a run is not accepted, we select the first accepted run that precedes it Once we've sampled all the runs we need, then we re-run them in full, including all their scenarios. """ set_logging_config(not quiet, chain_id) msg = "Running full models for chain %s with burn-in of %s and sample size of %s." logger.info(msg, chain_id, burn_in, sample_size) try: app_region = get_app_region(run_id) msg = f"Running the {app_region.app_name} {app_region.region_name} model" logger.info(msg) dest_db_path = os.path.join(FULL_RUN_DATA_DIR, f"chain-{chain_id}") src_db = get_database(src_db_path) dest_db = get_database(dest_db_path) # Burn in MCMC parameter history and copy it across so it can be used in visualizations downstream. # Don't apply sampling to it - we want to see the whole parameter space that was explored. mcmc_params_df = src_db.query(Table.PARAMS) burn_mask = mcmc_params_df["run"] >= burn_in dest_db.dump_df(Table.PARAMS, mcmc_params_df[burn_mask]) # Add some extra columns to MCMC run history to track sampling. mcmc_run_df = src_db.query(Table.MCMC) num_runs = len(mcmc_run_df) msg = f"Tried to burn {burn_in} runs with sample size {sample_size}, but there are only {num_runs}" assert num_runs > (burn_in + sample_size), msg # Sampled column tells us whether a run will be sampled. sampled = [] sample_step = max(1, (num_runs - burn_in) // sample_size) logger.info("Using a sample step of %s", sample_step) for idx, mcmc_run in mcmc_run_df.iterrows(): should_sample = 1 if (num_runs - idx - 1) % sample_step == 0 else 0 sampled.append(should_sample) mcmc_run_df["sampled"] = sampled # Parent column tells us which accepted run precedes this run parents = [] i_row = 0 # FIXME: This is a temporary patch. for _, mcmc_run in mcmc_run_df.iterrows(): if mcmc_run["accept"] or i_row == 0: parent = int(mcmc_run["run"]) parents.append(parent) i_row += 1 mcmc_run_df["parent"] = parents # Burn in MCMC run history. burn_mask = mcmc_run_df["run"] >= burn_in burned_runs_str = ", ".join([str(i) for i in mcmc_run_df[~burn_mask].run]) mcmc_run_df = mcmc_run_df[burn_mask].copy() num_remaining = len(mcmc_run_df) logger.info( "Burned %s of %s MCMC runs leaving %s remaining.", burn_in, num_runs, num_remaining ) logger.info("Burned MCMC runs %s", burned_runs_str) dest_db.dump_df(Table.MCMC, mcmc_run_df) # Figure out which model runs to actually re-run. sampled_run_ids = mcmc_run_df[mcmc_run_df["sampled"] == 1].parent.unique().tolist() # Also include the MLE mle_df = db.process.find_mle_run(mcmc_run_df) mle_run_id = mle_df["run"].iloc[0] logger.info("Including MLE run %s", mle_run_id) sampled_run_ids.append(mle_run_id) sampled_run_ids = sorted(list(set(sampled_run_ids))) logger.info( "Running full model for %s sampled runs %s", len(sampled_run_ids), sampled_run_ids ) outputs = [] derived_outputs = [] for sampled_run_id in sampled_run_ids: try: mcmc_run = mcmc_run_df.loc[mcmc_run_df["run"] == sampled_run_id].iloc[0] except IndexError: # This happens when we try to sample a parent run that has been burned, we log this and ignore it. logger.warn("Skipping (probably) burned parent run id %s", sampled_run_id) continue run_id = mcmc_run["run"] chain_id = mcmc_run["chain"] assert mcmc_run["accept"] logger.info("Running full model for MCMC run %s", run_id) param_updates = db.load.load_mcmc_params(dest_db, run_id) update_func = lambda ps: update_params(ps, param_updates) with Timer("Running model scenarios"): scenarios = app_region.build_and_run_scenarios(update_func=update_func) run_id = int(run_id) chain_id = int(chain_id) with Timer("Processing model outputs"): processed_outputs = app_region.process_scenario_outputs(scenarios, run_id, chain_id) outputs.append(processed_outputs[Table.OUTPUTS]) derived_outputs.append(processed_outputs[Table.DERIVED]) with Timer("Saving model outputs to the database"): final_outputs = {} final_outputs[Table.OUTPUTS] = pd.concat(outputs, copy=False, ignore_index=True) final_outputs[Table.DERIVED] = pd.concat(derived_outputs, copy=False, ignore_index=True) db.store.save_model_outputs(dest_db, **final_outputs) except Exception: logger.exception("Full model run for chain %s failed", chain_id) raise logger.info("Finished running full models for chain %s.", chain_id) return chain_id
def run( self, base_model=None, update_func=None, _hack_in_scenario_params: dict = None, derived_outputs_whitelist: Optional[List[str]] = None, ): """ Run the scenario model simulation. If a base model is provided, then run the scenario from the scenario start time. If a parameter update function is provided, it will be used to update params before the model is run. """ with Timer(f"Running scenario: {self.name}"): params = None if not base_model: # This model is the baseline model assert self.is_baseline, "Can only run base model if Scenario idx is 0" params = self.params["default"] if update_func: # Apply extra parameter updates params = update_func(params) self.model = self.model_builder(params) else: # This is a scenario model, based off the baseline model assert not self.is_baseline, "Can only run scenario model if Scenario idx is > 0" # Construct scenario params by merging scenario-specific params into default params params = self.params["scenarios"][self.idx] start_time = params["time"]["start"] if update_func: # Apply extra parameter updates params = update_func(params) if _hack_in_scenario_params: # Hack in scenario params for mixing optimization project. # TODO: Refactor code so that scenario params are applied *after* calibration update. params = update_params(params, _hack_in_scenario_params) # Ensure start time cannot be overwritten for a scenario params["time"]["start"] = start_time base_times = base_model.times base_outputs = base_model.outputs # Find the time step from which we will start the scenario start_index = get_scenario_start_index(base_times, params["time"]["start"]) start_time = base_times[start_index] init_compartments = base_outputs[start_index, :] # Create the new scenario model using the scenario-specific params, # ensuring the initial conditions are the same for the given start time. self.model = self.model_builder(params) if type(self.model) is CompartmentalModel: self.model.initial_population = init_compartments else: self.model.compartment_values = init_compartments if type(self.model) is CompartmentalModel: if derived_outputs_whitelist: self.model.set_derived_outputs_whitelist( derived_outputs_whitelist) self.model.run() else: self.model.run_model(IntegrationType.SOLVE_IVP)