예제 #1
0
def test_update_params__deep_array_request__expect_updated():
    old_params = {
        "foo": 1,
        "bar": {
            "baz": 2,
            "boop": [7, 8, 9],
            "bing": {
                "bonk": 3,
            },
        },
        "boop": [4, 5, 6],
    }
    update_request = {"bar.boop(0)": 1}
    expected_new_params = {
        "foo": 1,
        "bar": {
            "baz": 2,
            "boop": [1, 8, 9],
            "bing": {
                "bonk": 3,
            },
        },
        "boop": [4, 5, 6],
    }
    actual_new_params = update_params(old_params, update_request)
    assert actual_new_params == expected_new_params
예제 #2
0
def test_update_params__multiple_shallow_requests__expect_updated():
    old_params = {
        "foo": 1,
        "bank": 10,
        "bar": {
            "baz": 2,
            "boop": [7, 8, 9],
            "bing": {
                "bonk": 3,
            },
        },
        "boop": [4, 5, 6],
    }
    update_request = {"foo": 2, "bank": 3}
    expected_new_params = {
        "foo": 2,
        "bank": 3,
        "bar": {
            "baz": 2,
            "boop": [7, 8, 9],
            "bing": {
                "bonk": 3,
            },
        },
        "boop": [4, 5, 6],
    }
    actual_new_params = update_params(old_params, update_request)
    assert actual_new_params == expected_new_params
예제 #3
0
def test_update_params__no_request__expect_no_change():
    old_params = {
        "foo": 1,
        "bar": {
            "baz": 2,
            "boop": [7, 8, 9],
            "bing": {
                "bonk": 3,
            },
        },
        "boop": [4, 5, 6],
    }
    update_request = {}
    expected_new_params = {
        "foo": 1,
        "bar": {
            "baz": 2,
            "boop": [7, 8, 9],
            "bing": {
                "bonk": 3,
            },
        },
        "boop": [4, 5, 6],
    }
    actual_new_params = update_params(old_params, update_request)
    assert actual_new_params == expected_new_params
예제 #4
0
def test_update_params__dict_in_array_request__expect_updated():
    old_params = {
        "foo": [{"a": 1}, {"a": 2}, {"a": 3}],
    }
    update_request = {"foo(1).a": 4}
    expected_new_params = {
        "foo": [{"a": 1}, {"a": 4}, {"a": 3}],
    }
    actual_new_params = update_params(old_params, update_request)
    assert actual_new_params == expected_new_params
예제 #5
0
    def run_model_with_params(self, proposed_params: dict):
        """
        Run the model with a set of params.
        """
        logger.info(f"Running iteration {self.run_num}...")
        # Update default parameters to use calibration params.
        param_updates = {"time.end": self.end_time}
        for i, param_name in enumerate(self.param_list):
            param_updates[param_name] = proposed_params[i]

        params = copy.deepcopy(self.model_parameters)
        update_func = lambda ps: update_params(ps, param_updates)
        scenario = Scenario(self.model_builder, 0, params)
        scenario.run(update_func=update_func,
                     derived_outputs_whitelist=self.derived_outputs_to_plot)
        self.latest_scenario = scenario
        return scenario
예제 #6
0
def run_full_model_for_chain(
    run_id: str, src_db_path: str, chain_id: int, burn_in: int, sample_size: int, quiet: bool
):
    """
    Run the full model (all time steps, all scenarios) for a subset of accepted calibration runs.
    It works like this:
        - We start off with a calibration chain of length C
        - We apply "burn in" by throwing away the first B iterations of the chain, leaving us with C - B iterations
        - We then sample runs from the chain using a "sample size" parameter S by calculating N = floor(C - B / S)
          once we know N, we then start from the end of the chain, working backwards, and select every Nth run
              if a run is accepted then we select it
              if a run is not accepted, we select the first accepted run that precedes it

    Once we've sampled all the runs we need, then we re-run them in full, including all their scenarios.
    """
    set_logging_config(not quiet, chain_id)
    msg = "Running full models for chain %s with burn-in of %s and sample size of %s."
    logger.info(msg, chain_id, burn_in, sample_size)
    try:
        app_region = get_app_region(run_id)
        msg = f"Running the {app_region.app_name} {app_region.region_name} model"
        logger.info(msg)

        dest_db_path = os.path.join(FULL_RUN_DATA_DIR, f"chain-{chain_id}")
        src_db = get_database(src_db_path)
        dest_db = get_database(dest_db_path)

        # Burn in MCMC parameter history and copy it across so it can be used in visualizations downstream.
        # Don't apply sampling to it - we want to see the whole parameter space that was explored.
        mcmc_params_df = src_db.query(Table.PARAMS)
        burn_mask = mcmc_params_df["run"] >= burn_in
        dest_db.dump_df(Table.PARAMS, mcmc_params_df[burn_mask])

        # Add some extra columns to MCMC run history to track sampling.
        mcmc_run_df = src_db.query(Table.MCMC)
        num_runs = len(mcmc_run_df)
        msg = f"Tried to burn {burn_in} runs with sample size {sample_size}, but there are only {num_runs}"
        assert num_runs > (burn_in + sample_size), msg

        # Sampled column tells us whether a run will be sampled.
        sampled = []
        sample_step = max(1, (num_runs - burn_in) // sample_size)
        logger.info("Using a sample step of %s", sample_step)
        for idx, mcmc_run in mcmc_run_df.iterrows():
            should_sample = 1 if (num_runs - idx - 1) % sample_step == 0 else 0
            sampled.append(should_sample)

        mcmc_run_df["sampled"] = sampled

        # Parent column tells us which accepted run precedes this run
        parents = []
        i_row = 0  # FIXME: This is a temporary patch.
        for _, mcmc_run in mcmc_run_df.iterrows():
            if mcmc_run["accept"] or i_row == 0:
                parent = int(mcmc_run["run"])

            parents.append(parent)
            i_row += 1

        mcmc_run_df["parent"] = parents

        # Burn in MCMC run history.
        burn_mask = mcmc_run_df["run"] >= burn_in
        burned_runs_str = ", ".join([str(i) for i in mcmc_run_df[~burn_mask].run])
        mcmc_run_df = mcmc_run_df[burn_mask].copy()
        num_remaining = len(mcmc_run_df)
        logger.info(
            "Burned %s of %s MCMC runs leaving %s remaining.", burn_in, num_runs, num_remaining
        )

        logger.info("Burned MCMC runs %s", burned_runs_str)
        dest_db.dump_df(Table.MCMC, mcmc_run_df)

        # Figure out which model runs to actually re-run.
        sampled_run_ids = mcmc_run_df[mcmc_run_df["sampled"] == 1].parent.unique().tolist()

        # Also include the MLE
        mle_df = db.process.find_mle_run(mcmc_run_df)
        mle_run_id = mle_df["run"].iloc[0]
        logger.info("Including MLE run %s", mle_run_id)
        sampled_run_ids.append(mle_run_id)
        sampled_run_ids = sorted(list(set(sampled_run_ids)))
        logger.info(
            "Running full model for %s sampled runs %s", len(sampled_run_ids), sampled_run_ids
        )

        outputs = []
        derived_outputs = []
        for sampled_run_id in sampled_run_ids:
            try:
                mcmc_run = mcmc_run_df.loc[mcmc_run_df["run"] == sampled_run_id].iloc[0]
            except IndexError:
                # This happens when we try to sample a parent run that has been burned, we log this and ignore it.
                logger.warn("Skipping (probably) burned parent run id %s", sampled_run_id)
                continue

            run_id = mcmc_run["run"]
            chain_id = mcmc_run["chain"]
            assert mcmc_run["accept"]
            logger.info("Running full model for MCMC run %s", run_id)
            param_updates = db.load.load_mcmc_params(dest_db, run_id)
            update_func = lambda ps: update_params(ps, param_updates)
            with Timer("Running model scenarios"):
                scenarios = app_region.build_and_run_scenarios(update_func=update_func)

            run_id = int(run_id)
            chain_id = int(chain_id)

            with Timer("Processing model outputs"):
                processed_outputs = app_region.process_scenario_outputs(scenarios, run_id, chain_id)
                outputs.append(processed_outputs[Table.OUTPUTS])
                derived_outputs.append(processed_outputs[Table.DERIVED])

        with Timer("Saving model outputs to the database"):
            final_outputs = {}
            final_outputs[Table.OUTPUTS] = pd.concat(outputs, copy=False, ignore_index=True)
            final_outputs[Table.DERIVED] = pd.concat(derived_outputs, copy=False, ignore_index=True)
            db.store.save_model_outputs(dest_db, **final_outputs)

    except Exception:
        logger.exception("Full model run for chain %s failed", chain_id)
        raise

    logger.info("Finished running full models for chain %s.", chain_id)
    return chain_id
예제 #7
0
    def run(
        self,
        base_model=None,
        update_func=None,
        _hack_in_scenario_params: dict = None,
        derived_outputs_whitelist: Optional[List[str]] = None,
    ):
        """
        Run the scenario model simulation.
        If a base model is provided, then run the scenario from the scenario start time.
        If a parameter update function is provided, it will be used to update params before the model is run.
        """
        with Timer(f"Running scenario: {self.name}"):
            params = None
            if not base_model:
                # This model is the baseline model
                assert self.is_baseline, "Can only run base model if Scenario idx is 0"
                params = self.params["default"]
                if update_func:
                    # Apply extra parameter updates
                    params = update_func(params)

                self.model = self.model_builder(params)
            else:
                # This is a scenario model, based off the baseline model
                assert not self.is_baseline, "Can only run scenario model if Scenario idx is > 0"

                # Construct scenario params by merging scenario-specific params into default params
                params = self.params["scenarios"][self.idx]
                start_time = params["time"]["start"]
                if update_func:
                    # Apply extra parameter updates
                    params = update_func(params)

                if _hack_in_scenario_params:
                    # Hack in scenario params for mixing optimization project.
                    # TODO: Refactor code so that scenario params are applied *after* calibration update.
                    params = update_params(params, _hack_in_scenario_params)

                # Ensure start time cannot be overwritten for a scenario
                params["time"]["start"] = start_time

                base_times = base_model.times
                base_outputs = base_model.outputs

                # Find the time step from which we will start the scenario
                start_index = get_scenario_start_index(base_times,
                                                       params["time"]["start"])
                start_time = base_times[start_index]
                init_compartments = base_outputs[start_index, :]

                # Create the new scenario model using the scenario-specific params,
                # ensuring the initial conditions are the same for the given start time.
                self.model = self.model_builder(params)
                if type(self.model) is CompartmentalModel:
                    self.model.initial_population = init_compartments
                else:
                    self.model.compartment_values = init_compartments

            if type(self.model) is CompartmentalModel:
                if derived_outputs_whitelist:
                    self.model.set_derived_outputs_whitelist(
                        derived_outputs_whitelist)

                self.model.run()
            else:
                self.model.run_model(IntegrationType.SOLVE_IVP)