def get_settings(model_version_id=None, settings_json=None):
    if model_version_id is not None:
        return settings.settings_from_model_version_id(model_version_id,
                                                       "dismod-at-dev")
    elif settings_json is not None:
        return settings.load_settings(settings_json)
    else:
        return settings.load_settings(BASE_CASE)
Example #2
0
def test_override_priors():
    np.random.seed(0)
    settings = load_settings(BASE_CASE)
    alchemy = Alchemy(settings)

    prior = alchemy.get_smoothing_grid(rate=settings.rate[0])
    draws = np.abs(np.random.normal(loc=1e-3, scale=1e-7,
                                    size=(len(prior.ages), len(prior.times), 100)))

    alchemy.override_priors(
        rate_grid=prior,
        update_dict={
            'value': draws,
            'dage': draws,
            'dtime': draws,
            'ages': prior.ages,
            'times': prior.times
        },
        new_prior_distribution=None
    )
    for (a, age), (t, time) in zip(enumerate(prior.ages), enumerate(prior.times)):
        assert prior.value[age, time].mean == draws[a, t, :].mean()
    for (a, age), (t, time) in zip(enumerate(prior.ages[:-1]), enumerate(prior.times)):
        assert prior.dage[age, time].mean == draws[a, t, :].mean()
    for (a, age), (t, time) in zip(enumerate(prior.ages), enumerate(prior.times[:-1])):
        assert prior.dtime[age, time].mean == draws[a, t, :].mean()
Example #3
0
def main():
    """
    Grabs the inputs for a specific model version ID, sets up the folder
    structure, and pickles the inputs object plus writes the settings json
    for use later on.

    If you're doing a drill, then only get input data from locations
    that will be used for the drilling for parent-children.
    """
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    LOG.info(
        f"Configuring inputs for model version ID {args.model_version_id}.")
    LOG.debug(f"Arguments: {args}.")

    context = Context(model_version_id=args.model_version_id,
                      make=args.make,
                      configure_application=args.configure)
    parameter_json = settings_json_from_model_version_id(
        model_version_id=args.model_version_id,
        conn_def=context.model_connection)
    settings = load_settings(settings_json=parameter_json)

    inputs = MeasurementInputsFromSettings(settings=settings)
    inputs.get_raw_inputs()
    inputs.configure_inputs_for_dismod(settings=settings)

    context.write_inputs(inputs=inputs, settings=parameter_json)
Example #4
0
def test_data_cv_by_integrand_update(data_cv_by_integrand, num, measure, value):
    settings = BASE_CASE.copy()
    settings.update(data_cv_by_integrand)
    s = load_settings(settings)
    assert type(s.data_cv_by_integrand) == FormList
    assert s.data_cv_by_integrand[num].integrand_measure_id == measure
    assert s.data_cv_by_integrand[num].value == value
Example #5
0
def test_location_drill_start_end(ihme):
    these_settings = deepcopy(BASE_CASE)

    model_settings = these_settings["model"]

    tree = LocationDAG(these_settings['location_set_version_id'],
                       these_settings['gbd_round_id'])
    region_ids = tree.parent_children(1)
    parent_test_loc = choice(region_ids)
    test_children = list(tree.parent_children(parent_test_loc))
    num_test_children = randint(2, len(test_children))

    children_test_locs = sample(test_children, num_test_children)
    num_descendants = 0
    for child in children_test_locs:
        num_descendants += len(tree.descendants(child))

    model_settings['drill_location_end'] = children_test_locs
    model_settings['drill_location_start'] = parent_test_loc
    these_settings['model'] = model_settings
    s = load_settings(these_settings)
    mi = MeasurementInputsFromSettings(settings=s)

    # demographics.location_id shoul be set to all descendants of each
    # location in drill_location_end, plus drill_location_end locations
    # themselves, plus the drill_location_start location
    assert len(mi.demographics.location_id) == (num_descendants +
                                                len(children_test_locs) + 1)
    assert len(mi.demographics.drill_locations) == (len(children_test_locs) +
                                                    1)
Example #6
0
def test_get_prior_avgint_grid():
    prior_avgint_grid = get_prior_avgint_grid(
        settings=load_settings(BASE_CASE),
        integrands=['pini', 'iota', 'chi'],
        sexes=[1, 2],
        locations=[1])
    assert type(prior_avgint_grid) == pd.DataFrame
    assert sorted(prior_avgint_grid.integrand_id.unique()) == [0, 2, 7]
Example #7
0
def configure_inputs(model_version_id: int,
                     make: bool,
                     configure: bool,
                     midpoint: bool = False,
                     test_dir: Optional[str] = None,
                     json_file: Optional[str] = None) -> None:
    """
    Grabs the inputs for a specific model version ID, sets up the folder
    structure, and pickles the inputs object plus writes the settings json
    for use later on. Also uploads CSMR to the database attached to the model version,
    if applicable.

    Optionally use a json file for settings instead of a model version ID's json file.

    Parameters
    ----------
    model_version_id
        The model version ID to configure inputs for
    make
        Whether or not to make the directory structure for the model version ID
    configure
        Configure the application for the IHME cluster, otherwise will use the
        test_dir for the directory tree instead.
    test_dir
        A test directory to use rather than the directory specified by the
        model version context in the IHME file system.
    json_file
        An optional filepath pointing to a different json than is attached to the
        model_version_id. Will use this instead for settings.
    """
    LOG.info(f"Configuring inputs for model version ID {model_version_id}.")

    context = Context(model_version_id=model_version_id,
                      make=make,
                      configure_application=configure,
                      root_directory=test_dir)
    if json_file:
        LOG.info(f"Reading settings from file: {json_file}")
        with open(json_file, 'r') as json_file:
            parameter_json = json.load(json_file)
    else:
        parameter_json = settings_json_from_model_version_id(
            model_version_id=model_version_id,
            conn_def=context.model_connection)

    settings = load_settings(settings_json=parameter_json)

    inputs = MeasurementInputsFromSettings(settings=settings)
    inputs.get_raw_inputs()
    inputs.configure_inputs_for_dismod(settings=settings, midpoint=midpoint)

    if not inputs.csmr.raw.empty:
        LOG.info("Uploading CSMR to t3 table.")
        inputs.csmr.attach_to_model_version_in_db(
            model_version_id=model_version_id,
            conn_def=context.model_connection)

    context.write_inputs(inputs=inputs, settings=parameter_json)
Example #8
0
def test_apply_min_cv_to_value():
    settings = load_settings(BASE_CASE)
    alchemy = Alchemy(settings)

    prior = alchemy.get_smoothing_grid(rate=settings.rate[0]).value
    # Apply a ridiculously large coefficient of variation
    alchemy.apply_min_cv_to_prior_grid(prior_grid=prior, min_cv=1e6)
    for (a, age), (t, time) in zip(enumerate(prior.ages), enumerate(prior.times)):
        assert prior[age, time].standard_deviation == prior[age, time].mean * 1e6
def test_data_cv_by_integrand_update(data_cv_by_integrand, num, measure,
                                     value):
    settings = BASE_CASE.copy()
    settings.update(data_cv_by_integrand)
    s = load_settings(settings)
    assert type(s.data_cv_by_integrand) == FormList
    assert s.data_cv_by_integrand[num].integrand_measure_id == measure
    assert s.data_cv_by_integrand[num].value == value
    assert s.model.is_field_unset('midpoint_approximation')
Example #10
0
def test_min_cv_from_settings_TEMP(settings):
    new = load_settings(settings)
    cv = min_cv_from_settings(settings=new)
    assert cv['most_detailed']['chi'] == 0.3
    assert cv['most_detailed']['iota'] == 0.2
    assert cv['most_detailed']['omega'] == 0.1
    assert cv['level1']['omega'] == 1.0
    assert cv['level1']['chi'] == 10.0
    assert cv['dummy']['omega'] == 0.1
    assert cv['dummy']['iota'] == 0.1
    assert cv['dummy']['chi'] == 0.1
Example #11
0
def test_data_cv_from_settings_by_integrand():
    settings = BASE_CASE.copy()
    settings.update(
        {"data_cv_by_integrand": [{
            "integrand_measure_id": 5,
            "value": 0.5
        }]})
    s = load_settings(settings)
    cv = data_cv_from_settings(settings=s)
    assert cv['prevalence'] == 0.5
    assert cv['iota'] == 0.2
Example #12
0
def test_estimate_prior_grid():
    np.random.seed(0)
    settings = load_settings(BASE_CASE)
    alchemy = Alchemy(settings)

    prior = alchemy.get_smoothing_grid(rate=settings.rate[0])
    draws = np.abs(np.random.normal(loc=1e-3, scale=1e-7,
                                    size=(len(prior.ages), len(prior.times), 100)))
    estimate_grid_from_draws(
        ages=prior.ages, times=prior.times, draws=draws, grid_priors=prior.value
    )
    for (a, age), (t, time) in zip(enumerate(prior.ages), enumerate(prior.times)):
        assert prior.value[age, time].mean == draws[a, t, :].mean()
Example #13
0
def test_construct_integrand_table_from_settings():
    settings = BASE_CASE.copy()
    settings.update(
        {"data_cv_by_integrand": [{
            "integrand_measure_id": 5,
            "value": 0.5
        }]})
    s = load_settings(settings)
    cv = MeasurementInputs.data_cv_from_settings(settings=s)
    df = construct_integrand_table(data_cv_from_settings=cv)
    changed = df.loc[df.integrand_name == 'prevalence']
    unchanged = df.loc[df.integrand_name != 'prevalence']
    assert all(changed.minimum_meas_cv == 0.5)
    assert all(unchanged.minimum_meas_cv == 0.2)
Example #14
0
def test_data_cv_from_settings_by_integrand():
    settings = BASE_CASE.copy()
    settings.update(
        {"data_cv_by_integrand": [{
            "integrand_measure_id": 5,
            "value": 0.5
        }]})
    s = load_settings(settings)
    cv = MeasurementInputs.data_cv_from_settings(settings=s)
    for k, v in cv.items():
        if k == 'prevalence':
            assert v == 0.5
        else:
            assert v == 0.1
Example #15
0
def test_get_prior_avgint_grid():

    settings = load_settings(BASE_CASE)
    alchemy = Alchemy(settings)

    rates = ['pini', 'iota', 'chi']

    grids = integrand_grids(alchemy=alchemy, integrands=rates)

    df = get_prior_avgint_grid(
        grids=grids,
        sexes=[1, 2],
        locations=[1]
    )
    assert type(df) == pd.DataFrame
    assert sorted(df['integrand_id'].unique()) == [0, 2, 7]
    assert all(df.location_id == 1)
Example #16
0
def test_format_rate_grid_for_ihme(mi):
    settings = load_settings(BASE_CASE)
    alchemy = Alchemy(settings)
    d = DismodFiller(
        path='none',
        settings_configuration=settings,
        measurement_inputs=mi,
        grid_alchemy=alchemy,
        parent_location_id=70,
        sex_id=2
    )
    grid = format_rate_grid_for_ihme(
        rates=d.parent_child_model['rate'],
        gbd_round_id=6,
        location_id=70,
        sex_id=2
    )
    assert all(grid.columns == ['location_id', 'year_id', 'age_group_id',
                                'sex_id', 'measure_id', 'mean', 'upper', 'lower'])
Example #17
0
    def read_inputs(self) -> (MeasurementInputs, Alchemy, SettingsConfig):
        """
        Read the inputs from disk.
        """
        with open(self.inputs_file, "rb") as f:
            LOG.info(f"Reading input obj from {self.inputs_file}.")
            inputs = dill.load(f)
        with open(self.settings_file) as f:
            settings_json = json.load(f)
        settings = load_settings(settings_json=settings_json)
        alchemy = Alchemy(settings=settings)

        # For some reason the pickling process makes it so that there is a
        # key error in FormList when trying to access CovariateSpecs
        # This re-creates the covariate specs for the inputs, but ideally
        # we don't have to do this if we can figure out why pickling makes it error.
        inputs.covariate_specs = CovariateSpecs(
            country_covariates=settings.country_covariate,
            study_covariates=settings.study_covariate)
        return inputs, alchemy, settings
Example #18
0
def test_no_drill(ihme):
    these_settings = deepcopy(BASE_CASE)

    model_settings = these_settings["model"]

    tree = LocationDAG(these_settings['location_set_version_id'],
                       these_settings['gbd_round_id'])
    num_descendants = len(tree.descendants(1))

    model_settings.pop('drill_location_end')
    model_settings.pop('drill_location_start')

    these_settings['model'] = model_settings
    s = load_settings(these_settings)
    mi = MeasurementInputsFromSettings(settings=s)

    # since we haven't set either drill_location_start or
    # drill_location_end, demographics.location_id should be set
    # to the entire hierarchy
    assert len(mi.demographics.location_id) == num_descendants + 1
    assert len(mi.demographics.drill_locations) == num_descendants + 1
Example #19
0
def test_location_drill_start_only(ihme):
    these_settings = deepcopy(BASE_CASE)

    model_settings = these_settings["model"]

    tree = LocationDAG(these_settings['location_set_version_id'],
                       these_settings['gbd_round_id'])
    region_ids = tree.parent_children(1)
    test_loc = choice(region_ids)
    num_descendants = len(tree.descendants(test_loc))
    num_mr_locs = len(tree.parent_children(test_loc))

    model_settings.pop("drill_location_end")
    model_settings['drill_location_start'] = test_loc
    these_settings["model"] = model_settings
    s = load_settings(these_settings)
    mi = MeasurementInputsFromSettings(settings=s)

    # with drill_location_end unset, demographics.location_id should
    # be set to all descendants of the test loc, plus the test loc itself
    assert len(mi.demographics.location_id) == num_descendants + 1
    assert len(mi.demographics.drill_locations) == num_mr_locs
Example #20
0
def main():

    args = ARG_LIST.parse_args(sys.argv[1:])
    logging.basicConfig(level=LEVELS[args.log_level])

    from cascade_at.settings.settings import load_settings

    with open(args.json_file) as f:
        settings_json = json.load(f)
    settings = load_settings(settings_json=settings_json)

    if 0:
        from cascade_at.executor.configure_inputs import configure_inputs
        global context, inputs
        context, inputs = configure_inputs(
            model_version_id=args.model_version_id,
            make=False,
            configure=False,
            test_dir=args.test_dir,
            json_file=args.json_file,
        )

        inputs2 = all_locations(inputs, settings)

        for d in inputs, inputs2:
            print()
            for integrand in sorted(d.dismod_data.measure.unique()):
                print(
                    integrand,
                    len(d.dismod_data[d.dismod_data.measure == integrand]),
                    'locations',
                    len(d.dismod_data.loc[d.dismod_data.measure ==
                                          integrand].location_id.unique()))

        if 1:
            import shutil
            import dill
            with open(
                    f'/tmp/cascade_dir/data/{args.model_version_id}/inputs/inputs1.p',
                    'wb') as stream:
                dill.dump(inputs, stream)
            with open(
                    f'/tmp/cascade_dir/data/{args.model_version_id}/inputs/inputs2.p',
                    'wb') as stream:
                dill.dump(inputs2, stream)
            shutil.copy2(
                f'/tmp/cascade_dir/data/{args.model_version_id}/inputs/inputs2.p',
                f'/tmp/cascade_dir/data/{args.model_version_id}/inputs/inputs.p'
            )

        from cascade_at.executor.dismod_db import dismod_db
        # It seems that dismod_db gets mtall/mtspecific from inputs.p for just the parent and the parents children
        # And it seems that the entire set of locations is in inputs.p for mtall and mtspecific.
        dismod_db(model_version_id=args.model_version_id,
                  parent_location_id=inputs.drill_location_start,
                  fill=True,
                  test_dir=args.test_dir,
                  save_fit=False,
                  save_prior=False)

        from cascade_at.executor.run import run
        run(model_version_id=args.model_version_id,
            jobmon=False,
            make=False,
            skip_configure=True,
            json_file=args.json_file,
            test_dir=args.test_dir,
            execute_dag=False)

    else:

        import dill

        with open(
                f'/tmp/cascade_dir/data/{args.model_version_id}/inputs/inputs1.p',
                'rb') as stream:
            inputs = dill.load(stream)
        global covariate_reference, data, asdr, csmr

        cov_ref = CovariateReference(inputs)
        covariate_reference = reduce(
            lambda x, y: pd.merge(x, y),
            [cov_ref.configure_for_dismod(c) for c in inputs.covariate_data])

        data = inputs.data.configure_for_dismod(
            relabel_incidence=settings.model.relabel_incidence)
        data = inputs.add_covariates_to_data(data)

        asdr = inputs.asdr.configure_for_dismod()
        csmr = inputs.csmr.configure_for_dismod()

        if __debug__:
            asdr_grps = asdr.groupby(['sex_id', 'location_id'])
            csmr_grps = csmr.groupby(['sex_id', 'location_id'])
            import numpy as np
            assert np.all(asdr_grps.count() == csmr_grps.count())
Example #21
0
def test_data_cv_from_settings():
    settings = BASE_CASE.copy()
    s = load_settings(settings)
    cv = data_cv_from_settings(settings=s)
    assert cv['iota'] == 0.2
Example #22
0
def settings():
    settings = load_settings(BASE_CASE)
    return settings
Example #23
0
def alchemy():
    return Alchemy(load_settings(BASE_CASE))
Example #24
0
def settings():
    return load_settings(BASE_CASE)
def test_data_cv_from_settings():
    settings = BASE_CASE.copy()
    s = load_settings(settings)
    cv = MeasurementInputs.data_cv_from_settings(settings=s)
    for k, v in cv.items():
        assert v == 0.1
Example #26
0
def test_midpoint_approx_update(midpoint_approximation):
    settings = BASE_CASE.copy()
    settings['model'].update(midpoint_approximation)
    sett = load_settings(settings)
    assert sett.model.midpoint_approximation == [6, 9]
Example #27
0
def modified_settings():
    s = deepcopy(BASE_CASE)
    s['model']['constrain_omega'] = 0
    return load_settings(s)
Example #28
0
def run(model_version_id: int,
        jobmon: bool = True,
        make: bool = True,
        n_sim: int = 10,
        n_pool: int = 10,
        addl_workflow_args: Optional[str] = None,
        skip_configure: bool = False,
        json_file: Optional[str] = None,
        test_dir: Optional[str] = None,
        execute_dag: bool = True) -> None:
    """
    Runs the whole cascade or drill for a model version (whichever one is specified
    in the model version settings).

    Creates a cascade command and a bunch of cascade operations based
    on the model version settings. More information on this structure
    is in :ref:`executor`.

    Parameters
    ----------
    model_version_id
        The model version to run
    jobmon
        Whether or not to use Jobmon. If not using Jobmon, executes
        the commands in sequence in this session.
    make
        Whether or not to make the directory structure for the databases, inputs, and outputs.
    n_sim
        Number of simulations to do going down the cascade
    addl_workflow_args
        Additional workflow args to add to the jobmon workflow name
        so that it is unique if you're testing
    skip_configure
        Skip configuring the inputs because
    """
    LOG.info(f"Starting model for {model_version_id}.")

    context = Context(model_version_id=model_version_id,
                      make=make,
                      configure_application=not skip_configure,
                      root_directory=test_dir)
    context.update_status(status='Submitted')

    if json_file:
        with open(json_file) as fn:
            LOG.info(f"Reading settings from {json_file}")
            parameter_json = json.loads(fn.read())
        settings = load_settings(parameter_json)
        # Save the json file as it is used throughout the cascade
        LOG.info(f"Replacing {context.settings_file}")
        context.write_inputs(settings=parameter_json)
    else:
        settings = settings_from_model_version_id(
            model_version_id=model_version_id,
            conn_def=context.model_connection)
    dag = LocationDAG(location_set_version_id=settings.location_set_version_id,
                      gbd_round_id=settings.gbd_round_id)

    if settings.model.drill == 'drill':
        cascade_command = Drill(
            model_version_id=model_version_id,
            drill_parent_location_id=settings.model.drill_location_start,
            drill_sex=settings.model.drill_sex,
            n_sim=n_sim,
            n_pool=n_pool,
            skip_configure=skip_configure,
        )
    elif settings.model.drill == 'cascade':

        location_start = None
        sex = None

        if isinstance(settings.model.drill_location_start, int):
            location_start = settings.model.drill_location_start
        if isinstance(settings.model.drill_sex, int):
            sex = settings.model.drill_sex

        cascade_command = TraditionalCascade(
            model_version_id=model_version_id,
            split_sex=settings.model.split_sex == 'most_detailed',
            dag=dag,
            n_sim=n_sim,
            n_pool=n_pool,
            location_start=settings.model.drill_location_start,
            sex=sex,
            skip_configure=skip_configure,
        )
    else:
        raise NotImplementedError(
            f"The drill/cascade setting {settings.model.drill} is not implemented."
        )

    dag_cmds_path = (context.inputs_dir / 'dag_commands.txt')
    LOG.info(f"Writing cascade dag commands to {dag_cmds_path}.")
    dag_cmds_path.write_text('\n'.join(cascade_command.get_commands()))

    if not execute_dag: return

    if jobmon:
        LOG.info("Configuring jobmon.")
        wf = jobmon_workflow_from_cascade_command(
            cc=cascade_command,
            context=context,
            addl_workflow_args=addl_workflow_args)
        wf_run = wf.run(seconds_until_timeout=60 * 60 * 24 * 3, resume=True)
        if wf_run.status != 'D':
            context.update_status(status='Failed')
            raise RuntimeError("Jobmon workflow failed.")
    else:
        LOG.info("Running without jobmon.")
        for c in cascade_command.get_commands():
            LOG.info(f"Running {c}")
            process = subprocess.run(c,
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            if process.returncode:
                context.update_status(status='Failed')
                raise RuntimeError(f"Command {c} failed with error"
                                   f"{process.stderr.decode()}")
        if process.stderr:
            print(process.stderr.decode())
        if process.stdout:
            print(process.stdout.decode())

    context.update_status(status='Complete')