Exemple #1
0
    def __init__(self,
                 gbd_round_id: int,
                 location_set_version_id: Optional[int] = None):
        """
        Grabs and stores demographic information needed for shared functions.
        Will also make a location hierarchy dag.

        Parameters
        ----------
        gbd_round_id
            The GBD round
        location_set_version_id
            The location set version to use (right now EpiViz-AT is passing
            dismod location set versions, but this will eventually switch
            to the cause of death hierarchy that is more extensive).
        """
        demographics = db_queries.get_demographics(gbd_team='epi',
                                                   gbd_round_id=gbd_round_id)
        self.age_group_id = demographics['age_group_id']
        self.sex_id = demographics['sex_id'] + [3]

        cod_demographics = db_queries.get_demographics(
            gbd_team='cod', gbd_round_id=gbd_round_id)
        self.year_id = cod_demographics['year_id']

        if location_set_version_id:
            location_dag = LocationDAG(
                location_set_version_id=location_set_version_id,
                gbd_round_id=gbd_round_id)
            self.location_id = list(location_dag.dag.nodes)
            self.drill_locations = list(location_dag.dag.nodes)
        else:
            self.location_id = []
            self.drill_locations = []
Exemple #2
0
def test_location_drill_start_end(ihme):
    these_settings = deepcopy(BASE_CASE)

    model_settings = these_settings["model"]

    tree = LocationDAG(these_settings['location_set_version_id'],
                       these_settings['gbd_round_id'])
    region_ids = tree.parent_children(1)
    parent_test_loc = choice(region_ids)
    test_children = list(tree.parent_children(parent_test_loc))
    num_test_children = randint(2, len(test_children))

    children_test_locs = sample(test_children, num_test_children)
    num_descendants = 0
    for child in children_test_locs:
        num_descendants += len(tree.descendants(child))

    model_settings['drill_location_end'] = children_test_locs
    model_settings['drill_location_start'] = parent_test_loc
    these_settings['model'] = model_settings
    s = load_settings(these_settings)
    mi = MeasurementInputsFromSettings(settings=s)

    # demographics.location_id shoul be set to all descendants of each
    # location in drill_location_end, plus drill_location_end locations
    # themselves, plus the drill_location_start location
    assert len(mi.demographics.location_id) == (num_descendants +
                                                len(children_test_locs) + 1)
    assert len(mi.demographics.drill_locations) == (len(children_test_locs) +
                                                    1)
Exemple #3
0
def test_no_drill(ihme):
    these_settings = deepcopy(BASE_CASE)

    model_settings = these_settings["model"]

    tree = LocationDAG(these_settings['location_set_version_id'],
                       these_settings['gbd_round_id'])
    num_descendants = len(tree.descendants(1))

    model_settings.pop('drill_location_end')
    model_settings.pop('drill_location_start')

    these_settings['model'] = model_settings
    s = load_settings(these_settings)
    mi = MeasurementInputsFromSettings(settings=s)

    # since we haven't set either drill_location_start or
    # drill_location_end, demographics.location_id should be set
    # to the entire hierarchy
    assert len(mi.demographics.location_id) == num_descendants + 1
    assert len(mi.demographics.drill_locations) == num_descendants + 1
Exemple #4
0
def test_location_drill_start_only(ihme):
    these_settings = deepcopy(BASE_CASE)

    model_settings = these_settings["model"]

    tree = LocationDAG(these_settings['location_set_version_id'],
                       these_settings['gbd_round_id'])
    region_ids = tree.parent_children(1)
    test_loc = choice(region_ids)
    num_descendants = len(tree.descendants(test_loc))
    num_mr_locs = len(tree.parent_children(test_loc))

    model_settings.pop("drill_location_end")
    model_settings['drill_location_start'] = test_loc
    these_settings["model"] = model_settings
    s = load_settings(these_settings)
    mi = MeasurementInputsFromSettings(settings=s)

    # with drill_location_end unset, demographics.location_id should
    # be set to all descendants of the test loc, plus the test loc itself
    assert len(mi.demographics.location_id) == num_descendants + 1
    assert len(mi.demographics.drill_locations) == num_mr_locs
Exemple #5
0
    def __init__(self,
                 gbd_round_id: int,
                 location_set_version_id: Optional[int] = None):
        """
        Demographic groups needed for shared functions.
        """
        demographics = db_queries.get_demographics(gbd_team='epi',
                                                   gbd_round_id=gbd_round_id)
        self.age_group_id = demographics['age_group_id']
        self.sex_id = demographics['sex_id'] + [3]

        cod_demographics = db_queries.get_demographics(
            gbd_team='cod', gbd_round_id=gbd_round_id)
        self.year_id = cod_demographics['year_id']

        if location_set_version_id:
            location_dag = LocationDAG(
                location_set_version_id=location_set_version_id,
                gbd_round_id=gbd_round_id)
            self.location_id = list(location_dag.dag.nodes)
            self.mortality_rate_location_id = list(location_dag.dag.nodes)
        else:
            self.location_id = []
            self.mortality_rate_location_id = []
    def __init__(self, model_version_id: int,
                 gbd_round_id: int, decomp_step_id: int,
                 conn_def: str,
                 country_covariate_id: List[int],
                 csmr_cause_id: int, crosswalk_version_id: int,
                 csmr_process_version_id: Optional[int] = None,
                 location_set_version_id: Optional[int] = None,
                 drill_location_start: Optional[int] = None,
                 drill_location_end: Optional[List[int]] = None):
        """
        The class that constructs all of the measurement inputs. Pulls ASDR,
        CSMR, crosswalk versions, and country covariates, and puts them into
        one data frame that then formats itself for the dismod database.
        Performs covariate value interpolation if age and year ranges
        don't match up with GBD age and year ranges.

        Parameters
        ----------
        model_version_id
            the model version ID
        gbd_round_id
            the GBD round ID
        decomp_step_id
            the decomp step ID
        csmr_process_version_id
            process version ID for CSMR
        csmr_cause_id: (int) cause to pull CSMR from
        crosswalk_version_id
            crosswalk version to use
        country_covariate_id
            list of covariate IDs
        conn_def
            connection definition from .odbc file (e.g. 'epi') to connect to the IHME databases
        location_set_version_id
            can be None, if it's none, get the best location_set_version_id for estimation hierarchy of this GBD round
        drill_location_start
            which location ID to drill from as the parent
        drill_location_end
            which immediate children of the drill_location_start parent to include in the drill

        Attributes
        ----------
        self.decomp_step : str
            the decomp step in string form
        self.demographics : cascade_at.inputs.demographics.Demographics
            a demographics object that specifies the age group, sex,
            location, and year IDs to grab
        self.integrand_map : Dict[int, int]
            dictionary mapping from GBD measure IDs to DisMod IDs
        self.asdr : cascade_at.inputs.asdr.ASDR
            all-cause mortality input object
        self.csmr : cascade_at.inputs.csmr.CSMR
            cause-specific mortality input object from cause csmr_cause_id
        self.data : cascade_at.inputs.data.CrosswalkVersion
            crosswalk version data from IHME database
        self.covariate_data : List[cascade_at.inputs.covariate_data.CovariateData]
            list of covariate data objects that contains the raw covariate data mapped to IDs
        self.location_dag : cascade_at.inputs.locations.LocationDAG
            DAG of locations to be used
        self.population: (cascade_at.inputs.population.Population)
            population object that is used for covariate weighting
        self.data_eta: (Dict[str, float]): dictionary of eta value to be
            applied to each measure
        self.density: (Dict[str, str]): dictionary of density to be
            applied to each measure
        self.nu: (Dict[str, float]): dictionary of nu value to be applied
            to each measure
        self.dismod_data: (pd.DataFrame) resulting dismod data formatted
            to be used in the dismod database

        Examples
        --------
        >>> from cascade_at.settings.base_case import BASE_CASE
        >>> from cascade_at.settings.settings import load_settings
        >>>
        >>> settings = load_settings(BASE_CASE)
        >>> covariate_id = [i.country_covariate_id for i in settings.country_covariate]
        >>>
        >>> i = MeasurementInputs(
        >>>    model_version_id=settings.model.model_version_id,
        >>>    gbd_round_id=settings.gbd_round_id,
        >>>    decomp_step_id=settings.model.decomp_step_id,
        >>>    csmr_process_version_id=None,
        >>>    csmr_cause_id = settings.model.add_csmr_cause,
        >>>    crosswalk_version_id=settings.model.crosswalk_version_id,
        >>>    country_covariate_id=covariate_id,
        >>>    conn_def='epi',
        >>>    location_set_version_id=settings.location_set_version_id
        >>> )
        >>> i.get_raw_inputs()
        >>> i.configure_inputs_for_dismod(settings)
        """
        LOG.info(f"Initializing input object for model version ID {model_version_id}.")
        LOG.info(f"GBD Round ID {gbd_round_id}.")
        LOG.info(f"Pulling from connection {conn_def}.")

        self.model_version_id = model_version_id
        self.gbd_round_id = gbd_round_id
        self.decomp_step_id = decomp_step_id
        self.csmr_process_version_id = csmr_process_version_id
        self.csmr_cause_id = csmr_cause_id
        self.crosswalk_version_id = crosswalk_version_id
        self.country_covariate_id = country_covariate_id
        self.conn_def = conn_def
        self.drill_location_start = drill_location_start
        self.drill_location_end = drill_location_end
        self.decomp_step = ds.decomp_step_from_decomp_step_id(self.decomp_step_id)
        if location_set_version_id is None:
            self.location_set_version_id = get_location_set_version_id(gbd_round_id=self.gbd_round_id)
        else:
            self.location_set_version_id = location_set_version_id

        self.demographics = Demographics(
            gbd_round_id=self.gbd_round_id,
            location_set_version_id=self.location_set_version_id)
        self.location_dag = LocationDAG(
            location_set_version_id=self.location_set_version_id,
            gbd_round_id=self.gbd_round_id
        )
        # Need to subset the locations to only those needed for
        # the drill. drill_locations_all is the set of locations
        # to pull data for, including all descendents. drill_locations
        # is the set of locations just parent-children in the drill.
        drill_locations_all, drill_locations = locations_by_drill(
            drill_location_start=self.drill_location_start,
            drill_location_end=self.drill_location_end,
            dag=self.location_dag
        )
        if drill_locations_all:
            self.demographics.location_id = drill_locations_all
            self.demographics.drill_locations = drill_locations

        self.exclude_outliers = True
        self.asdr = None
        self.csmr = None
        self.population = None
        self.data = None
        self.covariates = None
        self.age_groups = None

        self.data_eta = None
        self.density = None
        self.nu = None
        self.measures_to_exclude = None

        self.dismod_data = None
        self.covariate_data = None
        self.country_covariate_data = None
        self.covariate_specs = None
        self.omega = None
Exemple #7
0
def dag(ihme):
    d = LocationDAG(location_set_version_id=544, gbd_round_id=6)
    return d
def dag(df):
    return LocationDAG(df=df)
Exemple #9
0
def run(model_version_id: int,
        jobmon: bool = True,
        make: bool = True,
        n_sim: int = 10,
        addl_workflow_args: Optional[str] = None,
        skip_configure: bool = False) -> None:
    """
    Runs the whole cascade or drill for a model version (which one is specified
    in the model version settings).

    Parameters
    ----------
    model_version_id
        The model version to run
    jobmon
        Whether or not to use Jobmon. If not using Jobmon, executes
        the commands in sequence in this session.
    make
        Whether or not to make the directory structure for the databases, inputs, and outputs.
    n_sim
        Number of simulations to do going down the cascade
    addl_workflow_args
    skip_configure
    """
    LOG.info(f"Starting model for {model_version_id}.")

    context = Context(model_version_id=model_version_id,
                      make=make,
                      configure_application=True)
    context.update_status(status='Submitted')

    settings = settings_from_model_version_id(
        model_version_id=model_version_id, conn_def=context.model_connection)
    dag = LocationDAG(location_set_version_id=settings.location_set_version_id,
                      gbd_round_id=settings.gbd_round_id)

    if settings.model.drill == 'drill':
        cascade_command = Drill(
            model_version_id=model_version_id,
            drill_parent_location_id=settings.model.drill_location_start,
            drill_sex=settings.model.drill_sex)
    elif settings.model.drill == 'cascade':

        location_start = None
        sex = None

        if isinstance(settings.model.drill_location_start, int):
            location_start = settings.model.drill_location_start
        if isinstance(settings.model.drill_sex, int):
            sex = settings.model.drill_sex

        cascade_command = TraditionalCascade(
            model_version_id=model_version_id,
            split_sex=settings.model.split_sex == 'most_detailed',
            dag=dag,
            n_sim=n_sim,
            location_start=settings.model.drill_location_start,
            sex=sex,
            skip_configure=skip_configure)
    else:
        raise NotImplementedError(
            f"The drill/cascade setting {settings.model.drill} is not implemented."
        )

    if jobmon:
        LOG.info("Configuring jobmon.")
        wf = jobmon_workflow_from_cascade_command(
            cc=cascade_command,
            context=context,
            addl_workflow_args=addl_workflow_args)
        error = wf.run()
        if error:
            context.update_status(status='Failed')
            raise RuntimeError("Jobmon workflow failed.")
    else:
        LOG.info("Running without jobmon.")
        for c in cascade_command.get_commands():
            LOG.info(f"Running {c}.")
            process = subprocess.run(c,
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            if process.returncode:
                context.update_status(status='Failed')
                raise RuntimeError(f"Command {c} failed with error"
                                   f"{process.stderr.decode()}")

    context.update_status(status='Complete')
Exemple #10
0
    def __init__(self,
                 model_version_id,
                 gbd_round_id,
                 decomp_step_id,
                 csmr_process_version_id,
                 csmr_cause_id,
                 crosswalk_version_id,
                 country_covariate_id,
                 conn_def,
                 location_set_version_id=None,
                 drill=None):
        """
        The class that constructs all of the measurement inputs. Pulls ASDR, CSMR, crosswalk versions,
        and country covariates, and puts them into one data frame that then formats itself
        for the dismod database. Performs covariate value interpolation if age and year ranges
        don't match up with GBD age and year ranges.

        Parameters:
            model_version_id: (int) the model version ID
            gbd_round_id: (int) the GBD round ID
            decomp_step_id: (int) the decomp step ID
            csmr_process_version_id: (int) process version ID for CSMR
            csmr_cause_id: (int) cause to pull CSMR from
            crosswalk_version_id: (int) crosswalk version to use
            country_covariate_id: (list of int) list of covariate IDs
            conn_def: (str) connection definition from .odbc file (e.g. 'epi')
            location_set_version_id: (int) can be None, if it's none, get the
                best location_set_version_id for estimation hierarchy of this GBD round.
            drill: (int) optional, which location ID to drill from as the parent

        Attributes:
            self.decomp_step: (str) the decomp step in string form
            self.demographics: (cascade_at.inputs.demographics.Demographics) a demographics object
                that specifies the age group, sex, location, and year IDs to grab
            self.integrand_map: (dict) dictionary mapping from GBD measure IDs to DisMod IDs
            self.asdr: (cascade_at.inputs.asdr.ASDR) all-cause mortality input object
            self.csmr: (cascade_at.inputs.csmr.CSMR) cause-specific mortality input object from cause
                csmr_cause_id
            self.data: (cascade_at.inputs.data.CrosswalkVersion) crosswalk version data from IHME database
            self.covariate_data: (List[cascade_at.inputs.covariate_data.CovariateData]) list of covariate
                data objects that contains the raw covariate data mapped to IDs
            self.location_dag: (cascade_at.inputs.locations.LocationDAG) DAG of locations to be used
            self.population: (cascade_at.inputs.population.Population) population object that is used
                for covariate weighting
            self.data_eta: (Dict[str, float]): dictionary of eta value to be applied to each measure
            self.density: (Dict[str, str]): dictionary of density to be applied to each measure
            self.nu: (Dict[str, float]): dictionary of nu value to be applied to each measure
            self.dismod_data: (pd.DataFrame) resulting dismod data formatted to be used in the dismod database
        
        Usage:
        >>> from cascade_at.settings.base_case import BASE_CASE
        >>> from cascade_at.settings.settings import load_settings

        >>> settings = load_settings(BASE_CASE)
        >>> covariate_ids = [i.country_covariate_id for i in settings.country_covariate]

        >>> i = MeasurementInputs(model_version_id=settings.model.model_version_id,
        >>>            gbd_round_id=settings.gbd_round_id,
        >>>            decomp_step_id=settings.model.decomp_step_id,
        >>>            csmr_process_version_id=None,
        >>>            csmr_cause_id = settings.model.add_csmr_cause,
        >>>            crosswalk_version_id=settings.model.crosswalk_version_id,
        >>>            country_covariate_id=covariate_ids,
        >>>            conn_def='epi',
        >>>            location_set_version_id=settings.location_set_version_id)
        >>> i.get_raw_inputs()
        >>> i.configure_inputs_for_dismod()
        """
        LOG.info(
            f"Initializing input object for model version ID {model_version_id}."
        )
        LOG.info(f"GBD Round ID {gbd_round_id}.")
        LOG.info(f"Pulling from connection {conn_def}.")
        self.model_version_id = model_version_id
        self.gbd_round_id = gbd_round_id
        self.decomp_step_id = decomp_step_id
        self.csmr_process_version_id = csmr_process_version_id
        self.csmr_cause_id = csmr_cause_id
        self.crosswalk_version_id = crosswalk_version_id
        self.country_covariate_id = country_covariate_id
        self.conn_def = conn_def
        self.decomp_step = ds.decomp_step_from_decomp_step_id(
            self.decomp_step_id)
        self.demographics = Demographics(gbd_round_id=self.gbd_round_id)
        if location_set_version_id is None:
            self.location_set_version_id = get_location_set_version_id(
                gbd_round_id=self.gbd_round_id)
        else:
            self.location_set_version_id = location_set_version_id
        self.location_dag = LocationDAG(
            location_set_version_id=self.location_set_version_id,
            gbd_round_id=self.gbd_round_id)

        if drill:
            LOG.info(
                f"This is a DRILL model, so only going to pull data associated with "
                f"drill location start {drill} and its descendants.")
            drill_descendants = list(
                self.location_dag.descendants(location_id=drill))
            self.demographics.location_id = [drill] + drill_descendants

        self.exclude_outliers = True
        self.asdr = None
        self.csmr = None
        self.population = None
        self.data = None
        self.covariates = None
        self.age_groups = None

        self.data_eta = None
        self.density = None
        self.nu = None
        self.measures_to_exclude = None

        self.dismod_data = None
        self.covariate_data = None
        self.country_covariate_data = None
        self.covariate_specs = None
        self.omega = None
def l_dag(df):
    return LocationDAG(df=df, root=1)
Exemple #12
0
def test_dag_no_root(df):
    with pytest.raises(LocationDAGError):
        LocationDAG(df=df)
Exemple #13
0
def test_dag_error_missing_args():
    with pytest.raises(LocationDAGError):
        LocationDAG(location_set_version_id=0)
Exemple #14
0
def test_dag_error_noargs():
    with pytest.raises(LocationDAGError):
        LocationDAG()
Exemple #15
0
def test_dag_from_df(df):
    dag = LocationDAG(df=df, root=1)
    assert set(dag.dag.successors(1)) == {2, 3}
    assert set(dag.dag.successors(2)) == {4, 5}
    assert set(dag.descendants(1)) == {2, 3, 4, 5}
Exemple #16
0
def run(model_version_id: int,
        jobmon: bool = True,
        make: bool = True,
        n_sim: int = 10,
        n_pool: int = 10,
        addl_workflow_args: Optional[str] = None,
        skip_configure: bool = False,
        json_file: Optional[str] = None,
        test_dir: Optional[str] = None,
        execute_dag: bool = True) -> None:
    """
    Runs the whole cascade or drill for a model version (whichever one is specified
    in the model version settings).

    Creates a cascade command and a bunch of cascade operations based
    on the model version settings. More information on this structure
    is in :ref:`executor`.

    Parameters
    ----------
    model_version_id
        The model version to run
    jobmon
        Whether or not to use Jobmon. If not using Jobmon, executes
        the commands in sequence in this session.
    make
        Whether or not to make the directory structure for the databases, inputs, and outputs.
    n_sim
        Number of simulations to do going down the cascade
    addl_workflow_args
        Additional workflow args to add to the jobmon workflow name
        so that it is unique if you're testing
    skip_configure
        Skip configuring the inputs because
    """
    LOG.info(f"Starting model for {model_version_id}.")

    context = Context(model_version_id=model_version_id,
                      make=make,
                      configure_application=not skip_configure,
                      root_directory=test_dir)
    context.update_status(status='Submitted')

    if json_file:
        with open(json_file) as fn:
            LOG.info(f"Reading settings from {json_file}")
            parameter_json = json.loads(fn.read())
        settings = load_settings(parameter_json)
        # Save the json file as it is used throughout the cascade
        LOG.info(f"Replacing {context.settings_file}")
        context.write_inputs(settings=parameter_json)
    else:
        settings = settings_from_model_version_id(
            model_version_id=model_version_id,
            conn_def=context.model_connection)
    dag = LocationDAG(location_set_version_id=settings.location_set_version_id,
                      gbd_round_id=settings.gbd_round_id)

    if settings.model.drill == 'drill':
        cascade_command = Drill(
            model_version_id=model_version_id,
            drill_parent_location_id=settings.model.drill_location_start,
            drill_sex=settings.model.drill_sex,
            n_sim=n_sim,
            n_pool=n_pool,
            skip_configure=skip_configure,
        )
    elif settings.model.drill == 'cascade':

        location_start = None
        sex = None

        if isinstance(settings.model.drill_location_start, int):
            location_start = settings.model.drill_location_start
        if isinstance(settings.model.drill_sex, int):
            sex = settings.model.drill_sex

        cascade_command = TraditionalCascade(
            model_version_id=model_version_id,
            split_sex=settings.model.split_sex == 'most_detailed',
            dag=dag,
            n_sim=n_sim,
            n_pool=n_pool,
            location_start=settings.model.drill_location_start,
            sex=sex,
            skip_configure=skip_configure,
        )
    else:
        raise NotImplementedError(
            f"The drill/cascade setting {settings.model.drill} is not implemented."
        )

    dag_cmds_path = (context.inputs_dir / 'dag_commands.txt')
    LOG.info(f"Writing cascade dag commands to {dag_cmds_path}.")
    dag_cmds_path.write_text('\n'.join(cascade_command.get_commands()))

    if not execute_dag: return

    if jobmon:
        LOG.info("Configuring jobmon.")
        wf = jobmon_workflow_from_cascade_command(
            cc=cascade_command,
            context=context,
            addl_workflow_args=addl_workflow_args)
        wf_run = wf.run(seconds_until_timeout=60 * 60 * 24 * 3, resume=True)
        if wf_run.status != 'D':
            context.update_status(status='Failed')
            raise RuntimeError("Jobmon workflow failed.")
    else:
        LOG.info("Running without jobmon.")
        for c in cascade_command.get_commands():
            LOG.info(f"Running {c}")
            process = subprocess.run(c,
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            if process.returncode:
                context.update_status(status='Failed')
                raise RuntimeError(f"Command {c} failed with error"
                                   f"{process.stderr.decode()}")
        if process.stderr:
            print(process.stderr.decode())
        if process.stdout:
            print(process.stdout.decode())

    context.update_status(status='Complete')