Example #1
0
def _get_cause_risk_pairs(gbd_round_id):
    """
    Returns most-detailed cause-risk pairs for the scalars pipeline.

    Args:
        gbd_round_id (int): gbd round id.

    Returns:
        pd.DataFrame:  Most detailed cause-risk pairs.

    Raises:
        RuntimeError:
            If the sets of directly-modeled and calculated PAFs are not
            mutually exclusive
    """
    if gbd_round_id == 4:
        return _get_cause_risk_pairs_gbd2016()

    engine = db.db_engine("fbd-dev-read", database="forecasting")
    session = sessionmaker(bind=engine)()

    cause_hierarchy_version_id = get_hierarchy_version_id(
            session, entity_type="cause", entity_set_id=CAUSE_SET_ID,
            gbd_round_id=gbd_round_id)
    causes = get_strategy_set(session, strategy_id=CAUSE_STRATEGY_ID,
                              hierarchy_id=cause_hierarchy_version_id)

    rei_hierarchy_version_id = get_hierarchy_version_id(
            session, entity_type="risk", entity_set_id=REI_SET_ID,
            gbd_round_id=gbd_round_id)
    risks = get_strategy_set(session, strategy_id=REI_STRATEGY_ID,
                             hierarchy_id=rei_hierarchy_version_id)

    cr_hierarchy_version_id = get_hierarchy_version_id(
            session, entity_type="cause_risk_pair",
            entity_set_id=CAUSE_RISK_SET_ID, gbd_round_id=gbd_round_id)
    calculated_paf_set = get_strategy_set(
            session, strategy_id=CAUSE_RISK_STRATEGY_ID,
            hierarchy_id=cr_hierarchy_version_id)
    directly_modeled_paf_set = get_directly_modeled_pafs(gbd_round_id)
    crs = pd.concat([calculated_paf_set, directly_modeled_paf_set])
    if crs.duplicated().any():
        err_msg = ("The sets of directly-modeled and calculated PAFs are not "
                   "mutually exclusive")
        LOGGER.error(err_msg)
        raise RuntimeError(err_msg)

    cause_risk_pairs = crs[["cause_id", "rei_id"]].\
        merge(causes[["cause_id", "acause"]], on="cause_id").\
        merge(risks[["rei_id", "rei"]], on="rei_id")

    session.close()
    engine.dispose()

    return cause_risk_pairs
Example #2
0
def get_vaccine_reis(gbd_round_id):
    """Returns the list of risks that are interventions, e.g. vaccines, such as
    dtp3.

    Args:
        gbd_round_id (int):
            Numeric ID for the GBD round.
    Returns:
        tuple:
            The reis of the risks that are interventions.
    """
    if gbd_round_id == 4:
        return "dtp3", "measles", "rota", "pcv", "hib"

    engine = db.db_engine("fbd-dev-read", database="forecasting")
    session = sessionmaker(bind=engine)()

    rei_hierarchy_version_id = get_hierarchy_version_id(
        session,
        entity_type="risk",
        entity_set_id=REI_SET_ID,
        gbd_round_id=gbd_round_id)

    return tuple(
        get_strategy_set(session, strategy_id=REI_INTERVENTION_ID,
                         hierarchy_id=rei_hierarchy_version_id)["rei"])
Example #3
0
def get_maybe_negative_paf_pairs(gbd_round_id):
    """Get cause-risk pairs that *can* have negative PAFs, because they *can*
    be protective"""
    if gbd_round_id == 4:
        # Unfortunately these have to be hard-coded because we don't have
        # strategy sets for GBD 2016.
        return pd.DataFrame({
            "acause": ["cvd_ihd", "cvd_stroke_isch", "diabetes", "neo_breast",
                       "neuro_parkinsons"],
            "rei": ["drugs_alcohol", "drugs_alcohol", "drugs_alcohol",
                    "metab_bmi", "smoking_direct_prev"],
            "cause_id": [493, 495, 587, 429, 544],
            "rei_id": [102, 102, 102, 108, 166]
            })

    engine = db.db_engine("fbd-dev-write", database="forecasting")
    session = sessionmaker(bind=engine)()

    cause_risk_hierarchy_version_id = get_hierarchy_version_id(
        session, entity_type="cause_risk_pair",
        entity_set_id=CAUSE_RISK_SET_ID, gbd_round_id=gbd_round_id)

    maybe_negative_paf_set = get_strategy_set(
        session, strategy_id=CAUSE_RISK_MAYBE_NEGATIVE_PAF_SET_ID,
        hierarchy_id=cause_risk_hierarchy_version_id)

    # Set only has cause_ids and rei_ids, so get acauses
    acause_cause_id_map = _acauses(
        maybe_negative_paf_set["cause_id"].unique())
    maybe_negative_paf_set_with_acause = maybe_negative_paf_set.merge(
        acause_cause_id_map, how="left")

    # Ensure that all cause-ids have acauses
    acauses_missing = (
        maybe_negative_paf_set_with_acause["acause"].notnull().any())
    acause_err_msg = "Some causes don't have acauses"
    assert acauses_missing, acause_err_msg

    # ... and get reis.
    rei_rei_id_map = _reis(
        maybe_negative_paf_set_with_acause["rei_id"].unique())
    maybe_negative_paf_set_with_rei = (
        maybe_negative_paf_set_with_acause.merge(rei_rei_id_map, how="left"))

    # Ensure that all rei-ids have reis
    reis_missing = maybe_negative_paf_set_with_rei["rei"].notnull().any()
    rei_err_msg = "Some reis don't have reis"
    assert reis_missing, rei_err_msg

    session.close()
    engine.dispose()

    return maybe_negative_paf_set_with_rei
Example #4
0
def _get_y_hat(acause, input_version, agg_version, measure, period, draws,
               gbd_round_id):
    """Gets expected value of cause specific mortality or yld rates.

    For modeled causes, if the data is split by sex, then it is assumed that it
    is in log rate space. If the data is not split by sex, then it is assumed
    that it is in normal rate space.

    For aggregate causes, it is assumed that the data is not split by sex and
    is saved in log rate space.

    The resulting y_hat is in log rate space.

    :param str acause: name of the target acause to aggregate to.
    :param str mort_version: name of the mortality or yld version the aggregate
    is based on.
    :param str agg_version: name of the aggregate version.
    :return xarray.DataArray: The expected value of the cause specific
        mortality or yld rate.
    """
    # read GK modeled-level (most-detailed) causes from database
    engine = db.db_engine(NAME, database=DATABASE)
    session = sessionmaker(bind=engine)()

    gk_causes = get_strategy_set(session, FATAL_GK_STRATEGY_ID,
                                 CAUSE_HIERARCHY_ID)["acause"].values

    if acause in gk_causes:
        logger.info("{} is a modeled cause.".format(acause))
        y_hat = _get_modeled_y_hat(acause, input_version, measure, period,
                                   gbd_round_id, draws)

    else:
        logger.info("{} is an aggregated cause.".format(acause))
        y_hat = _get_aggregated_y_hat(acause, agg_version, measure, period,
                                      gbd_round_id)

    if isinstance(y_hat, xr.Dataset):
        if len(y_hat.data_vars) == 1:
            y_hat.rename({list(y_hat.data_vars.keys())[0]: "value"},
                         inplace=True)
            return y_hat["value"]
        logger.info("Using __xarray_dataarray_variable__, "
                    "but other data_vars are present! (probably just acause)")
        y_hat.rename({"__xarray_dataarray_variable__": "value"}, inplace=True)
    else:
        y_hat.name = "value"
    return y_hat
Example #5
0
def get_directly_modeled_pafs(gbd_round_id):
    """Get cause-risk pairs that have directly-modeled PAFs"""
    engine = db.db_engine("fbd-dev-write", database="forecasting")
    session = sessionmaker(bind=engine)()

    if gbd_round_id == 4:
        gbd_round_id = 5   # use gbd2017 data

    cause_risk_hierarchy_version_id = get_hierarchy_version_id(
        session, entity_type="cause_risk_pair",
        entity_set_id=CAUSE_RISK_SET_ID, gbd_round_id=gbd_round_id)

    directly_modeled_paf_set = get_strategy_set(
        session, strategy_id=CAUSE_RISK_DIRECTLY_MODELED_SET_ID,
        hierarchy_id=cause_risk_hierarchy_version_id)

    # Set_only has cause_ids and rei_ids, so get acauses
    acause_cause_id_map = _acauses(
        directly_modeled_paf_set["cause_id"].unique())
    directly_modeled_paf_set_with_acause = directly_modeled_paf_set.merge(
        acause_cause_id_map, how="left")

    # Ensure that all cause-ids have acauses
    acauses_missing = (
        directly_modeled_paf_set_with_acause["acause"].notnull().any())
    acause_err_msg = "Some causes don't have acauses"
    assert acauses_missing, acause_err_msg

    # ... and get reis.
    rei_rei_id_map = _reis(
        directly_modeled_paf_set_with_acause["rei_id"].unique())
    directly_modeled_paf_set_with_rei = (
        directly_modeled_paf_set_with_acause.merge(rei_rei_id_map, how="left"))

    # Ensure that all rei-ids have reis
    reis_missing = directly_modeled_paf_set_with_rei["rei"].notnull().any()
    rei_err_msg = "Some reis don't have reis"
    assert reis_missing, rei_err_msg

    session.close()
    engine.dispose()

    return directly_modeled_paf_set_with_rei
def main(version, years, draws, oos, subnational, dryrun=False):
    """
    Run all cause-specific cod models with the current modeling framework.
    This file must be run from the `scripts` directory of fbd_cod.

    Args:
        model (str): version name to use for the current mortality run.
        years (fbd_core.argparse.YearRange): years to load and model, e.g.
            (1990:2017:2040).
        draws (int): how many draws to run through the pipeline
        oos (bool): whether to hold a time series out of sample for validation
            and comparison
        subnational (bool): whether or not to include the 93 subnational csu
            locations in the model
    """
    # NOTE: This wont work if you install fbd_cod and attempt to run
    # this script from the command line, but it never did
    file_dir = os.path.dirname(os.path.realpath(__file__))
    execf_ = os.path.join(file_dir, "run_cod_model.py")

    make_run_log_file(version)
    years_str = years.__str__()
    threads = 30

    qsub_template = (
        "qsub -b y -l m_mem_free={memory}G -l fthread={threads} -q all.q -now no -P proj_forecasting "
        "-N {acause}_{sex}_{version}{dryrun} "
        "{python} {exec_file} -v "
        "-c {acause} -s {sex} --version {version} --years {years} --draws {"
        "draws} "
        #"--spline {sdi_interaction} {oos} {subnat} {dryrun}")
        "{sdi_interaction} {oos} {subnat} {dryrun}")

    # create a db connection to get the strategy set for fatal GK causes
    engine = db.db_engine("fbd-dev-read", database="forecasting")
    session = sessionmaker(bind=engine)()
    cols_to_keep = ["acause", "male", "female"]
    fatal_gk_causes = get_strategy_set(session, 18, 303)[cols_to_keep]

    for _, row in fatal_gk_causes.iterrows():
        for sex_id in settings.SEX_DICT.keys():
            sex_name = settings.SEX_DICT[sex_id]
            if not row[sex_name] == 1:
                continue
            acause = row["acause"]
            if acause in settings.INTERACTION_CAUSES:
                sdi_interaction = "--sdi-interaction"
            else:
                sdi_interaction = ""
            oos_arg = "--oos" if oos else ""
            subnat_arg = "--subnational" if subnational else ""
            dryrun_arg = "--dryrun" if dryrun else ""
            if (acause.startswith("ckd")) or (acause == "nutrition_pem"):
                memory = 500 if draws == 1000 else 75
            else:
                memory = 400 if draws == 1000 else 75

            qsub = qsub_template.format(memory=memory,
                                        threads=threads,
                                        acause=acause,
                                        sex=sex_id,
                                        years=years_str,
                                        version=version,
                                        exec_file=execf_,
                                        draws=draws,
                                        python=settings.PYTHON_EXEC,
                                        sdi_interaction=sdi_interaction,
                                        oos=oos_arg,
                                        subnat=subnat_arg,
                                        dryrun=dryrun_arg)
            print(qsub)
            os.popen(qsub)
Example #7
0
def _get_aggregated_y_hat(acause, version, measure, period, gbd_round_id):
    """Gets expected value of cause specific mortality rates.

    For aggregate causes, it is assumed that the data is not split by sex and
    is saved in log rate space.

    When the children are added to form the aggregated acause result, the
    summation happens in normal space. Therefore, we must exponentiate the
    children's rates, add them up, and log them to get an aggregated
    y_hat in log rate space.

    The resulting y_hat is in log rate space.

    :param str acause: name of the target acause to aggregate to.
    :param str version: name of the aggregation version.
    :return xarray.DataArray: The expected value of the cause specific
        mortality rate.
    """
    # connect to db and read in cause hierarchy
    engine = db.db_engine(NAME, database=DATABASE)
    session = sessionmaker(bind=engine)()
    all_causes = get_hierarchy(session, "cause", CAUSE_HIERARCHY_ID)[[
        "acause", "cause_id", "parent_id"
    ]]
    # subset to just fatal causes
    cause_strategy_set = get_strategy_set(session, FATAL_GK_STRATEGY_ID,
                                          CAUSE_HIERARCHY_ID)
    cause_hierarchy = get_hierarchy(session, "cause", CAUSE_HIERARCHY_ID)
    cause_tree, node_map = subset_fatal.make_hierarchy_tree(
        cause_hierarchy, 294, "cause_id")
    fatal_subset = subset_fatal.include_up_hierarchy(
        cause_tree, node_map, cause_strategy_set["cause_id"].values)
    fatal_causes = all_causes[all_causes.cause_id.isin(fatal_subset)]

    cause_id = fatal_causes[fatal_causes.acause == acause].cause_id.values[0]
    children = fatal_causes.query(
        "parent_id == {}".format(cause_id))["acause"].values
    logger.info("y_hat is a sum of children: {}".format(children))

    # Create a list of child acause files which are not external causes and
    # check to make sure all the ones we want to sum up are actually present.
    potential_child_files = [
        FBDPath("/{gri}/{p}/{m}/{v}/{c}_hat.nc".format(gri=gbd_round_id,
                                                       p=period,
                                                       m=measure,
                                                       v=version,
                                                       c=child),
                root_dir="scratch") for child in children
        if child not in ("_all", "_none")
    ]
    child_files = [
        str(child_file) for child_file in potential_child_files
        if child_file.exists()
    ]
    if len(potential_child_files) != len(child_files):
        logger.error("You are missing files, bud. {} vs {}".format(
            potential_child_files, child_files))
        raise Exception("Missing y_hat files!")
    logger.debug("Summing these files: {}".format(child_files))

    exp_y_hat_sum = None
    for child_file in child_files:
        logger.info("Adding {}".format(child_file))
        exp_y_hat = xr.ufuncs.exp(
            xr.open_dataarray(child_file, drop_variables=["measure", "cov"]))
        if exp_y_hat_sum is None:
            exp_y_hat_sum = exp_y_hat
        else:
            exp_y_hat_broadcasted = xr.broadcast(exp_y_hat_sum, exp_y_hat)
            exp_y_hat_broadcasted = [
                data.fillna(0.) for data in exp_y_hat_broadcasted
            ]
            exp_y_hat_sum = sum(exp_y_hat_broadcasted)
    y_hat = xr.ufuncs.log(exp_y_hat_sum)
    y_hat.coords["acause"] = acause
    return y_hat
    smoothing = {
        0: ["location_id", "sex_id", "age_group_id"],
        1: ["location_id", "sex_id", "age_group_id"],
        2: ["region_id", "sex_id", "age_group_id"],
        3: ["super_region_id", "sex_id", "age_group_id"],
        "modeled": ["super_region_id", "sex_id", "age_group_id"]
    }

    # create a session to read in the relevant causes
    engine = db.db_engine(NAME, DATABASE)
    session = sessionmaker(bind=engine)()
    all_causes = get_hierarchy(session, "cause", CAUSE_HIERARCHY_ID)[[
        "acause", "cause_id", "parent_id", "level"
    ]]

    cause_strategy_set = get_strategy_set(session, GK_STRATEGY_SET_ID,
                                          CAUSE_HIERARCHY_ID)
    cause_hierarchy = get_hierarchy(session, "cause", CAUSE_HIERARCHY_ID)
    cause_tree, node_map = subset_fatal.make_hierarchy_tree(
        cause_hierarchy, 294, "cause_id")
    fatal_subset = subset_fatal.include_up_hierarchy(
        cause_tree, node_map, cause_strategy_set["cause_id"].values)
    fatal_causes = all_causes[all_causes.cause_id.isin(fatal_subset)]
    modeled_causes = fatal_causes.query("cause_id not in parent_id.values")
    aggregate_causes = fatal_causes.query("cause_id in parent_id.values")

    # Grab causes that are not modeled. There should be no level 4 causes.
    assert len(aggregate_causes.query("level == 4")) == 0

    hold_jids = ["1"]
    arima_jids = ["1"]
    for level in ["modeled", 3, 2, 1, 0]: