Esempio n. 1
0
def load_lbwsg_paf(key: str, location: str):
    path = paths.lbwsg_data_path('population_attributable_fraction', location)
    data = pd.read_hdf(path)  # type: pd.DataFrame
    data['rei_id'] = risk_factors.low_birth_weight_and_short_gestation.gbd_id
    data = data[data.metric_id == vi_globals.METRICS['Percent']]
    # All lbwsg risk is about mortality.
    data = data[data.measure_id.isin([vi_globals.MEASURES['YLLs']])]

    temp = []
    causes_map = {c.gbd_id: c for c in causes}
    # We filter paf age groups by cause level restrictions.
    for (c_id, measure), df in data.groupby(['cause_id', 'measure_id']):
        cause = causes_map[c_id]
        measure = 'yll' if measure == vi_globals.MEASURES['YLLs'] else 'yld'
        df = utilities.filter_data_by_restrictions(df, cause, measure, utility_data.get_age_group_ids())
        temp.append(df)
    data = pd.concat(temp, ignore_index=True)

    data = utilities.convert_affected_entity(data, 'cause_id')
    data.loc[data['measure_id'] == vi_globals.MEASURES['YLLs'], 'affected_measure'] = 'excess_mortality_rate'
    data = (data.groupby(['affected_entity', 'affected_measure'])
            .apply(utilities.normalize, fill_value=0)
            .reset_index(drop=True))
    data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS
                       + ['affected_entity', 'affected_measure']
                       + vi_globals.DRAW_COLUMNS)
    data = utilities.reshape(data)
    data = utilities.scrub_gbd_conventions(data, location)
    validation.validate_for_simulation(data, risk_factors.low_birth_weight_and_short_gestation,
                                       'population_attributable_fraction', location)
    data = utilities.split_interval(data, interval_column='age', split_column_prefix='age')
    data = utilities.split_interval(data, interval_column='year', split_column_prefix='year')
    return utilities.sort_hierarchical_data(data)
Esempio n. 2
0
def load_lbwsg_relative_risk(key: str, location: str):
    path = paths.lbwsg_data_path('relative_risk', location)
    data = pd.read_hdf(path)  # type: pd.DataFrame
    data['rei_id'] = risk_factors.low_birth_weight_and_short_gestation.gbd_id
    data = utilities.convert_affected_entity(data, 'cause_id')
    # RRs for all causes are the same.
    data = data[data.affected_entity == 'diarrheal_diseases']
    data['affected_entity'] = 'all'
    # All lbwsg risk is about mortality.
    data.loc[:, 'affected_measure'] = 'excess_mortality_rate'
    data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS
                       + ['affected_entity', 'affected_measure', 'parameter']
                       + vi_globals.DRAW_COLUMNS)
    data = (
        data
            .groupby(['affected_entity', 'parameter'])
            .apply(utilities.normalize, fill_value=1)
            .reset_index(drop=True)
    )

    tmrel_cat = utility_data.get_tmrel_category(risk_factors.low_birth_weight_and_short_gestation)
    tmrel_mask = data.parameter == tmrel_cat
    data.loc[tmrel_mask, vi_globals.DRAW_COLUMNS] = (
        data
            .loc[tmrel_mask, vi_globals.DRAW_COLUMNS]
            .mask(np.isclose(data.loc[tmrel_mask, vi_globals.DRAW_COLUMNS], 1.0), 1.0)
    )

    data = utilities.reshape(data)
    data = utilities.scrub_gbd_conventions(data, location)
    validation.validate_for_simulation(data, risk_factors.low_birth_weight_and_short_gestation,
                                       'relative_risk', location)
    data = utilities.split_interval(data, interval_column='age', split_column_prefix='age')
    data = utilities.split_interval(data, interval_column='year', split_column_prefix='year')
    return utilities.sort_hierarchical_data(data)
Esempio n. 3
0
def process_relative_risk(data: pd.DataFrame,
                          key: str,
                          entity: Union[RiskFactor, AlternativeRiskFactor],
                          location: str,
                          gbd_round_id: int,
                          age_group_ids: List[int] = None,
                          whitelist_sids: bool = False) -> pd.DataFrame:
    # from vivarium_gbd_access.gbd.get_relative_risk
    data['rei_id'] = entity.gbd_id

    # from vivarium_inputs.extract.extract_relative_risk
    data = vi_utils.filter_to_most_detailed_causes(data)

    # from vivarium_inputs.core.get_relative_risk
    yll_only_causes = set([
        c.gbd_id for c in causes if c.restrictions.yll_only and
        (c != causes.sudden_infant_death_syndrome if whitelist_sids else True)
    ])
    data = data[~data.cause_id.isin(yll_only_causes)]

    data = vi_utils.convert_affected_entity(data, 'cause_id')
    morbidity = data.morbidity == 1
    mortality = data.mortality == 1
    data.loc[morbidity & mortality, 'affected_measure'] = 'incidence_rate'
    data.loc[morbidity & ~mortality, 'affected_measure'] = 'incidence_rate'
    data.loc[~morbidity & mortality,
             'affected_measure'] = 'excess_mortality_rate'
    data = filter_relative_risk_to_cause_restrictions(data)

    data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS +
                       ['affected_entity', 'affected_measure', 'parameter'] +
                       vi_globals.DRAW_COLUMNS)
    data = (data.groupby(['affected_entity', 'parameter']).apply(
        normalize_age_and_years,
        fill_value=1,
        gbd_round_id=gbd_round_id,
        age_group_ids=age_group_ids).reset_index(drop=True))

    if entity.distribution in [
            'dichotomous', 'ordered_polytomous', 'unordered_polytomous'
    ]:
        tmrel_cat = utility_data.get_tmrel_category(entity)
        tmrel_mask = data.parameter == tmrel_cat
        data.loc[tmrel_mask, vi_globals.DRAW_COLUMNS] = (
            data.loc[tmrel_mask, vi_globals.DRAW_COLUMNS].mask(
                np.isclose(data.loc[tmrel_mask, vi_globals.DRAW_COLUMNS], 1.0),
                1.0))

    data = validate_and_reshape_gbd_data(data, entity, key, location,
                                         gbd_round_id, age_group_ids)
    return data
Esempio n. 4
0
def load_ikf_paf(key: str, location: str) -> pd.DataFrame:
    key = EntityKey(key)
    entity = get_entity(key)

    value_cols = vi_globals.DRAW_COLUMNS
    location_id = utility_data.get_location_id(location)

    data = extract.extract_data(entity, 'population_attributable_fraction', location_id, validate=False)
    relative_risk = extract.extract_data(entity, 'relative_risk', location_id, validate=False)

    yll_only_causes = set([c.gbd_id for c in causes if c.restrictions.yll_only])
    data = data[~data.cause_id.isin(yll_only_causes)]
    relative_risk = relative_risk[~relative_risk.cause_id.isin(yll_only_causes)]

    data = (data.groupby('cause_id', as_index=False)
            .apply(core.filter_by_relative_risk, relative_risk)
            .reset_index(drop=True))

    causes_map = {c.gbd_id: c for c in causes}
    temp = []
    # We filter paf age groups by cause level restrictions.
    for (c_id, measure), df in data.groupby(['cause_id', 'measure_id']):
        cause = causes_map[c_id]
        measure = 'yll' if measure == vi_globals.MEASURES['YLLs'] else 'yld'
        df = utilities.filter_data_by_restrictions(df, cause, measure, utility_data.get_age_group_ids())
        temp.append(df)
    data = pd.concat(temp, ignore_index=True)

    data = utilities.convert_affected_entity(data, 'cause_id')
    data.loc[data['measure_id'] == vi_globals.MEASURES['YLLs'], 'affected_measure'] = 'excess_mortality_rate'
    data.loc[data['measure_id'] == vi_globals.MEASURES['YLDs'], 'affected_measure'] = 'incidence_rate'
    data = (data.groupby(['affected_entity', 'affected_measure'])
            .apply(utilities.normalize, fill_value=0)
            .reset_index(drop=True))
    data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS + ['affected_entity', 'affected_measure']
                       + vi_globals.DRAW_COLUMNS)

    data = utilities.reshape(data, value_cols=value_cols)
    data = utilities.scrub_gbd_conventions(data, location)
    sim_validation.validate_for_simulation(data, entity, key.measure, location)
    data = utilities.split_interval(data, interval_column='age', split_column_prefix='age')
    data = utilities.split_interval(data, interval_column='year', split_column_prefix='year')
    return utilities.sort_hierarchical_data(data)
Esempio n. 5
0
def load_ikf_relative_risk(key: str, location: str) -> pd.DataFrame:
    key = EntityKey(key)
    entity = get_entity(key)

    value_cols = vi_globals.DRAW_COLUMNS
    location_id = utility_data.get_location_id(location)

    data = extract.extract_data(entity, 'relative_risk', location_id, validate=False)
    yll_only_causes = set([c.gbd_id for c in causes if c.restrictions.yll_only])
    data = data[~data.cause_id.isin(yll_only_causes)]

    data = utilities.convert_affected_entity(data, 'cause_id')
    data = data[data['affected_entity'].isin(project_globals.DISEASE_MODELS)]
    morbidity = data.morbidity == 1
    mortality = data.mortality == 1
    data.loc[morbidity & mortality, 'affected_measure'] = 'incidence_rate'
    data.loc[morbidity & ~mortality, 'affected_measure'] = 'incidence_rate'
    data.loc[~morbidity & mortality, 'affected_measure'] = 'excess_mortality_rate'
    data = core.filter_relative_risk_to_cause_restrictions(data)

    data = (data.groupby(['affected_entity', 'parameter'])
            .apply(utilities.normalize, fill_value=1)
            .reset_index(drop=True))
    data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS + ['affected_entity', 'affected_measure', 'parameter']
                       + vi_globals.DRAW_COLUMNS)

    tmrel_cat = utility_data.get_tmrel_category(entity)
    tmrel_mask = data.parameter == tmrel_cat
    data.loc[tmrel_mask, value_cols] = (
        data.loc[tmrel_mask, value_cols].mask(np.isclose(data.loc[tmrel_mask, value_cols], 1.0), 1.0)
    )

    data = utilities.reshape(data, value_cols=value_cols)
    data = utilities.scrub_gbd_conventions(data, location)
    sim_validation.validate_for_simulation(data, entity, key.measure, location)
    data = utilities.split_interval(data, interval_column='age', split_column_prefix='age')
    data = utilities.split_interval(data, interval_column='year', split_column_prefix='year')
    return utilities.sort_hierarchical_data(data)
Esempio n. 6
0
def get_relative_risk(entity: RiskFactor, location_id: int) -> pd.DataFrame:
    data = extract.extract_data(entity, "relative_risk", location_id)

    # FIXME: we don't currently support yll-only causes so I'm dropping them because the data in some cases is
    #  very messed up, with mort = morb = 1 (e.g., aortic aneurysm in the RR data for high systolic bp) -
    #  2/8/19 K.W.
    yll_only_causes = set([c.gbd_id for c in causes if c.restrictions.yll_only])
    data = data[~data.cause_id.isin(yll_only_causes)]

    data = utilities.convert_affected_entity(data, "cause_id")
    morbidity = data.morbidity == 1
    mortality = data.mortality == 1
    data.loc[morbidity & mortality, "affected_measure"] = "incidence_rate"
    data.loc[morbidity & ~mortality, "affected_measure"] = "incidence_rate"
    data.loc[~morbidity & mortality, "affected_measure"] = "excess_mortality_rate"
    data = filter_relative_risk_to_cause_restrictions(data)

    data = data.filter(
        DEMOGRAPHIC_COLUMNS
        + ["affected_entity", "affected_measure", "parameter"]
        + DRAW_COLUMNS
    )
    data = (
        data.groupby(["affected_entity", "parameter"])
        .apply(utilities.normalize, fill_value=1)
        .reset_index(drop=True)
    )

    if entity.distribution in ["dichotomous", "ordered_polytomous", "unordered_polytomous"]:
        tmrel_cat = utility_data.get_tmrel_category(entity)
        tmrel_mask = data.parameter == tmrel_cat
        data.loc[tmrel_mask, DRAW_COLUMNS] = data.loc[tmrel_mask, DRAW_COLUMNS].mask(
            np.isclose(data.loc[tmrel_mask, DRAW_COLUMNS], 1.0), 1.0
        )

    return data
Esempio n. 7
0
def get_population_attributable_fraction(
    entity: Union[RiskFactor, Etiology], location_id: int
) -> pd.DataFrame:
    causes_map = {c.gbd_id: c for c in causes}
    if entity.kind == "risk_factor":
        data = extract.extract_data(entity, "population_attributable_fraction", location_id)
        relative_risk = extract.extract_data(entity, "relative_risk", location_id)

        # FIXME: we don't currently support yll-only causes so I'm dropping them because the data in some cases is
        #  very messed up, with mort = morb = 1 (e.g., aortic aneurysm in the RR data for high systolic bp) -
        #  2/8/19 K.W.
        yll_only_causes = set([c.gbd_id for c in causes if c.restrictions.yll_only])
        data = data[~data.cause_id.isin(yll_only_causes)]
        relative_risk = relative_risk[~relative_risk.cause_id.isin(yll_only_causes)]

        data = (
            data.groupby("cause_id", as_index=False)
            .apply(filter_by_relative_risk, relative_risk)
            .reset_index(drop=True)
        )

        temp = []
        # We filter paf age groups by cause level restrictions.
        for (c_id, measure), df in data.groupby(["cause_id", "measure_id"]):
            cause = causes_map[c_id]
            measure = "yll" if measure == MEASURES["YLLs"] else "yld"
            df = utilities.filter_data_by_restrictions(
                df, cause, measure, utility_data.get_age_group_ids()
            )
            temp.append(df)
        data = pd.concat(temp, ignore_index=True)

    else:  # etiology
        data = extract.extract_data(
            entity, "etiology_population_attributable_fraction", location_id
        )
        cause = [c for c in causes if entity in c.etiologies][0]
        data = utilities.filter_data_by_restrictions(
            data, cause, "inner", utility_data.get_age_group_ids()
        )
        if np.any(data[DRAW_COLUMNS] < 0):
            logger.warning(
                f"{entity.name.capitalize()} has negative values for paf. These will be replaced with 0."
            )
            other_cols = [c for c in data.columns if c not in DRAW_COLUMNS]
            data.set_index(other_cols, inplace=True)
            data = data.where(data[DRAW_COLUMNS] > 0, 0).reset_index()

    data = utilities.convert_affected_entity(data, "cause_id")
    data.loc[
        data["measure_id"] == MEASURES["YLLs"], "affected_measure"
    ] = "excess_mortality_rate"
    data.loc[data["measure_id"] == MEASURES["YLDs"], "affected_measure"] = "incidence_rate"
    data = (
        data.groupby(["affected_entity", "affected_measure"])
        .apply(utilities.normalize, fill_value=0)
        .reset_index(drop=True)
    )
    data = data.filter(
        DEMOGRAPHIC_COLUMNS + ["affected_entity", "affected_measure"] + DRAW_COLUMNS
    )
    return data
def write_sbp_data(artifact, location):
    load = get_load(location)
    affected_entity_map = {
        'ischemic_heart_disease': 'acute_myocardial_infarction',
        'ischemic_stroke': 'acute_ischemic_stroke',
        'intracerebral_hemorrhage': 'acute_intracerebral_hemorrhage',
        'subarachnoid_hemorrhage': 'acute_subarachnoid_hemorrhage',
        'chronic_kidney_disease': 'chronic_kidney_disease'
    }

    prefix = 'risk_factor.high_systolic_blood_pressure.'
    measures = [
        "restrictions", "distribution", "tmred", "exposure",
        "exposure_standard_deviation", "relative_risk_scalar",
        "exposure_distribution_weights"
    ]
    for m in measures:
        key = prefix + m
        artifact.write(key, load(key))

    sbp = risk_factors.high_systolic_blood_pressure

    data = gbd.get_paf(sbp.gbd_id, utility_data.get_location_id(location))
    data = data[data.metric_id == globals.METRICS['Percent']]
    data = data[data.measure_id == globals.MEASURES['YLDs']]
    data = utilities.convert_affected_entity(data, 'cause_id')
    data.loc[data['measure_id'] == globals.MEASURES['YLDs'],
             'affected_measure'] = 'incidence_rate'
    data = (data.groupby(['affected_entity', 'affected_measure'
                          ]).apply(utilities.normalize,
                                   fill_value=0).reset_index(drop=True))
    data = data.loc[data.affected_entity.isin(affected_entity_map.keys())]
    data.affected_entity.replace(to_replace=affected_entity_map, inplace=True)
    data = data.filter(globals.DEMOGRAPHIC_COLUMNS +
                       ['affected_entity', 'affected_measure'] +
                       globals.DRAW_COLUMNS)
    data = utilities.reshape(data)
    data = utilities.scrub_gbd_conventions(data, location)
    data = split_interval(data,
                          interval_column='age',
                          split_column_prefix='age')
    data = split_interval(data,
                          interval_column='year',
                          split_column_prefix='year')
    data = utilities.sort_hierarchical_data(data)

    key = prefix + 'population_attributable_fraction'
    artifact.write(key, data)

    data = gbd.get_relative_risk(sbp.gbd_id,
                                 utility_data.get_location_id(location))
    data = utilities.convert_affected_entity(data, 'cause_id')
    morbidity = data.morbidity == 1
    mortality = data.mortality == 1
    data.loc[morbidity & mortality, 'affected_measure'] = 'incidence_rate'
    data.loc[morbidity & ~mortality, 'affected_measure'] = 'incidence_rate'
    data.loc[~morbidity & mortality, 'affected_measure'] = 'excess_mortality'

    data = data.loc[data.affected_entity.isin(affected_entity_map.keys())]
    data = core.filter_relative_risk_to_cause_restrictions(data)
    data.affected_entity.replace(to_replace=affected_entity_map, inplace=True)
    data = data.filter(globals.DEMOGRAPHIC_COLUMNS +
                       ['affected_entity', 'affected_measure', 'parameter'] +
                       globals.DRAW_COLUMNS)
    data = (data.groupby(['affected_entity', 'parameter'
                          ]).apply(utilities.normalize,
                                   fill_value=1).reset_index(drop=True))
    data = utilities.reshape(data)
    data = utilities.scrub_gbd_conventions(data, location)
    data = utilities.sort_hierarchical_data(data)
    data = split_interval(data,
                          interval_column='age',
                          split_column_prefix='age')
    data = split_interval(data,
                          interval_column='year',
                          split_column_prefix='year')
    loc = location.lower().replace(' ', '_')
    ckd_rr = pd.read_hdf(
        f'/share/costeffectiveness/artifacts/vivarium_csu_hypertension_sdc/ckd_rr/{loc}.hdf'
    )
    ckd_rr = ckd_rr.reset_index()
    ckd_rr['parameter'] = 'per unit'
    ckd_rr['affected_entity'] = 'chronic_kidney_disease'
    ckd_rr['affected_measure'] = 'incidence_rate'
    ckd_rr = ckd_rr.set_index([
        'location', 'sex', 'age_start', 'year_start', 'affected_entity',
        'affected_measure', 'parameter', 'age_end', 'year_end'
    ])
    data = pd.concat([data, ckd_rr])
    key = prefix + 'relative_risk'
    artifact.write(key, data)