Beispiel #1
0
def process_mmr_draws(version=dw.MMR_VERS):
    '''process maternal mortality ratio draws'''

    data_dir = dw.INPUT_DATA_DIR + 'mmr' + '/' + str(version)

    print("pulling MMR")
    df = pd.read_feather(data_dir + '/' + '47' + '.feather')

    # merge births
    births = load_age_disagg_births()
    df = df.merge(births, how='left')

    # create age groups and aggregate
    df_10_24 = df.loc[df.age_group_id.isin([7, 8, 9])]
    df.loc[:, 'age_group_id'] = 169
    df_10_24.loc[:, 'age_group_id'] = 159
    df = pd.concat([df, df_10_24], ignore_index=True)
    df = agg.age_sex_aggregate(df,
                               group_cols=dw.MMR_GROUP_COLS,
                               denominator='births')

    # aggregate to global and make goalkeepers units/age-groups
    df_global = agg.aggregate_locations_to_global(df,
                                                  dw.MMR_GROUP_COLS,
                                                  denominator='births')
    df_global_gk = df_global.copy(deep=True)
    df_global_gk.loc[:, dw.
                     DRAW_COLS] = df_global_gk.loc[:, dw.DRAW_COLS].applymap(
                         lambda x: x / 100
                     )  # want per 1000 live births for goalkeepers
    df_global = df_global[df_global.age_group_id !=
                          159]  # no 10-24 age group for sdgs
    df_global.loc[:, 'units'] = 'sdg'
    df_global_gk.loc[:, 'units'] = 'goalkeepers'
    df_global = pd.concat([df_global, df_global_gk], ignore_index=True)

    # output
    df = df[df.age_group_id != 159]  # no 10-24 age group for sdgs
    df = df[dw.MMR_GROUP_COLS + dw.DRAW_COLS]
    df_global = df_global[dw.MMR_GROUP_COLS + dw.DRAW_COLS + ['units']]

    df.reset_index(drop=True, inplace=True)
    df.to_feather(data_dir + '/' + '1033' + '.feather')
    df_global.reset_index(drop=True, inplace=True)
    df_global.to_feather(data_dir + '/' + '1033' + '_global' + '.feather')

    return df_global
Beispiel #2
0
def process_uhc_intervention_draws(version=dw.COV_VERS):
    data_dir = dw.INPUT_DATA_DIR + 'covariate' + '/' + str(version)
    component_ids = dw.UHC_INTERVENTION_COMP_IDS

    # first rename components that don't need further prep
    rename_ids = [str(rid) for rid in component_ids if rid not in [206, 209]]

    for rid in rename_ids:
        if path.isfile(data_dir + '/' + rid + '.feather'):
            print('renaming' + rid)
            rename(data_dir + '/' + rid + '.feather',
                   data_dir + '/' + rid + '_prepped' + '.feather')

    dfs = []
    for component_id in [
            206, 209
    ]:  # these are the interventions that require aggregation
        print("pulling {c}".format(c=component_id))
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')
        dfs.append(df)

    print('concatenating')
    df = pd.concat(dfs, ignore_index=True)

    # merge populations
    db_pops = qry.get_pops()
    df = df.merge(db_pops, how='left')

    # age/sex aggregate
    df.loc[df.indicator_component_id == 209, 'sex_id'] = 3
    df.loc[df.indicator_component_id == 209, 'age_group_id'] = 29  # 15+
    df.loc[df.indicator_component_id == 206, 'age_group_id'] = 24  # 15-49
    df = agg.age_sex_aggregate(df,
                               group_cols=dw.COV_GROUP_COLS,
                               denominator='population')

    # output
    df = df[dw.COV_GROUP_COLS + dw.DRAW_COLS]

    for component_id in [206, 209]:
        print("outputting " + str(component_id))
        df_id = df[df.indicator_component_id == component_id]
        df_id.reset_index(drop=True, inplace=True)
        df_id.to_feather(data_dir + '/' + str(component_id) + '_prepped' +
                         '.feather')
Beispiel #3
0
def process_dismod_draws(past_future, version=dw.DISMOD_VERS):

    if past_future == 'past':
        index_cols = dw.DISMOD_GROUP_COLS
        data_dir = dw.INPUT_DATA_DIR + 'dismod' + '/' + str(version)
        db_pops = qry.get_pops()
    elif past_future == 'future':
        index_cols = ['indicator_component_id'] + INDEX_COLS_FUTURE
        data_dir = dw.FORECAST_DATA_DIR + 'dismod' + '/' + str(version)
        db_pops = load_population()
    else:
        raise ValueError(
            'The past_future arg must be set to "past" or "future".')

    component_ids = [14, 17, 242, 245]  # no child sex abuse (pulled later)
    dfs = []

    for component_id in component_ids:
        print("pulling {c}".format(c=component_id))
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')
        df.loc[:, 'indicator_component_id'] = component_id
        dfs.append(df)

    print("concatenating")
    df = pd.concat(dfs, ignore_index=True)

    df = df[index_cols + dw.DRAW_COLS]

    # COLLAPSE SEX/AGES
    df = df.merge(db_pops, how='left')
    df_sex_split = df[df.indicator_component_id.isin([14, 17])]
    df.loc[df['indicator_component_id'].isin([14, 17]),
           'sex_id'] = 3  # physical and sexual violence sex aggregation
    df = agg.age_sex_aggregate(df, group_cols=index_cols)
    df = pd.concat([df, df_sex_split], ignore_index=True)

    # AGE STANDARDIZE
    df_age_stand_all_age = df.loc[df['indicator_component_id'].isin([14, 17])]
    df_age_stand_15_plus = df.loc[df['indicator_component_id'].isin(
        [242, 245])]  # int partner and non-int partner violence

    # global
    df_aa_global = agg.aggregate_locations_to_global(df_age_stand_all_age,
                                                     index_cols,
                                                     age_standardized=True,
                                                     age_group_years_start=0,
                                                     age_group_years_end=125,
                                                     age_group_id=27)

    df_15_plus_global = agg.aggregate_locations_to_global(
        df_age_stand_15_plus,
        index_cols,
        age_standardized=True,
        age_group_years_start=15,
        age_group_years_end=125,
        age_group_id=29)

    # national/subnational
    df_age_stand_all_age = agg.age_standardize(df_age_stand_all_age,
                                               index_cols, 0, 125, 27)
    df_age_stand_15_plus = agg.age_standardize(df_age_stand_15_plus,
                                               index_cols, 15, 125, 29)

    # concat
    print("concatenating")
    df = pd.concat([df_age_stand_all_age, df_age_stand_15_plus],
                   ignore_index=True)
    df_global = pd.concat([df_aa_global, df_15_plus_global], ignore_index=True)

    # output
    df = df[index_cols + dw.DRAW_COLS]

    file_dict = dict(zip(component_ids, ['1094', '1095', '1047', '1098']))

    for component_id in file_dict.keys():

        path = data_dir + '/' + file_dict[component_id] + '.feather'
        global_path = data_dir + '/' + file_dict[
            component_id] + '_global' + '.feather'

        # sdg
        print('outputting ' + file_dict[component_id])
        df_id = df[df.indicator_component_id == component_id]
        df_id.reset_index(drop=True, inplace=True)
        df_id.to_feather(path)

        # global
        print('outputting ' + file_dict[component_id] + ' global')
        df_id_global = df_global[df_global.indicator_component_id ==
                                 component_id]
        df_id_global.reset_index(drop=True, inplace=True)
        df_id_global.to_feather(global_path)

    #############################################
    # child sex abuse
    index_cols = index_cols.remove('indicator_component_id')

    df_csa = pd.read_feather(data_dir + '/' + '1064_age_disagg.feather')
    df_csa = df_csa.merge(db_pops, how='left')

    # aggregation and output
    df_csa.loc[:, 'age_group_id'] = 202
    df_csa = agg.age_sex_aggregate(df_csa, group_cols=index_cols)

    df_csa_global = df_csa.copy(deep=True)

    df_csa = df_csa[index_cols + dw.DRAW_COLS]
    df_csa.reset_index(drop=True, inplace=True)
    print('outputting 1064')
    df_csa.to_feather(data_dir + '/' + '1064.feather')

    df_csa_global = agg.aggregate_locations_to_global(df_csa_global,
                                                      index_cols)
    df_csa_global = df_csa_global[index_cols + dw.DRAW_COLS]
    df_csa_global.reset_index(drop=True, inplace=True)
    print('outputting 1064 global')
    df_csa_global.to_feather(data_dir + '/' + '1064_global.feather')

    return df
Beispiel #4
0
def process_codcorrect_draws(version=dw.CC_VERS):

    index_cols = dw.CC_GROUP_COLS
    component_ids = dw.CC_ALL_AGE_COMPONENT_IDS + dw.CC_THIRTY_SEVENTY_COMPONENT_IDS + dw.CONF_DIS_COMPONENT_IDS
    data_dir = dw.INPUT_DATA_DIR + 'codcorrect' + '/' + str(version)

    dfs = []
    for component_id in component_ids:
        print("pulling {c}".format(c=component_id))
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')
        df.loc[:, 'indicator_component_id'] = component_id
        dfs.append(df)

    print("concatenating")
    df = pd.concat(dfs, ignore_index=True)

    # convert to numbers
    db_pops = qry.get_pops()
    df = df.merge(db_pops, how='left')

    # keep sex split for certain indicators (ncds, road mort, poisoning, homicide)
    df_keep_sex_split = df.loc[df['indicator_component_id'].isin(
        dw.CC_THIRTY_SEVENTY_COMPONENT_IDS + dw.CC_ALL_AGE_COMPONENT_IDS
    ), :]  # these age_groups get standardized later

    # collapse sex (and age for conlict and distaster mort)
    df['sex_id'] = 3
    df.loc[df['indicator_component_id'].isin(dw.CONF_DIS_COMPONENT_IDS),
           'age_group_id'] = 22
    df = agg.age_sex_aggregate(df, group_cols=index_cols)

    # make sure it looks like we expect
    assert set(df.loc[df['cause_id'].isin(dw.CONF_DIS_CAUSES)].age_group_id) == set([22]), \
        'unexpected age group ids found'
    assert set(df.loc[~df['cause_id'].isin(dw.CONF_DIS_CAUSES)].age_group_id) == \
        set(range(2, 21) + range(30, 33) + [235]), \
        'unexpected age group ids found'
    assert set(df.sex_id) == set([3]), 'unexpected sex ids found'

    # concat sex-split data before age-standardizing
    df = pd.concat([df, df_keep_sex_split], axis=0)

    # prepare for age-standardization
    # all age-standardized except for conflict and disaster mort
    df_conf_dis = df.loc[df['indicator_component_id'].isin(
        dw.CONF_DIS_COMPONENT_IDS)]
    df_ncds = df.loc[df['indicator_component_id'].isin(
        dw.CC_THIRTY_SEVENTY_COMPONENT_IDS)]
    df_all_ages = df.loc[df['indicator_component_id'].isin(
        dw.CC_ALL_AGE_COMPONENT_IDS)]

    # global aggregation
    df_cd_global = agg.aggregate_locations_to_global(df_conf_dis, index_cols)

    df_ncds_global = agg.aggregate_locations_to_global(
        df_ncds,
        index_cols,
        age_standardized=True,
        age_group_years_start=30,
        age_group_years_end=70,
        age_group_id=214)

    df_aa_global = agg.aggregate_locations_to_global(df_all_ages,
                                                     index_cols,
                                                     age_standardized=True,
                                                     age_group_years_start=0,
                                                     age_group_years_end=125,
                                                     age_group_id=27)

    # age standardize
    df_ncds = agg.age_standardize(df_ncds, index_cols, 30, 70, 214)
    df_all_ages = agg.age_standardize(df_all_ages, index_cols, 0, 125, 27)

    # concat all
    print('concatenating')
    df = pd.concat([df_ncds, df_all_ages, df_conf_dis], axis=0)
    df_global = pd.concat([df_ncds_global, df_aa_global, df_cd_global], axis=0)

    # output
    df = df[index_cols + dw.DRAW_COLS]
    df_global = df_global[index_cols + dw.DRAW_COLS]

    file_dict = dw.CC_FILE_DICT
    for component_id in file_dict.keys():

        path = data_dir + '/' + file_dict[component_id] + '.feather'
        global_path = data_dir + '/' + file_dict[
            component_id] + '_global' + '.feather'

        # sdg
        print('outputting ' + file_dict[component_id])
        df_id = df[df.indicator_component_id == component_id]
        df_id.reset_index(drop=True, inplace=True)
        df_id.to_feather(path)

        # global
        print('outputting ' + file_dict[component_id] + ' global')
        df_id_global = df_global[df_global.indicator_component_id ==
                                 component_id]
        df_id_global.reset_index(drop=True, inplace=True)
        df_id_global.to_feather(global_path)
Beispiel #5
0
def process_risk_exposure_draws(past_future, version=dw.RISK_EXPOSURE_VERS):

    if past_future == 'past':
        index_cols = dw.RISK_EXPOSURE_GROUP_COLS
        data_dir = dw.INPUT_DATA_DIR + 'risk_exposure' + '/' + str(version)
        db_pops = qry.get_pops()
    if past_future == 'future':
        index_cols = ['indicator_component_id'] + INDEX_COLS_FUTURE
        data_dir = dw.FORECAST_DATA_DIR + 'risk_exposure' + '/' + str(version)
        db_pops = load_population()

    component_ids = dw.RISK_EXPOSURE_COMPONENT_IDS

    dfs = []
    for component_id in component_ids:
        print("pulling {c}".format(c=component_id))
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')
        df.loc[:, 'indicator_component_id'] = component_id
        dfs.append(df)

    print("concatenating")
    df = pd.concat(dfs, ignore_index=True)

    # collapse sex/ages
    df = df.merge(db_pops, how='left')

    # set age-groups for aggregation now so it doesn't have to be done twice (for sex_split)
    df.loc[df.indicator_component_id.isin([35, 41]),
           'age_group_id'] = 1  # Malnutrition
    df.loc[df.indicator_component_id == 44,
           'age_group_id'] = 5  # Child Overweight

    # keep these for later
    df_sex_split = df[~df.indicator_component_id.isin([5, 227])]
    df_smoking = df[df.indicator_component_id == 227]
    df_smoking_sex_split = df_smoking.copy(deep=True)

    df = df[df.indicator_component_id != 227]  # remove smoking from main df

    # age/sex aggregate
    df['sex_id'] = 3  # changes everything but Mean PM2.5 which is already aggregated
    print("concatenating")  # concat sex-split data
    df = pd.concat([df, df_sex_split], ignore_index=True)
    df = agg.age_sex_aggregate(df,
                               group_cols=index_cols,
                               denominator='population')

    # sex aggregate smoking data before age-standardizing
    df_smoking['sex_id'] = 3
    df = agg.age_sex_aggregate(df,
                               group_cols=index_cols,
                               denominator='population')
    print("concatenating")
    df_smoking = pd.concat([df_smoking, df_smoking_sex_split], axis=0)

    # aggregate all but smoking to global
    df_global = agg.aggregate_locations_to_global(df, index_cols)

    # aggregate smoking to global and age-standardize global and non-global
    df_smoking_global = agg.aggregate_locations_to_global(
        df_smoking,
        index_cols,
        age_standardized=True,
        age_group_years_start=10,
        age_group_years_end=125,
        age_group_id=194)
    # df_smoking_global['units'] = 'sdg'
    df_smoking = agg.age_standardize(df_smoking, index_cols, 10, 125, 194)

    # concat smoking
    df = pd.concat([df, df_smoking], axis=0)
    df_global = pd.concat([df_global, df_smoking_global], axis=0)

    # output
    df = df[index_cols + dw.DRAW_COLS]
    df_global = df_global[index_cols + dw.DRAW_COLS]

    file_dict = dw.RE_FILE_DICT
    for component_id in file_dict.keys():

        path = data_dir + '/' + str(file_dict[component_id]) + '.feather'
        global_path = data_dir + '/' + str(
            file_dict[component_id]) + '_global' + '.feather'

        # sdg
        print('outputting ' + str(file_dict[component_id]))
        df_id = df[df.indicator_component_id == component_id]
        df_id.reset_index(drop=True, inplace=True)
        df_id.to_feather(path)

        # global
        print('outputting ' + str(file_dict[component_id]) + ' global')
        df_id_global = df_global[df_global.indicator_component_id ==
                                 component_id]
        df_id_global.reset_index(drop=True, inplace=True)
        df_id_global.to_feather(global_path)

    return df_global
Beispiel #6
0
def process_covariate_draws(version=dw.COV_VERS):
    data_dir = dw.INPUT_DATA_DIR + 'covariate' + '/' + str(version)
    component_ids = dw.NON_UHC_COV_COMPONENT_IDS

    dfs = []
    for component_id in component_ids:  # read in all components
        print("pulling {c}".format(c=component_id))
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')
        dfs.append(df)

    print('concatenating')
    df = pd.concat(dfs, ignore_index=True)

    # merge populations
    db_pops = qry.get_pops()

    db_pops_adol_birth = db_pops[db_pops.age_group_id.isin(
        [7, 8])]  # create adol birth age group
    db_pops_adol_birth['age_group_id'] = 162
    pop_cols = ['location_id', 'year_id', 'age_group_id', 'sex_id']
    db_pops_adol_birth = db_pops_adol_birth.groupby(
        pop_cols, as_index=False)['population'].sum()
    db_pops = db_pops.append(db_pops_adol_birth, ignore_index=True)
    df = df.merge(db_pops, how='left')

    # aggregate met need
    df_met_need_15_plus = df[df.indicator_component_id == 179]
    df_met_need_15_24 = df_met_need_15_plus[
        df_met_need_15_plus.age_group_id.isin([8, 9])]
    df_met_need_15_plus['age_group_id'] = 24
    df_met_need_15_24['age_group_id'] = 149
    df_met_need = df_met_need_15_plus.append(df_met_need_15_24,
                                             ignore_index=True)
    df_met_need = agg.age_sex_aggregate(df_met_need,
                                        group_cols=dw.COV_GROUP_COLS,
                                        denominator='population')

    df = df[df.indicator_component_id != 179]
    df = df.append(df_met_need, ignore_index=True)

    # global aggregation
    df_global = df_global[~df_global.indicator_component_id.isin(
        [1457, 1460, 1463, 1556])]  # hrh aggregated later

    df_global = agg.aggregate_locations_to_global(df,
                                                  dw.COV_GROUP_COLS,
                                                  denominator='population')

    # output
    df = df[dw.COV_GROUP_COLS + dw.DRAW_COLS]
    df_global = df_global[dw.COV_GROUP_COLS + dw.DRAW_COLS]

    file_dict = dw.COV_FILE_DICT
    for component_id in file_dict.keys():
        if file_dict[component_id] == component_id:
            path = data_dir + '/' + str(component_id) + '_prepped' + '.feather'
            global_path = data_dir + '/' + str(
                component_id) + '_global' + '.feather'
        else:
            path = data_dir + '/' + str(file_dict[component_id]) + '.feather'
            global_path = data_dir + '/' + str(
                file_dict[component_id]) + '_global' + '.feather'

        print('outputting ' + str(file_dict[component_id]))
        df_id = df[(df.indicator_component_id == component_id)
                   & (df.age_group_id != 149)]
        df_id.reset_index(drop=True, inplace=True)
        df_id.to_feather(path)

        if component_id not in [1457, 1460, 1463, 1556]:  # save global dfs
            print('outputting ' + str(file_dict[component_id]) + ' global')
            df_id_global = df_global[df_global.indicator_component_id ==
                                     component_id]
            df_id_global.reset_index(drop=True, inplace=True)
            df_id_global.to_feather(global_path)
Beispiel #7
0
def process_demo_draws(past_future, version=dw.DEMO_VERS):
    '''process demographics draws'''

    if past_future == 'past':
        data_dir = dw.INPUT_DATA_DIR + 'demographics' + '/' + str(version)
    elif past_future == 'future':
        data_dir = dw.FORECAST_DATA_DIR + 'demographics' + '/' + str(version)
    else:
        raise ValueError(
            'The past_future arg must be set to "past" or "future".')

    component_ids = dw.DEMO_COMPONENT_IDS
    dfs = []

    for component_id in component_ids:
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')

        dfs.append(df)

    print('concatenating')
    df = pd.concat(dfs, ignore_index=True)

    #temporary for gk
    locs = qry.get_sdg_reporting_locations(level_3=True)
    df = df[df.location_id.isin(locs.location_id)]

    # get live births
    births = load_births()
    if past_future == 'past':
        births = births[births.scenario == 0]
        births.drop('scenario', inplace=True, axis=1)

    df = df.merge(births, how='left')

    # Keep sex-split
    df_sex_split = df.copy(deep=True)

    # aggregate sexes
    df['sex_id'] == 3
    df = agg.age_sex_aggregate(df, dw.DEMO_GROUP_COLS, denominator='births')
    df = df.append(df_sex_split, ignore_index=True)

    # global aggregation
    df_global = agg.aggregate_locations_to_global(df,
                                                  dw.DEMO_GROUP_COLS,
                                                  denominator='births')

    # output
    df = df[dw.DEMO_GROUP_COLS + dw.DRAW_COLS]
    df_global = df_global[dw.DEMO_GROUP_COLS + dw.DRAW_COLS]

    for component_id in component_ids:
        if component_id == 56:
            ind_id = '1040'
        else:
            ind_id = '1041'

        print("outputting " + ind_id)
        df_id = df[df.indicator_component_id == component_id]
        df_id.reset_index(drop=True, inplace=True)
        df_id.to_feather(data_dir + '/' + ind_id + '.feather')

        df_id_global = df_global[df_global.indicator_component_id ==
                                 component_id]
        df_id_global.reset_index(drop=True, inplace=True)
        df_id_global.to_feather(data_dir + '/' + ind_id + '_' + 'global' +
                                '.feather')
Beispiel #8
0
def process_burdenator_draws(past_future, version=dw.BURDENATOR_VERS):
    if past_future == 'past':
        index_cols = dw.RISK_BURDEN_GROUP_COLS
        data_dir = dw.INPUT_DATA_DIR + 'risk_burden' + '/' + str(version)
        db_pops = qry.get_pops()
    elif past_future == 'future':
        index_cols = ['indicator_component_id'] + INDEX_COLS_FUTURE
        data_dir = dw.FORECAST_DATA_DIR + 'risk_burden' + '/' + str(version)
        db_pops = load_population()

    else:
        raise ValueError(
            'The past_future arg must be set to "past" or "future".')

    dfs = []

    component_ids = dw.RISK_BURDEN_COMPONENT_IDS + dw.RISK_BURDEN_DALY_COMPONENT_IDS
    for component_id in component_ids:
        print("pulling " + str(component_id))
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')
        df.loc[:, 'indicator_component_id'] = component_id
        dfs.append(df)

    df = pd.concat(dfs, ignore_index=True)

    # aggregate to both sexes but keep sex-split data as well
    df = df.merge(db_pops, how='left')
    df_sex_split = df.copy(deep=True)
    df['sex_id'] = 3
    df = agg.age_sex_aggregate(df, group_cols=index_cols)
    df = pd.concat([df, df_sex_split], axis=0, ignore_index=True)

    # global
    df_global = agg.aggregate_locations_to_global(df,
                                                  index_cols,
                                                  age_standardized=True,
                                                  age_group_years_start=0,
                                                  age_group_years_end=125,
                                                  age_group_id=27)

    # age-standardize
    df = agg.age_standardize(df, index_cols, 0, 125)

    # output
    df = df[index_cols + dw.DRAW_COLS]
    df_global = df_global[index_cols + dw.DRAW_COLS]

    file_dict = dw.RB_INPUT_FILE_DICT
    for component_id in file_dict.keys():

        path = data_dir + '/' + str(file_dict[component_id]) + '.feather'
        global_path = data_dir + '/' + str(
            file_dict[component_id]) + '_global' + '.feather'

        # sdg
        print('outputting ' + str(file_dict[component_id]))
        df_id = df[df.indicator_component_id == component_id]
        df_id.reset_index(drop=True, inplace=True)
        df_id.to_feather(path)

        # global
        print('outputting ' + str(file_dict[component_id]) + ' global')
        df_id_global = df_global[df_global.indicator_component_id ==
                                 component_id]
        df_id_global.reset_index(drop=True, inplace=True)
        df_id_global.to_feather(global_path)

    return df