def cause_age_sex_agg(death_df, true_paf_df, amen_paf_df, draw_parameters, uhc_version_dir): ''' Aggregate PAFs over age and sex, and collapse deaths. ''' # convert to count space true_paf_df = misc.draw_math([death_df, true_paf_df], specs.ID_COLS + ['age_group_id', 'sex_id', 'cause_id'], specs.DRAW_COLS, '*') amen_paf_df = misc.draw_math([death_df, amen_paf_df], specs.ID_COLS + ['age_group_id', 'sex_id', 'cause_id'], specs.DRAW_COLS, '*') # agg cause and sex true_paf_df = true_paf_df.groupby(specs.ID_COLS + ['age_group_id'], as_index=False)[specs.DRAW_COLS].sum() amen_paf_df = amen_paf_df.groupby(specs.ID_COLS + ['age_group_id'], as_index=False)[specs.DRAW_COLS].sum() death_df = death_df.groupby(specs.ID_COLS + ['age_group_id'], as_index=False)[specs.DRAW_COLS].sum() # now that everything is both sex space, reassign sex_id true_paf_df['sex_id'] = 3 amen_paf_df['sex_id'] = 3 death_df['sex_id'] = 3 # age-standardize # set counts = False even though we are passing in counts... don't want to convert these to rates (yet) true_paf_df = misc.age_standardize(true_paf_df, specs.ID_COLS, specs.DRAW_COLS, draw_parameters, uhc_version_dir, counts=False) amen_paf_df = misc.age_standardize(amen_paf_df, specs.ID_COLS, specs.DRAW_COLS, draw_parameters, uhc_version_dir, counts=False) death_df = misc.age_standardize(death_df, specs.ID_COLS, specs.DRAW_COLS, draw_parameters, uhc_version_dir, counts=False) # convert back to PAF space true_paf_df = misc.draw_math([true_paf_df, death_df], specs.ID_COLS, specs.DRAW_COLS, '/') amen_paf_df = misc.draw_math([amen_paf_df, death_df], specs.ID_COLS, specs.DRAW_COLS, '/') return death_df, true_paf_df, amen_paf_df
def delete_risk(df, paf_df, index_cols, draw_cols): ''' Remove effect of local risk exposure. ''' # get local unattribtable paf_df[draw_cols] = 1 - paf_df[draw_cols] df = misc.draw_math([df, paf_df], index_cols, draw_cols, '*') return df
def draw_divide(num_df, denom_df): ''' Get the weight value for each uhc_id. ''' # print uhc_id for ease of debugging if loop breaks print(pd.unique(num_df['uhc_id'])) df = misc.draw_math([num_df, denom_df], specs.ID_COLS, specs.DRAW_COLS, '/') df['uhc_id'] = num_df['uhc_id'].tolist() return df
def calc_counterfactual_burden(): ''' For a given service/population cell, get the risk-adjusted death rate for relevant indicators. ''' parser = argparse.ArgumentParser() parser.add_argument('--uhc_id', help='Indicates tracer-service_population', type=int) parser.add_argument('--uhc_version', help='Version number for run.', type=int) parser.add_argument('--value_type', help='What are we storing,', type=str) args = parser.parse_args() # get tracers and service pop uhc_version_dir = FILEPATH # get efficacy tier uhc_df = pd.read_excel(FILEPATH) uhc_df = uhc_df.query("uhc_id == {}".format(args.uhc_id)) efficacy = 1. - 0.2 * uhc_df['efficacy_tier'].values.item() + 0.1 # read coverage and observed burden draws coverage_df = pd.read_hdf(FILEPATH) burden_df = pd.read_hdf(FILEPATH) # perform calculation: # counterfactual 0 is the burden we'd see if not for the intervention # couterfactual0 = observed / (1 - coverage * efficacy) # counterfactual 1 is the burden we'd see with 100% coverage of the intervention # couterfactual1 = couterfactual0 * (1 - 1 * efficacy) # the health gain weight is the difference # health gain weight = couterfactual0 - couterfactual1 if len(coverage_df) + len(burden_df) == 0: summary_df = pd.DataFrame(columns=specs.ID_COLS + ['mean', 'lower', 'upper']) else: coverage_df[specs.DRAW_COLS] = ( 1 - coverage_df[specs.DRAW_COLS] * efficacy) burden_df = misc.draw_math([burden_df, coverage_df], specs.ID_COLS, specs.DRAW_COLS, '/') burden_df[specs.DRAW_COLS] = burden_df[specs.DRAW_COLS] - ( burden_df[specs.DRAW_COLS] * (1 - efficacy)) burden_df['efficacy'] = efficacy summary_df = misc.summarize(burden_df, specs.DRAW_COLS) # store burden_df.to_hdf(FILEPATH) summary_df.to_csv(FILEPATH)
def fetch_ratio_draws(draw_parameters, uhc_version_dir, uhc_id, **kwargs): ''' required kwargs: gbd_id ([int]) = ids for `get_draws` call gbd_id_type ([str]) = types associated w/ each id. measure_id (int) = What is the measure of the indicator. ''' nf_df = fetch_outputs_draws(draw_parameters, uhc_version_dir, uhc_id, **kwargs) kwargs['measure_id'] = 1 death_df = fetch_outputs_draws(draw_parameters, uhc_version_dir, uhc_id, **kwargs) # get ratio df = misc.draw_math([death_df, nf_df], specs.ID_COLS, specs.DRAW_COLS, '/') return df[specs.ID_COLS + specs.DRAW_COLS]
def add_global_risk(df, gpaf_df, index_cols, draw_cols): ''' Add effect of global risk exposure. ''' # get global unattribtable assert index_cols == specs.ID_COLS, 'Assumes location and year as index' # expand by location-year, average over year, then add year col back on gpaf_df = gpaf_df.drop('location_id', axis=1) gpaf_df = gpaf_df.merge(df[index_cols]) gpaf_df = gpaf_df.groupby('location_id', as_index=False)[specs.DRAW_COLS].mean() gpaf_df = gpaf_df.merge(df[index_cols]) gpaf_df[draw_cols] = 1 - gpaf_df[draw_cols] df = misc.draw_math([df, gpaf_df], index_cols, draw_cols, '/') return df
def fetch_mmr_draws(draw_parameters, uhc_version_dir, uhc_id, **kwargs): ''' required kwargs: gbd_id ([int]) = ids for `get_draws` call gbd_id_type ([str]) = types associated w/ each id. measure_id (int) = What is the measure of the indicator. ''' # NOTE: need to break age/sex params, set up for all-ages (for because of met need) draw_parameters['age_group_id'] = range(7, 16) draw_parameters['sex_id'] = [2] # load age-standardized draws for maternal deaths and births death_df = fetch_outputs_draws(draw_parameters, uhc_version_dir, uhc_id, **kwargs) births_df = fetch_asfr_draws(draw_parameters, uhc_version_dir) # calc age-standardized MMR df = misc.draw_math([death_df, births_df], specs.ID_COLS, specs.DRAW_COLS, '/') return df[specs.ID_COLS + specs.DRAW_COLS]
def produce_uhc(): parser = argparse.ArgumentParser() parser.add_argument('--uhc_id', help='0 for UHC service coverage aggregate.', type=int) parser.add_argument('--uhc_version', help='Version number for run.', type=int) parser.add_argument('--value_type', help='What are we storing,', type=str) args = parser.parse_args() # get service_proxys and service pop uhc_version_dir = FILEPATH # retrieve the IDs we need uhc_ids = get_uhc_ids() # calculate health gain weight fraction (health gain / sum of health gains) count_dfs = uhc_io.compile_dfs('counterfactual_burden', uhc_ids, uhc_version_dir) total_df = pd.concat(count_dfs) total_df = total_df.groupby(specs.ID_COLS, as_index=False)[specs.DRAW_COLS].sum() weight_dfs = [draw_divide(count_df, total_df) for count_df in count_dfs] weight_df = pd.concat(weight_dfs) weight_df['mean_weight'] = weight_df[specs.DRAW_COLS].mean(axis=1) # output the unadjusted weights summary_unadjusted_weights = misc.summarize(weight_df.copy(), specs.DRAW_COLS) summary_unadjusted_weights.to_csv(FILEPATH) weight_df.to_csv(FILEPATH) # adjust the weights -- take mean weight of indicator in a specified number of bands within a country and year weight_df['mean_weight'] = weight_df['mean_weight'].replace(0, np.nan) num_bands = 3 weight_df['weight_band'] = weight_df.groupby( ['location_id', 'year_id']).mean_weight.transform( lambda x: pd.qcut(x, num_bands, labels=range(1, num_bands + 1))) weight_df['weight_band'] = weight_df['weight_band'].replace(np.nan, 0) weight_df[specs.DRAW_COLS] = weight_df.groupby( ['location_id', 'year_id', 'weight_band'])[specs.DRAW_COLS].transform('mean') weight_df = weight_df.drop(['weight_band', 'mean_weight'], axis=1) # apply weight cov_dfs = uhc_io.compile_dfs('coverage', uhc_ids, uhc_version_dir) cov_df = pd.concat(cov_dfs) # weight_df = pd.concat(weight_dfs) uhcw_df = misc.draw_math([cov_df, weight_df], specs.ID_COLS + ['uhc_id'], specs.DRAW_COLS, '*') uhcw_df = uhcw_df.groupby(specs.ID_COLS, as_index=False)[specs.DRAW_COLS].sum() uhca_df = cov_df.groupby(specs.ID_COLS, as_index=False)[specs.DRAW_COLS].mean() # summarize and store... ## WEIGHTS summaryw_df = misc.summarize(weight_df, specs.DRAW_COLS) weight_df.to_hdf(FILEPATH) summaryw_df.to_csv(FILEPATH) ## WEIGHTED VALUES summaryuhcw_df = misc.summarize(uhcw_df, specs.DRAW_COLS) uhcw_df.to_hdf(FILEPATH) summaryuhcw_df.to_csv(FILEPATH) ## AVERAGE VALUES summaryuhca_df = misc.summarize(uhca_df, specs.DRAW_COLS) uhca_df.to_hdf(FILEPATH) summaryuhca_df.to_csv(FILEPATH)
def fetch_art_draws(draw_parameters, uhc_version_dir, uhc_id, **kwargs): # load age- and sex-specific data _fetch_art_draws_loc = functools.partial( fetch_art_draws_loc, age_group_id=draw_parameters['age_group_id']) pool = Pool(30) dfs = pool.map(_fetch_art_draws_loc, draw_parameters['location_id']) pool.close() pool.join() df = pd.concat(dfs) # coverage is by sex, age, loc, year when we read it in. we need to take a # few steps to get rid of that level of specificity, we need coverage by # location and year, NOT by loc, year, age, and sex. # load prevalence and aggregate print(""" prev_df = get_draws( source='como', gbd_round_id=GBD_ROUND, version_id={id}, num_workers=30, metric_id=3, location_id=draw_parameters['location_id'], year_id=draw_parameters['year_id'], age_group_id=draw_parameters['age_group_id'], sex_id=[1, 2], decomp_step=DECOMP_STEP, **kwargs {kwargs} ) """.format(id=COMO_VERSION_ID, kwargs=kwargs)) prev_df = get_draws(source='como', gbd_round_id=GBD_ROUND, version_id=COMO_VERSION_ID, num_workers=30, metric_id=3, location_id=draw_parameters['location_id'], year_id=draw_parameters['year_id'], age_group_id=draw_parameters['age_group_id'], sex_id=[1, 2], decomp_step=DECOMP_STEP, **kwargs) prev_df = prev_df[['location_id', 'year_id', 'age_group_id', 'sex_id'] + specs.DRAW_COLS] pop_df = pd.read_hdf(FILEPATH) prev_df = prev_df.merge(pop_df[[ 'location_id', 'year_id', 'age_group_id', 'sex_id', 'population' ]]) # multiply prevalence (proportion) by population to get number of people with # HIV/AIDS prev_df[specs.DRAW_COLS] = (prev_df[specs.DRAW_COLS].values.transpose() * prev_df['population'].values).transpose() # multiply number of people with HIV/AIDS by coverage to get number of people # covered df = misc.draw_math([df, prev_df], ['location_id', 'year_id', 'age_group_id', 'sex_id'], specs.DRAW_COLS, '*') # get number of people covered and number of people with HIV/AIDS for each # year and loc (sum up by sex and by age) df = df.groupby(specs.ID_COLS, as_index=False)[specs.DRAW_COLS].sum() prev_df = prev_df.groupby(specs.ID_COLS, as_index=False)[specs.DRAW_COLS].sum() # now divide number of people covered by number of people with HIV/AIDs to get # back into coverage space df = misc.draw_math([df, prev_df], specs.ID_COLS, specs.DRAW_COLS, '/') draw_parameters['to_check'] = ['location_id', 'year_id'] param_check(df, draw_parameters) return df[specs.ID_COLS + specs.DRAW_COLS]