def get_envelope_draws(meid, loc): df = get_draws('modelable_entity_id', meid, 'dismod', location_ids=loc, year_ids=yids, sex_ids=sids, age_group_ids=agids, gbd_round_id={GBD ROUND ID}) df = df.drop(['modelable_entity_id', 'model_version_id', 'measure_id'], axis=1) renames = {'draw_%s' % d: 'env_%s' % d for d in range(1000)} df.rename(columns=renames, inplace=True) draws = [col for col in list(df) if col.startswith('env')] df['mean'] = df[draws].mean(axis=1) for col in df[draws]: df[col] = df[col]/df['mean'] df.fillna(value=1, inplace=True) df= df.drop(['mean'], axis=1) return df
locations = stroke_fns.get_locations() sexes = [1, 2] loops = (len(locations) * 2) * 2 all_df = pd.DataFrame() #all_df_list = [] count = 0 for me, mv in izip(modelable_entity_ids, models): for geo in locations: for sex in sexes: print 'On loop %s of %s' % (count, loops) draws = get_draws('modelable_entity_id', me, 'epi', location_ids=geo, year_ids=year, sex_ids=sex, gbd_round_id=4) d_ages = draws.age_group_id.unique() if 235 in d_ages: draws = draws[draws.age_group_id.isin(ages2)] draws[['age_group_id']] = draws[['age_group_id' ]].replace(to_replace=235, value=33) elif 33 in d_ages: draws = draws[draws.age_group_id.isin(ages1)] # pull out incidence and EMR into seperate dfs incidence = draws[draws.measure_id == 6] emr = draws[draws.measure_id == 9]
def run_yld_compile(yld_tmp, yld_dir, yld_version, root_dir, location, ar, n_draws): if ar: years = range(1990, 2017) else: years = [1990, 1995, 2000, 2005, 2010, 2016] pops = pd.read_csv('{root_dir}/PATH/pop.csv'.format(root_dir=root_dir)) index_cols = ['location_id', 'age_group_id', 'sex_id', 'year_id'] if yld_version == 0: yld_draws = get_draws("cause_id", source="como", gbd_id=294, measure_ids=3, location_ids=location, year_ids=years, sex_ids=[1, 2], n_draws=n_draws, resample=True) else: yld_draws = get_draws("cause_id", source="como", gbd_id=294, measure_ids=3, location_ids=location, year_ids=years, sex_ids=[1, 2], version=yld_version, n_draws=n_draws, resample=True) draw_cols = [col for col in yld_draws.columns if 'draw' in col] yld_draws = yld_draws[index_cols + draw_cols] yld_draws = yld_draws.merge(pops, on=index_cols) yld_draws = yld_draws.set_index(index_cols) yld_draws[draw_cols] = yld_draws[draw_cols].multiply( yld_draws['population'], axis='index') yld_draws.drop('population', axis=1, inplace=True) yld_draws = yld_draws.reset_index() yld_draws.loc[yld_draws['age_group_id'].isin(range(2, 5) + [164]), 'age_group_id'] = 28 ages = range(5, 21) + range(30, 33) + [28, 235] yld_draws = yld_draws.loc[yld_draws['age_group_id'].isin(ages)] yld_draws = yld_draws.groupby(index_cols).sum().reset_index() sex_agg = yld_draws.groupby(['age_group_id', 'location_id', 'year_id']).sum().reset_index() sex_agg['sex_id'] = 3 yld_draws = yld_draws.append(sex_agg) yld_draws = yld_draws.merge(pops, on=index_cols) yld_draws = yld_draws.set_index(index_cols) yld_draws[draw_cols] = yld_draws[draw_cols].divide(yld_draws['population'], axis='index') yld_draws.drop('population', axis=1, inplace=True) yld_draws = yld_draws.reset_index() yld_draws.rename(columns=(lambda x: x.replace('draw_', '') if x in draw_cols else x), inplace=True) new_draws = [col for col in yld_draws.columns if col.isdigit()] yld_draws = pd.melt(yld_draws, id_vars=index_cols, value_vars=new_draws, var_name='draw', value_name='yld_rate') csv_draws = yld_draws.set_index('location_id') csv_draws.to_csv('{yld_tmp}/{location}_draws.csv'.format( yld_tmp=yld_tmp, location=location)) summ_cols = ['yld_rate'] calc_summary(yld_draws, summ_cols, yld_dir, location)
def process_location_cc_draws(location_id, test=False): """Pull mortality numbers, limiting to desired ages by cause Gets all years >1990 and ages for the location id as mortality numbers from get_draws """ dfs = [] cause_age_sets = [[ dw.CC_ALL_AGE_CAUSE_IDS, range(2, 21) + range(30, 33) + [235] ], [dw.CC_THIRTY_SEVENTY_CAUSE_IDS, range(11, 19)], [dw.PRE_1990_CAUSES, [22]]] if test: years = [2016] else: years = [] for causes, ages in cause_age_sets: gbd_ids = {'cause_ids': causes} df = get_draws(gbd_id_field=['cause_id'] * len(causes), gbd_id=causes, source='codcorrect', version=dw.CC_VERS, location_ids=[location_id], year_ids=years, age_group_ids=ages, sex_ids=[3], measure_ids=1) dfs.append(df) df = pd.concat(dfs, ignore_index=True) # keep relevant years df = df.ix[(df['year_id'] >= 1990) | ((df['cause_id'].isin(dw.PRE_1990_CAUSES)) & (df['year_id'] >= 1980))] # make sure index variables are ints for idvar in dw.CC_GROUP_COLS: df[idvar] = df[idvar].astype(int) # make sure it looks like we expect assert set(df.ix[df['cause_id'].isin(dw.PRE_1990_CAUSES)].age_group_id) == set([22]), \ 'unexpected age group ids found' assert set(df.ix[~df['cause_id'].isin(dw.PRE_1990_CAUSES)].age_group_id) == \ set(range(2, 21) + range(30, 33) + [235]), \ 'unexpected age group ids found' assert set(df.sex_id) == set([3]), \ 'unexpected sex ids found' if not test: assert set(df.ix[df['cause_id'].isin(dw.PRE_1990_CAUSES)].year_id) == \ set(range(1980, 2017)), \ 'unexpected year ids found' assert set(df.ix[ ~df['cause_id'].isin(dw.PRE_1990_CAUSES) ].year_id) == \ set(range(1990, 2017)), \ 'unexpected year ids found' assert set(df.location_id) == set([location_id]), \ 'unexpected location ids found' # age standardize df = age_standardize(df, 'codcorrect') # write the output df = df[dw.CC_GROUP_COLS + dw.DRAW_COLS] write_output(df, 'codcorrect', location_id) return df
def process_location_como_draws(location_id, measure_id, test=False): """Pull indidence rates, merging with population to make cases Gets all years, ages, and sexes for the location id as incidence rates from get_draws, and combines into all ages, both sexes cases. """ db_pops = qry.get_pops() if measure_id == 6: causes = dw.COMO_INC_CAUSE_IDS elif measure_id == 5: causes = dw.COMO_PREV_CAUSE_IDS else: raise ValueError("bad measure_id: {}".format(measure_id)) dfs = [] if test: years = [2016] else: years = [] for cause_id in causes: print("pulling {c}".format(c=cause_id)) if test: df = get_draws(gbd_id_field='cause_id', gbd_id=cause_id, source='como', version=dw.COMO_VERS, location_ids=[location_id], year_ids=years, age_group_ids=[], sex_ids=[1, 2], measure_ids=[measure_id]) else: df = interpolate(gbd_id_field='cause_id', gbd_id=cause_id, source='como', version=dw.COMO_VERS, reporting_year_start=1990, reporting_year_end=2016, location_ids=[location_id], age_group_ids=[], sex_ids=[1, 2], measure_ids=[measure_id]) # these pull in as rates df['metric_id'] = 3 # make sure it looks like we expect assert set(df.age_group_id) == set(range(2, 21) + range(30, 33) + [235]), \ 'unexpected age group ids found' assert set(df.sex_id) == set([1, 2]), \ 'unexpected sex ids found' if not test: assert set(df.year_id) == set(range(1990, 2017)), \ 'unexpected year ids found' assert set(df.location_id) == set([location_id]), \ 'unexpected location ids found' # compile dfs.append(df[dw.COMO_GROUP_COLS + dw.DRAW_COLS]) df = pd.concat(dfs, ignore_index=True) # merge with pops to transform to cases df = df.merge(db_pops, how='left') assert df.population.notnull().values.all( ), 'merge with populations failed' # concatenate the metadata with the draw cols times the pop # this multiplies each draw column by the population column df = pd.concat([ df[dw.COMO_GROUP_COLS], df[dw.DRAW_COLS].apply(lambda x: x * df['population']), df['population'] ], axis=1) # now its numbers (this line is for readability) df['metric_id'] = 1 # aggregate sexes df['sex_id'] = 3 # collapse sexes together df = df.groupby(dw.COMO_GROUP_COLS, as_index=False)[dw.DRAW_COLS + ['population']].sum() df = pd.concat([ df[dw.COMO_GROUP_COLS], df[dw.DRAW_COLS].apply(lambda x: x / df['population']) ], axis=1) df['metric_id'] = 3 # AGE STANDARDIZE print("age standardizing") wgts = custom_age_weights(0, 125) df = df.merge(wgts, on=['age_group_id'], how='left') assert df.age_group_weight_value.notnull().values.all(), \ 'merge w wgts failed' df = pd.concat([ df[dw.COMO_GROUP_COLS], df[dw.DRAW_COLS].apply(lambda x: x * df['age_group_weight_value']) ], axis=1) df['age_group_id'] = 27 df = df.groupby(dw.COMO_GROUP_COLS, as_index=False)[dw.DRAW_COLS].sum() df = df[dw.COMO_GROUP_COLS + dw.DRAW_COLS] write_output(df, 'como', location_id) return df
def process_location_risk_exposure_draws(location_id, test=False): """Return yearly age standardized estimates of each rei_id. Arguments: location_id: the location_id to process Returns: pandas dataframe like so: [ID_COLS] : [dw.DRAW_COLS] """ dfs = [] # version_df = pd.DataFrame() risks = set(dw.RISK_EXPOSURE_REI_IDS).union( set(dw.RISK_EXPOSURE_REI_IDS_MALN)) if test: years = [2016] else: years = [] for rei_id in risks: print("pulling {r}".format(r=rei_id)) if test or rei_id == 166: df = get_draws(gbd_id_field='rei_id', gbd_id=rei_id, source='risk', location_ids=[location_id], year_ids=years, age_group_ids=[], sex_ids=[], draw_type='exposure') elif not test and rei_id == 86: df = interpolate(gbd_id_field='rei_id', gbd_id=rei_id, source='risk', reporting_year_start=1990, reporting_year_end=2016, location_ids=[location_id], age_group_ids=[], sex_ids=[], measure_ids=19, draw_type='exposure') else: df = interpolate(gbd_id_field='rei_id', gbd_id=rei_id, source='risk', reporting_year_start=1990, reporting_year_end=2016, location_ids=[location_id], age_group_ids=[], sex_ids=[], draw_type='exposure') # remove any other ages besides main gbd ages df = df.query( '(age_group_id >= 2 & age_group_id <= 20) or age_group_id in [30, 31, 32, 235] and sex_id in [1, 2]' ) df = df.query('year_id >= 1990') if rei_id == 166: # only keep 10+ for smoking df = df.query('age_group_id >= 7') df = df.query('parameter=="cat1"') # set the rei_id because it isnt in the get_draws pull df['rei_id'] = rei_id # these are prevalence rates df['metric_id'] = 3 if rei_id == 86: df['measure_id'] = 19 else: df['measure_id'] = 5 dfs.append(df[dw.RISK_EXPOSURE_GROUP_COLS + dw.DRAW_COLS]) df = pd.concat(dfs, ignore_index=True) # COLLAPSE SEX print("collapsing sex") df = df.merge(qry.get_pops(), how='left') assert df.population.notnull().values.all(), 'merge with pops fail' # overriding the sex variable for collapsing df['sex_id'] = df.rei_id.apply(lambda x: 2 if x == 167 else 3) # for stunting and wasting (where we only have under-5), keep only under-5 and aggregate ages df.ix[df['rei_id'].isin(dw.RISK_EXPOSURE_REI_IDS_MALN), 'age_group_id'] = 1 # make all ages for PM 2.5 df.ix[df['rei_id'] == 86, 'age_group_id'] = 22 df = pd.concat([ df[dw.RISK_EXPOSURE_GROUP_COLS], df[dw.DRAW_COLS].apply(lambda x: x * df['population']) ], axis=1) # so unnecessary programmatically but good for documentation - # these are now prev cases df['metric_id'] = 1 # now that its in cases it is possible to collapse sex df = df.groupby(dw.RISK_EXPOSURE_GROUP_COLS, as_index=False).sum() # RETURN TO RATES print("returning to rates") df = df.merge(qry.get_pops(), how='left') assert df.population.notnull().values.all(), 'merge with pops fail' df = pd.concat([ df[dw.RISK_EXPOSURE_GROUP_COLS], df[dw.DRAW_COLS].apply(lambda x: x / df['population']) ], axis=1) df['metric_id'] = 3 # AGE STANDARDIZE print("age standardizing") wgts = custom_age_weights(10, 125) # FOR SMOKING ONLY df = df.merge(wgts, on=['age_group_id'], how='left') assert df.age_group_weight_value.notnull().values.all(), \ 'merge w wgts failed' df = pd.concat([ df[dw.RISK_EXPOSURE_GROUP_COLS], df[dw.DRAW_COLS].apply(lambda x: x * df['age_group_weight_value']) ], axis=1) df['age_group_id'] = 27 df = df.groupby(dw.RISK_EXPOSURE_GROUP_COLS, as_index=False)[dw.DRAW_COLS].sum() df = df[dw.RISK_EXPOSURE_GROUP_COLS + dw.DRAW_COLS] write_output(df, 'risk_exposure', location_id) return df
def process_location_risk_burden_draws(location_id, test=False): ''' Given a list of rei_ids, use gopher to get attributable burden draws and save to out directory. ''' dfs = [] for rei_id in dw.RISK_BURDEN_REI_IDS + dw.RISK_BURDEN_DALY_REI_IDS: print(rei_id) if rei_id in dw.RISK_BURDEN_REI_IDS: measure_id = 1 elif rei_id in dw.RISK_BURDEN_DALY_REI_IDS: measure_id = 2 else: raise ValueError("no measure found") print('Getting draws') df = get_draws(gbd_id_field=['cause_id', 'rei_id'], gbd_id=[294, rei_id], source='burdenator', version=dw.BURDENATOR_VERS, location_ids=location_id, year_ids=[], age_group_ids=[], sex_ids=[], num_workers=3, n_draws=1000, resample=True) # keep years we want df = df.query('measure_id == {}'.format(measure_id)) df = df.query('metric_id == 1') df = df.query('age_group_id in {} and sex_id in [1, 2]'.format( range(2, 21) + range(30, 33) + [235])) df = df.query('year_id in {}'.format(range(1990, 2011, 5) + [2016])) # aggregate to both sexes df['sex_id'] = 3 df = df.groupby(dw.RISK_BURDEN_GROUP_COLS, as_index=False)[dw.DRAW_COLS].sum() pops = qry.get_pops(both_sexes=True) df = df.merge(pops, how='left', on=['location_id', 'age_group_id', 'sex_id', 'year_id']) df = pd.concat([ df[dw.RISK_BURDEN_GROUP_COLS], df[dw.DRAW_COLS].apply(lambda x: x / df['population']) ], axis=1) df['metric_id'] = 3 # keep the right columns df = df[dw.RISK_BURDEN_GROUP_COLS + dw.DRAW_COLS] # interpolate years print('Interpolating') df = custom_interpolate(df) # age-standardize age_weights = qry.get_age_weights(4) df = df.merge(age_weights) df = pd.concat([ df[dw.RISK_BURDEN_GROUP_COLS], df[dw.DRAW_COLS].apply(lambda x: x * df['age_group_weight_value']) ], axis=1) df['age_group_id'] = 27 df = df.groupby(dw.RISK_BURDEN_GROUP_COLS, as_index=False)[dw.DRAW_COLS].sum() dfs.append(df) df = pd.concat(dfs) write_output(df, 'risk_burden', location_id) return df
def draw_deaths(cause_id, location_id, out_path, send_Slack, slack, channel): """ draw_deaths: For a given location and Cause grab death estimates at the draw-level. Collapse it mean, upper, and lower. """ # year IDs YEAR_IDS = range(1990, 2017) # All ages that could be used ALL_AGES = range(2, 22) + [28] + range(30, 33) + [235] # Early and late ages of DisMod DISMOD_AGES = [2, 3, 4, 30, 31, 32, 235] # DisMod old ages OLD_DISMOD_AGES = [30, 31, 32, 235] # DisMod young ages YOUNG_DISMOD_AGES = [2, 3, 4] # Ages AGES = range(2, 5) + range(30, 34) # old ages OLD_AGES = range(30, 34) # young ages YOUNG_AGES = range(2, 5) # necessary columns group_cols = ['cause_id', 'sex_id', 'age_group_id', 'location_id'] measure_cols = ['mean_death', 'upper_death', 'lower_death'] # Draw columns draw_cols = (['draw_%s' % i for i in range(0, 1000)]) hf_death_cols = (['hf_deaths_%s' % i for i in range(0, 1000)]) # split <1 age group into early neonatal, late neonatal, and post neonatal young_age_groups = pd.DataFrame({'age_group_id':YOUNG_DISMOD_AGES, 'temp_id':1}) # split 80+ age group into early neonatal, late neonatal, and post neonatal old_age_groups = pd.DataFrame({'age_group_id':OLD_DISMOD_AGES, 'temp_id':1}) # Query draws for the cause/location combo print cause_id, location_id try: DF = get_draws('cause_id', cause_id, 'codcorrect', location_ids=location_id, age_group_ids=ALL_AGES, gbd_round_id=4, #status="best", status="latest", #version_id=64, sexes=[1,2], location_set_id=35, measure_ids=1, numworkers=5) print DF['output_version_id'].unique(), " ", DF.shape DF = DF.query('year_id in {}'.format(YEAR_IDS)) DF = DF.query('output_version_id == {}'.format(64)) DF.fillna(0, inplace=True) except: if send_Slack == "YES": message = ("get_draws query FAILED for location_id={location_id} " "and cause_id={cause_id}").format(\ location_id=location_id, cause_id=cause_id) slack.chat.post_message(channel, message) print message if send_Slack == "YES" and not len(DF): message = "Missing data for get_draws for location_id={location_id}" +\ " and cause_id={cause_id}".format(location_id=location_id, cause_id=cause_id) slack.chat.post_message(channel, message) # Compute 25 and 75 percentiles of the distribution for each row stats = DF[draw_cols].transpose().describe( percentiles=[.25, .75]).transpose()[['mean', '25%', '75%']] stats.rename( columns={'mean':'mean_death', '25%': 'lower_death', '75%': 'upper_death'}, inplace=True) # add these percentiles to the dataset DF['mean_death'] = stats['mean_death'] DF['lower_death'] = stats['lower_death'] DF['upper_death'] = stats['upper_death'] # To make this script robust filter by what ever the age groups are # available if age group ID 235 is available go with that otherwise find # another way to get/make an 80+ age group if pd.Series(DISMOD_AGES).isin(DF.age_group_id.unique()).all(): ages = range(2,21) + [30, 31, 32, 235] DF = DF.query('age_group_id in {}'.format(ages)) print "DISMOD_AGES" elif pd.Series(YOUNG_DISMOD_AGES).isin(DF.age_group_id.unique()).all() and \ not pd.Series(OLD_DISMOD_AGES).isin(DF.age_group_id.unique()).all(): ages = range(2, 21) + range(30, 34) DF = DF.query('age_group_id in {}'.format(ages)) DF['age_group_id'].replace(to_replace=33, value=235, inplace=True) print "YOUNG_DISMOD_AGES" elif pd.Series(OLD_DISMOD_AGES).isin(DF.age_group_id.unique()).all() and \ not pd.Series(YOUNG_DISMOD_AGES).isin(DF.age_group_id.unique()).all(): ages = range(5, 21) + [28] + [30, 31, 32, 235] DF = DF.query('age_group_id in {}'.format(ages)) print "OLD_DISMOD_AGES" elif pd.Series(AGES).isin(DF.age_group_id.unique()).all(): ages = range(2, 21) + range(30, 34) DF = DF.query('age_group_id in {}'.format(ages)) print "AGES" elif pd.Series(OLD_AGES).isin(DF.age_group_id.unique()).all() and \ not pd.Series(YOUNG_AGES).isin(DF.age_group_id.unique()).all(): ages = range(5, 21) + [28] + range(30, 34) DF = DF.query('age_group_id in {}'.format(ages)) print "OLD_AGES" elif pd.Series(YOUNG_AGES).isin(DF.age_group_id.unique()).all() and \ not pd.Series(OLD_AGES).isin(DF.age_group_id.unique()).all(): ages = range(2, 22) DF = DF.query('age_group_id in {}'.format(ages)) print "YOUNG_AGES" else: ages = range(5, 22) + [28] DF = DF.query('age_group_id in {}'.format(ages)) "A lot is missing" # split out neotals age groups if needed if pd.Series([28]).isin(DF.age_group_id.unique()).all(): temp = DF.query('age_group_id == 28').copy() DF = DF.query('age_group_id != 28') # Make a temporary id to merge with age groups DataFrame temp.drop('age_group_id', axis=1, inplace=True) temp['temp_id'] = 1 temp = temp.merge(young_age_groups, on='temp_id', how='inner') temp.drop('temp_id', axis=1, inplace=True) # Append the new df w/ age groups to the original # (excluding the <1 age composite). DF = DF.append(temp) # split out 80+ age groups if needed if pd.Series([21]).isin(DF.age_group_id.unique()).all(): temp = DF.query('age_group_id == 21').copy() DF = DF.query('age_group_id != 21') # Make a temporary id to merge with age groups DataFrame temp.drop('age_group_id', axis=1, inplace=True) temp['temp_id'] = 1 temp = temp.merge(old_age_groups, on='temp_id', how='inner') temp.drop('temp_id', axis=1, inplace=True) # Append the new df w/ age groups to the original # (excluding the 80+ age composite). DF = DF.append(temp) # The columns that are added up DF = DF.groupby(group_cols)[measure_cols].sum().reset_index() # fill in any missing data DF = make_square_matrix(DF) # Save it to a CSV on the cluster to be read back into the main script. DF.to_csv('{out_path}/codcorrect_{cause}_{location}.csv'.format(\ out_path=out_path, cause=cause_id, location=location_id), index=False, encoding='utf-8')
all_cols = keep_cols + index_cols ages1 = range(2,21) + [30,31,32,33] ages2 = range(2,21) + [30,31,32,235] # ages1 = [22,27] # ages2 = [22,27] all_acute_isch_list = [] all_acute_hem_list = [] all_chronic_isch_list = [] all_chronic_hem_list = [] count = 0 location_count = len(locations) for geo in locations: # get acute isch csmr_isch = get_draws('modelable_entity_id', isch_me, 'epi', location_ids=geo, year_ids=year, sex_ids=[1,2], gbd_round_id=4) csmr_isch = csmr_isch[csmr_isch['measure_id']==15] isch_ages = csmr_isch.age_group_id.unique() if 235 in isch_ages: csmr_isch = csmr_isch[csmr_isch.age_group_id.isin(ages2)] csmr_isch[['age_group_id']] = csmr_isch[['age_group_id']].replace(to_replace=235,value=33) elif 33 in isch_ages: csmr_isch = csmr_isch[csmr_isch.age_group_id.isin(ages1)] csmr_isch = csmr_isch[all_cols] #get acute hem csmr_hem = get_draws('modelable_entity_id', hem_me, 'epi', location_ids=geo, year_ids=year, sex_ids=[1,2], gbd_round_id=4) csmr_hem = csmr_hem[csmr_hem['measure_id']==15] hem_ages = csmr_hem.age_group_id.unique() if 235 in hem_ages:
def get_envelope(self): """get the envelope""" print "get envelope" # columns draw_cols = (['draw_%s' % i for i in range(0, 1000)]) env_cols = (['env_prev_%s' % i for i in range(0, 1000)]) hf_cols = (['hf_prev_%s' % i for i in range(0, 1000)]) # get overall prevalence of heart failure hf = get_draws('modelable_entity_id', 2412, 'dismod', sex_ids=[1,2], status="best", measure_ids=5, location_ids=self.location_id, gbd_round_id=4, num_workers=5) # drop unneeded columns hf = hf[self.group_cols + draw_cols] # drop unneeded age groups hf = hf.query('age_group_id in {}'.format((self.AGE_GROUPS_IDS))) # delete the prevalence due to Chagas chagas = get_draws('modelable_entity_id', 2413, 'dismod', sex_ids=[1,2], status="best", measure_ids=5, location_ids=self.location_id, gbd_round_id=4, num_workers=5) # drop unneeded columns chagas = chagas[self.group_cols + draw_cols] # drop unneeded age groups chagas = chagas.query('age_group_id in {}'.format((self.AGE_GROUPS_IDS))) # make sure the size of HF and Chagas is the same if len(hf) != len(chagas) and self.send_Slack == "YES": message = "Chagas and HF envelople have different number of rows for {location_id}"\ .format(location_id=location_id) self.slack.chat.post_message(self.channel, message) #assert len(hf) == len(chagas), "matrices are not the same size." # rename HF prev draws for i in xrange(1000): hf.rename(columns={'draw_'+str(i):'env_prev_'+str(i)}, inplace=True) # merge the HF-chagas prev draws and the HF prev draws hf = hf.merge(chagas, on=self.group_cols, how='inner') # subtract prevalence of HF due to Chagas from total HF prevalence for i in xrange(1000): hf['nonchagas_prev_'+str(i)] = hf['env_prev_'+str(i)] - hf['draw_'+str(i)] hf.rename(columns={'nonchagas_prev_'+str(i):'hf_prev_'+str(i)}, inplace=True) # drop unneeded columns hf = hf[self.group_cols + hf_cols] hf[hf < 0] = 0 # Make sure the Matrix is square assert_env_df_is_square(hf, self.send_Slack, self.slack, self.channel, self.location_id) # Make sure there aren't any duplicates if hf[self.group_cols].duplicated().any() and self.send_Slack == "YES": message = "The Chagas deleted HF envelope has duplicates for {location_id}"\ .format(location_id=location_id) self.slack.chat.post_message(self.channel, message) #assert not hf[self.index_cols].duplicated().any(), 'duplicates introduced in custom cause generation' self.envelope = hf