def get_measures_interpolate(me_id, locs, sexes, ages, inc_id, rms_id, emr_id, year_start, year_end): inc = interpolate.interpolate( gbd_id_type='modelable_entity_id', gbd_id=me_id, measure_id=inc_id, location_id=locs, sex_id=sexes, age_group_id=ages, reporting_year_start=year_start, reporting_year_end=year_end, status='best', source='epi', gbd_round_id=help.GBD_ROUND) rms = interpolate.interpolate( gbd_id_type='modelable_entity_id', gbd_id=me_id, measure_id=rms_id, location_id=locs, sex_id=sexes, age_group_id=ages, reporting_year_start=year_start, reporting_year_end=year_end, status='best', source='epi', gbd_round_id=help.GBD_ROUND) emr = interpolate.interpolate( gbd_id_type='modelable_entity_id', gbd_id=me_id, measure_id=emr_id, location_id=locs, sex_id=sexes, age_group_id=ages, reporting_year_start=year_start, reporting_year_end=year_end, status='best', source='epi', gbd_round_id=help.GBD_ROUND) inc = inc.loc[inc['year_id'] < year_end] rms = rms.loc[rms['year_id'] < year_end] emr = emr.loc[emr['year_id'] < year_end] dropcols = ['measure_id', 'metric_id', 'model_version_id', 'modelable_entity_id'] inc.drop(dropcols, axis=1, inplace=True) rms.drop(dropcols, axis=1, inplace=True) emr.drop(dropcols, axis=1, inplace=True) indexcols = ['location_id', 'year_id', 'age_group_id', 'sex_id'] inc.set_index(indexcols, inplace=True) rms.set_index(indexcols, inplace=True) emr.set_index(indexcols, inplace=True) m_dict = {'incidence': inc, 'remission': rms, 'emr': emr} return m_dict
def year_chunk_interp(start_year, end_year, locid, measid, meid): df = interpolate(gbd_id_type='modelable_entity_id', gbd_id=meid, source='epi', measure_id=[measid], location_id=locid, reporting_year_start=start_year, reporting_year_end=end_year, status='best') if start_year != 1980: df = df[df.year_id != start_year] return df
def year_chunk_interp(start_year, end_year, locid, meid): df = interpolate(gbd_id_type='modelable_entity_id', gbd_id=meid, source='epi', measure_id=18, location_id=locid, reporting_year_start=start_year, reporting_year_end=end_year, status='best') if start_year != 1990: df = df[df.year_id != start_year] df = df[['location_id', 'year_id', 'age_group_id', 'sex_id'] + draws] df['year_id'] = df['year_id'].astype(int) df.set_index(['location_id', 'year_id', 'age_group_id', 'sex_id'], inplace=True) return df
if __name__ == '__main__': (gbd_id, measure_id, gbd_round_id, sex_id, outdir, decomp_step) = parse_arguments() if not os.path.exists(outdir): makedirs_safely(outdir) end_year = int(gbd_round_from_gbd_round_id(gbd_round_id)) df = interpolate(gbd_id=gbd_id, gbd_id_type='modelable_entity_id', source='epi', measure_id=measure_id, reporting_year_start=1980, reporting_year_end=end_year, sex_id=sex_id, gbd_round_id=gbd_round_id, decomp_step=decomp_step, num_workers=30) id_cols = [col for col in df.columns if col.endswith('_id')] for col in id_cols: df[col] = df[col].astype('int64') df.to_hdf( os.path.join(outdir, 'interp_{}_{}.h5'.format(gbd_id, sex_id)), key='draws', mode='w', format='table', data_columns=['location_id', 'year_id', 'age_group_id', 'sex_id'])
me_id = int(me_id) # get list of locations locations = maternal_fns.get_locations() yearlist = list(range(1980, 2018)) start_year = 1980 end_year = 2017 # call central function to interpolate logger.info("Calling interpolate") interp_df = interpolate(gbd_id_type='modelable_entity_id', gbd_id=me_id, source='epi', reporting_year_start=start_year, reporting_year_end=end_year, measure_id=18, age_group_id=list(range(7, 16)), sex_id=2, num_workers=45) draw_cols = ["draw_{}".format(x) for x in range(0, 1000)] data_cols = ['measure_id', 'location_id', 'year_id', 'age_group_id', 'sex_id'] interp_df = interp_df[data_cols + draw_cols] for col in data_cols: interp_df[col] = interp_df[col].astype(np.int64) # save each of the files for year in yearlist: logger.info('saving interpolated draws for year %s' % year)
# get list of locations locations = maternal_fns.get_locations() start_year = 1980 end_year = 2019 yearlist = list(range(1980, end_year + 1)) # call central function to interpolate logger.info("Calling interpolate the first time.") interpolate(gbd_id_type='modelable_entity_id', gbd_id=2519, source='epi', reporting_year_start=1980, reporting_year_end=2019, measure_id=18, age_group_id=7, location_id=101, sex_id=2, num_workers=45, decomp_step=decomp.decomp_step_from_decomp_step_id(decomp_step_id), gbd_round_id=maternal_fns.GBD_ROUND_ID) logger.info("Calling interpolate a second time.") interp_df = interpolate( gbd_id_type='modelable_entity_id', gbd_id=me_id, source='epi', reporting_year_start=start_year, reporting_year_end=end_year, measure_id=18, age_group_id=list(range(7, 16)), sex_id=2,
def get_measures_interpolate(ecode, locs, sexes, ages, inc_id, rms_id, emr_id, year_start, year_end, decomp): me_id = help.get_me(ecode) best_version = db.get_best_model_versions(entity="modelable_entity", ids=me_id, status="best", decomp_step=decomp, gbd_round_id=help.GBD_ROUND) me_id = int(me_id) inc_id = int(inc_id) rms_id = int(rms_id) emr_id = int(emr_id) n_workers = 30 start = time.time() inc = interpolate.interpolate( gbd_id_type='modelable_entity_id', gbd_id=me_id, measure_id=inc_id, location_id=locs, sex_id=sexes, age_group_id=ages, reporting_year_start=year_start, reporting_year_end=year_end, status='best', source='epi', gbd_round_id=help.GBD_ROUND, decomp_step=decomp, num_workers=n_workers, # Make sure we have 40 threads ) start = time.time() rms = interpolate.interpolate( gbd_id_type='modelable_entity_id', gbd_id=me_id, measure_id=rms_id, location_id=locs, sex_id=sexes, age_group_id=ages, reporting_year_start=year_start, reporting_year_end=year_end, status='best', source='epi', gbd_round_id=help.GBD_ROUND, decomp_step=decomp, num_workers=n_workers # Make sure we have 40 threads ) start = time.time() emr = interpolate.interpolate( gbd_id_type='modelable_entity_id', gbd_id=me_id, measure_id=emr_id, location_id=locs, sex_id=sexes, age_group_id=ages, reporting_year_start=year_start, reporting_year_end=year_end, status='best', source='epi', gbd_round_id=help.GBD_ROUND, decomp_step=decomp ) inc = inc.loc[inc['year_id'] < year_end] rms = rms.loc[rms['year_id'] < year_end] emr = emr.loc[emr['year_id'] < year_end] dropcols = ['measure_id', 'metric_id', 'model_version_id', 'modelable_entity_id'] inc.drop(dropcols, axis=1, inplace=True) rms.drop(dropcols, axis=1, inplace=True) emr.drop(dropcols, axis=1, inplace=True) indexcols = ['location_id', 'year_id', 'age_group_id', 'sex_id'] inc.set_index(indexcols, inplace=True) rms.set_index(indexcols, inplace=True) emr.set_index(indexcols, inplace=True) emr = emr.reindex(inc.index) m_dict = {'incidence': inc, 'remission': rms, 'emr': emr} return m_dict