def save_mortality(ecode, year_id, sex_id, locs, ages, decomp, version): cause_id = help.get_cause(ecode) draws = gd.get_draws( gbd_id_type="cause_id", gbd_id=cause_id, location_id=locs, year_id=year_id, sex_id=sex_id, age_group_id=ages, status="best", source="codem", gbd_round_id=help.GBD_ROUND, decomp_step=decomp ) draws[help.drawcols()] = draws[help.drawcols()].divide(draws['pop'], axis=0) draws.drop(['pop', 'envelope', 'cause_id', 'sex_name', 'measure_id', 'metric_id'], axis=1, inplace=True) draws.set_index(['location_id','year_id','sex_id','age_group_id'], inplace=True) mort = etl.df_to_xr(draws, wide_dim_name='draw', fill_value=np.nan) filename = "mort_{}_{}.nc".format(str(year_id), str(sex_id)) version = version.rstrip() folder = os.path.join(paths.DATA_DIR, decomp, inj_info.ECODE_PARENT[ecode], version, 'mortality_for_shocks') if not os.path.exists(folder): try: os.makedirs(folder) except OSError as e: if e.errno != os.errno.EEXIST: raise pass filepath = os.path.join(folder, filename) mort.to_netcdf(filepath)
def save_mortality(ecode, year_id, sex_id, locs, ages, version): cause_id = help.get_cause(ecode) draws = gd.get_draws( gbd_id_type="cause_id", gbd_id=cause_id, location_id=locs, year_id=year_id, sex_id=sex_id, age_group_id=ages, status="best", source="codem", gbd_round_id=help.GBD_ROUND ) draws[help.drawcols()] = draws[help.drawcols()].divide(draws['pop'], axis=0) draws.drop(['pop', 'envelope', 'cause_id', 'sex_name', 'measure_id', 'metric_id'], axis=1, inplace=True) draws.set_index(['location_id','year_id','sex_id','age_group_id'], inplace=True) mort = etl.df_to_xr(draws, wide_dim_name='draw', fill_value=np.nan) filename = "FILEPATH.nc".format(str(year_id), str(sex_id)) folder = os.path.join("FILEPATH") if not os.path.exists(folder): try: os.makedirs(folder) except OSError as e: if e.errno != os.errno.EEXIST: raise pass filepath = os.path.join(folder, filename) print("Writing mortality") mort.to_netcdf(filepath)
def smr(ncode): smr = pd.read_csv(os.path.join(paths.INPUT_DIR, 'FILEPATH.csv')).drop('name', axis=1) if ncode == "N48": smr = smr.loc[smr["ncode"] == "N9"] else: smr = smr.loc[smr["ncode"] == ncode] smr["se"] = (smr["UL"] - smr["LL"]) / 3.92 smr["ncode"] = ncode # generate draws of SMR smr.reset_index( drop=True, inplace=True ) # need to reset index so that the random draws will line up np.random.seed(659177) smr[help.drawcols()] = pd.DataFrame( np.random.normal(smr['SMR'], smr['se'], size=(1000, len(smr))).T) smr.drop(['SMR', 'UL', 'LL', 'se'], inplace=True, axis=1) smr = help.convert_to_age_group_id(smr, collapsed_0=False) smr.set_index(['ncode', 'age_group_id'], inplace=True) smr[smr < 1] = 1 return etl.df_to_xr(smr, wide_dim_name='draw', fill_value=np.nan)
def spinal_split(df, ecode, ncode, year_id, decomp, version): spinal_split_folder = ("FILEPATH") drawdict = {'prop_' + d: d for d in help.drawcols()} parent = inj_info.ECODE_PARENT[ecode] filename = f"36_{year_id}.h5" for s in ['a', 'b', 'c', 'd']: split_prop = pd.read_csv(spinal_split_folder / f"prop_{s}.csv") split_prop.rename(columns=drawdict, inplace=True) split_prop.drop('acause', axis=1, inplace=True) result = df * split_prop.loc[0] result.reset_index(inplace=True) version = version.rstrip() out_dir = (paths.DATA_DIR / decomp / parent / str(version) / 'upload' / ecode / ncode + s) if not os.path.exists(out_dir): try: os.makedirs(out_dir) except OSError as e: if e.errno != os.errno.EEXIST: raise pass result.to_hdf( out_dir / filename, 'draws', mode='w', format='table', data_columns=['location_id', 'year_id', 'sex_id', 'age_group_id'])
def spinal_split(df, ecode, ncode, year_id, version): spinal_split_folder = 'FILEPATH' drawdict = {'prop_' + d: d for d in help.drawcols()} parent = inj_info.ECODE_PARENT[ecode] filename = 'FILEPATH.h5'.format(year_id) for s in ['a', 'b', 'c', 'd']: # load proportion draws split_prop = pd.read_csv( os.path.join(spinal_split_folder, 'FILEPATH.csv')) split_prop.rename(columns=drawdict, inplace=True) split_prop.drop('acause', axis=1, inplace=True) # split the ncode result = df * split_prop.loc[0] result.reset_index(inplace=True) # save out_dir = os.path.join("FILEPATH") if not os.path.exists(out_dir): try: os.makedirs(out_dir) except OSError, e: if e.errno != os.errno.EEXIST: raise pass result.to_hdf( os.path.join(out_dir, filename), 'draws', mode='w', format='table', data_columns=['location_id', 'year_id', 'sex_id', 'age_group_id'])
def create_lt_grid(platform, ages): ncodes = [ x for x in inj_info.get_lt_ncodes(platform) if x not in inj_info.ST_NCODES ] grid = pd.DataFrame(help.expandgrid(ncodes, ages)) grid.columns = ["ncode", "age_gr"] grid["platform"] = platform for draw in help.drawcols(): grid[draw] = 1 return grid
def create_lt_grid(platform, ages): """Creates a grid of N-codes that should have 100% long-term probabilities.""" ncodes = [ x for x in inj_info.get_lt_ncodes(platform) if x not in inj_info.ST_NCODES ] grid = pd.DataFrame(help.expandgrid(ncodes, ages)) grid.columns = ["ncode", "age_gr"] grid["platform"] = platform # set all of them to 1.0 lt probability for draw in help.drawcols(): grid[draw] = 1 return grid
def main(ecode, year_id, sex_id, version): tic = time.time() me_id = help.get_me(ecode) m_draws = get_measures(ecode, me_id, year_id, sex_id, version) x_inc = etl.df_to_xr(m_draws['incidence'], wide_dim_name='draw', fill_value=np.nan) x_rem = etl.df_to_xr(m_draws['remission'], wide_dim_name='draw', fill_value=np.nan) x_emr = etl.df_to_xr(m_draws['emr'], wide_dim_name='draw', fill_value=np.nan) otp_cov = outpatient_cov(me_id, help.drawcols()) adjusted_inc = calculate_measures.short_term_incidence_unsplit(x_inc, x_rem, x_emr, otp_cov) write_results(adjusted_inc, ecode, version, year_id, sex_id) toc = time.time() total = toc - tic print("Total time was {} seconds".format(total))
def main(ecode, ncode, platform, year, decomp, version, flat_version): toc = time.time() dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND) dm_settings = os.path.join(paths.SHARE_DIR, 'dismod_settings') version = version.rstrip() dm_dir = os.path.join(paths.DATA_DIR, decomp, inj_info.ECODE_PARENT[ecode], str(version), "dismod_ode", ecode) metaloc = db.get_location_metadata(location_set_id=35, gbd_round_id=help.GBD_ROUND) filepath = write_path(ecode, ncode, platform, year, decomp, version) locations = help.ihme_loc_id_dict(metaloc, dems['location_id']) alldata = [] value_in = os.path.join(dm_dir, "value_in", "value_in_{}_{}.csv".format(ncode, platform)) draw_in = os.path.join(dm_settings, "draw_in.csv") plain_in = os.path.join(dm_settings, "plain_in.csv") effect_in = os.path.join(dm_settings, "effect_in.csv") v_in = pd.read_csv(value_in) num_locs = len(locations) loc_pos = 0 initime = help.start_timer() for locn in locations: loc_pos = loc_pos + 1 for sex in [1, 2]: start = help.start_timer() if float(v_in.loc[v_in['name'] == 'eta_incidence', 'value'][0]) == 0: result = pd.DataFrame({'age_group_id': dems['age_group_id']}) result = result.assign(**{d: 0 for d in help.drawcols()}) result = help.convert_from_age_group_id(result) else: data_in = os.path.join( dm_dir, "data_in", locations[locn], str(year), str(sex), ecode, "data_in_{}_{}.csv".format(ncode, platform)) if ncode in inj_info.EMR_NCODES: rate_in_name = "rate_in_emr.csv" else: rate_in_name = "rate_in_no_emr.csv" rate_in = os.path.join(paths.DATA_DIR, 'flats', str(flat_version), 'rate_in', str(year), str(sex), locations[locn], rate_in_name) draw_out_dir = os.path.join(dm_dir, "prev_results", locations[locn], str(year), str(sex)) draw_out = os.path.join( draw_out_dir, "prevalence_{}_{}.csv".format(ncode, platform)) if not os.path.exists(draw_out_dir): try: os.makedirs(draw_out_dir) except OSError as e: if e.errno != os.errno.EEXIST: raise pass result = run_model_injuries(draw_in, data_in, value_in, plain_in, rate_in, effect_in, draw_out, 1000) result['location_id'] = locn result['platform'] = platform result['year_id'] = year result['sex_id'] = sex alldata.append(result) help.end_timer(start) sys.stdout.flush() # write to log file total_time = (time.time() - initime) / 60. final = pd.concat(alldata) write_results(final, ecode, ncode, platform, year, decomp, version) tic = time.time()
u_dw = etl.df_to_xr(untreat_dw, wide_dim_name='draw', fill_value=np.nan) t_dw = etl.df_to_xr(treated_dw, wide_dim_name='draw', fill_value=np.nan) dems = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND) # Get the percent treated in each country-year, and multiply by dws to get total dw p_t = calculate_measures.pct_treated() dw = t_dw * p_t + u_dw * (1 - p_t) # Load in split proportions for spinal cord injuries and find weighted average disability weight among the 4 splits n_parent = pd.Series(index=treated_dw.index, data=[n[0:3] for n in treated_dw.index], name='ncode_parent') spinal_split_folder = 'FILEPATH' drawdict = {'prop_' + d: d for d in help.drawcols()} split_props_list = [] for s in ['a', 'b', 'c', 'd']: # load proportion draws split_prop = pd.read_csv(os.path.join('FILEPATH.csv')) split_prop.rename(columns=drawdict, inplace=True) split_prop.drop('acause', axis=1, inplace=True) for n in ['N33', 'N34']: split_props_list.append(split_prop.rename({0: n + s})) split_props = pd.concat(split_props_list) split_props.index.rename('ncode', inplace=True) other_ncodes = pd.DataFrame(index=[n for n in treated_dw.index if len(n) < 4], columns=help.drawcols(), data=1) other_ncodes.loc[['N33', 'N34']] = 0
def lognormal(): # load file filepath = 'FILEPATH.xlsx' raw = pd.read_excel(filepath, sheet_name='short-term durations', header=None, skiprows=9, index_col=0) # subset to the right data inpatient = raw[[2, 3, 4, 5]] outpatient = raw[[6, 7, 8, 9]] mults = raw.reset_index()[[0, 10, 11, 12]] inpatient.rename(columns={ 2: 'mean', 3: 'se', 4: 'll', 5: 'ul' }, inplace=True) outpatient.rename(columns={ 6: 'mean', 7: 'se', 8: 'll', 9: 'ul' }, inplace=True) mults.rename(columns={ 0: 'ncode', 10: 'mean', 11: 'll', 12: 'ul' }, inplace=True) treated = pd.concat([inpatient, outpatient], keys=['inpatient', 'outpatient'], names=['platform', 'ncode']) # create SE where it doesn't already exist treated['se'] = treated['se'].fillna( (treated['ul'] - treated['ll']) / 3.92) mults['se'] = (mults['ul'] - mults['ll']) / 3.92 # convert to years treated = treated / 365.25 treated['mu'] = np.log( (treated['mean']**2) / np.sqrt(treated['se']**2 + treated['mean']**2)) treated['sig'] = np.sqrt(np.log(1 + (treated['se'] / treated['mean'])**2)) # make draws treated.reset_index( inplace=True) # need to reset index to make it line up with draws np.random.seed(81112) treated[help.drawcols()] = pd.DataFrame( np.random.lognormal(treated['mu'], treated['sig'], size=(1000, len(treated))).T) mults[help.drawcols()] = pd.DataFrame( np.random.normal(mults['mean'], mults['se'], size=(1000, len(mults))).T) # format treated.drop(['mean', 'se', 'll', 'ul', 'mu', 'sig'], axis=1, inplace=True) treated.set_index(['ncode', 'platform'], inplace=True) mults.drop(['mean', 'se', 'll', 'ul'], axis=1, inplace=True) mults.set_index(['ncode'], inplace=True) # make untreated mults[mults < 0] = 0 # multipliers can't be negative treated[treated > 1] = 1 # short term, so no longer than one year untreated = treated * mults untreated[untreated > 1] = 1 outdir = 'FILEPATH' treated.sort_index().to_csv(os.path.join(outdir, 'FILEPATH.csv')) untreated.sort_index().to_csv(os.path.join(outdir, 'FILEPATH.csv'))
u_dw = etl.df_to_xr(untreat_dw, wide_dim_name='draw', fill_value=np.nan) t_dw = etl.df_to_xr(treated_dw, wide_dim_name='draw', fill_value=np.nan) dems = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND) decomp = 'step1' p_t = calculate_measures.pct_treated(decomp) dw = t_dw * p_t + u_dw * (1 - p_t) n_parent = pd.Series(index=treated_dw.index, data=[n[0:3] for n in treated_dw.index], name='ncode_parent') spinal_split_folder = "FILEPATH" drawdict = {'prop_' + d: d for d in help.drawcols()} split_props_list = [] for s in ['a', 'b', 'c', 'd']: split_prop = pd.read_csv(os.path.join(spinal_split_folder, 'prop_' + s + '.csv')) split_prop.rename(columns=drawdict, inplace=True) split_prop.drop('acause', axis=1, inplace=True) for n in ['N33', 'N34']: split_props_list.append(split_prop.rename({0: n+s})) split_props = pd.concat(split_props_list) split_props.index.rename('ncode', inplace=True) other_ncodes = pd.DataFrame(index=[n for n in treated_dw.index if len(n) < 4], columns=help.drawcols(), data=1) other_ncodes.loc[['N33', 'N34']] = 0 other_ncodes.index.rename('ncode', inplace=True) weight = etl.df_to_xr(split_props.append(other_ncodes), wide_dim_name='draw', fill_value=np.nan)
def main(ecode, ncode, platform, year, version, flat_version): dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND) dm_settings = os.path.join("FILEPATH") dm_dir = os.path.join("FILEPATH") metaloc = db.get_location_metadata(location_set_id=35, gbd_round_id=help.GBD_ROUND) locations = help.ihme_loc_id_dict(metaloc, dems['location_id']) alldata = [] value_in = os.path.join("FILEPATH.csv".format(ncode, platform)) draw_in = os.path.join(dm_settings, "FILEPATH.csv") plain_in = os.path.join(dm_settings, "FILEPATH.csv") effect_in = os.path.join(dm_settings, "FILEPATH.csv") v_in = pd.read_csv(value_in) num_locs = len(locations) loc_pos = 0 initime = help.start_timer() for locn in locations: loc_pos = loc_pos + 1 for sex in [1,2]: print("Running DisMod ODE for location {} year {} sex {}".format(locations[locn], year, sex)) start = help.start_timer() if float(v_in.loc[v_in['name']=='eta_incidence','value'][0]) == 0: print('eta incidence is 0, so all incidence should be 0 and we\'ll just make an all 0 df instead of ' 'running the ODE') result = pd.DataFrame({'age_group_id': dems['age_group_id']}) result = result.assign(**{d: 0 for d in help.drawcols()}) result = help.convert_from_age_group_id(result) else: data_in = os.path.join("FILEPATH.csv".format(ncode, platform)) # create the rate in filepath based on whether it has excess mortality or not if ncode in inj_info.EMR_NCODES: rate_in_name = "FILEPATH.csv" else: rate_in_name = "FILEPATH.csv" rate_in = os.path.join("FILEPATH") draw_out_dir = os.path.join("FILEPATH") draw_out = os.path.join("FILEPATH.csv".format(ncode, platform)) if not os.path.exists(draw_out_dir): try: os.makedirs(draw_out_dir) except OSError as e: if e.errno != os.errno.EEXIST: raise pass result = run_model_injuries(draw_in, data_in, value_in, plain_in, rate_in, effect_in, draw_out, 1000) # format the results so that we have the identifying columns result['location_id'] = locn result['platform'] = platform result['year_id'] = year result['sex_id'] = sex alldata.append(result) help.end_timer(start) sys.stdout.flush() # write to log file total_time = (time.time() - initime)/60. print('Completed {} of {} locations in {} minutes. Will take {} more minutes at this rate'.format( loc_pos, num_locs, total_time, (total_time/loc_pos)*(num_locs-loc_pos))) sys.stdout.flush() # write to log file # concatenate all of the data together final = pd.concat(alldata) write_results(final, ecode, ncode, platform, year, version) print('Finished!')
def lognormal(): filepath = "FILEPATH" raw = pd.read_excel(filepath, sheet_name='short-term durations', header=None, skiprows=9, index_col=0) # subset to the right data inpatient = raw[[2, 3, 4, 5]] outpatient = raw[[6, 7, 8, 9]] mults = raw.reset_index()[[0, 10, 11, 12]] inpatient.rename(columns={ 2: 'mean', 3: 'se', 4: 'll', 5: 'ul' }, inplace=True) outpatient.rename(columns={ 6: 'mean', 7: 'se', 8: 'll', 9: 'ul' }, inplace=True) mults.rename(columns={ 0: 'ncode', 10: 'mean', 11: 'll', 12: 'ul' }, inplace=True) treated = pd.concat([inpatient, outpatient], keys=['inpatient', 'outpatient'], names=['platform', 'ncode']) treated['se'] = treated['se'].fillna( (treated['ul'] - treated['ll']) / 3.92) mults['se'] = (mults['ul'] - mults['ll']) / 3.92 treated = treated / 365.25 treated['mu'] = np.log( (treated['mean']**2) / np.sqrt(treated['se']**2 + treated['mean']**2)) treated['sig'] = np.sqrt(np.log(1 + (treated['se'] / treated['mean'])**2)) treated.reset_index(inplace=True) np.random.seed(81112) treated[help.drawcols()] = pd.DataFrame( np.random.lognormal(treated['mu'], treated['sig'], size=(1000, len(treated))).T) mults[help.drawcols()] = pd.DataFrame( np.random.normal(mults['mean'], mults['se'], size=(1000, len(mults))).T) treated.drop(['mean', 'se', 'll', 'ul', 'mu', 'sig'], axis=1, inplace=True) treated.set_index(['ncode', 'platform'], inplace=True) mults.drop(['mean', 'se', 'll', 'ul'], axis=1, inplace=True) mults.set_index(['ncode'], inplace=True) mults[mults < 0] = 0 treated[treated > 1] = 1 untreated = treated * mults untreated[untreated > 1] = 1 outdir = "FILEPATH" treated.sort_index().to_csv( os.path.join(outdir, 'durs_treated_test_log.csv')) untreated.sort_index().to_csv( os.path.join(outdir, 'durs_untreated_test_log.csv'))