def load_forecast_data(key: EntityKey, location: str): location_id = extract.get_location_id(location) path = paths.forecast_data_path(key) data = extract.load_forecast_from_xarray(path, location_id) data = data[data.scenario == project_globals.FORECASTING_SCENARIO].drop( columns='scenario') if key == EntityKey('etiology.shigellosis.incidence'): # Only one draw for incidence data = pd.concat(project_globals.NUM_DRAWS * [ data.set_index( ['location_id', 'age_group_id', 'sex_id', 'year_id']).value ], axis=1) else: data = data.set_index( ['location_id', 'age_group_id', 'sex_id', 'year_id', 'draw']).unstack() if len(data.columns) == 100: # Not 1000 draws for everything data = pd.concat([data] * 10, axis=1) data.columns = pd.Index([f'draw_{i}' for i in range(1000)]) data = data.reset_index() data = standardize.normalize(data) data = utilities.reshape(data) data = utilities.scrub_gbd_conventions(data, location) data = utilities.split_interval(data, interval_column='age', split_column_prefix='age') data = utilities.split_interval(data, interval_column='year', split_column_prefix='year') return utilities.sort_hierarchical_data(data)
def _load_diarrhea_sequela_disability_weight(sequela, location_id: int): logger.info(f'Loading disability weight for {sequela.name} from GBD 2016.') data = extract.get_auxiliary_data('disability_weight', 'sequela', 'all', location_id) data = data.loc[data.healthstate_id == sequela.healthstate.gbd_id, :] data = standardize.normalize(data) data = utilities.clear_disability_weight_outside_restrictions( data, causes.diarrheal_diseases, 0.0, utility_data.get_age_group_ids()) data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS + vi_globals.DRAW_COLUMNS) return utilities.reshape(data)
def _load_prevalence(entity, location_id: int, entity_type: str): logger.info(f'Loading prevalence for {entity.name} from GBD 2016.') data = extract.get_como_draws(entity.gbd_id, location_id, entity_type) data = data[data.measure_id == vi_globals.MEASURES['Prevalence']] data = utilities.filter_data_by_restrictions( data, causes.diarrheal_diseases, 'yld', utility_data.get_age_group_ids()) data = data[data.year_id == 2016].drop( columns='year_id') # Use latest GBD results for all data data = standardize.normalize(data, fill_value=0) data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS + vi_globals.DRAW_COLUMNS) return utilities.reshape(data)
def _get_raw_demographic_dimensions(location: str): location_id = extract.get_location_id(location) ages = utility_data.get_age_group_ids() years = range(project_globals.MIN_YEAR, project_globals.MAX_YEAR + 1) sexes = [vi_globals.SEXES['Male'], vi_globals.SEXES['Female']] location_id = [location_id] values = [location_id, sexes, ages, years] names = ['location_id', 'sex_id', 'age_group_id', 'year_id'] data = (pd.MultiIndex.from_product(values, names=names).to_frame(index=False)) data = standardize.normalize(data) data = utilities.reshape(data) return data
def load_shigella_remission_rate(key: EntityKey, location: str): location_id = extract.get_location_id(location) data = extract.get_modelable_entity_draws( causes.diarrheal_diseases.dismod_id, location_id) data = data[data.measure_id == vi_globals.MEASURES['Remission rate']] data = utilities.filter_data_by_restrictions( data, causes.diarrheal_diseases, 'yld', utility_data.get_age_group_ids()) data = data[data.year_id == 2016].drop( columns='year_id') # Use latest GBD results for all data data = standardize.normalize(data, fill_value=0) data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS + vi_globals.DRAW_COLUMNS) data = utilities.reshape(data) data = utilities.scrub_gbd_conventions(data, location) data = utilities.split_interval(data, interval_column='age', split_column_prefix='age') data = utilities.split_interval(data, interval_column='year', split_column_prefix='year') return utilities.sort_hierarchical_data(data)
def load_live_births_by_year(key: EntityKey, location: str): location_id = extract.get_location_id(location) asfr_key = EntityKey('covariate.age_specific_fertility_rate.estimate') pop_key = EntityKey(project_globals.POPULATION_STRUCTURE) asfr_data = extract.load_forecast_from_xarray( paths.forecast_data_path(asfr_key), location_id) asfr_data = asfr_data[ (asfr_data.scenario == project_globals.FORECASTING_SCENARIO) & (asfr_data.year_id >= project_globals.MIN_YEAR)].drop( columns='scenario') asfr_data = asfr_data.set_index( ['location_id', 'age_group_id', 'sex_id', 'year_id', 'draw']).unstack() asfr_data.columns = pd.Index([f'draw_{i}' for i in range(1000)]) pop_data = extract.load_forecast_from_xarray( paths.forecast_data_path(pop_key), location_id) pop_data = pop_data[( pop_data.scenario == project_globals.FORECASTING_SCENARIO)].drop( columns='scenario') pop_data = pop_data.set_index( ['location_id', 'age_group_id', 'sex_id', 'year_id', 'draw']).unstack() pop_data.columns = pd.Index([f'draw_{i}' for i in range(1000)]) pop_data = pop_data.loc[asfr_data.index] live_births = asfr_data * pop_data live_births = (live_births.reset_index().drop( columns=['sex_id', 'age_group_id']).groupby(['location_id', 'year_id' ]).sum().reset_index()) data = standardize.normalize(live_births) data = utilities.reshape(data) data = utilities.scrub_gbd_conventions(data, location) data = utilities.split_interval(data, interval_column='year', split_column_prefix='year') return utilities.sort_hierarchical_data(data)