Beispiel #1
0
def load_forecast_data(key: EntityKey, location: str):
    location_id = extract.get_location_id(location)
    path = paths.forecast_data_path(key)
    data = extract.load_forecast_from_xarray(path, location_id)
    data = data[data.scenario == project_globals.FORECASTING_SCENARIO].drop(
        columns='scenario')
    if key == EntityKey('etiology.shigellosis.incidence'):
        # Only one draw for incidence
        data = pd.concat(project_globals.NUM_DRAWS * [
            data.set_index(
                ['location_id', 'age_group_id', 'sex_id', 'year_id']).value
        ],
                         axis=1)
    else:
        data = data.set_index(
            ['location_id', 'age_group_id', 'sex_id', 'year_id',
             'draw']).unstack()
    if len(data.columns) == 100:  # Not 1000 draws for everything
        data = pd.concat([data] * 10, axis=1)
    data.columns = pd.Index([f'draw_{i}' for i in range(1000)])
    data = data.reset_index()
    data = standardize.normalize(data)
    data = utilities.reshape(data)
    data = utilities.scrub_gbd_conventions(data, location)
    data = utilities.split_interval(data,
                                    interval_column='age',
                                    split_column_prefix='age')
    data = utilities.split_interval(data,
                                    interval_column='year',
                                    split_column_prefix='year')
    return utilities.sort_hierarchical_data(data)
Beispiel #2
0
def _load_diarrhea_sequela_disability_weight(sequela, location_id: int):
    logger.info(f'Loading disability weight for {sequela.name} from GBD 2016.')
    data = extract.get_auxiliary_data('disability_weight', 'sequela', 'all',
                                      location_id)
    data = data.loc[data.healthstate_id == sequela.healthstate.gbd_id, :]
    data = standardize.normalize(data)
    data = utilities.clear_disability_weight_outside_restrictions(
        data, causes.diarrheal_diseases, 0.0, utility_data.get_age_group_ids())
    data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS +
                       vi_globals.DRAW_COLUMNS)
    return utilities.reshape(data)
Beispiel #3
0
def _load_prevalence(entity, location_id: int, entity_type: str):
    logger.info(f'Loading prevalence for {entity.name} from GBD 2016.')
    data = extract.get_como_draws(entity.gbd_id, location_id, entity_type)
    data = data[data.measure_id == vi_globals.MEASURES['Prevalence']]
    data = utilities.filter_data_by_restrictions(
        data, causes.diarrheal_diseases, 'yld',
        utility_data.get_age_group_ids())
    data = data[data.year_id == 2016].drop(
        columns='year_id')  # Use latest GBD results for all data
    data = standardize.normalize(data, fill_value=0)
    data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS +
                       vi_globals.DRAW_COLUMNS)
    return utilities.reshape(data)
Beispiel #4
0
def _get_raw_demographic_dimensions(location: str):
    location_id = extract.get_location_id(location)
    ages = utility_data.get_age_group_ids()
    years = range(project_globals.MIN_YEAR, project_globals.MAX_YEAR + 1)
    sexes = [vi_globals.SEXES['Male'], vi_globals.SEXES['Female']]
    location_id = [location_id]
    values = [location_id, sexes, ages, years]
    names = ['location_id', 'sex_id', 'age_group_id', 'year_id']

    data = (pd.MultiIndex.from_product(values,
                                       names=names).to_frame(index=False))
    data = standardize.normalize(data)
    data = utilities.reshape(data)
    return data
Beispiel #5
0
def load_shigella_remission_rate(key: EntityKey, location: str):
    location_id = extract.get_location_id(location)
    data = extract.get_modelable_entity_draws(
        causes.diarrheal_diseases.dismod_id, location_id)
    data = data[data.measure_id == vi_globals.MEASURES['Remission rate']]
    data = utilities.filter_data_by_restrictions(
        data, causes.diarrheal_diseases, 'yld',
        utility_data.get_age_group_ids())
    data = data[data.year_id == 2016].drop(
        columns='year_id')  # Use latest GBD results for all data
    data = standardize.normalize(data, fill_value=0)
    data = data.filter(vi_globals.DEMOGRAPHIC_COLUMNS +
                       vi_globals.DRAW_COLUMNS)
    data = utilities.reshape(data)
    data = utilities.scrub_gbd_conventions(data, location)
    data = utilities.split_interval(data,
                                    interval_column='age',
                                    split_column_prefix='age')
    data = utilities.split_interval(data,
                                    interval_column='year',
                                    split_column_prefix='year')
    return utilities.sort_hierarchical_data(data)
Beispiel #6
0
def load_live_births_by_year(key: EntityKey, location: str):
    location_id = extract.get_location_id(location)
    asfr_key = EntityKey('covariate.age_specific_fertility_rate.estimate')
    pop_key = EntityKey(project_globals.POPULATION_STRUCTURE)

    asfr_data = extract.load_forecast_from_xarray(
        paths.forecast_data_path(asfr_key), location_id)
    asfr_data = asfr_data[
        (asfr_data.scenario == project_globals.FORECASTING_SCENARIO)
        & (asfr_data.year_id >= project_globals.MIN_YEAR)].drop(
            columns='scenario')
    asfr_data = asfr_data.set_index(
        ['location_id', 'age_group_id', 'sex_id', 'year_id',
         'draw']).unstack()
    asfr_data.columns = pd.Index([f'draw_{i}' for i in range(1000)])

    pop_data = extract.load_forecast_from_xarray(
        paths.forecast_data_path(pop_key), location_id)
    pop_data = pop_data[(
        pop_data.scenario == project_globals.FORECASTING_SCENARIO)].drop(
            columns='scenario')
    pop_data = pop_data.set_index(
        ['location_id', 'age_group_id', 'sex_id', 'year_id',
         'draw']).unstack()
    pop_data.columns = pd.Index([f'draw_{i}' for i in range(1000)])
    pop_data = pop_data.loc[asfr_data.index]

    live_births = asfr_data * pop_data
    live_births = (live_births.reset_index().drop(
        columns=['sex_id', 'age_group_id']).groupby(['location_id', 'year_id'
                                                     ]).sum().reset_index())

    data = standardize.normalize(live_births)
    data = utilities.reshape(data)
    data = utilities.scrub_gbd_conventions(data, location)
    data = utilities.split_interval(data,
                                    interval_column='year',
                                    split_column_prefix='year')
    return utilities.sort_hierarchical_data(data)