def _agg_age_std_ages(self): age_tree = agetree(age.AGE_STANDARDIZED) # make the source and sink source = self.gen_draw_source() source.add_transform( fill_square, index_cols=[ col for col in self.dimensions.index_names if col != "age_group_id" ], square_col="age_group_id", square_col_vals=[node.id for node in age_tree.leaves()]) sink = self.gen_draw_sink() # constuct aggregator obj operator = WtdSum(index_cols=[ col for col in self.dimensions.index_names if col != "age_group_id" ], value_cols=self.dimensions.data_list(), weight_df=self.std_age_weights, weight_name="age_group_weight_value", merge_cols=["age_group_id"]) aggregator = AggSynchronous(draw_source=source, draw_sink=sink, index_cols=[ col for col in self.dimensions.index_names if col != "age_group_id" ], aggregate_col="age_group_id", operator=operator) # run the tree aggregator.run(age_tree)
def _agg_pop_wtd_ages_birth(self, age_group_id): age_tree = agetree(age_group_id) age_tree.add_node(age.BIRTH, {}, age_tree.root.id) # make the source and sink source = self.gen_draw_source() source.add_transform(convert_to_counts, self.population, self.dimensions.data_list()) sink = self.gen_draw_sink() sink.add_transform(convert_to_rates, self.population, self.dimensions.data_list()) # constuct aggregator obj operator = Sum(index_cols=[ col for col in self.dimensions.index_names if col != "age_group_id" ], value_cols=self.dimensions.data_list()) aggregator = AggSynchronous(draw_source=source, draw_sink=sink, index_cols=[ col for col in self.dimensions.index_names if col != "age_group_id" ], aggregate_col="age_group_id", operator=operator) aggregator.run(age_tree)
def _compute_age_aggregates( data: pd.DataFrame, gbd_round_id: int, groupby_cols: List[str]=Columns.INDEX ) -> pd.DataFrame: """ Takes a dataframe in count space, calculates all aggregated ages from gbd.constants.GBD_COMPARE_AGES + ALL_AGES, and returns aggregates as a new dataframe Arguments: df (pd.DataFrame): dataframe containing indices and draws to create age aggregates with. gbd_round_id (int): Returns: A new aggregated data set """ compare_ages = list( set(gbd.GBD_COMPARE_AGES).union(set(Ages.END_OF_ROUND_AGE_GROUPS)) ) if gbd.age.ALL_AGES not in compare_ages: compare_ages.append(gbd.age.ALL_AGES) data = data[~data[Columns.AGE_GROUP_ID].isin(compare_ages)] # create age trees age_trees = [] for age_group in compare_ages: tree = agetree(age_group_id=age_group, gbd_round_id=gbd_round_id) age_trees.append(tree) agg_ages = [] # for each tree, identify the child age groups, groupby sum the children # to produce the parent estimates. for atree in age_trees: child_ids = list(map(lambda x: x.id, atree.root.children)) child_data = data[data[Columns.AGE_GROUP_ID].isin(child_ids)].copy() child_data[Columns.AGE_GROUP_ID] = atree.root.id child_data = child_data.groupby(groupby_cols).sum().reset_index() agg_ages.append(child_data) aggregated_ages: pd.DataFrame = pd.concat(agg_ages).reset_index(drop=True) return aggregated_ages
def generate_aggregated_ages(df, index_columns, database='gbd'): """Takes in a dataframe in count space, calculates all aggregated ages, and adds to the dataframe. Arguments: df (pd.DataFrame): dataframe containing indices and draws to create age aggregates with. index_columns (str[]): list of strings represnting the data indices to aggregate over. Returns: The original dataset with all-ages added. """ compare_ages = GBD.GBD_COMPARE_AGES # remove 28 from our list of GBD compare ages. We don't compute under one. if 28 in compare_ages: compare_ages.remove(28) if 22 not in compare_ages: compare_ages.append(22) if (database == 'cod') and (21 in compare_ages): compare_ages.remove(21) df = df[~df['age_group_id'].isin(compare_ages)] # create age trees age_trees = [] for age_group in compare_ages: tree = agetree(age_group_id=age_group, gbd_round_id=GBD.GBD_ROUND_ID) age_trees.append(tree) agg_ages = [] for atree in age_trees: child_ids = list(map(lambda x: x.id, atree.root.children)) temp = df[df['age_group_id'].isin(child_ids)].copy(deep=True) temp['age_group_id'] = atree.root.id temp = temp.groupby(index_columns).sum().reset_index() agg_ages.append(temp) agg_ages_df = pd.concat(agg_ages) df = pd.concat([df, agg_ages_df]).reset_index(drop=True) return df
def get_age_group_map(gbd_round_id: int, age_group_ids: List[int]) -> Dict[int, List[int]]: """Gets dictionary of age group ID to age group IDs in the aggregate. Replaces detailed age groups [2, 3, 4] with aggregate age group 28 since life tables are not produced for age groups [2, 3, 4]. Sorts age groups by age start since probability of death calculation relies on age groups being in sorted order. Most-detailed age groups are returned in the same format as aggregate age groups in order to provide the probability of death calculation with a consistent data structure. E.g: { 6: [6] # Most detailed 21: [30, 31, 32, 235] # Aggregate } Args: gbd_round_id: ID of the GBD round with which to build age trees. age_group_ids: IDs of age groups, both aggregate and most detailed. Returns: Dictionary of age group ID to age group IDs that comprise the aggregate. """ age_groups_with_starts = ezfuncs.query( queries.GET_AGE_STARTS, conn_def="cod", parameters={"age_group_ids": age_group_ids}) age_group_map: Dict[int, List[int]] = {} for age_group_id in age_group_ids: tree = dbtrees.agetree(age_group_id, gbd_round_id) detailed_ids = [node.id for node in tree.root.children] by_age_start = _sort_age_group_ids(detailed_ids, age_groups_with_starts) under_one_replaced = _replace_under_one(by_age_start) age_group_map[age_group_id] = under_one_replaced return age_group_map
def __init__( self, cause_id, year_id, out_dir, cod_process_v, decomp_step, gbd_round_id, location_set_ids=mmr_constants.AGGREGATE_LOCATION_SET_IDS): self.cause_id = cause_id self.year_id = year_id self.out_dir = out_dir self.cod_process_v = cod_process_v self.decomp_step = decomp_step self.gbd_round_id = gbd_round_id self.sex_id = [2] self.age_group_ids = list(range(7, 16)) self.location_set_ids = location_set_ids self.location_ids = self.get_location_ids() self.aggregated_age_group_ids: Dict[int: List[int]] = { ag_id: [_id.id for _id in dbtrees.agetree(ag_id).leaves()] for ag_id in mmr_constants.AGGREGATE_AGE_GROUP_IDS } self.draw_cols = ['draw_{}'.format(i) for i in list(range(0, 1000))] self.index_cols = ['location_id', 'year_id', 'age_group_id', 'sex_id'] self.live_birth_col = mmr_constants.Columns.LIVE_BIRTH_VALUE_COL
def _agg_pop_wtd_ages(self, age_group_id): age_tree = agetree(age_group_id) # make the source and sink source = self.gen_draw_source() source.add_transform( fill_square, index_cols=[ col for col in self.dimensions.index_names if col != "age_group_id" ], square_col="age_group_id", square_col_vals=[node.id for node in age_tree.leaves()]) sink = self.gen_draw_sink() # constuct aggregator obj operator = WtdSum( index_cols=[ col for col in self.dimensions.index_names if col != "age_group_id" ], value_cols=self.dimensions.data_list(), weight_df=self.population, weight_name="population", merge_cols=["location_id", "year_id", "age_group_id", "sex_id"]) aggregator = AggSynchronous(draw_source=source, draw_sink=sink, index_cols=[ col for col in self.dimensions.index_names if col != "age_group_id" ], aggregate_col="age_group_id", operator=operator) aggregator.run(age_tree)
def new_population(self, location_set_id, agg_loc_sets=[]): dim = self.nonfatal_dimensions.get_simulation_dimensions( self.measure_id) df = get_population( age_group_id=( dim.index_dim.get_level("age_group_id") + [164]), location_id=dbtrees.loctree(location_set_id=location_set_id, gbd_round_id=self.gbd_round_id ).node_ids, sex_id=dim.index_dim.get_level("sex_id"), year_id=dim.index_dim.get_level("year_id")) index_cols = ["location_id", "year_id", "age_group_id", "sex_id"] data_cols = ["population"] io_mock = {} source = DrawSource({"draw_dict": io_mock, "name": "tmp"}, mem_read_func) sink = DrawSink({"draw_dict": io_mock, "name": "tmp"}, mem_write_func) sink.push(df[index_cols + data_cols]) # location for set_id in agg_loc_sets: loc_tree = dbtrees.loctree( location_set_id=set_id, gbd_round_id=self.gbd_round_id) operator = Sum( index_cols=[col for col in index_cols if col != "location_id"], value_cols=data_cols) aggregator = AggSynchronous( draw_source=source, draw_sink=sink, index_cols=[col for col in index_cols if col != "location_id"], aggregate_col="location_id", operator=operator) aggregator.run(loc_tree) # age for age_group_id in ComoSummaries._gbd_compare_age_group_list: age_tree = dbtrees.agetree(age_group_id) operator = Sum( index_cols=[col for col in index_cols if col != "age_group_id" ], value_cols=data_cols) aggregator = AggSynchronous( draw_source=source, draw_sink=sink, index_cols=[col for col in index_cols if col != "age_group_id" ], aggregate_col="age_group_id", operator=operator) aggregator.run(age_tree) # sex sex_tree = dbtrees.sextree() operator = Sum( index_cols=[col for col in index_cols if col != "sex_id"], value_cols=data_cols) aggregator = AggSynchronous( draw_source=source, draw_sink=sink, index_cols=[col for col in index_cols if col != "sex_id"], aggregate_col="sex_id", operator=operator) aggregator.run(sex_tree) df = source.content() df.to_hdf( "{}/info/population.h5".format(self.como_dir), 'draws', mode='w', format='table', data_columns=["location_id", "year_id", "age_group_id", "sex_id"])