Beispiel #1
0
def summarize_pct_change(
        tool_name: str,
        parent_dir: str,
        gbd_round_id: int,
        location_id: int,
        measure_id: int,
        year_start_id: int,
        year_end_id: int
):

    year_start_draws = _read_scaled_draws(tool_name,
                                          parent_dir,
                                          location_id,
                                          year_start_id,
                                          measure_id)

    year_end_draws = _read_scaled_draws(tool_name,
                                        parent_dir,
                                        location_id,
                                        year_end_id,
                                        measure_id)

    year_start_draws = prep_summarize_gbd(year_start_draws,
                                          tool_name,
                                          parent_dir,
                                          gbd_round_id,
                                          location_id,
                                          year_start_id)

    year_end_draws = prep_summarize_gbd(year_end_draws,
                                        tool_name,
                                        parent_dir,
                                        gbd_round_id,
                                        location_id,
                                        year_end_id)

    df = pd.concat([year_start_draws, year_end_draws]).reset_index(drop=True)
    change_df = pct_change(df, year_start_id, year_end_id, Columns.YEAR_ID,
                           Columns.DRAWS, change_type='pct_change')
    change_df = change_df.dropna()
    logging.info("Generating pct change summaries")
    change_summaries = _generate_summaries_pct_change(change_df)
    # One last bit of formatting add in measure_id
    change_summaries[Columns.MEASURE_ID] = measure_id
    # Division by 0 can create inf, replace all inf with na and replace
    # na with 0
    change_summaries = (
        change_summaries.replace([np.inf, -np.inf], np.nan).fillna(0)
    )
    logging.info("Saving summaries.")
    _save_pct_change_summaries(
        change_summaries,
        parent_dir=parent_dir,
        location_id=location_id,
        year_start_id=year_start_id,
        year_end_id=year_end_id,
        measure_id=measure_id,
    )
Beispiel #2
0
 def calculate_pct_change(self, mmr_draws):
     logger.info(f'calculating percent change')
     change_list = []
     for start_year, end_year in self.year_tuples:
         df = mmr_draws.loc[mmr_draws.year_id.isin(
                 [start_year, end_year]),:].copy(deep=True)
         change_list.append(pct_change(df, start_year, end_year,
             time_col='year_id', data_cols=self.draw_cols))
     return pd.concat(change_list)
Beispiel #3
0
 def compute_percent_change(self, year_start, year_end):
     source = self.gen_draw_source()
     change_df = source.content(filters={"year_id": [year_start, year_end]})
     df = pct_change(df=change_df,
                     start_year=year_start,
                     end_year=year_end,
                     time_col="year_id",
                     data_cols=self.dimensions.data_list())
     df.fillna(0, inplace=True)
     return df
Beispiel #4
0
def summarize_loc_rei(source,
                      location_id,
                      rei_id,
                      year_id,
                      change_intervals,
                      gbd_round_id,
                      pop,
                      aw):
    '''aggregate age and sex then calc mean ui for single and multi year
    for one location risk pair'''
    if change_intervals:
        change_years = [i for i in itertools.chain(*change_intervals)]
    else:
        change_years = []

    multi_yrs = []
    single = []
    for year in year_id:
        df = source.content(filters={'location_id': location_id,
                                     'year_id': year,
                                     'rei_id': rei_id})
        df.drop(df.columns[df.columns.str.contains('^Unnamed')], axis = 1, inplace = True)
        both_sex = combine_sexes_indf(df, pop)
        df = df.append(both_sex)
        age_agg = combine_ages(df, pop, aw,
                               gbd_compare_ags=True)
        df = df.append(age_agg)
        draw_cols = [c for c in df if c.startswith('draw_')]
        single.append(get_summary(df, draw_cols))
        if year in change_years:
            multi_yrs.append(df)

    single = pd.concat(single,sort=True)
    single = single[[
       'location_id', 'year_id', 'age_group_id', 'sex_id',
       'measure_id', 'metric_id', 'rei_id', 'mean', 'lower',
       'upper']]
    single.rename(columns={'mean': 'val'}, inplace=True)

    multi_yrs = pd.concat(multi_yrs,sort=True)
    multi = []
    for ci in change_intervals:
        draw_cols = [c for c in multi_yrs if c.startswith('draw_')]
        chg_df = pct_change(multi_yrs, ci[0], ci[1], 'year_id', draw_cols)
        draw_cols = [c for c in chg_df if c.startswith('draw_')]
        multi.append(get_summary(chg_df, draw_cols))
    multi = pd.concat(multi,sort=True)
    multi = multi[[
       'location_id', 'year_start_id', 'year_end_id',
       'age_group_id', 'sex_id', 'measure_id', 'rei_id',
       'metric_id', 'pct_change_means', 'lower', 'upper']]
    multi.rename(columns={'pct_change_means': 'val'}, inplace=True)

    return single, multi
Beispiel #5
0
 def get_data_frame(self):
     logger.info("BEGIN compute pct change")
     # Check to make sure years are correct
     logger.debug("  check year information")
     if self.start_year >= self.end_year:
         msg = "Start year ({}) must come before end year ({})".format(
             self.start_year, self.end_year)
         self.log_and_raise(msg)
     logger.debug("  read data")
     data = self.data_frame
     logger.debug("  calculate pct change")
     pct_data_df = pct_change(data,
                              self.start_year,
                              self.end_year,
                              time_col='year_id',
                              data_cols=self.data_columns,
                              change_type='pct_change',
                              index_cols=list(set(self.index_columns) -
                                              set(['year_id'])))
     logger.info("END compute pct change")
     return pct_data_df
Beispiel #6
0
def main_summarize_gbd(pop_data, index_columns, data_columns, years,
                       location_id, measure_id):
    """Execute all the steps needed to summarize for the gbd db. This includes
    calculating pct-change."""
    logger = logging.getLogger('summary.main_summarize_gbd')
    try:
        if 'change' in measure_id:
            change = True
        else:
            change = False
        measure_id = int(measure_id.lstrip("change_"))
        logging.info("Reading in draw files for GBD, for measure {}"
                     .format(measure_id))
        draws = read_gbd_draw_files(parent_dir, location_id, years, measure_id)

        logging.info("Generating both-sexes")
        draws = generate_both_sexes(draws, index_columns)

        logging.info("Generating aggregated-ages")
        draws = generate_aggregated_ages(draws, index_columns, database='gbd')

        logging.info("Merging population on")
        draws = pd.merge(draws, pop_data, on=['location_id', 'year_id',
                                              'sex_id', 'age_group_id'],
                         how='left')
        draws['pop'] = draws['pop'].fillna(0)

        logging.info("Generating age-standardized rates")
        draws = generate_asr(draws, index_columns, 'pop', data_columns,
                             age_weights, 'gbd')

        logging.info("Generating rates")
        index_columns = index_columns + ['metric_id']
        draws = generate_gbd_rates(draws, index_columns, data_columns, 'pop')
        draws.drop('pop', axis=1, inplace=True)

        logging.info("Generating cause fractions")
        draws = generate_gbd_cause_fractions(draws)

        if change:
            logging.info("Generating pct-change")
            change_dict = {1990: [2007, 2017], 2007: [2017]}
            change_list = []
            for start in change_dict.keys():
                for end in change_dict[start]:
                    change_df = pct_change(draws, start, end, 'year_id',
                                           data_columns,
                                           change_type='pct_change')
                    change_df.dropna(inplace=True)
                    change_list.append(change_df)
            draws = pd.concat(change_list).reset_index(drop=True)
            index_columns = ['location_id', 'year_start_id', 'year_end_id',
                             'age_group_id', 'sex_id', 'cause_id', 'metric_id']

        logging.info("Generating summaries for GBD")
        data_summaries = generate_gbd_summaries(draws, index_columns,
                                                data_columns, change)

        del draws
        gc.collect()
        logging.info("Formatting final GBD df")
        data_summaries = format_df(data_summaries, 'gbd', measure_id)

        logging.info("Saving GBD summaries")
        save_gbd_summaries(data_summaries, location_id, years, measure_id,
                           change)
        rc = 1
    except Exception as e:
        logger.exception("Summarizing GBD failed: {}".format(e))
        rc = e
    return rc