예제 #1
0
def summarize_loc_rei(source,
                      location_id,
                      rei_id,
                      year_id,
                      change_intervals,
                      gbd_round_id,
                      pop,
                      aw):
    '''aggregate age and sex then calc mean ui for single and multi year
    for one location risk pair'''
    if change_intervals:
        change_years = [i for i in itertools.chain(*change_intervals)]
    else:
        change_years = []

    multi_yrs = []
    single = []
    for year in year_id:
        df = source.content(filters={'location_id': location_id,
                                     'year_id': year,
                                     'rei_id': rei_id})
        df.drop(df.columns[df.columns.str.contains('^Unnamed')], axis = 1, inplace = True)
        both_sex = combine_sexes_indf(df, pop)
        df = df.append(both_sex)
        age_agg = combine_ages(df, pop, aw,
                               gbd_compare_ags=True)
        df = df.append(age_agg)
        draw_cols = [c for c in df if c.startswith('draw_')]
        single.append(get_summary(df, draw_cols))
        if year in change_years:
            multi_yrs.append(df)

    single = pd.concat(single,sort=True)
    single = single[[
       'location_id', 'year_id', 'age_group_id', 'sex_id',
       'measure_id', 'metric_id', 'rei_id', 'mean', 'lower',
       'upper']]
    single.rename(columns={'mean': 'val'}, inplace=True)

    multi_yrs = pd.concat(multi_yrs,sort=True)
    multi = []
    for ci in change_intervals:
        draw_cols = [c for c in multi_yrs if c.startswith('draw_')]
        chg_df = pct_change(multi_yrs, ci[0], ci[1], 'year_id', draw_cols)
        draw_cols = [c for c in chg_df if c.startswith('draw_')]
        multi.append(get_summary(chg_df, draw_cols))
    multi = pd.concat(multi,sort=True)
    multi = multi[[
       'location_id', 'year_start_id', 'year_end_id',
       'age_group_id', 'sex_id', 'measure_id', 'rei_id',
       'metric_id', 'pct_change_means', 'lower', 'upper']]
    multi.rename(columns={'pct_change_means': 'val'}, inplace=True)

    return single, multi
예제 #2
0
def _calc_mean_upper_lower(df: pd.DataFrame,
                           draw_cols: List[str]) -> pd.DataFrame:
    return df\
        .pipe(lambda df: cm_summarize.get_summary(df, draw_cols))\
        .rename(columns={
            columns.MEAN: columns.HALE_MEAN,
            columns.LOWER: columns.HALE_LOWER,
            columns.UPPER: columns.HALE_UPPER})\
        .drop(columns=columns.MEDIAN)
예제 #3
0
 def summarize_draws(self, arc_draws):
     self.index_cols.extend(['year_start_id', 'year_end_id'])
     self.index_cols.remove('year_id')
     summaries_mean = arc_draws[self.index_cols + ['pct_change_means']]
     summaries = get_summary(arc_draws, self.draw_cols)
     summaries.drop(['median','index','pct_change_means'], axis=1,
         inplace=True)
     summaries = summaries.merge(summaries_mean, on=self.index_cols)
     summaries.rename(columns={'pct_change_means': 'val'}, inplace=True)
     return summaries
def summarize_draws(df, index_cols):
    """Summarize the draws down to mean/lower/upper columns"""
    col_order = [
        'measure_id', 'year_id', 'location_id', 'sex_id', 'age_group_id',
        'cause_id', 'rei_id', 'star_id', 'metric_id', 'mean', 'upper', 'lower'
    ]
    sumdf = get_summary(df, list(df.filter(like='draw_').columns))
    sumdf = sumdf.reset_index()
    del sumdf['index']
    del sumdf['median']
    return sumdf[col_order]
예제 #5
0
def compute_estimates(df, point_estimate="mean"):
    """ Compute summaries """
    draw_cols = [col for col in df.columns if "draw_" in col]
    df = get_summary(df, data_cols=draw_cols)
    if point_estimate == "mean":
        df.drop(["median"], axis=1, inplace=True)
    elif point_estimate is None:
        df.drop(["median", "mean"], axis=1, inplace=True)
    else:
        raise ValueError("point_estimate must be one of ['mean', None]")
    return df
예제 #6
0
    def get_data_frame(self):
        logger.info("BEGIN compute summaries")

        self.validate_measure_and_metric(self.in_df, "incoming dataframe")
        logger.debug("validated")

        sumdf = get_summary(self.in_df,
                            self.in_df.filter(like='draw_').columns)
        sumdf = sumdf.reset_index()
        del sumdf['index']
        del sumdf['median']
        if 'pct_change_means' in sumdf:
            logger.info("replacing mean of pct change distribution with pct "
                        "change of means")
            sumdf['mean'] = sumdf['pct_change_means']
        sumdf = sumdf[self.write_out_columns]

        return sumdf
예제 #7
0
data = data[input_keep_cols]
# Format columns
data['year_id'] = data['year'].astype('int64')
data['sex_id'] = 3
data['age_group_id'] = 1
data['estimate_stage_id'] = 3
data['sim'] = data['sim'].astype('int64')
# Reshape draws wide
index_cols = ['location_id', 'ihme_loc_id', 'year_id', 'year',  'sex_id',
              'age_group_id', 'estimate_stage_id']
data = data.pivot_table(values="mort", index=index_cols, columns="sim")
data = data.reset_index()
data = data.rename(columns={x: 'draw_{}'.format(x) for x in range(1000)})
# Get the summary statistics
draw_cols = [col for col in data.columns if 'draw' in col]
data = get_summary(data, draw_cols)
# Format for upload
keep_cols = index_cols + ['mean', 'lower', 'upper']
return data[keep_cols]


# Parse arguments
parser = argparse.ArgumentParser()
parser.add_argument('--version_id', type=int, required=True, action='store',
                    help='The version_id to run')
parser.add_argument('--location_id', type=int, required=True, action='store',
                    help='The version_id to run')
args = parser.parse_args()
version_id = args.version_id
location_id = args.location_id
예제 #8
0
 def summarize_draws(self, mmr_draws):
     logger.info("Summarizing MMR draws")
     summaries = get_summary(mmr_draws, self.draw_cols)
     summaries.drop('median', axis=1, inplace=True)
     summaries.rename(columns={'mean': 'val'}, inplace=True)
     return summaries
예제 #9
0
# Get input file
data = pd.read_csv("{}/{}.csv".format(input_dir, location_id))

# Reshape metric-age long
index_cols = ['ihme_loc_id', 'year', 'sex']
data_cols = ['pys1', 'pys2', 'pys3', 'pys4', 'pysenn', 'pyslnn', 'pyspnn','pyspna','pyspnb']
data = data[index_cols + ['sim'] + data_cols]
data = pd.melt(data, id_vars=(index_cols + ['sim']), value_vars=data_cols, var_name="age_group", value_name='draw')

# Reshape draws wide
data = reshape_wide(data, index_cols + ['age_group'], ['draw'], 'sim')
data = data.sort_values(['ihme_loc_id', 'year', 'sex', 'age_group']).reset_index(drop=True)
data['age_group'] = data['age_group'].map(lambda x: x.replace("pys", ""))

# Take point estimates
index_cols = ['ihme_loc_id', 'year', 'sex']
draw_cols = ['draw_{}'.format(x) for x in range(1000)]
summary_data = get_summary(data, data.filter(like='draw_').columns)
summary_data = summary_data.reset_index(drop=True)

# Reformat
summary_data['location_id'] = location_id
summary_data['year_id'] = summary_data['year'].astype('int64')
summary_data.loc[(summary_data['sex'] == "male"), 'sex_id'] = 1
summary_data.loc[(summary_data['sex'] == "female"), 'sex_id'] = 2
summary_data['sex_id'] = summary_data['sex_id'].astype('int64')
summary_data = summary_data[['location_id', 'ihme_loc_id', 'year_id', 'sex_id', 'age_group', 'mean', 'lower', 'upper']]

# Save
summary_data.to_csv(output_file, index=False)