Esempio n. 1
0
def make_summaries(data):
    """
    This function expects a DataFrame with the following
    columns:
        location_id
        ihme_loc_id
        year
        sim
        mort
    """
    # Input
    input_keep_cols = ['location_id', 'ihme_loc_id', 'year', 'sim', 'mort']
    data = data[input_keep_cols]
    # Format columns
    data['year_id'] = data['year'].astype('int64')
    data['sex_id'] = 3
    data['age_group_id'] = 1
    data['estimate_stage_id'] = 3
    data['sim'] = data['sim'].astype('int64')
    # Reshape draws wide
    index_cols = [
        'location_id', 'ihme_loc_id', 'year_id', 'year', 'sex_id',
        'age_group_id', 'estimate_stage_id'
    ]
    data = data.pivot_table(values="mort", index=index_cols, columns="sim")
    data = data.reset_index()
    data = data.rename(columns={x: 'draw_{}'.format(x) for x in range(1000)})
    # Get the summary statistics
    data = sm.get_estimates(data)
    # Format for upload
    keep_cols = index_cols + ['mean', 'lower', 'upper']
    return data[keep_cols]
Esempio n. 2
0
 def summarize(self, arc_draws):
     self.index_cols.extend(['year_start_id', 'year_end_id'])
     self.index_cols.remove('year_id')
     summaries_mean = arc_draws[self.index_cols + ['pct_change_means']]
     summaries = get_estimates(arc_draws[self.index_cols + self.draw_cols])
     summaries = summaries.merge(summaries_mean, on=self.index_cols)
     summaries.rename(columns={'pct_change_means': 'val'}, inplace=True)
     return summaries
Esempio n. 3
0
def summarize_draws(df, index_cols):
    """Summarize the draws down to mean/lower/upper columns"""
    col_order = [
        'measure_id', 'year_id', 'location_id', 'sex_id', 'age_group_id',
        'cause_id', 'rei_id', 'metric_id', 'mean', 'upper', 'lower'
    ]
    sumdf = sm.get_estimates(df)
    sumdf = sumdf.reset_index()
    del sumdf['index']
    del sumdf['median']
    return sumdf[col_order]
Esempio n. 4
0
    def get_data_frame(self):
        logger.info("BEGIN compute summaries")

        self.validate_measure_and_metric(self.in_df, "incoming dataframe")
        logger.debug("validated")

        sumdf = sm.get_estimates(self.in_df)
        sumdf = sumdf.reset_index()
        del sumdf['index']
        del sumdf['median']

        if 'pct_change_means' in sumdf:
            logger.info("replacing mean of pct change distribution with pct "
                        "change of means")
            sumdf['mean'] = sumdf['pct_change_means']
        sumdf = sumdf[self.write_out_columns]

        return sumdf
Esempio n. 5
0
 def summarize(self, mmr_draws):
     logger.info("Summarizing MMR draws")
     summaries = get_estimates(mmr_draws)
     summaries.drop('median', axis=1, inplace=True)
     summaries.rename(columns={'mean': 'val'}, inplace=True)
     return summaries
Esempio n. 6
0
    # standardize all inputs by transforming everything to rate space
    df = define_metric(df, source)
    if 1 in df.metric_id.unique():
        df.loc[df.metric_id == 1] = transform_metric(df.loc[df.metric_id == 1],
                                                     to_id=3,
                                                     from_id=1)

    # find index (non draw) columns
    try:
        df.drop(['envelope', 'pop'], axis=1, inplace=True)
    except:
        pass
    draw_cols = list(df.filter(like='draw').columns)
    index_cols = list(set(df.columns) - set(draw_cols + ['year_id']))

    # calculate pct_change
    if change_type == 'pct_change_num':  # drop any 2's. transform only 3's.
        df = transform_metric(df[df.metric_id == 3], to_id=1, from_id=3)
    if change_type in ['pct_change_rate', 'pct_change_num']:
        change_type = 'pct_change'
    change_df = pct_change(df, start_year, end_year, change_type, index_cols)

    # summarize
    summ_df = get_estimates(change_df)
    summ_df.drop('mean', axis=1, inplace=True)
    summ_df.rename(columns={'pct_change_means': 'mean'}, inplace=True)

    # stream results to sys.stdout for get_pct_change.ado to read in
    # Use a dct because stata is faster at reading those
    to_dct(df=summ_df, fname=sys.stdout, include_header=True)