Esempio n. 1
0
 def split(self):
     # get input draws
     draws = self._epi_draw_source.content(filters=self.demo_filters.copy())
     # get split props
     filters = self.ss_filters
     filters.update(self.demo_filters)
     gprops = self._ss_draw_source.content(filters=filters)
     splits = merge_split(draws,
                          gprops,
                          group_cols=self.dimensions.index_names,
                          value_cols=self.dimensions.data_list())
     splits = splits.assign(modelable_entity_id=splits['child_meid'])
     splits = splits[self.dimensions.index_names + ["modelable_entity_id"] +
                     self.dimensions.data_list()]
     splits = splits.fillna(0)
     self.pusher.push(splits, append=False)
Esempio n. 2
0
def _parallel_merge_split(meid_cause_map, interp_files, output_dir, tmpdir,
                          location_id):
    try:
        epi_draw = []
        for f in interp_files:
            epi_draw.append(
                pd.read_hdf(f,
                            'draws',
                            where=["location_id=={}".format(location_id)]))
        epi_draws = pd.concat(epi_draw)

        cd = pd.read_hdf(os.path.join(tmpdir.name, 'source_cause_draws.h5'),
                         'draws',
                         where=['location_id=={}'.format(location_id)])

        draw_cols = [col for col in cd.columns if 'draw_' in col]
        epi_draws = epi_draws[epi_draws['age_group_id'].isin(
            cd['age_group_id'].unique())]

        # these columns are not needed and cause maths.merge_split to break
        drop_cols = ['measure_id', 'model_version_id', 'metric_id']
        cd.drop(drop_cols, axis=1, inplace=True, errors='ignore')
        epi_draws.drop(drop_cols, axis=1, inplace=True, errors='ignore')

        cout = merge_split(
            cd, epi_draws,
            ['year_id', 'age_group_id', 'sex_id', 'location_id'], draw_cols)

        cout = cout.merge(cd[[
            'year_id', 'age_group_id', 'sex_id', 'location_id', 'envelope'
        ]],
                          how='left')
        cout['cause_id'] = cout['modelable_entity_id']
        cout['cause_id'] = cout['cause_id'].replace(meid_cause_map)
        cout['measure_id'] = 1
        for cid in cout.cause_id.unique():
            cid_dir = '{}/{}'.format(output_dir, int(cid))
            cid_dir = cid_dir.replace("\r", "")
            if not os.path.exists(cid_dir):
                makedirs_safely(cid_dir)
            fn = '{}/death_{}.csv'.format(cid_dir, location_id)
            cout.query('cause_id=={}'.format(cid)).to_csv(fn, index=False)
        return location_id, 0
    except Exception:
        tb_str = traceback.format_exc()
        return location_id, tb_str
Esempio n. 3
0
def filet(source_meid, target_prop_map, location_id, split_meas_ids,
          prop_meas_id, gbd_round_id, mvid_map, source_mvid, decomp_step,
          n_draws, downsample):
    """
    Splits the draws for source_meid to the target meids given in
    target_prop_map by the proportions estimated in the prop_meids. The split
    is applied to all GBD years associated with the given gbd_round_id for the
    specified location_id. The 'best' version of the meids will be used by
    default.

    Arguments:
        source_meid (int): meid for the draws to be split.

        target_prop_map (dict): dictionary whose keys are the target meids and
            whose values are the meids for the corresponding proportion models.

        location_id (int): location_id to operate on.

        split_meas_ids (list of ints): The measure_ids from source_meid to be
            split.

        prop_meas_id (int): The measure_id that identifies the proportion in
            prop_meids to use for the split.

        gbd_round_id (int): the gbd_round_id for models being split.

        mvid_map (dict): relationship of target MEs to proportion MEs.

        source_mvid (int): source model version id.

        decomp_step (str): Decomposition step. Allowed values are None,
            'iterative', 'step1', 'step2', 'step3', 'step4', and 'step5'
            depending on the value of gbd_round_id.

        n_draws (Optional[int])

        downsample (Optional[bool])

    Returns:
        A DataFrame containing the draws for the target meids
    """
    splits = []
    props = []

    for key in target_prop_map:
        if mvid_map is not None:
            version_id = mvid_map[target_prop_map[key]]
        else:
            version_id = None
        this_props = get_draws(gbd_id_type='modelable_entity_id',
                               gbd_id=target_prop_map[key],
                               source='epi',
                               measure_id=prop_meas_id,
                               location_id=location_id,
                               version_id=version_id,
                               gbd_round_id=gbd_round_id,
                               decomp_step=decomp_step,
                               n_draws=n_draws,
                               downsample=downsample)
        props.append(this_props)

    props = pd.concat(props)
    props = props.reset_index(drop=True)
    props['target_modelable_entity_id'] = (props.modelable_entity_id.replace(
        {v: k
         for k, v in target_prop_map.items()}))
    props_drawcols = [col for col in props.columns if 'draw_' in col]
    if source_mvid is not None:
        version_id = source_mvid
    else:
        version_id = None
    for measure_id in split_meas_ids:
        source = get_draws(gbd_id_type='modelable_entity_id',
                           gbd_id=[source_meid],
                           source='epi',
                           measure_id=measure_id,
                           location_id=location_id,
                           version_id=version_id,
                           gbd_round_id=gbd_round_id,
                           decomp_step=decomp_step)
        source_drawcols = [col for col in source.columns if 'draw_' in col]
        props['measure_id'] = measure_id
        props = props[props.age_group_id.isin(source.age_group_id.unique())]
        props = props[props.sex_id.isin(source.sex_id.unique())]

        # These columns are not needed and break maths.merge_split
        drop_cols = ['modelable_entity_id', 'model_version_id', 'metric_id']
        source.drop(drop_cols, axis=1, inplace=True, errors='ignore')

        if len(source) > 0 and len(props) > 0:
            if len(target_prop_map) > 1:
                force_scale = True
            else:
                force_scale = False

            if len(props_drawcols) != len(source_drawcols):
                raise ValueError(
                    "props and source drawcols are different lengths")

            split = merge_split(source,
                                props, [
                                    'year_id', 'age_group_id', 'sex_id',
                                    'location_id', 'measure_id'
                                ],
                                props_drawcols,
                                force_scale=force_scale)
            splits.append(split)
        else:
            pass
    splits = pd.concat(splits)
    splits = splits[[
        'location_id', 'year_id', 'age_group_id', 'sex_id', 'measure_id',
        'target_modelable_entity_id'
    ] + props_drawcols]
    splits.rename(
        columns={'target_modelable_entity_id': 'modelable_entity_id'},
        inplace=True)
    return splits