Python SquareImport Examples, job_utils.draws.SquareImport Python Examples

Example #1

0

Show file

File: excess.py Project: cheth-rowe/ihmexp

def redist_excess(excess_id, redist_map, copy_map, year_id, out_dir):

    # declare calculation dimensions
    dim = RedistExcess.default_idx_dmnsns
    dim["year_id"] = year_id
    dim["measure_id"] = [5]
    dim["sex_id"] = [2]

    # run redistribution
    redistributer = RedistExcess(excess_id, redist_map, idx_dmnsns=dim)
    redistributer.redistribute_excess()

    # copy dimensions of redistributer instance then update measure_id
    # so we can copy over incidence draws as directed in copy_map
    dim_inc = copy.deepcopy(redistributer.idx_dmnsns)
    dim_inc["measure_id"] = [6]  # incidence
    inc = draws.SquareImport(idx_dmnsns=dim_inc)
    inc_dict = {}

    for me_id in copy_map.keys():
        inc_dict[copy_map[me_id]] = inc.import_square(
            gopher_what={"modelable_entity_id": me_id}, source="epi")

    # save to disk
    for me_id in redist_map.values():
        fname = str(year_id[0]) + ".h5"
        out_df = redistributer.me_dict[me_id].reset_index()
        if me_id in copy_map.values():
            inc_df = inc_dict[me_id].reset_index()
            out_df = pd.concat([out_df, inc_df])
        out_df.to_hdf(os.path.join(out_dir, str(me_id), fname),
                      key="draws",
                      format="table",
                      data_columns=dim.keys())

Example #2

0

Show file

    def import_sex_prop(self, modelable_entity_id, sex_id):

        # desired index shape (contains prev and incidence in index, no sex_id)
        broadcast_idx_dmnsns = self.base_importer.idx_dmnsns.copy()
        broadcast_idx_dmnsns.pop("sex_id")
        broadcast_idx = pd.MultiIndex.from_product(
            broadcast_idx_dmnsns.values(),
            names=broadcast_idx_dmnsns.keys())
        broadcast_df = pd.DataFrame(index=broadcast_idx)

        # get import shape
        gopher_idx_dmnsns = broadcast_idx_dmnsns.copy()
        gopher_idx_dmnsns["sex_id"] = [sex_id]
        gopher_idx_dmnsns["measure_id"] = [18]

        # import using specified dimensions
        importer = draws.SquareImport(idx_dmnsns=gopher_idx_dmnsns)
        tmp_df = importer.import_square(
            {"modelable_entity_id": modelable_entity_id},
            source="epi")

        # broadcast to preferred shape
        broadcast_over = broadcast_idx_dmnsns.keys()
        broadcast_over.remove("measure_id")
        tmp_df = tmp_df.reset_index()[broadcast_over + importer.draw_cols]
        df = pd.merge(broadcast_df.reset_index(), tmp_df, on=broadcast_over,
                      how="left").set_index(broadcast_idx_dmnsns.keys())
        df.fillna(value=0, inplace=True)
        return df

Example #3

0

Show file

    def import_couples(self, modelable_entity_id):

        # desired index shape (contains prev and incidence in index and male)
        broadcast_idx_dmnsns = self.base_importer.idx_dmnsns.copy()
        broadcast_df = self.base_importer.get_index_df()

        # get import shape
        gopher_idx_dmnsns = broadcast_idx_dmnsns.copy()
        gopher_idx_dmnsns["sex_id"] = [2]

        # import using specified dimensions
        importer = draws.SquareImport(idx_dmnsns=gopher_idx_dmnsns)
        tmp_df = importer.import_square(
            {"modelable_entity_ids": [modelable_entity_id]}, source="dismod")

        # broadcast to preferred shape
        broadcast_over = broadcast_idx_dmnsns.keys()
        broadcast_over.remove("sex_id")
        tmp_df = tmp_df.reset_index()[broadcast_over + importer.draw_cols]
        df = pd.merge(broadcast_df.reset_index(),
                      tmp_df,
                      on=broadcast_over,
                      how="left").set_index(broadcast_idx_dmnsns.keys())
        df.fillna(value=0, inplace=True)
        return df

Example #4

0

Show file

    def __init__(self, me_map, **kwargs):
        # super init
        super(Split, self).__init__(**kwargs)

        # store data by me in this dict
        # key=me_id
        self.me_map = me_map
        self.me_dict = {}

        #import every input and create a dictionary of dataframes
        for mapper_key, mapper in me_map.items():
            inputs = mapper.get("srcs", {})
            for src_key, me_id in inputs.items():
                if src_key == "profound_prop":
                    print("Inside prop, retrieving meid {}".format(me_id))
                    dim_prop = copy.deepcopy(self.idx_dmnsns)
                    dim_prop["measure_id"] = [18]  # proportion
                    prop = draws.SquareImport(idx_dmnsns=dim_prop)
                    self.me_dict[me_id] = prop.import_square(
                        gopher_what={"modelable_entity_id": me_id},
                        source="epi")
                else:
                    print("Inside else, retrieving meid {}".format(me_id))
                    self.me_dict[me_id] = self.import_square(
                        gopher_what={"modelable_entity_id": me_id},
                        source="epi")

Example #5

0

Show file

    def __init__(self, male_prop_id, female_prop_id, exp_id, env_id, year_id):

        # base dimensions importer
        dim = draws.SquareImport.default_idx_dmnsns
        dim["year_id"] = year_id
        dim["measure_id"] = [5]
        self.base_importer = draws.SquareImport(idx_dmnsns=dim)

        # import and scale sex proportions
        self.male_prop = self.import_sex_prop(male_prop_id, 1)
        self.female_prop = self.import_sex_prop(female_prop_id, 2)
        self.scale_props()

        # import exposure and env
        self.exposure = self.import_exposure(exp_id)
        self.envelope = self.import_couples(env_id)

Example #6

0

Show file

    def __init__(self, me_map, cause_name, **kwargs):
        # super init
        super(HrtFailImpCong, self).__init__(**kwargs)

        # store data by me in this dict
        # key=me_id, val=dataframe
        self.me_map = me_map
        self.cause_name = cause_name
        self.csmr_dict = {}
        self.hrtf_dict = {}
        self.sqz_dict = {}
        self.out_dict = {}
        ''' The .import_square() method of the SquareImport class 
        is what will call the get_draws shared function. It grabs draws for 
        one me_id at a time and shapes the dataframe for downstream 
        manipulation. '''
        for mapper_key, mapper in me_map.items():
            inputs = mapper.get("srcs", {})
            for key, me_id in inputs.items():
                if (mapper_key == "Other") & (key == "tot"):
                    pass
                elif key == "tot":
                    '''the import_square function sets up a multiindex based 
                    off the values in the dimension dictionary first given to
                    it. Since we have to use a different measure_id to grab 
                    csmr we can't self.import_square because it will call 
                    df = pd.concat([self.index_df, df], axis=1) with the 
                    measure_id we first used. Instead, we need to instantiate 
                    a new instance of the SquareImport class with the heart 
                    failure dimensions, and use the import_square method from 
                    that object '''
                    dim_csmr = copy.deepcopy(self.idx_dmnsns)
                    # 15=cause-specific mortality rate (csmr)
                    dim_csmr["measure_id"] = [15]
                    csmr = draws.SquareImport(idx_dmnsns=dim_csmr)
                    self.csmr_dict[me_id] = csmr.import_square(
                        gopher_what={"modelable_entity_id": me_id},
                        source="epi")
                elif key == "sqzd":
                    self.sqz_dict[me_id] = self.import_square(
                        gopher_what={"modelable_entity_id": me_id},
                        source="epi")
                else:
                    self.hrtf_dict[me_id] = self.import_square(
                        gopher_what={"modelable_entity_id": me_id},
                        source="epi")

Example #7

0

Show file

    def calc_heart_failure(self):
        # grab dimensions used in this class instantiation 
        # modify as necessary to get CSMR (cause-specific mortality rates)
        # since code is parallelized by year, year_id should already be reduced to one year
        dim_hf = copy.deepcopy(self.idx_dmnsns)
        dim_hf["measure_id"] = [15] # cause-specific mortality rate (csmr)
        
        # the import_square function sets up a multiindex based off the values in the dimension dictionary first passed in
        # since we have to use a different measure_id to grab csmr we can't self.import_square because it will call df = pd.concat([self.index_df, df], axis=1) with the measure_id we first used.
        # instead, we need to instantiate a new instance of the SquareImport class with the heart failure dimensions, and use the import_square method from that 
        heart_failure = draws.SquareImport(idx_dmnsns=dim_hf)
        
        # get draws for csmr measure_id and src me_ids
        csmr_dict = {}
        csmr_prop_dict = {}
        for sub_group, mapper in self.me_map.items():
            if mapper["type"] == "sub_group":
                inputs = mapper.get("srcs",{})
                for src_key, me_id in inputs.items():
                    if src_key != "bundle":
                        csmr_dict[me_id] = heart_failure.import_square(gopher_what={"modelable_entity_ids": [me_id]}, source="dismod")
                        
        # aggregate csmr subgroups
        # measure_id = 15
        csmr_sub_causes = pd.DataFrame()
        for me_id in csmr_dict.keys():
            csmr_sub_causes = pd.concat([csmr_sub_causes, csmr_dict[me_id]])
        # collapse on keys 
        sigma_csmr_sub_causes = csmr_sub_causes.groupby(level=self.idx_dmnsns.keys()).sum()

        # calc proportions for csmr me_ids
        for me_id in csmr_dict.keys():
            # csmr_dict[me_id] is not changed in this process, no copy is necessary
            prop = csmr_dict[me_id] / sigma_csmr_sub_causes
            prop.fillna(0, inplace=True)
            assert (prop[self.draw_cols] > 1.0).any().any() == False
            assert (prop[self.draw_cols] < 0.0).any().any() == False
            assert prop.isnull().values.any() == False

            # reindex prop df so that measure_id is 5 instead of 15, that way it can be joined with the other dataframes (everything else stays the same, only the measure_id changes)
            prop = prop.reset_index()
            prop.loc[:,'measure_id'] = 5
            prop = prop.set_index(self.idx_dmnsns.keys())
            prop = prop[self.draw_cols]
            prop = pd.concat([self.index_df, prop], axis=1)
            csmr_prop_dict[me_id] = prop

        # incorporate heart failure severities
        for sub_group, mapper in self.me_map.items():
            if mapper["type"] == "sub_group":

                for severity in ["mild", "moderate", "severe"]:
                    split_trg_id = self.me_map[sub_group]['trgs'][severity]
                    hf_src_id = self.me_map['heart failure']['srcs'][severity]
                    csmr_id = self.me_map[sub_group]['srcs']['tot']

                    # At draw level, multiply heart failure severity X CSMR proportion -> save to the target_modelable_entity_id
                    self.me_dict[split_trg_id] = (self.me_dict[hf_src_id] * csmr_prop_dict[csmr_id])
                    assert self.me_dict[split_trg_id]['draw_0'][0] == (self.me_dict[hf_src_id]['draw_0'][0] * csmr_df['draw_0'][0])
                    
                # aggregate congenital heart failure severity groups
                severity_df = pd.DataFrame()
                for severity in ["mild", "moderate", "severe"]:
                    split_trg_id = self.me_map[sub_group]['trgs'][severity]
                    severity_df = pd.concat([severity_df, self.me_dict[split_trg_id]])
                # collapse on keys
                sigma_severity_df = severity_df.groupby(level=self.idx_dmnsns.keys()).sum()
                
                # calc non heart failure portion and save to the target_modelable_entity_id
                sqzd_id = self.me_map[sub_group]['trgs']['sqzd']
                non_sqzd = self.me_map[sub_group]['srcs']['tot']
                if str(sqzd_id) == '10937':
                    sqzd_df = self.calc_vsd_asd_asymptomatic()
                else: 
                    sqzd_df = self.me_dict[sqzd_id].copy()
                residual = sqzd_df - sigma_severity_df
                residual[residual<0] = 0
                assert (residual < 0.0).any().any() == False
                assert residual.isnull().values.any() == False
                assert residual['draw_0'][0] == (sqzd_df['draw_0'][0] - sigma_severity_df['draw_0'][0])
                non_hf_trg_id = self.me_map[sub_group]['trgs']['none']
                self.me_dict[non_hf_trg_id] = residual

        # copy cong_heart sqzd other into "15756 Congenital heart disease due to other congenital cardiovascular anomalies before ID severity split". 
        # no other calculations are necessary at this point in time.
        other_none_id = self.me_map["Other"]['trgs']['none']
        other_sqzd_id = self.me_map["Other"]['trgs']['sqzd']
        self.me_dict[other_none_id] = self.me_dict[other_sqzd_id].copy()