コード例 #1
0
ファイル: input.py プロジェクト: zhouxm4/ihme-modeling
    def get_full_envelope(self):
        '''
        (self) -> None

        For models with age restrictions, the all-ages group should use envelope
        for all ages. This function replaces the age-restricted envelope for the
        full envelope for each location-year, country-year, region-year,
        super-region-year, and global-year
        '''
        print "We're definitely pulling the full envelope."
        for df_type in [
                'age_location', 'age_country', 'age_region',
                'age_super_region', 'global'
        ]:
            self.agg_dfs[df_type].drop('envelope', inplace=True, axis=1)
            locs = self.agg_dfs[df_type].location_id.values.tolist()
            years = self.agg_dfs[df_type].year.values.tolist()
            # get the envelope, then merge in with the aggregated dfs
            env_df = get_envelope(age_group_id=22,
                                  gbd_round_id=self.gbd_round_id,
                                  location_id=locs,
                                  sex_id=self.sex_id,
                                  year_id=years)
            env_df = env_df[['location_id', 'year_id', 'mean']]
            env_df.rename(columns={
                'mean': 'envelope',
                'year_id': 'year'
            },
                          inplace=True)
            self.agg_dfs[df_type] = self.agg_dfs[df_type].merge(
                env_df, on=['location_id', 'year'])
コード例 #2
0
ファイル: data.py プロジェクト: zhouxm4/ihme-modeling
    def get_data(self, id_template_df):
        locs = id_template_df.location_id.tolist()
        ages = id_template_df.age_group_id.tolist()
        sexes = id_template_df.sex_id.tolist()
        years = id_template_df.year_id.tolist()

        # get deaths and pop
        env_df = get_envelope(
            age_group_id=ages, location_id=locs, year_id=years, sex_id=sexes,
            with_hiv=1, with_shock=0)
        pop_df = get_population(
            age_group_id=ages, location_id=locs, year_id=years, sex_id=sexes)
        df = env_df.merge(
            pop_df, on=["location_id", "year_id", "age_group_id", "sex_id"])

        # convert to rates
        for col in ["mean", "lower", "upper"]:
            df[col] = df[col] / df["population"]

        df = self.drop_zeros_nulls(df, "mean", "lower", "upper")

        # add input_data_key
        df = df.merge(id_template_df,
                      on=["location_id", "year_id", "age_group_id", "sex_id"])

        if df.empty:
            raise NoNonZeroValues

        # aggregate
        df = self.calc_se_from_ui(df, "mean", "lower", "upper")
        df = self.calc_aggregate_se(df, self._data_key, "mean", "se")
        df = df.set_index(self._data_key)
        return df
コード例 #3
0
ファイル: WriteResults.py プロジェクト: cheth-rowe/ihmexp
 def get_full_envelope(self):
     """
     For models with age restrictions, the all-ages group should use envelope
     for all ages. Replace the age-restricted envelope for the full envelope for
     each location-year, country-year, region-year, super-region-year, and global-year.
     """
     logger.info("Pulling in the full envelope in order to have a correct"
                 "envelope for age-restricted causes.")
     for df_type in self.age_aggregate_dfs:
         self.agg_dfs[df_type].drop('envelope', inplace=True, axis=1)
         locs = self.agg_dfs[df_type].location_id.values.tolist()
         years = self.agg_dfs[df_type].year_id.values.tolist()
         # get the envelope, then merge in with the aggregated dfs
         env_df = get_envelope(
             age_group_id=self.model_metadata.
             model_parameters['all_age_group_id'],
             gbd_round_id=self.model_metadata.
             model_parameters['gbd_round_id'],
             location_id=locs,
             sex_id=self.model_metadata.model_parameters['sex_id'],
             year_id=years,
             decomp_step=self.model_metadata.
             model_parameters['decomp_step'],
             run_id=self.model_metadata.model_parameters['env_run_id'])
         env_df = env_df[['location_id', 'year_id', 'mean']]
         env_df.rename(columns={'mean': 'envelope'}, inplace=True)
         self.agg_dfs[df_type] = self.agg_dfs[df_type].merge(
             env_df, on=['location_id', 'year_id'])
コード例 #4
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def mortQuery(sex, start_year, start_age, end_age, location_set_version_id,
              gbd_round, db_connection):
    '''
    Strings indicating model parameters -> Pandas Data Frame

    Given a set of model parameters will query from the mortality database and
    return a pandas data frame. The data frame contains the base variables
    used in the CODEm process.
    '''
    loc_df = locQuery(location_set_version_id, db_connection)
    loc_list = loc_df.location_id.values.tolist()
    age_df = createAgeDF(db_connection)
    age_restrict = "all_ages >= {0} & all_ages <= {1}".format(start_age,
                                                              end_age)
    age_list = age_df.query(age_restrict).all_ages.values.tolist()
    env = get_envelope(age_group_id=age_list,
                       sex_id=sex,
                       year_id=range(start_year, gbd_round+1),
                       location_set=35,
                       location_id=loc_list)
    pop = get_population(age_group_id=age_list,
                         sex_id=sex,
                         year_id=range(start_year, gbd_round+1),
                         location_set=35,
                         location_id=loc_list)

    df = pd.merge(env, pop, on=['age_group_id', 'location_id', 'year_id',
                                'sex_id'])
    df.drop(['upper', 'lower', 'run_id_x', 'run_id_y'], axis=1, inplace=True)
    df.rename(columns={'age_group_id': 'age', 'year_id': 'year',
                       'sex_id': 'sex', 'mean': 'envelope',
                       'population': 'pop'}, inplace=True)
    return df
コード例 #5
0
ファイル: mortality_inputs.py プロジェクト: cheth-rowe/ihmexp
def _get_envelope_summary(version: MachineParameters) -> pd.DataFrame:
    """
    Wrapper around get_envelope; used to unpack arguments from version object
    as well as rename columns and sub-select dataframe columns.

    Arguments:
        version (FauxCorrectParameters): object containing all the demographic
            and fauxcorrect configuration data needed to query envelope
            estimates.

    Return:
        pd.DataFrame
    """
    env = get_envelope(age_group_id=version.all_age_group_ids,
                       location_id=version.location_ids,
                       year_id=version.year_ids,
                       sex_id=version.all_sex_ids,
                       decomp_step=version.decomp_step,
                       gbd_round_id=version.gbd_round_id,
                       run_id=version.envelope_version_id,
                       with_shock=0,
                       with_hiv=0)
    env.rename(columns={constants.Columns.MEAN: constants.Columns.ENVELOPE},
               inplace=True)
    keep_cols = (constants.Columns.DEMOGRAPHIC_INDEX +
                 [constants.Columns.ENVELOPE])
    return env[keep_cols]
コード例 #6
0
ファイル: envelope.py プロジェクト: cheth-rowe/ihmexp
    def __init__(self,
                 gbd_round_id: int = gbd.GBD_ROUND_ID,
                 decomp_step: str = gbd.decomp_step.ONE):
        self.gbd_round_id: int = gbd_round_id
        self.decomp_step: str = decomp_step

        self._run_id: int = get_envelope(
            gbd_round_id=self.gbd_round_id,
            decomp_step=self.decomp_step).run_id.item()
コード例 #7
0
def get_all_cause_mortality():
    dems = db.get_demographics(gbd_team = "epi", gbd_round_id = 6) 
    mortality = db.get_envelope(age_group_id = dems["age_group_id"],
        location_id = dems["location_id"], year_id = dems["year_id"], sex_id = dems["sex_id"], with_hiv = 1, rates = 1, decomp_step = ds, gbd_round_id = gbd_round_id)
    # calculate the standard error
    mortality["std"] = (mortality["upper"] - mortality["lower"])/3.92
    mortality.drop(['run_id'], axis = 1, inplace = True)
    filename = "all_cause_mortality.csv"
    mortality.to_csv(os.path.join(out_dir_rate, "02_temp/03_data", filename), index = False)
コード例 #8
0
def pull_mort_vers(gbd_round_id):
    ##############################################################
    ## Returns the current best with shock life table version
    ##############################################################
    env = get_envelope(gbd_round_id=gbd_round_id,
                       with_shock=1,
                       with_hiv=1,
                       location_id=1,
                       sex_id=1,
                       age_group_id=10,
                       year_id=gbdr(gbd_round_id))
    lt_vers = env['run_id'].item()
    return lt_vers
コード例 #9
0
def get_env(): 
    ''' returns current gbd envelope 
    '''
    d_step = utils.get_gbd_parameter('current_decomp_step')
    gbd_id = utils.get_gbd_parameter('current_gbd_round')
    yr_range = range(1980,2030) 
    yr_list = list(yr_range)
    env_df = get_envelope(age_group_id=-1, location_id=-1, location_set_id=8,
                            year_id=yr_list,
                            sex_id = -1,
                            decomp_step = d_step,
                            gbd_round_id = gbd_id)
    env_df.rename(columns={"mean": "mean_env"}, inplace=True)
    return env_df
コード例 #10
0
def load_mortality_envelope(location_id_list, age_group_list, year_list):
    ''' Returns the current all-cause mortality envelope
    '''
    env = get_envelope(sex_id=[1, 2],
                       location_id=location_id_list,
                       year_id=year_list,
                       age_group_id=age_group_list)
    env.rename(columns={'mean': 'envelope'}, inplace=True)
    pop = get_population(sex_id=[1, 2],
                         location_id=location_id_list,
                         year_id=year_list,
                         age_group_id=age_group_list)
    env = env.merge(pop,
                    on=['location_id', 'year_id', 'sex_id', 'age_group_id'])
    env['death_rate'] = env['envelope'] / env['population']
    env = env[[
        'location_id', 'year_id', 'sex_id', 'age_group_id', 'death_rate'
    ]]
    return (env)
コード例 #11
0
ファイル: emr_tb.py プロジェクト: zhouxm4/ihme-modeling
def combined_get_model_results(prev_filepath=None, inc_filepath=None, model_version_id=152216):
    age_ids = [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,30,31,32,33]
    year_ids = [1990,1995,2000,2005,2010,2016]
    sex_ids = [1,2]
    location_id = [6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33, 34,
    35, 36, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63, 66, 
    67, 68, 69, 71, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 97,
    98, 99, 101, 102, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 125,
    126, 127, 128, 129, 130, 131, 132, 133, 135, 136, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
    151, 152, 153, 154, 155, 156, 157, 160, 161, 162, 163, 164, 165, 168, 169, 170, 171, 172, 173, 175, 176, 177,
    178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202,
    203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 298, 305, 349, 351, 354, 376,
    385, 422, 433, 434, 435, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537,
    538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558,
    559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 4636, 4643, 4644, 4645, 4646, 
    4647, 4648, 4650, 4651, 4652, 4653, 4655, 4656, 4657, 4658, 4659, 4660, 4661, 4662, 4663, 4664, 4665, 4666,
    4667, 4668, 4669, 4670, 4671, 4672, 4673, 4674, 4753, 4756, 4757, 4758, 4760, 4761, 4762, 4765, 4768, 4770,
    4771, 4772, 4773, 4775, 4940, 4944, 35424, 35425, 35426, 35427, 35428, 35429, 35430, 35431, 35432, 35433, 
    35434, 35435, 35436, 35437, 35438, 35439, 35440, 35441, 35442, 35443, 35444, 35445, 35446, 35447, 35448, 
    35449, 35450, 35451, 35452, 35453, 35454, 35455, 35456, 35457, 35458, 35459, 35460, 35461, 35462, 35463, 
    35464, 35465, 35466, 35467, 35468, 35469, 35470, 44643, 44644, 44645, 44646, 44647, 44648, 44649, 44650, 
    44651, 44652, 44653, 44654, 44655, 44656, 44657, 44658, 44659, 44660, 44661, 44662, 44663, 44664, 44665,
    44666, 44667, 44668, 44669, 44670, 44671, 44672, 44673, 44674, 44675, 44676, 44677, 44678, 44679, 44680, 
    44681, 44682, 44683, 44684, 44685, 44686, 44687, 44688, 44689, 44690, 44691, 44692, 44693, 44694, 44695, 
    44696, 44697, 44698, 44699, 44700, 44701, 44702, 44703, 44704, 44705, 44706, 44707, 44708, 44709, 44710, 
    44711, 44712, 44713, 44714, 44715, 44716, 44717, 44718, 44719, 44720, 44721, 44722, 44723, 44724, 44725, 
    44726, 44727, 44728, 44729, 44730, 44731, 44732, 44733, 44734, 44735, 44736, 44737, 44738, 44739, 44740,
    44741, 44742, 44743, 44744, 44745, 44746, 44747, 44748, 44749, 44750, 44751, 44752, 44753, 44754, 44755,
    44756, 44757, 44758, 44759, 44760, 44761, 44762, 44763, 44764, 44765, 44766, 44767, 44768, 44769, 44770, 
    44771, 44772,  44773, 44774, 44775, 44776, 44777, 44778, 44779, 44780, 44781, 44782, 44783, 44784, 
    44785, 44786, 44787, 44788, 44789, 44790, 44791, 44792]

    #get incidence and prevalence data
    if (prev_filepath):
        print("Using file {} for prev" % prev_filepath)
        prev = pd.read_excel(prev_filepath)
        # get excel
    else:
        print("querying epi database for prev...")
        all_covariate_adjusted_data = get_cov_adjusted_data(model_version_id)
        prev = all_covariate_adjusted_data.loc[all_covariate_adjusted_data['measure_id']==5]
        
    if (inc_filepath):
        print("Using file {} for inc" % inc_filepath)
        inc = pd.read_excel(inc_filepath)
        # get excel
    else:
        print("querying epi database for inc...")
        all_covariate_adjusted_data = get_cov_adjusted_data(model_version_id)
        inc = all_covariate_adjusted_data.loc[all_covariate_adjusted_data['measure_id']==6].copy()
         
    prev = prev.loc[prev['location_id'].isin(location_id)]
    prev = prev.loc[prev['outlier_type_id'] == 0]
    prev['prev_se'] = (prev["upper"] - prev["lower"]) / (2*1.96)
    prev = prev.rename(columns={'mean':'prev_mean', 'lower':'prev_lower', 'upper':'prev_upper'})
    prev = adj_data_template(df=prev)
    
    inc = inc.loc[inc['location_id'].isin(location_id)]
    inc = all_fourplus_locs(inc)
    inc = inc.loc[inc['outlier_type_id'] == 0]
    inc['inc_se'] = (inc["upper"] - inc["lower"]) / (2*1.96)
    inc = inc.rename(columns={'mean':'inc_mean', 'lower':'inc_lower', 'upper':'inc_upper'})
    inc = adj_data_template(df=inc)
    
    #load custom (HIV-neg + HIV-pos) csmr
    print("loading custom csmr data...")
    csmr = pd.read_csv("FILEPATH")
    csmr['csmr_se'] = (csmr["upper"] - csmr["lower"]) / (2*1.96)
    csmr = csmr.rename(columns={'mean':'csmr_mean', 'lower':'csmr_lower', 'upper':'csmr_upper'})
    csmr = csmr[['age_group_id', 'location_id', 'year_id', 'sex_id', 'csmr_mean', 'csmr_se', 'csmr_lower', 'csmr_upper']].copy()
    
    #get acmr data
    print("querying get_envelope for acmr...")
    acmr = get_envelope(age_group_id=age_ids, location_id=location_id, year_id=year_ids, sex_id=sex_ids, gbd_round_id=4, status='best', rates=1)
    acmr['acmr_se'] = (acmr["upper"] - acmr["lower"]) / (2*1.96)
    acmr = acmr.rename(columns={'mean':'acmr_mean', 'lower':'acmr_lower', 'upper':'acmr_upper'})
    

    #get emr-predicted data
    print('pulling global emr-pred numbers...')
    emrpred = get_emr_pred(model_version_id)

    print('merging dataframes...')
    merge_inc = pd.merge(left=inc, right=csmr, on=['age_group_id', 'sex_id', 'year_id', 'location_id'], how='inner')
    merge_inc = pd.merge(left=merge_inc, right=acmr, on=['age_group_id', 'sex_id', 'year_id', 'location_id'], how='inner')
    merge_inc = pd.merge(left=merge_inc, right=emrpred, on=['age_group_id', 'sex_id', 'year_id'], how='inner')
    merge_inc['location_id']=merge_inc['location_id_x'] 
    #required since emr-pred is only global, location_id=1

    #remission should equal 2. upper and lower bounds 1.8-2.2
    merge_inc['rem_mean'] = 2
    merge_inc['rem_se'] = .1020408
        #merge data required for incidence-based emr calculation

    merge_prev = pd.merge(left=prev, right=csmr, on=['age_group_id', 'sex_id', 'year_id', 'location_id'], how='inner')
        #merge data required for prevalence-based emr calculation
      
    return (merge_prev, merge_inc)
コード例 #12
0
ファイル: get_mort_pop.py プロジェクト: zhouxm4/ihme-modeling
def get_mortality(dems, shock):
    """Pull all-cause mortality."""
    print("Getting mortality")
    df = db.get_envelope(age_group_id=dems["age_group_id"], location_id=dems["location_id"], year_id=dems['year_id'],
                         sex_id=dems['sex_id'], with_hiv=1, with_shock=shock, rates=1, gbd_round_id=help.GBD_ROUND)
    return df
コード例 #13
0
ファイル: get_mort_pop.py プロジェクト: zhouxm4/ihme-modeling
def get_best_version():
    temp = db.get_envelope(gbd_round_id=help.GBD_ROUND, year_id=help.LAST_YEAR, with_hiv=True, with_shock=False)
    return temp.loc[0,'run_id']
コード例 #14
0
def combined_get_model_results(gbd_id=None, 
                                location_id='all', 
                                prev_filepath=None, 
                                inc_filepath=None, 
                                model_version_id=263738):
    age_ids = [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,30,31,32,235]
    year_ids = [1990,1995,2000,2005,2010,2017]
    sex_ids = [1,2]

    #get incidence and prevalence data
    if (prev_filepath):
        print("Using file for prev")
        prev = pd.read_excel(prev_filepath)
        # get excel
    else:
        print("querying get_model_results for prev...")
        prev = get_model_results('epi', 
                                gbd_id=gbd_id, 
                                measure_id=5, 
                                location_id='all', 
                                year_id=year_ids, 
                                age_group_id=age_ids, 
                                sex_id=sex_ids, 
                                status='best', 
                                gbd_round_id=4)
    
    if (inc_filepath):
        print("Using file for inc")
        inc = pd.read_excel(inc_filepath)
        # get excel
    else:
        print("querying get_model_results for inc...")
        inc = get_model_results('epi', 
                                gbd_id=gbd_id, 
                                measure_id=6, 
                                location_id='all', 
                                year_id=year_ids, 
                                age_group_id=age_ids, 
                                sex_id=sex_ids, 
                                status='best', 
                                gbd_round_id=4)
        
    #prev['prev_se'] = (prev["upper"] - prev["lower"]) / (2*1.96)
    #inc['inc_se'] = (inc["upper"] - inc["lower"]) / (2*1.96)
    prev = prev.rename(columns={'mean':'prev_mean', 
                                'lower':'prev_lower', 
                                'upper':'prev_upper',
                                'standard_error':'prev_se'})
    inc = inc.rename(columns={'mean':'inc_mean', 
                            'lower':'inc_lower', 
                            'upper':'inc_upper',
                            'standard_error':'inc_se'})
    #prev = adj_data_template(df=prev)
    #inc = adj_data_template(df=inc)

    #load custom (HIV-neg + HIV-pos) csmr
    print("loading custom csmr data...")
    csmr = pd.read_csv("FILEPATH")
    #csmr['csmr_se'] = (csmr["upper"] - csmr["lower"]) / (2*1.96)
    csmr = csmr.rename(columns={'mean':'csmr_mean', 
                                'lower':'csmr_lower', 
                                'upper':'csmr_upper',
                                'standard_error':'csmr_se'})
    csmr = csmr[['age_group_id', 
                'location_id', 
                'year_id', 
                'sex_id', 
                'csmr_mean', 
                'csmr_se', 
                'csmr_lower', 
                'csmr_upper']].copy()
    
    #get acmr data
    print("querying get_envelope for acmr...")
    acmr = get_envelope(age_group_id=age_ids, 
                        location_id='all', 
                        year_id=year_ids, 
                        sex_id=sex_ids, 
                        gbd_round_id=5, 
                        with_shock=1, 
                        with_hiv=1, 
                        rates=1)
    acmr['acmr_se'] = (acmr["upper"] - acmr["lower"]) / (2*1.96)
    acmr = acmr.rename(columns={'mean':'acmr_mean', 
                                'lower':'acmr_lower', 
                                'upper':'acmr_upper'})
    
    #get remission data
    #remission should equal 2. upper and lower bounds 1.8-2.2

    #get emr-predicted data
    emrpred = get_emr_pred(model_version_id)

    merge_inc = pd.merge(left=inc, 
                        right=csmr, 
                        on=['age_group_id', 'sex_id', 'year_id', 'location_id'], 
                        how='left')

    merge_inc = pd.merge(left=merge_inc, 
                        right=acmr, 
                        on=['age_group_id', 'sex_id', 'year_id', 'location_id'], 
                        how='left')

    merge_inc = pd.merge(left=merge_inc, 
                        right=emrpred, 
                        on=['age_group_id', 'sex_id', 'year_id'], 
                        how='left')

    merge_inc['rem_mean'] = 2
    merge_inc['rem_se'] = .1020408
    merge_inc = merge_inc.rename(columns={'location_id_x':'location_id'})
    #merge data required for incidence-based emr calculation
    merge_prev = pd.merge(left=prev, 
                        right=csmr, 
                        on=['age_group_id', 'sex_id', 'year_id', 'location_id'], 
                        how='left')
    #merge data required for prevalence-based emr calculation
    return (merge_prev, merge_inc)
コード例 #15
0
env.set_index(index_cols, inplace=True)
env.sort_index(inplace=True)

prop.set_index(index_cols, inplace=True)
prop.sort_index(inplace=True)

# subset to columns that need to be adjusted
final = env.join(prop)
final.loc[final['adj'] == 0, 'prop'] = 1

final[draw_cols] = final[draw_cols].multiply(final['prop'], axis="index")

# pull in mortality draws
mort = get_envelope(
    location_id=locs, location_set_id=25,
    age_group_id=list(range(7, 16)), sex_id=2,
    year_id=list(range(1980, 2020)),
    decomp_step=decomp.decomp_step_from_decomp_step_id(decomp_step_id),
    gbd_round_id=maternal_fns.GBD_ROUND_ID)
mort = mort[index_cols + ['mean']]
mort.set_index(index_cols, inplace=True)

final = final.join(mort)
final[draw_cols] = final[draw_cols].multiply(final['mean'], axis="index")

final['cause_id'] = 366
final['measure_id'] = 1

final.reset_index(inplace=True)

logger.info("Exporting to %s" % out_dir)
final.to_hdf('%s/late_corrected_maternal_envelope.h5' % out_dir, key='draws',
コード例 #16
0
env.set_index(index_cols, inplace=True)
env.sort_index(inplace=True)

prop.set_index(index_cols, inplace=True)
prop.sort_index(inplace=True)

# subset to columns that need to be adjusted
final = env.join(prop)
final.loc[final['adj'] == 0, 'prop'] = 1

final[draw_cols] = final[draw_cols].multiply(final['prop'], axis="index")

# pull in mortality draws
mort = get_envelope(location_id=locs,
                    location_set_id=25,
                    age_group_id=list(range(7, 16)),
                    sex_id=2,
                    year_id=list(range(1980, 2018)))
mort = mort[index_cols + ['mean']]
mort.set_index(index_cols, inplace=True)

final = final.join(mort)
final[draw_cols] = final[draw_cols].multiply(final['mean'], axis="index")

final['cause_id'] = 366
final['measure_id'] = 1

final.reset_index(inplace=True)

logger.info("Exporting to %s" % out_dir)
final.to_hdf('%s/FILEPATH.h5' % out_dir,
コード例 #17
0
ファイル: demographics.py プロジェクト: cheth-rowe/ihmexp
def get_mortality_data(sex, start_year, start_age, end_age,
                       location_set_version_id, gbd_round_id, gbd_round,
                       decomp_step_id, db_connection, env_run_id, pop_run_id,
                       standard_location_set_version_id):
    """
    strings indicating model parameters -> Pandas Data Frame

    Given a set of model parameters will query from the mortality database and
    return a pandas data frame. The data frame contains the base variables
    used in the CODEm process.

    Also calculates the weights for subnationals of standard locations.
    """
    logger.info("Querying mortality and population.")
    loc_df = get_location_info(
        location_set_version_id=location_set_version_id,
        standard_location_set_version_id=standard_location_set_version_id,
        db_connection=db_connection)
    loc_list = loc_df.location_id.values.tolist()

    age_df = create_age_df(db_connection)
    age_restrict = "all_ages >= {0} & all_ages <= {1}".format(
        start_age, end_age)
    age_list = age_df.query(age_restrict).all_ages.values.tolist()
    env = get_envelope(
        age_group_id=age_list,
        sex_id=sex,
        year_id=list(range(start_year, gbd_round + 1)),
        location_set_id=35,
        location_id=loc_list,
        gbd_round_id=gbd_round_id,
        decomp_step=decomp_step_from_decomp_step_id(decomp_step_id),
        run_id=env_run_id)

    pop = get_population(
        age_group_id=age_list,
        sex_id=sex,
        year_id=list(range(start_year, gbd_round + 1)),
        location_set_id=35,
        location_id=loc_list,
        gbd_round_id=gbd_round_id,
        decomp_step=decomp_step_from_decomp_step_id(decomp_step_id),
        run_id=pop_run_id)

    df = pd.merge(env,
                  pop,
                  on=['age_group_id', 'location_id', 'year_id', 'sex_id'])
    df.drop(['upper', 'lower', 'run_id_x', 'run_id_y'], axis=1, inplace=True)

    df = population_weights(
        df,
        loc_df,
        age_group_id=age_list,
        sex_id=sex,
        year_id=list(range(start_year, gbd_round + 1)),
        decomp_step_id=decomp_step_id,
        gbd_round_id=gbd_round_id,
        pop_run_id=pop_run_id,
        location_set_version_id=location_set_version_id,
        standard_location_set_version_id=standard_location_set_version_id)

    df = df[[
        'age_group_id', 'location_id', 'year_id', 'sex_id', 'mean',
        'population', 'weight'
    ]]
    df.rename(columns={
        'age_group_id': 'age',
        'year_id': 'year',
        'sex_id': 'sex',
        'mean': 'envelope',
        'population': 'pop'
    },
              inplace=True)
    return df
コード例 #18
0
# This retrieves mortality rates from IHME's database.
# Run this first in order to get access to db_queries.
# source /ihme/code/central_comp/miniconda/bin/activate gbd_env
from db_queries import get_envelope, get_age_metadata

gbd_round = 5
population = get_envelope(age_group_id="all",
                          location_id="all",
                          sex_id=1,
                          gbd_round_id=gbd_round)
mortality_rate = get_envelope(age_group_id="all",
                              location_id="all",
                              sex_id=1,
                              gbd_round_id=gbd_round,
                              rates=1)
ages = get_age_metadata(age_group_set_id=12, gbd_round_id=gbd_round)

mortality_rate.to_hdf("mortality.h5",
                      "mortality",
                      format="table",
                      append=False)
mortality_rate.to_csv("mortality.csv", index=False)
population.to_hdf("mortality.h5", "population", format="table", append=False)
population.to_csv("population.csv", index=False)
ages.to_hdf("mortality.h5", "ages", format="table", append=False)
ages.to_csv("ages.csv", index=False)