예제 #1
0
def main(version):
    start = help.start_timer()
    # pull identifiers
    ratio_ecodes = inj_info.IM_RATIO_ECODES
    dems = db.get_demographics(gbd_team='cod', gbd_round_id=help.GBD_ROUND)
    sexes = dems['sex_id']
    mortyears = list(filter(lambda x: x >= 1990, dems['year_id']))
    regmap = get_region_map(dems['location_id'])
    # iterate over year sex and ecode to get the ratios all in one data frame
    final_list = []
    for ecode in ratio_ecodes:
        year_arr_list = []
        for year in mortyears:
            sex_arr_list = []
            for sex in sexes:
                print('{}, {}, sex {}'.format(ecode, year, sex))
                sys.stdout.flush()  # write to log file
                ratios = compute_ratio(ecode,
                                       str(versions.get_best_version(ecode)),
                                       year, sex)
                sex_arr_list.append(ratios)
            combined_sexes = xr.concat(sex_arr_list, 'sex_id')
            year_arr_list.append(combined_sexes)
        combined_years = xr.concat(year_arr_list, 'year_id')
        print('Summarize {}'.format(ecode))
        summarized = summarize(combined_years, regmap)
        final_list.append(summarized)
    final_ratios = xr.concat(final_list, pd.Index(ratio_ecodes, name='ecode'))

    print('Write results')
    write_results(final_ratios, version)

    help.end_timer(start)
예제 #2
0
def main(decomp, version):
    start = help.start_timer()

    ratio_ecodes = inj_info.IM_RATIO_ECODES
    dems = db.get_demographics(gbd_team='cod', gbd_round_id=help.GBD_ROUND)
    sexes = dems['sex_id']
    mortyears = list([x for x in dems['year_id'] if x >= 1990])

    regmap = get_region_map(dems['location_id'])
    final_list = []
    for ecode in ratio_ecodes:
        year_arr_list = []
        for year in mortyears:
            sex_arr_list = []
            for sex in sexes:
                ratios = compute_ratio(ecode, decomp,
                                       str(versions.get_best_version(ecode)),
                                       year, sex)
                sex_arr_list.append(ratios)
            combined_sexes = xr.concat(sex_arr_list, 'sex_id')
            year_arr_list.append(combined_sexes)
        combined_years = xr.concat(year_arr_list, 'year_id')
        print(('Summarize {}'.format(ecode)))
        summarized = summarize(combined_years, regmap)
        final_list.append(summarized)
    final_ratios = xr.concat(final_list, pd.Index(ratio_ecodes, name='ecode'))

    write_results(final_ratios, decomp, version)

    help.end_timer(start)
예제 #3
0
def main(ecode, ncode, version):
    start = help.start_timer()

    if ncode in inj_info.ST_NCODES:
        print("Getting durations, percent treated, and disability weights...")
        if ecode in inj_info.SHOCK_ECODES:
            year_set = 'full'
        else:
            year_set = 'all'
        pct_treated = calculate_measures.pct_treated(
            year_id=year_set)  # defaults to 10% min treated, 75 haqi cap
        durations = calculate_measures.get_durations(pct_treated, ncode=ncode)
        dws = load_measures.disability_weights_st().loc[{'ncode': ncode}]

    if ecode in inj_info.SHOCK_ECODES:
        dems = db.get_demographics(gbd_team='cod', gbd_round_id=help.GBD_ROUND)
        years = [y for y in dems['year_id'] if y >= 1990]
    else:
        dems = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND)
        years = dems['year_id']

    for year in years:
        print("----------------------------------")
        print("Working on {}".format(year))
        sys.stdout.flush()
        print(
            "(1): Get short-term split incidence & write collapsed incidence results."
        )
        incidence = load_measures.short_term_incidence_split(
            ecode, version, ncode, year)
        write_results(incidence, version, ecode, ncode, year, measure_id=6)

        if ncode in inj_info.ST_NCODES:  # only run st prev and ylds on short term ncodes

            # prevalence - we will also use this for fake long term if it's a long term ncode
            print("(2): Get short-term prevalence & write results.")
            st_prevalence = calculate_measures.compute_prevalence(
                incidence, durations)
            write_results(st_prevalence,
                          version,
                          ecode,
                          ncode,
                          year,
                          measure_id=35)

            # ylds
            print("(3): Get YLDs & write results.")
            ylds = calculate_measures.short_term_ylds(st_prevalence, dws)
            write_results(ylds, version, ecode, ncode, year, measure_id=3)

        if ncode in inj_info.LT_NCODES:
            raw_lt = read_ode(ecode, ncode, year, version)

            if ncode in inj_info.ST_NCODES:
                # get the fake long-term draw
                fake_lt = get_fake_long_term(ncode, year, st_prevalence)
                fake_lt = fake_lt.loc[{'ncode': ncode}].drop('ncode')

                if 'ecode' in fake_lt.coords:
                    fake_lt = fake_lt.drop('ecode')

                real_lt = raw_lt - fake_lt

                real_lt.values[real_lt.values < 0] = 0

            else:
                real_lt = raw_lt.copy()

            # Expert adjustments
            # 1. Do not allow certain outpatient long-term
            # 2. Delete under 1 lt prevalence of shocks
            # 3. Subtract weird long-term animal contact

            if ncode in ["N48", "N26", "N11", "N19", "N43", "N25", "N23"]:
                real_lt.loc[{'platform': ['outpatient']}] = 0
            if ecode in inj_info.SHOCK_ECODES:
                real_lt.loc[{'age_group_id': [2, 3, 4]}] = 0

            if ecode == "inj_animal_nonven" or ecode == "inj_animal_venom":
                real_lt = animal_adjustment(real_lt, ecode, ncode)

            write_results(real_lt, version, ecode, ncode, year, measure_id=36)
            sys.stdout.flush()
    print('All done!')

    help.end_timer(start)
예제 #4
0
def main(ecode, ncode, platform, year, decomp, version, flat_version):
    toc = time.time()

    dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND)
    dm_settings = os.path.join(paths.SHARE_DIR, 'dismod_settings')
    version = version.rstrip()
    dm_dir = os.path.join(paths.DATA_DIR, decomp, inj_info.ECODE_PARENT[ecode],
                          str(version), "dismod_ode", ecode)
    metaloc = db.get_location_metadata(location_set_id=35,
                                       gbd_round_id=help.GBD_ROUND)

    filepath = write_path(ecode, ncode, platform, year, decomp, version)
    locations = help.ihme_loc_id_dict(metaloc, dems['location_id'])

    alldata = []
    value_in = os.path.join(dm_dir, "value_in",
                            "value_in_{}_{}.csv".format(ncode, platform))
    draw_in = os.path.join(dm_settings, "draw_in.csv")
    plain_in = os.path.join(dm_settings, "plain_in.csv")
    effect_in = os.path.join(dm_settings, "effect_in.csv")

    v_in = pd.read_csv(value_in)

    num_locs = len(locations)
    loc_pos = 0
    initime = help.start_timer()
    for locn in locations:
        loc_pos = loc_pos + 1

        for sex in [1, 2]:

            start = help.start_timer()

            if float(v_in.loc[v_in['name'] == 'eta_incidence',
                              'value'][0]) == 0:
                result = pd.DataFrame({'age_group_id': dems['age_group_id']})
                result = result.assign(**{d: 0 for d in help.drawcols()})
                result = help.convert_from_age_group_id(result)
            else:
                data_in = os.path.join(
                    dm_dir, "data_in", locations[locn], str(year), str(sex),
                    ecode, "data_in_{}_{}.csv".format(ncode, platform))

                if ncode in inj_info.EMR_NCODES:
                    rate_in_name = "rate_in_emr.csv"
                else:
                    rate_in_name = "rate_in_no_emr.csv"
                rate_in = os.path.join(paths.DATA_DIR, 'flats',
                                       str(flat_version), 'rate_in', str(year),
                                       str(sex), locations[locn], rate_in_name)

                draw_out_dir = os.path.join(dm_dir,
                                            "prev_results", locations[locn],
                                            str(year), str(sex))
                draw_out = os.path.join(
                    draw_out_dir,
                    "prevalence_{}_{}.csv".format(ncode, platform))
                if not os.path.exists(draw_out_dir):
                    try:
                        os.makedirs(draw_out_dir)
                    except OSError as e:
                        if e.errno != os.errno.EEXIST:
                            raise
                        pass

                result = run_model_injuries(draw_in, data_in, value_in,
                                            plain_in, rate_in, effect_in,
                                            draw_out, 1000)

            result['location_id'] = locn
            result['platform'] = platform

            result['year_id'] = year
            result['sex_id'] = sex

            alldata.append(result)
            help.end_timer(start)
            sys.stdout.flush()  # write to log file
        total_time = (time.time() - initime) / 60.

    final = pd.concat(alldata)

    write_results(final, ecode, ncode, platform, year, decomp, version)
    tic = time.time()
예제 #5
0
def main(ecode, ncode, platform, version):
    
    start = help.start_timer()
    
    parent = inj_info.ECODE_PARENT[ecode]
    flat_version = versions.get_env(parent, version)
    
    # get demographics
    print("1. Getting demographic, location, and long-term probabilities...")
    dems = db.get_demographics(gbd_team = "epi", gbd_round_id=help.GBD_ROUND)
    metaloc = db.get_location_metadata(location_set_id=35, gbd_round_id=help.GBD_ROUND)
    locations = help.ihme_loc_id_dict(metaloc, dems['location_id'])
    
    # get long-term probabilities that will be used and long-term standardized-mortality ratios
    lt_probs = calculate_measures.long_term_probs_combined(ncode=ncode)
    smr = load_measures.smr(ncode)
    
    # define DisMod ODE input directory
    dm_out_dir = os.path.join("FILEPATH")
    
    # make the sub-directory for data in files:
    folder = os.path.join("FILEPATH")
    if not os.path.exists(folder):
        try:
            os.makedirs(folder)
        except OSError as e:
            if e.errno != os.errno.EEXIST:
                raise
            pass
    
    print("2. Looping through years and sexes to make rate-in and data-in files.")

    value_data = []

    for year in dems["year_id"]:
        for sex in dems["sex_id"]:
            measures = {}
            print('Working on year {} sex {}'.format(year, sex))

            incidence = calculate_measures.long_term_incidence(ecode, version, ncode, platform, year, sex, lt_probs)
            inc_mean = incidence.mean(dim='draw')
            # if the value is less then one in a trillion, set to 0. Otherwise, DisMod can have an overflow issue where
            #    it sets prevalence to 100%
            inc_summary = xr.merge([inc_mean.where(inc_mean > .000000000001, 0).rename('meas_value'),
                                    incidence.std(dim='draw').rename('meas_stdev')])
            measures['incidence'] = inc_summary
            if ncode in inj_info.EMR_NCODES:
                emr = calculate_measures.emr(smr, year, sex, flat_version)
                emr_summary = xr.merge([emr.mean(dim='draw').rename('meas_value'),
                                        emr.std(dim='draw').rename('meas_stdev')])
                measures['mtexcess'] = emr_summary
            
            print('Making data in')
            data = make_data_in(measures, ecode, version, ncode, platform, locations, year, sex)

            value_data.append(data)

            sys.stdout.flush()
                        
    print("Finished making data in files.")
    print("4. Now making the value-in file with the saved data from data in process...")
    
    make_value_in(value_data, ecode, ncode, platform, dm_out_dir)
    
    help.end_timer(start)
예제 #6
0
def main(ecode, year_id, sex_id, platform, version):
    start = help.start_timer()

    dems = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND)
    income = get_income_map(dems['location_id'])

    # if this is a parent e-code, we now want to use the parent incidence to scale the child incidence
    # and we don't want to save the parent incidence at all.
    if ecode in inj_info.PARENT_ECODES:
        print(
            "This is a parent e-code, so now we are going to split and scale the children."
        )
        # get parent info
        parent_inc = load_measures.short_term_incidence_unsplit(
            ecode, version, year_id, sex_id, platform)
        parent_mat = load_measures.en_matrix(ecode, sex_id, platform)

        # get child info
        children_inc = []
        children_mat = []

        for child in inj_info.ECODE_CHILD[ecode]:
            child_inc = load_measures.short_term_incidence_unsplit(
                child, version, year_id, sex_id, platform)
            child_mat = load_measures.en_matrix(child, sex_id, platform)
            children_inc.append(child_inc)
            children_mat.append(child_mat)
        child_inc = xr.concat(
            children_inc, pd.Index(inj_info.ECODE_CHILD[ecode], name='ecode'))
        child_mat = xr.concat(
            children_mat, pd.Index(inj_info.ECODE_CHILD[ecode], name='ecode'))

        # split into ncodes, scale the children, and save
        prep_file(ecode, year_id, sex_id, platform, version)
        mode = 'w'
        for n in inj_info.get_ncodes(platform):
            parent_n_mat = parent_mat.loc[{'ncode': [n]}]
            child_n_mat = child_mat.loc[{'ncode': [n]}]
            parent_split_inc = split_ncodes(parent_inc, parent_n_mat, income)
            child_split_inc = split_ncodes(child_inc, child_n_mat, income)

            scaled_child = child_split_inc * (parent_split_inc /
                                              child_split_inc.sum(dim='ecode'))
            scaled_child = scaled_child.fillna(0)
            write_results(scaled_child,
                          ecode,
                          year_id,
                          sex_id,
                          platform,
                          version,
                          mode=mode,
                          group=n)
            mode = 'a'  # after first time, change to 'a' so it appends other ncodes to the same file
    else:  # non-parent ecode
        print(
            "This is a single e-code so we are just splitting it, no scaling.")
        inc = load_measures.short_term_incidence_unsplit(
            ecode, version, year_id, sex_id, platform)
        matx = load_measures.en_matrix(ecode, sex_id, platform)
        prep_file(ecode, year_id, sex_id, platform, version)
        mode = 'w'
        for n in inj_info.get_ncodes(platform):
            print(n)
            n_matx = matx.loc[{'ncode': [n]}]
            split_inc = split_ncodes(inc, n_matx, income)
            write_results(split_inc,
                          ecode,
                          year_id,
                          sex_id,
                          platform,
                          version,
                          mode=mode,
                          group=n)
            mode = 'a'  # after first time, change to 'a' so it appends other ncodes to the same file
    help.end_timer(start)
예제 #7
0
def main(ecode, ncode, platform, version):
    start = help.start_timer()

    parent = inj_info.ECODE_PARENT[ecode]
    flat_version = versions.get_env(parent, version)

    # need the cod demographics because
    dems = db.get_demographics(gbd_team="cod", gbd_round_id=help.GBD_ROUND)

    # get dfs used for long-term incidence and EMR
    lt_probs = calculate_measures.long_term_probs_combined(ncode,
                                                           year_id='full')

    print "Working on {}".format(ncode)
    if ncode in inj_info.EMR_NCODES:
        smr = load_measures.smr(ncode)

    sy_pop = load_measures.population(flat_version, single_year=True)
    grp_pop = load_measures.population(flat_version)

    prev = xr.DataArray([0], dims='ncode', coords=[[ncode]])
    for year in dems['year_id']:
        print(year)
        inc_list = []
        emr_list = []
        print('Getting incidence and emr if applicable')
        sys.stdout.flush()  # write to log
        for sex in dems['sex_id']:
            sex_inc = calculate_measures.long_term_incidence(
                ecode, version, ncode, platform, year, sex, lt_probs)
            inc_list.append(sex_inc)
            if ncode in inj_info.EMR_NCODES:
                sex_emr = calculate_measures.emr(smr, year, sex, flat_version)
                emr_list.append(sex_emr)

        incidence = xr.concat(inc_list, dim='sex_id')

        print('Interpolating')
        sys.stdout.flush()  # write to log
        inc_interp = interpolate_ages(incidence, sy_pop.loc[{
            'year_id': [year]
        }], grp_pop.loc[{
            'year_id': [year]
        }])
        if ncode in inj_info.EMR_NCODES:
            emr = xr.concat(emr_list, dim='sex_id')
            emr_interp = interpolate_ages(emr, sy_pop.loc[{
                'year_id': [year]
            }], grp_pop.loc[{
                'year_id': [year]
            }])
        else:
            emr_interp = xr.DataArray([0], dims='year_id', coords=[[year]])

        print('Running ODE/incrementing process')
        sys.stdout.flush()  # write to log
        # progress half year and save, for 1990 and on
        if year >= 1990:
            year_result = progress_half_year(prev, inc_interp, emr_interp,
                                             sy_pop.loc[{
                                                 'year_id': [year]
                                             }], grp_pop.loc[{
                                                 'year_id': [year]
                                             }])
            write_results(year_result, ecode, ncode, platform, year, version)
        # then progress full year and increment, for all years but the last
        if year != help.LAST_YEAR:
            prev = progress_one_year(prev, inc_interp, emr_interp)

    help.end_timer(start)
예제 #8
0
def main(ecode, ncode, platform, year, version, flat_version):
    dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND)
    dm_settings = os.path.join("FILEPATH")
    dm_dir = os.path.join("FILEPATH")
    metaloc = db.get_location_metadata(location_set_id=35, gbd_round_id=help.GBD_ROUND)

    locations = help.ihme_loc_id_dict(metaloc, dems['location_id'])
    
    alldata = []
    
    value_in = os.path.join("FILEPATH.csv".format(ncode, platform))
    draw_in = os.path.join(dm_settings, "FILEPATH.csv")
    plain_in = os.path.join(dm_settings, "FILEPATH.csv")
    effect_in = os.path.join(dm_settings, "FILEPATH.csv")
    v_in = pd.read_csv(value_in)
    
    num_locs = len(locations)
    loc_pos = 0
    initime = help.start_timer()
    for locn in locations:
        loc_pos = loc_pos + 1

        for sex in [1,2]:

            print("Running DisMod ODE for location {} year {} sex {}".format(locations[locn], year, sex))

            start = help.start_timer()
            
            if float(v_in.loc[v_in['name']=='eta_incidence','value'][0]) == 0:
                print('eta incidence is 0, so all incidence should be 0 and we\'ll just make an all 0 df instead of '
                      'running the ODE')
                result = pd.DataFrame({'age_group_id': dems['age_group_id']})
                result = result.assign(**{d: 0 for d in help.drawcols()})
                result = help.convert_from_age_group_id(result)
            else:
                data_in = os.path.join("FILEPATH.csv".format(ncode, platform))
                
                # create the rate in filepath based on whether it has excess mortality or not
                if ncode in inj_info.EMR_NCODES:
                    rate_in_name = "FILEPATH.csv"
                else:
                    rate_in_name = "FILEPATH.csv"
                rate_in = os.path.join("FILEPATH")
                
                draw_out_dir = os.path.join("FILEPATH")
                draw_out = os.path.join("FILEPATH.csv".format(ncode, platform))
                if not os.path.exists(draw_out_dir):
                    try:
                        os.makedirs(draw_out_dir)
                    except OSError as e:
                        if e.errno != os.errno.EEXIST:
                            raise
                        pass
                
                result = run_model_injuries(draw_in, data_in, value_in, plain_in, rate_in, effect_in, draw_out, 1000)
                
            # format the results so that we have the identifying columns
            result['location_id'] = locn
            result['platform'] = platform
            
            result['year_id'] = year
            result['sex_id'] = sex
            
            alldata.append(result)
            help.end_timer(start)
            sys.stdout.flush()  # write to log file
        total_time = (time.time() - initime)/60.
        print('Completed {} of {} locations in {} minutes. Will take {} more minutes at this rate'.format(
            loc_pos, num_locs, total_time, (total_time/loc_pos)*(num_locs-loc_pos)))
        sys.stdout.flush()  # write to log file
        
    # concatenate all of the data together
    final = pd.concat(alldata)
    write_results(final, ecode, ncode, platform, year, version)
    print('Finished!')
예제 #9
0
def main(ecode, ncode, platform, decomp, version):
    start = help.start_timer()

    parent = inj_info.ECODE_PARENT[ecode]
    flat_version = versions.get_env(parent, version)

    dems = db.get_demographics(gbd_team="cod", gbd_round_id=help.GBD_ROUND)

    lt_probs = calculate_measures.long_term_probs_combined(ncode=ncode,
                                                           decomp=decomp,
                                                           year_id='full')

    if ncode in inj_info.EMR_NCODES:
        smr = load_measures.smr(ncode)

    sy_pop = load_measures.population(flat_version, single_year=True)
    grp_pop = load_measures.population(flat_version)

    prev = xr.DataArray([0], dims='ncode', coords=[[ncode]])
    for year in dems['year_id']:
        inc_list = []
        emr_list = []
        for sex in dems['sex_id']:
            sex_inc = calculate_measures.long_term_incidence(
                ecode, decomp, version, ncode, platform, year, sex, lt_probs)
            inc_list.append(sex_inc)
            if ncode in inj_info.EMR_NCODES:
                sex_emr = calculate_measures.emr(smr, year, sex, flat_version)
                emr_list.append(sex_emr)

        incidence = xr.concat(inc_list, dim='sex_id')

        inc_interp = interpolate_ages(incidence, sy_pop.loc[{
            'year_id': [year]
        }], grp_pop.loc[{
            'year_id': [year]
        }])
        if ncode in inj_info.EMR_NCODES:
            emr = xr.concat(emr_list, dim='sex_id')
            emr_interp = interpolate_ages(emr, sy_pop.loc[{
                'year_id': [year]
            }], grp_pop.loc[{
                'year_id': [year]
            }])
        else:
            emr_interp = xr.DataArray([0], dims='year_id', coords=[[year]])

        if year >= 1990:
            year_result = progress_half_year(prev, inc_interp, emr_interp,
                                             sy_pop.loc[{
                                                 'year_id': [year]
                                             }], grp_pop.loc[{
                                                 'year_id': [year]
                                             }])
            write_results(year_result, ecode, ncode, platform, year, decomp,
                          version)

        if year != help.LAST_YEAR:
            prev = progress_one_year(prev, inc_interp, emr_interp)

    help.end_timer(start)