Ejemplo n.º 1
0
def main():
    usage = 'usage: %prog [options] disease_model_id'
    parser = optparse.OptionParser(usage)

    # flags for parameters to modify
    parser.add_option('-y', '--ymax',
                      help='set the maximum y value for summary plots')
    parser.add_option('-t', '--condition',
                      help='set the condition of the model')
    parser.add_option('-n', '--notes',
                      help='set the notes of the model')

    # boolean flags
    parser.add_option('-c', '--clone',
                      action='store_true', dest='clone',
                      help='create a clone of the model (leave specified model unchanged)')

    (opts, args) = parser.parse_args()

    # check that args are correct
    if len(args) == 1:
        try:
            id = int(args[0])
        except ValueError:
            parser.error('disease_model_id must be an integer')
            return
    else:
        parser.error('incorrect number of arguments')
        return

    # fetch requested model
    dm = dismod3.get_disease_model(id)

    # change values specified
    if opts.ymax:
        dm.set_ymax(float(opts.ymax))

    # TODO: get condition to actually change
    if opts.condition:
        dm.set_condition(opts.condition)
    if opts.notes:
        dm.set_notes(opts.notes)

    # clone if requested
    if opts.clone:
        dm.params.pop('id')  # dismod_data_server creates new model if it doesn't find an id

    # post results to dismod_data_server
    url = dismod3.post_disease_model(dm)

    # announce url to view results
    print url
Ejemplo n.º 2
0
Archivo: hep_c.py Proyecto: flaxter/gbd
def hep_c_fit(regions, prediction_years, data_year_start=-inf, data_year_end=inf, egypt_flag=False):
    """ Fit prevalence for regions and years specified """
    print "\n***************************\nfitting %s for %s (using data from years %f to %f)" % (
        regions,
        prediction_years,
        data_year_start,
        data_year_end,
    )

    ## load model to fit
    # dm = DiseaseJson(file('tests/hep_c.json').read())
    id = 8788
    dismod3.disease_json.create_disease_model_dir(id)
    dm = dismod3.fetch_disease_model(id)

    ## adjust the expert priors
    dm.params["global_priors"]["heterogeneity"]["prevalence"] = "Very"
    dm.params["global_priors"]["smoothness"]["prevalence"]["amount"] = "Slightly"
    # TODO: construct examples of adjusting other covariates
    # ipdb> dm.params['global_priors'].keys()
    # [u'increasing', u'unimodal', u'level_bounds', u'y_maximum', u'note', u'level_value', u'decreasing', u'parameter_age_mesh', u'heterogeneity', u'smoothness']
    # ipdb> dm.params['global_priors']['smoothness']['prevalence']
    # {u'age_start': 0, u'amount': u'Moderately', u'age_end': 100}

    # include a study-level covariate for 'bias'
    covariates_dict = dm.get_covariates()
    covariates_dict["Study_level"]["bias"]["rate"]["value"] = 1
    # TODO: construct additional examples of adjusting covariates

    ## select relevant prevalence data
    # TODO: streamline data selection functions
    if egypt_flag:
        dm.data = [d for d in dm.data if d["country_iso3_code"] == "EGY"]
    else:
        dm.data = [
            d
            for d in dm.data
            if dismod3.utils.clean(d["gbd_region"]) in regions
            and float(d["year_end"]) >= data_year_start
            and float(d["year_start"]) <= data_year_end
            and d["country_iso3_code"] != "EGY"
        ]

    ## create, fit, and save rate model
    dm.vars = {}

    keys = dismod3.utils.gbd_keys(type_list=["prevalence"], region_list=regions, year_list=prediction_years)
    # TODO: consider how to do this for models that use the complete disease model
    # TODO: consider adding hierarchical similarity priors for the male and female models
    k0 = keys[0]  # looks like k0='prevalence+asia_south+1990+male'
    dm.vars[k0] = neg_binom_model.setup(dm, k0, dm.data)

    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.sample(iter=50000, burn=25000, thin=50, verbose=1)

    # make map object so we can compute AIC and BIC
    dm.map = mc.MAP(dm.vars)
    dm.map.fit()

    for k in keys:
        # save the results in the disease model
        dm.vars[k] = dm.vars[k0]

        neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])

        # check autocorrelation to confirm chain has mixed
        test_model.summarize_acorr(dm.vars[k]["rate_stoch"].trace())

        # generate plots of results
        dismod3.tile_plot_disease_model(dm, [k], defaults={"ymax": 0.15, "alpha": 0.5})
        dm.savefig("dm-%d-posterior-%s.%f.png" % (dm.id, k, random()))

    # summarize fit quality graphically, as well as parameter posteriors
    dismod3.plotting.plot_posterior_predicted_checks(dm, k0)
    dm.savefig("dm-%d-check-%s.%f.png" % (dm.id, k0, random()))
    dismod3.post_disease_model(dm)
    return dm
Ejemplo n.º 3
0
Archivo: hep_c.py Proyecto: flaxter/gbd
            pop_1 = neg_binom_model.population_by_age[("EGY", str(y), s)]

            key = "prevalence+north_africa_middle_east+%d+%s" % (y, s)
            prev_0 = neg_binom_model.calc_rate_trace(dm_na_me, key, dm_na_me.vars[key])
            pop_0 = neg_binom_model.regional_population(key)

            # generate population weighted average
            prev = (prev_0 * (pop_0 - pop_1) + prev_1 * pop_1) / pop_0
            neg_binom_model.store_mcmc_fit(dm_na_me, key, None, prev)

            # generate plots of results
            dismod3.tile_plot_disease_model(dm_na_me, [key], defaults={"ymax": 0.15, "alpha": 0.5})
            dm_na_me.savefig("dm-%d-posterior-na_me_w_egypt.%f.png" % (dm_na_me.id, random()))

            # save results
            dismod3.post_disease_model(dm_na_me)

    dm = hep_c_fit(
        "caribbean latin_america_tropical latin_america_andean latin_america_central latin_america_southern".split(),
        [1990, 2005],
    )
    dm = hep_c_fit(
        "sub-saharan_africa_central sub-saharan_africa_southern sub-saharan_africa_west".split(), [1990, 2005]
    )

    for (
        r
    ) in "europe_eastern europe_central asia_central asia_east asia_south asia_southeast australasia oceania sub-saharan_africa_east asia_pacific_high_income".split():
        dm = hep_c_fit([r], [1990, 2005])

    for r in "north_america_high_income europe_western ".split():
Ejemplo n.º 4
0
def add_region_effect_prior(id, val):
    dm = dismod3.disease_json.DiseaseJson(json.dumps({'params': {}, 'data': [], 'id': id}))
    dm.params['region_effects'] = val
    dismod3.post_disease_model(dm)
Ejemplo n.º 5
0
def add_sex_effect_prior(id, type, mean, lower_ci, upper_ci):
    dm = dismod3.disease_json.DiseaseJson(json.dumps({'params': {}, 'data': [], 'id': id}))
    dm.params['sex_effect_%s'%type] = dict(mean=mean, upper_ci=upper_ci, lower_ci=lower_ci)
    print dm.to_json()
    dismod3.post_disease_model(dm)
Ejemplo n.º 6
0
Archivo: fitit.py Proyecto: flaxter/gbd
    dm.data = [d for d in dm.data if \
               d['gbd_region'] == k.replace('prevalence data+', '')]

    dm.set_priors('prevalence data', ' smooth 10\n zero 0 15\n zero 99 100\n confidence 1000 .0001\n')

    import dismod3.beta_binomial_model as model
    print 'Processing %s (%d data points)' % (k, len(dm.data))
    model.fit(dm, 'map')
    model.fit(dm, 'mcmc')
    model.fit(dm, 'map')
    model.fit(dm, 'mcmc')
    model.fit(dm, 'map')

else:
    keys = dm.params['priors'].keys()
    for k in keys:
        dm.set_priors(k, ' smooth 10\n zero 0 15\n zero 99 100\n confidence 1000 .0001\n')

    import dismod3.multiregion_model as model
    print 'Processing all regions (%d data points)' % len(dm.data)
    model.fit(dm, 'map')
    model.fit(dm, 'mcmc')
    model.fit(dm, 'map')
    model.fit(dm, 'mcmc')
    model.fit(dm, 'map')
    
print dismod3.post_disease_model(dm)


Ejemplo n.º 7
0
        'Year Start': d['year_start'],
        'Year End': d['year_end'],
        'Self Reported': d.get('self-reported', '')
        }
    return c

f_file = open('simulated_data.tsv', 'w')
csv_f = csv.writer(f_file, dialect=csv.excel_tab)

col_names = sorted(data_dict_for_csv(data[0]).keys())
        
csv_f.writerow(col_names)
for d in data:
    dd = data_dict_for_csv(d)
    csv_f.writerow([dd[c] for c in col_names])

f_file.close()

# upload a new disease model which knows ground truth (but needs to
# have the data from the csv loaded separately)

dm.data = []
dm.params.pop('id')
dm.id = -1
for key in truth:
    dm.set_truth(key, truth[key])

url = dismod3.post_disease_model(dm)
print url

Ejemplo n.º 8
0
def fit(id, opts):
    fit_str = '(%d) %s %s %s' % (id, opts.region or '', opts.sex or '', opts.year or '')
    #tweet('fitting disease model %s' % fit_str)
    sys.stdout.flush()
    
    # update job status file
    if opts.log:
        if opts.type and not (opts.region and opts.sex and opts.year):
            dismod3.log_job_status(id, 'empirical_priors', opts.type, 'Running')
        elif opts.region and opts.sex and opts.year and not opts.type:
            dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (opts.region, opts.sex, opts.year), 'Running')

    dm = dismod3.get_disease_model(id)
    fit_str = '%s %s' % (dm.params['condition'], fit_str)

    sex_list = opts.sex and [ opts.sex ] or dismod3.gbd_sexes
    year_list = opts.year and [ opts.year ] or dismod3.gbd_years
    region_list = opts.region and [ opts.region ] or dismod3.gbd_regions
    keys = gbd_keys(region_list=region_list, year_list=year_list, sex_list=sex_list)

    # fit empirical priors, if type is specified
    if opts.type:
        fit_str += ' emp prior for %s' % opts.type
        #print 'beginning ', fit_str
        import dismod3.neg_binom_model as model

        dir = dismod3.settings.JOB_WORKING_DIR % id
        model.fit_emp_prior(dm, opts.type, dbname='%s/empirical_priors/pickle/dm-%d-emp_prior-%s.pickle' % (dir, id, opts.type))

    # if type is not specified, find consistient fit of all parameters
    else:
        import dismod3.gbd_disease_model as model

        # get the all-cause mortality data, and merge it into the model
        mort = dismod3.get_disease_model('all-cause_mortality')
        dm.data += mort.data

        # fit individually, if sex, year, and region are specified
        if opts.sex and opts.year and opts.region:
            dm.params['estimate_type'] = 'fit individually'

        # fit the model
        #print 'beginning ', fit_str
        dir = dismod3.settings.JOB_WORKING_DIR % id
        model.fit(dm, method='map', keys=keys, verbose=1)
        model.fit(dm, method='mcmc', keys=keys, iter=10000, thin=5, burn=5000, verbose=1,
                  dbname='%s/posterior/pickle/dm-%d-posterior-%s-%s-%s.pickle' % (dir, id, opts.region, opts.sex, opts.year))
        #model.fit(dm, method='mcmc', keys=keys, iter=1, thin=1, burn=0, verbose=1)

    # remove all keys that have not been changed by running this model
    for k in dm.params.keys():
        if type(dm.params[k]) == dict:
            for j in dm.params[k].keys():
                if not j in keys:
                    dm.params[k].pop(j)

    # post results to dismod_data_server
    # "dumb" error handling, in case post fails (try: except: sleep random time, try again, stop after 4 tries)
    from twill.errors import TwillAssertionError
    from urllib2 import URLError
    import random

    PossibleExceptions = [TwillAssertionError, URLError]
    try:
        url = dismod3.post_disease_model(dm)
    except PossibleExceptions:
        time.sleep(random.random()*30)
        try:
            url = dismod3.post_disease_model(dm)
        except PossibleExceptions:
            time.sleep(random.random()*30)
            try:
                url = dismod3.post_disease_model(dm)
            except PossibleExceptions:
                time.sleep(random.random()*30)
                url = dismod3.post_disease_model(dm)

    # form url to view results
    #if opts.sex and opts.year and opts.region:
    #    url += '/%s/%s/%s' % (opts.region, opts.year, opts.sex)
    #elif opts.region:
    #    url += '/%s' % opts.region

    # announce completion, and url to view results
    #tweet('%s fit complete %s' % (fit_str, url))
    sys.stdout.flush()

    # update job status file
    if opts.log:
        if opts.type and not (opts.region and opts.sex and opts.year):
            dismod3.log_job_status(id, 'empirical_priors', opts.type, 'Completed')
        elif opts.region and opts.sex and opts.year and not opts.type:
            dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (opts.region, opts.sex, opts.year), 'Completed')