Esempio n. 1
0
def fit_posterior(id, region, sex, year):
    """ Fit posterior of specified region/sex/year for specified model

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years

    Example
    -------
    >>> import fit_posterior
    >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005')
    """
    #print 'updating job status on server'
    #dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (region, sex, year), 'Running')

    dm = dismod3.load_disease_model(id)
    #dm.data = []  # for testing, remove all data
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=['prevalence'])

    # fit the model
    dir = dismod3.settings.JOB_WORKING_DIR % id
    import dismod3.neg_binom_model as model
    k0 = keys[0]
    dm.vars = {}
    dm.vars[k0] = model.setup(dm, k0, dm.data)
    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.sample(iter=50000,burn=25000,thin=50,verbose=1)
    dm.map = mc.MAP(dm.vars)
    dm.map.fit()
    model.store_mcmc_fit(dm, k0, dm.vars[k0])

    # update job status file
    #print 'updating job status on server'
    #dismod3.log_job_status(id, 'posterior',
    #                       '%s--%s--%s' % (region, sex, year), 'Completed')
    
    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=['prevalence'])
    dm.save('dm-%d-posterior-%s-%s-%s.json' % (id, region, sex, year), keys_to_save=keys)

    return dm
Esempio n. 2
0
def prior_vals(dm, type):
    """ Estimate the prior distribution on param_age_mesh for a particular type

    Parameters
    ----------
    dm : DiseaseJson
    type : str, one of 'prevalence', 'incidence', 'remission', 'excess-mortality'

    Results
    -------
    vars : dict of stochastics generated by logit_normal_model
    
    """
    import random
    import dismod3.neg_binom_model as model

    data = [
        d for d in dm.data
        if clean(d['data_type']).find(type) != -1 and not d.get('ignore') != 1
    ]

    dm.clear_empirical_prior()
    dm.fit_initial_estimate(type, data)
    if len(data) >= 8:
        random.seed(12345)
        data = random.sample(data, 8)

    X_region, X_study = model.regional_covariates('none', dm.get_covariates())
    est_mesh = dm.get_estimate_age_mesh()
    prior_dict = dict(alpha=pl.zeros(len(X_region)),
                      beta=pl.zeros(len(X_study)),
                      gamma=-10 * pl.ones(len(est_mesh)),
                      sigma_alpha=[1.],
                      sigma_beta=[1.],
                      sigma_gamma=[1.],
                      delta=100.,
                      sigma_delta=1.)

    vars = model.setup(dm, key=type, data_list=data, emp_prior=prior_dict)

    mc.MAP(vars).fit(method='fmin_powell', tol=.1, iterlim=100)
    mc.MCMC(vars).sample(1)
    return vars
Esempio n. 3
0
def prior_vals(dm, type):
    """ Estimate the prior distribution on param_age_mesh for a particular type

    Parameters
    ----------
    dm : DiseaseJson
    type : str, one of 'prevalence', 'incidence', 'remission', 'excess-mortality'

    Results
    -------
    vars : dict of stochastics generated by logit_normal_model
    
    """
    import random
    import dismod3.neg_binom_model as model

    data = [d for d in dm.data if clean(d['data_type']).find(type) != -1 and not d.get('ignore') != 1]

    dm.clear_empirical_prior()
    dm.fit_initial_estimate(type, data)
    if len(data) >= 8:
        random.seed(12345)
        data = random.sample(data, 8)

    X_region, X_study = model.regional_covariates('none', dm.get_covariates())
    est_mesh = dm.get_estimate_age_mesh()
    prior_dict = dict(alpha=pl.zeros(len(X_region)),
                      beta=pl.zeros(len(X_study)),
                      gamma=-10*pl.ones(len(est_mesh)),
                      sigma_alpha=[1.],
                      sigma_beta=[1.],
                      sigma_gamma=[1.],
                      delta=100.,
                      sigma_delta=1.)

    vars = model.setup(dm, key=type, data_list=data, emp_prior=prior_dict)

    mc.MAP(vars).fit(method='fmin_powell', tol=.1, iterlim=100)
    mc.MCMC(vars).sample(1)
    return vars
Esempio n. 4
0
File: hep_c.py Progetto: flaxter/gbd
def hep_c_fit(regions, prediction_years, data_year_start=-inf, data_year_end=inf, egypt_flag=False):
    """ Fit prevalence for regions and years specified """
    print "\n***************************\nfitting %s for %s (using data from years %f to %f)" % (
        regions,
        prediction_years,
        data_year_start,
        data_year_end,
    )

    ## load model to fit
    # dm = DiseaseJson(file('tests/hep_c.json').read())
    id = 8788
    dismod3.disease_json.create_disease_model_dir(id)
    dm = dismod3.fetch_disease_model(id)

    ## adjust the expert priors
    dm.params["global_priors"]["heterogeneity"]["prevalence"] = "Very"
    dm.params["global_priors"]["smoothness"]["prevalence"]["amount"] = "Slightly"
    # TODO: construct examples of adjusting other covariates
    # ipdb> dm.params['global_priors'].keys()
    # [u'increasing', u'unimodal', u'level_bounds', u'y_maximum', u'note', u'level_value', u'decreasing', u'parameter_age_mesh', u'heterogeneity', u'smoothness']
    # ipdb> dm.params['global_priors']['smoothness']['prevalence']
    # {u'age_start': 0, u'amount': u'Moderately', u'age_end': 100}

    # include a study-level covariate for 'bias'
    covariates_dict = dm.get_covariates()
    covariates_dict["Study_level"]["bias"]["rate"]["value"] = 1
    # TODO: construct additional examples of adjusting covariates

    ## select relevant prevalence data
    # TODO: streamline data selection functions
    if egypt_flag:
        dm.data = [d for d in dm.data if d["country_iso3_code"] == "EGY"]
    else:
        dm.data = [
            d
            for d in dm.data
            if dismod3.utils.clean(d["gbd_region"]) in regions
            and float(d["year_end"]) >= data_year_start
            and float(d["year_start"]) <= data_year_end
            and d["country_iso3_code"] != "EGY"
        ]

    ## create, fit, and save rate model
    dm.vars = {}

    keys = dismod3.utils.gbd_keys(type_list=["prevalence"], region_list=regions, year_list=prediction_years)
    # TODO: consider how to do this for models that use the complete disease model
    # TODO: consider adding hierarchical similarity priors for the male and female models
    k0 = keys[0]  # looks like k0='prevalence+asia_south+1990+male'
    dm.vars[k0] = neg_binom_model.setup(dm, k0, dm.data)

    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.sample(iter=50000, burn=25000, thin=50, verbose=1)

    # make map object so we can compute AIC and BIC
    dm.map = mc.MAP(dm.vars)
    dm.map.fit()

    for k in keys:
        # save the results in the disease model
        dm.vars[k] = dm.vars[k0]

        neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])

        # check autocorrelation to confirm chain has mixed
        test_model.summarize_acorr(dm.vars[k]["rate_stoch"].trace())

        # generate plots of results
        dismod3.tile_plot_disease_model(dm, [k], defaults={"ymax": 0.15, "alpha": 0.5})
        dm.savefig("dm-%d-posterior-%s.%f.png" % (dm.id, k, random()))

    # summarize fit quality graphically, as well as parameter posteriors
    dismod3.plotting.plot_posterior_predicted_checks(dm, k0)
    dm.savefig("dm-%d-check-%s.%f.png" % (dm.id, k0, random()))
    dismod3.post_disease_model(dm)
    return dm