def fit_posterior(id, region, sex, year): """ Fit posterior of specified region/sex/year for specified model Parameters ---------- id : int The model id number for the job to fit region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years Example ------- >>> import fit_posterior >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005') """ #print 'updating job status on server' #dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (region, sex, year), 'Running') dm = dismod3.load_disease_model(id) #dm.data = [] # for testing, remove all data keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=['prevalence']) # fit the model dir = dismod3.settings.JOB_WORKING_DIR % id import dismod3.neg_binom_model as model k0 = keys[0] dm.vars = {} dm.vars[k0] = model.setup(dm, k0, dm.data) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.sample(iter=50000,burn=25000,thin=50,verbose=1) dm.map = mc.MAP(dm.vars) dm.map.fit() model.store_mcmc_fit(dm, k0, dm.vars[k0]) # update job status file #print 'updating job status on server' #dismod3.log_job_status(id, 'posterior', # '%s--%s--%s' % (region, sex, year), 'Completed') # save results (do this last, because it removes things from the disease model that plotting function, etc, might need keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=['prevalence']) dm.save('dm-%d-posterior-%s-%s-%s.json' % (id, region, sex, year), keys_to_save=keys) return dm
def prior_vals(dm, type): """ Estimate the prior distribution on param_age_mesh for a particular type Parameters ---------- dm : DiseaseJson type : str, one of 'prevalence', 'incidence', 'remission', 'excess-mortality' Results ------- vars : dict of stochastics generated by logit_normal_model """ import random import dismod3.neg_binom_model as model data = [ d for d in dm.data if clean(d['data_type']).find(type) != -1 and not d.get('ignore') != 1 ] dm.clear_empirical_prior() dm.fit_initial_estimate(type, data) if len(data) >= 8: random.seed(12345) data = random.sample(data, 8) X_region, X_study = model.regional_covariates('none', dm.get_covariates()) est_mesh = dm.get_estimate_age_mesh() prior_dict = dict(alpha=pl.zeros(len(X_region)), beta=pl.zeros(len(X_study)), gamma=-10 * pl.ones(len(est_mesh)), sigma_alpha=[1.], sigma_beta=[1.], sigma_gamma=[1.], delta=100., sigma_delta=1.) vars = model.setup(dm, key=type, data_list=data, emp_prior=prior_dict) mc.MAP(vars).fit(method='fmin_powell', tol=.1, iterlim=100) mc.MCMC(vars).sample(1) return vars
def prior_vals(dm, type): """ Estimate the prior distribution on param_age_mesh for a particular type Parameters ---------- dm : DiseaseJson type : str, one of 'prevalence', 'incidence', 'remission', 'excess-mortality' Results ------- vars : dict of stochastics generated by logit_normal_model """ import random import dismod3.neg_binom_model as model data = [d for d in dm.data if clean(d['data_type']).find(type) != -1 and not d.get('ignore') != 1] dm.clear_empirical_prior() dm.fit_initial_estimate(type, data) if len(data) >= 8: random.seed(12345) data = random.sample(data, 8) X_region, X_study = model.regional_covariates('none', dm.get_covariates()) est_mesh = dm.get_estimate_age_mesh() prior_dict = dict(alpha=pl.zeros(len(X_region)), beta=pl.zeros(len(X_study)), gamma=-10*pl.ones(len(est_mesh)), sigma_alpha=[1.], sigma_beta=[1.], sigma_gamma=[1.], delta=100., sigma_delta=1.) vars = model.setup(dm, key=type, data_list=data, emp_prior=prior_dict) mc.MAP(vars).fit(method='fmin_powell', tol=.1, iterlim=100) mc.MCMC(vars).sample(1) return vars
def hep_c_fit(regions, prediction_years, data_year_start=-inf, data_year_end=inf, egypt_flag=False): """ Fit prevalence for regions and years specified """ print "\n***************************\nfitting %s for %s (using data from years %f to %f)" % ( regions, prediction_years, data_year_start, data_year_end, ) ## load model to fit # dm = DiseaseJson(file('tests/hep_c.json').read()) id = 8788 dismod3.disease_json.create_disease_model_dir(id) dm = dismod3.fetch_disease_model(id) ## adjust the expert priors dm.params["global_priors"]["heterogeneity"]["prevalence"] = "Very" dm.params["global_priors"]["smoothness"]["prevalence"]["amount"] = "Slightly" # TODO: construct examples of adjusting other covariates # ipdb> dm.params['global_priors'].keys() # [u'increasing', u'unimodal', u'level_bounds', u'y_maximum', u'note', u'level_value', u'decreasing', u'parameter_age_mesh', u'heterogeneity', u'smoothness'] # ipdb> dm.params['global_priors']['smoothness']['prevalence'] # {u'age_start': 0, u'amount': u'Moderately', u'age_end': 100} # include a study-level covariate for 'bias' covariates_dict = dm.get_covariates() covariates_dict["Study_level"]["bias"]["rate"]["value"] = 1 # TODO: construct additional examples of adjusting covariates ## select relevant prevalence data # TODO: streamline data selection functions if egypt_flag: dm.data = [d for d in dm.data if d["country_iso3_code"] == "EGY"] else: dm.data = [ d for d in dm.data if dismod3.utils.clean(d["gbd_region"]) in regions and float(d["year_end"]) >= data_year_start and float(d["year_start"]) <= data_year_end and d["country_iso3_code"] != "EGY" ] ## create, fit, and save rate model dm.vars = {} keys = dismod3.utils.gbd_keys(type_list=["prevalence"], region_list=regions, year_list=prediction_years) # TODO: consider how to do this for models that use the complete disease model # TODO: consider adding hierarchical similarity priors for the male and female models k0 = keys[0] # looks like k0='prevalence+asia_south+1990+male' dm.vars[k0] = neg_binom_model.setup(dm, k0, dm.data) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.sample(iter=50000, burn=25000, thin=50, verbose=1) # make map object so we can compute AIC and BIC dm.map = mc.MAP(dm.vars) dm.map.fit() for k in keys: # save the results in the disease model dm.vars[k] = dm.vars[k0] neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) # check autocorrelation to confirm chain has mixed test_model.summarize_acorr(dm.vars[k]["rate_stoch"].trace()) # generate plots of results dismod3.tile_plot_disease_model(dm, [k], defaults={"ymax": 0.15, "alpha": 0.5}) dm.savefig("dm-%d-posterior-%s.%f.png" % (dm.id, k, random())) # summarize fit quality graphically, as well as parameter posteriors dismod3.plotting.plot_posterior_predicted_checks(dm, k0) dm.savefig("dm-%d-check-%s.%f.png" % (dm.id, k0, random())) dismod3.post_disease_model(dm) return dm