def test_save_country_level_posterior(): """ Test exporting country level posterior output """ # load model to test fitting dm = DiseaseJson(file('tests/dismoditis.json').read()) # fit posterior where there is data from dismod3 import gbd_disease_model keys = dismod3.utils.gbd_keys(region_list=['asia_southeast'], year_list=[1990], sex_list=['male']) gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1) ## first generate decent initial conditions gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=100, thin=1, burn=0, verbose=1, dbname='/dev/null') ## then sample the posterior via MCMC # make a rate_type_list rate_type_list = ['incidence', 'prevalence', 'remission', 'excess-mortality'] # job working directory job_wd = dismod3.settings.JOB_WORKING_DIR % dm.id # directory to save the file dir = job_wd + '/posterior/country_level_posterior_dm-' + str(dm.id) + '/' import os from shutil import rmtree if os.path.exists(dir): rmtree(dir) os.makedirs(dir) # save country level posterior in csv file from fit_posterior import save_country_level_posterior save_country_level_posterior(dm, 'asia_southeast', '1990', 'male', rate_type_list) # zip the csv file from upload_fits import zip_country_level_posterior_files zip_country_level_posterior_files(dm.id)
def fit_without_confrontation(id, region, sex, year): """ Fit posterior of specified region/sex/year for specified model without trying to integrate conflicting sources of data Parameters ---------- id : int The model id number for the job to fit region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years """ ## load model dm = dismod3.load_disease_model(id) ## separate out prevalence and relative-risk data prev_data = [ d for d in dm.data if dm.relevant_to(d, 'prevalence', region, year, sex) ] rr_data = [ d for d in dm.data if dm.relevant_to(d, 'relative-risk', region, year, sex) ] dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data] ### setup the generic disease model (without prevalence data) import dismod3.gbd_disease_model as model keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.calc_effective_sample_size(dm.data) dm.vars = model.setup(dm, keys) ## override the birth prevalence prior, based on the withheld prevalence data logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year, sex)]['initial']['logit_C_0'] assert len(prev_data) == 1, 'should be a single prevalance datum' d = prev_data[0] mu_logit_C_0 = mc.logit(dm.value_per_1(d) + dismod3.settings.NEARLY_ZERO) lb, ub = dm.bounds_per_1(d) sigma_logit_C_0 = (mc.logit(ub + dismod3.settings.NEARLY_ZERO) - mc.logit(lb + dismod3.settings.NEARLY_ZERO)) / (2 * 1.96) print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0) print 'ui_C_0_pri:', lb, ub # override the excess-mortality, based on the relative-risk data mu_rr = 1.01 * np.ones(dismod3.settings.MAX_AGE) sigma_rr = .01 * np.ones(dismod3.settings.MAX_AGE) for d in rr_data: mu_rr[d['age_start']:(d['age_end'] + 1)] = dm.value_per_1(d) sigma_rr[d['age_start']:(d['age_end'] + 1)] = dm.se_per_1(d) print 'mu_rr:', mu_rr.round(2) #print 'sigma_rr:', sigma_rr.round(2) log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['age_coeffs'] log_f_mesh = log_f.parents['gamma_mesh'] param_mesh = log_f.parents['param_mesh'] m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region, year, sex)] mu_log_f = np.log((mu_rr - 1) * m_all) sigma_log_f = 1 / ((mu_rr - 1) * m_all) * sigma_rr * m_all print 'mu_log_f:', mu_log_f.round(2)[param_mesh] print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh] ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.use_step_method(SampleFromNormal, logit_C_0, mu=mu_logit_C_0, tau=sigma_logit_C_0**-2) dm.mcmc.use_step_method(SampleFromNormal, log_f_mesh, mu=mu_log_f[param_mesh], tau=sigma_log_f[param_mesh]**-2) for stoch in dm.mcmc.stochastics: dm.mcmc.use_step_method(mc.NoStepper, stoch) dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE) #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2) #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2) #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2) print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for( 'excess-mortality', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) for k in keys: t, r, y, s = dismod3.utils.type_region_year_sex_from_key(k) if t in [ 'incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality' ]: dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) elif t in ['relative-risk', 'duration', 'incidence_x_duration']: dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k]) from fit_posterior import save_country_level_posterior if str(year) == '2005': # also generate 2010 estimates save_country_level_posterior(dm, region, 2010, sex, ['prevalence', 'remission']) save_country_level_posterior( dm, region, year, sex, ['prevalence', 'remission'] ) #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split()) # save results (do this last, because it removes things from the disease model that plotting function, etc, might need keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year), keys_to_save=keys) return dm
def fit_without_confrontation(id, region, sex, year): """ Fit posterior of specified region/sex/year for specified model without trying to integrate conflicting sources of data Parameters ---------- id : int The model id number for the job to fit region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years """ ## load model dm = dismod3.load_disease_model(id) ## separate out prevalence and relative-risk data prev_data = [d for d in dm.data if dm.relevant_to(d, 'prevalence', region, year, sex)] rr_data = [d for d in dm.data if dm.relevant_to(d, 'relative-risk', region, year, sex)] dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data] ### setup the generic disease model (without prevalence data) import dismod3.gbd_disease_model as model keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.calc_effective_sample_size(dm.data) dm.vars = model.setup(dm, keys) ## override the birth prevalence prior, based on the withheld prevalence data logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year, sex)]['initial']['logit_C_0'] assert len(prev_data) == 1, 'should be a single prevalance datum' d = prev_data[0] mu_logit_C_0 = mc.logit(dm.value_per_1(d)+dismod3.settings.NEARLY_ZERO) lb, ub = dm.bounds_per_1(d) sigma_logit_C_0 = (mc.logit(ub+dismod3.settings.NEARLY_ZERO) - mc.logit(lb+dismod3.settings.NEARLY_ZERO)) / (2 * 1.96) print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0) print 'ui_C_0_pri:', lb, ub # override the excess-mortality, based on the relative-risk data mu_rr = 1.01*np.ones(dismod3.settings.MAX_AGE) sigma_rr = .01*np.ones(dismod3.settings.MAX_AGE) for d in rr_data: mu_rr[d['age_start']:(d['age_end']+1)] = dm.value_per_1(d) sigma_rr[d['age_start']:(d['age_end']+1)] = dm.se_per_1(d) print 'mu_rr:', mu_rr.round(2) #print 'sigma_rr:', sigma_rr.round(2) log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['age_coeffs'] log_f_mesh = log_f.parents['gamma_mesh'] param_mesh = log_f.parents['param_mesh'] m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region, year, sex)] mu_log_f = np.log((mu_rr-1) * m_all) sigma_log_f = 1 / ((mu_rr-1) * m_all) * sigma_rr * m_all print 'mu_log_f:', mu_log_f.round(2)[param_mesh] print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh] ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.use_step_method(SampleFromNormal, logit_C_0, mu=mu_logit_C_0, tau=sigma_logit_C_0**-2) dm.mcmc.use_step_method(SampleFromNormal, log_f_mesh, mu=mu_log_f[param_mesh], tau=sigma_log_f[param_mesh]**-2) for stoch in dm.mcmc.stochastics: dm.mcmc.use_step_method(mc.NoStepper, stoch) dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE) #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2) #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2) #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2) print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) for k in keys: t,r,y,s = dismod3.utils.type_region_year_sex_from_key(k) if t in ['incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality']: dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) elif t in ['relative-risk', 'duration', 'incidence_x_duration']: dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k]) from fit_posterior import save_country_level_posterior if str(year) == '2005': # also generate 2010 estimates save_country_level_posterior(dm, region, 2010, sex, ['prevalence', 'remission']) save_country_level_posterior(dm, region, year, sex, ['prevalence', 'remission']) #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split()) # save results (do this last, because it removes things from the disease model that plotting function, etc, might need keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year), keys_to_save=keys) return dm