def test_mesh_refinement(): """ Compare fit for coarse and fine age mesh""" # load model and fit it dm1 = DiseaseJson(file('tests/single_low_noise.json').read()) dm1.set_param_age_mesh(arange(0,101,20)) from dismod3 import neg_binom_model neg_binom_model.fit_emp_prior(dm1, 'prevalence', '/dev/null') # load another copy and fit it with a finer age mesh dm2 = DiseaseJson(file('tests/single_low_noise.json').read()) dm2.set_param_age_mesh(arange(0,101,5)) from dismod3 import neg_binom_model neg_binom_model.fit_emp_prior(dm2, 'prevalence', '/dev/null') # compare fits p1 = dm1.get_mcmc('emp_prior_mean', dismod3.utils.gbd_key_for('prevalence', 'asia_southeast', 1990, 'male')) p2 = dm2.get_mcmc('emp_prior_mean', dismod3.utils.gbd_key_for('prevalence', 'asia_southeast', 1990, 'male')) print p1[::20] print p2[::20] assert np.all(abs(p1[::20] / p2[::20] - 1.) < .05), 'Prediction should be closer to data'
def fit_simulated_disease(n=300, cv=2.): """ Test fit for simulated disease data with noise and missingness""" # load model to test fitting dm = DiseaseJson(file('tests/simulation_gold_standard.json').read()) # adjust any priors and covariates as desired dm.set_param_age_mesh(arange(0,101,2)) for type in 'incidence prevalence remission excess_mortality'.split(): dm.params['global_priors']['heterogeneity'][type] = 'Very' dm.params['covariates']['Country_level']['LDI_id']['rate']['value'] = 0 # filter and noise up data mort_data = [] all_data = [] for d in dm.data: d['truth'] = d['value'] d['age_weights'] = array([1.]) if d['data_type'] == 'all-cause mortality data': mort_data.append(d) else: if d['value'] > 0: se = (cv / 100.) * d['value'] Y_i = mc.rtruncnorm(d['truth'], se**-2, 0, np.inf) d['value'] = Y_i d['standard_error'] = se d['effective_sample_size'] = Y_i * (1-Y_i) / se**2 all_data.append(d) sampled_data = random.sample(all_data, n) + mort_data dm.data = sampled_data # fit empirical priors and compare fit to data from dismod3 import neg_binom_model for rate_type in 'prevalence incidence remission excess-mortality'.split(): #neg_binom_model.fit_emp_prior(dm, rate_type, iter=1000, thin=1, burn=0, dbname='/dev/null') neg_binom_model.fit_emp_prior(dm, rate_type, iter=30000, thin=15, burn=15000, dbname='/dev/null') check_emp_prior_fits(dm) # fit posterior delattr(dm, 'vars') # remove vars so that gbd_disease_model creates its own version from dismod3 import gbd_disease_model keys = dismod3.utils.gbd_keys(region_list=['north_america_high_income'], year_list=[1990], sex_list=['male']) gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1) ## first generate decent initial conditions gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=30000, thin=15, burn=15000, verbose=1, dbname='/dev/null') ## then sample the posterior via MCMC #gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=1, burn=0, verbose=1, dbname='/dev/null') ## fast for dev print 'error compared to the noisy data (coefficient of variation = %.2f)' % cv check_posterior_fits(dm) dm.data = all_data for d in dm.data: if d['data_type'] != 'all-cause mortality data': d['noisy_data'] = d['value'] d['value'] = d['truth'] print 'error compared to the truth' are, coverage = check_posterior_fits(dm) print print 'Median Absolute Relative Error of Posterior Predictions:', median(are) print 'Pct coverage:', 100*mean(coverage) f = open('score_%d_%f.txt' % (n, cv), 'a') f.write('%10.10f,%10.10f\n' % (median(are), mean(coverage))) f.close() dm.all_data = all_data dm.data = sampled_data for d in dm.data: if d['data_type'] != 'all-cause mortality data': d['value'] = d['noisy_data'] generate_figure(dm, n, cv) return dm