forked from aflaxman/gbd
/
hep_c.py
127 lines (98 loc) · 5.44 KB
/
hep_c.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
""" Script to fit the North America High Income Hep C data for 1990
"""
# matplotlib backend setup
import matplotlib
matplotlib.use("AGG")
from pylab import *
import pymc as mc
from dismod3.disease_json import DiseaseJson
from dismod3 import neg_binom_model
neg_binom_model.countries_for['egypt'] = ['EGY'] # HACK: to treat egypt as its own region
import dismod3.utils
import test_model
import dismod3
def hep_c_fit(regions, prediction_years, data_year_start=-inf, data_year_end=inf, egypt_flag=False):
""" Fit prevalence for regions and years specified """
print '\n***************************\nfitting %s for %s (using data from years %f to %f)' % (regions, prediction_years, data_year_start, data_year_end)
## load model to fit
#dm = DiseaseJson(file('tests/hep_c.json').read())
id = 8788
dismod3.disease_json.create_disease_model_dir(id)
dm = dismod3.fetch_disease_model(id)
## adjust the expert priors
dm.params['global_priors']['heterogeneity']['prevalence'] = 'Very'
dm.params['global_priors']['smoothness']['prevalence']['amount'] = 'Slightly'
# TODO: construct examples of adjusting other covariates
# ipdb> dm.params['global_priors'].keys()
# [u'increasing', u'unimodal', u'level_bounds', u'y_maximum', u'note', u'level_value', u'decreasing', u'parameter_age_mesh', u'heterogeneity', u'smoothness']
#ipdb> dm.params['global_priors']['smoothness']['prevalence']
#{u'age_start': 0, u'amount': u'Moderately', u'age_end': 100}
# include a study-level covariate for 'bias'
covariates_dict = dm.get_covariates()
covariates_dict['Study_level']['bias']['rate']['value'] = 1
# TODO: construct additional examples of adjusting covariates
## select relevant prevalence data
# TODO: streamline data selection functions
if egypt_flag:
dm.data = [d for d in dm.data if d['country_iso3_code'] == 'EGY']
else:
dm.data = [d for d in dm.data if
dismod3.utils.clean(d['gbd_region']) in regions
and float(d['year_end']) >= data_year_start
and float(d['year_start']) <= data_year_end
and d['country_iso3_code'] != 'EGY']
## create, fit, and save rate model
dm.vars = {}
keys = dismod3.utils.gbd_keys(type_list=['prevalence'],
region_list=regions,
year_list=prediction_years)
# TODO: consider how to do this for models that use the complete disease model
# TODO: consider adding hierarchical similarity priors for the male and female models
k0 = keys[0] # looks like k0='prevalence+asia_south+1990+male'
dm.vars[k0] = neg_binom_model.setup(dm, k0, dm.data)
dm.mcmc = mc.MCMC(dm.vars)
dm.mcmc.sample(iter=50000, burn=25000, thin=50, verbose=1)
# make map object so we can compute AIC and BIC
dm.map = mc.MAP(dm.vars)
dm.map.fit()
for k in keys:
# save the results in the disease model
dm.vars[k] = dm.vars[k0]
neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])
# check autocorrelation to confirm chain has mixed
test_model.summarize_acorr(dm.vars[k]['rate_stoch'].trace())
# generate plots of results
dismod3.tile_plot_disease_model(dm, [k], defaults={'ymax':.15, 'alpha': .5})
dm.savefig('dm-%d-posterior-%s.%f.png' % (dm.id, k, random()))
# summarize fit quality graphically, as well as parameter posteriors
dismod3.plotting.plot_posterior_predicted_checks(dm, k0)
dm.savefig('dm-%d-check-%s.%f.png' % (dm.id, k0, random()))
dismod3.post_disease_model(dm)
return dm
if __name__ == '__main__':
dm_egypt = hep_c_fit(['egypt'], [1990, 2005], egypt_flag=True)
dm_na_me = hep_c_fit(['north_africa_middle_east'], [1990, 2005])
# combine prevalence curves for egypt and rest of north africa
for y in [1990, 2005]:
for s in ['male', 'female']:
key = 'prevalence+egypt+%d+%s' % (y, s)
prev_1 = neg_binom_model.calc_rate_trace(dm_egypt, key, dm_egypt.vars[key])
pop_1 = neg_binom_model.population_by_age[('EGY', str(y), s)]
key = 'prevalence+north_africa_middle_east+%d+%s' % (y, s)
prev_0 = neg_binom_model.calc_rate_trace(dm_na_me, key, dm_na_me.vars[key])
pop_0 = neg_binom_model.regional_population(key)
# generate population weighted average
prev = (prev_0 * (pop_0 - pop_1) + prev_1 * pop_1) / pop_0
neg_binom_model.store_mcmc_fit(dm_na_me, key, None, prev)
# generate plots of results
dismod3.tile_plot_disease_model(dm_na_me, [key], defaults={'ymax':.15, 'alpha': .5})
dm_na_me.savefig('dm-%d-posterior-na_me_w_egypt.%f.png' % (dm_na_me.id, random()))
# save results
dismod3.post_disease_model(dm_na_me)
dm = hep_c_fit('caribbean latin_america_tropical latin_america_andean latin_america_central latin_america_southern'.split(), [1990, 2005])
dm = hep_c_fit('sub-saharan_africa_central sub-saharan_africa_southern sub-saharan_africa_west'.split(), [1990, 2005])
for r in 'europe_eastern europe_central asia_central asia_east asia_south asia_southeast australasia oceania sub-saharan_africa_east asia_pacific_high_income'.split():
dm = hep_c_fit([r], [1990, 2005])
for r in 'north_america_high_income europe_western '.split():
dm = hep_c_fit([r], [1990], data_year_end=1997)
dm = hep_c_fit([r], [2005], data_year_start=1997)