forked from aflaxman/gbd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fit_all_prevonly.py
112 lines (90 loc) · 3.69 KB
/
fit_all_prevonly.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/python2.5
""" Fit all model parameters on cluster using empirical bayes.
Example
-------
$ python fit_all.py 4222 # submit jobs to cluster to estimate empirical priors followed by posteriors for model #4222
"""
import optparse
import os
import subprocess
from shutil import rmtree
import dismod3
from dismod3.utils import clean, gbd_keys, type_region_year_sex_from_key
def fit_all(id):
""" Enqueues all jobs necessary to fit specified model
to the cluster
Parameters
----------
id : int
The model id number for the job to fit
Example
-------
>>> import fit_all
>>> fit_all.fit_all(2552)
"""
# TODO: store all disease information in this dir already, so fetching is not necessary
# download the disease model json and store it in the working dir
print 'downloading disease model'
dismod3.disease_json.create_disease_model_dir(id)
dm = dismod3.fetch_disease_model(id)
# get the all-cause mortality data, and merge it into the model
mort = dismod3.fetch_disease_model('all-cause_mortality')
dm.data += mort.data
dm.save()
# fit empirical priors (by pooling data from all regions)
dir = dismod3.settings.JOB_WORKING_DIR % id # TODO: refactor into a function
emp_names = []
for t in ['prevalence']:
o = '%s/empirical_priors/stdout/%s' % (dir, t)
e = '%s/empirical_priors/stderr/%s' % (dir, t)
name_str = '%s-%d' %(t[0], id)
emp_names.append(name_str)
call_str = 'qsub -cwd -o %s -e %s ' % (o, e) \
+ '-N %s ' % name_str \
+ 'run_on_cluster.sh fit_emp_prior.py %d -t %s' % (id, t)
subprocess.call(call_str, shell=True)
# directory to save the country level posterior csv files
temp_dir = dir + '/posterior/country_level_posterior_dm-' + str(id) + '/'
if os.path.exists(temp_dir):
rmtree(temp_dir)
os.makedirs(temp_dir)
#fit each region/year/sex individually for this model
hold_str = '-hold_jid %s ' % ','.join(emp_names)
post_names = []
for ii, r in enumerate(dismod3.gbd_regions):
for s in dismod3.gbd_sexes:
for y in dismod3.gbd_years:
k = '%s+%s+%s' % (clean(r), s, y)
o = '%s/posterior/stdout/%s' % (dir, k)
e = '%s/posterior/stderr/%s' % (dir, k)
name_str = '%s%d%s%s%d' % (r[0], ii+1, s[0], str(y)[-1], id)
post_names.append(name_str)
call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \
+ hold_str \
+ '-N %s ' % name_str \
+ 'run_on_cluster.sh fit_posterior_prevonly.py %d -r %s -s %s -y %s' % (id, clean(r), s, y)
subprocess.call(call_str, shell=True)
# after all posteriors have finished running, upload disease model json
hold_str = '-hold_jid %s ' % ','.join(post_names)
o = '%s/upload.stdout' % dir
e = '%s/upload.stderr' % dir
call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \
+ hold_str \
+ '-N upld-%s ' % id \
+ 'run_on_cluster.sh upload_fits.py %d' % id
subprocess.call(call_str, shell=True)
def main():
usage = 'usage: %prog [options] disease_model_id'
parser = optparse.OptionParser(usage)
(options, args) = parser.parse_args()
if len(args) != 1:
parser.error('incorrect number of arguments')
try:
id = int(args[0])
except ValueError:
parser.error('disease_model_id must be an integer')
# if not dismod3.settings.ON_SGE:
# parser.error('dismod3.settings.ON_SGE must be true to fit_all automatically')
fit_all(id)
if __name__ == '__main__':
main()