def main(): usage = 'usage: %prog [options] disease_model_id' parser = optparse.OptionParser(usage) # flags for parameters to modify parser.add_option('-y', '--ymax', help='set the maximum y value for summary plots') parser.add_option('-t', '--condition', help='set the condition of the model') parser.add_option('-n', '--notes', help='set the notes of the model') # boolean flags parser.add_option('-c', '--clone', action='store_true', dest='clone', help='create a clone of the model (leave specified model unchanged)') (opts, args) = parser.parse_args() # check that args are correct if len(args) == 1: try: id = int(args[0]) except ValueError: parser.error('disease_model_id must be an integer') return else: parser.error('incorrect number of arguments') return # fetch requested model dm = dismod3.get_disease_model(id) # change values specified if opts.ymax: dm.set_ymax(float(opts.ymax)) # TODO: get condition to actually change if opts.condition: dm.set_condition(opts.condition) if opts.notes: dm.set_notes(opts.notes) # clone if requested if opts.clone: dm.params.pop('id') # dismod_data_server creates new model if it doesn't find an id # post results to dismod_data_server url = dismod3.post_disease_model(dm) # announce url to view results print url
def generate_disease_data(condition, cov): """ Generate csv files with gold-standard disease data, and somewhat good, somewhat dense disease data, as might be expected from a condition that is carefully studied in the literature """ age_len = dismod3.MAX_AGE ages = np.arange(age_len, dtype='float') # incidence rate i0 = .005 + .02 * mc.invlogit((ages - 44) / 3) #i0 = np.maximum(0., .001 * (-.125 + np.ones_like(ages) + (ages / age_len)**2.)) # remission rate #r = 0. * ages r = .1 * np.ones_like(ages) # excess-mortality rate #f_init = .085 * (ages / 100) ** 2.5 SMR = 3. * np.ones_like(ages) - ages / age_len # all-cause mortality-rate mort = dismod3.get_disease_model('all-cause_mortality') #age_intervals = [[a, a+9] for a in range(0, dismod3.MAX_AGE-4, 10)] + [[0, 100] for ii in range(1)] age_intervals = [[a, a] for a in range(0, dismod3.MAX_AGE, 1)] # TODO: take age structure from real data sparse_intervals = dict([[ region, random.sample(age_intervals, (ii**3 * len(age_intervals)) / len(countries_for)**3 / 1) ] for ii, region in enumerate(countries_for)]) dense_intervals = dict( [[region, random.sample(age_intervals, len(age_intervals) / 2)] for ii, region in enumerate(countries_for)]) gold_data = [] noisy_data = [] for ii, region in enumerate(sorted(countries_for)): if region == 'world': continue print region sys.stdout.flush() # introduce unexplained regional variation #i = i0 * (1 + float(ii) / 21) # or not i = i0 for year in [1990, 2005]: for sex in ['male', 'female']: param_type = 'all-cause_mortality' key = dismod3.gbd_key_for(param_type, region, year, sex) m_all_cause = mort.mortality(key, mort.data) # calculate excess-mortality rate from smr f = (SMR - 1.) * m_all_cause ## compartmental model (bins S, C, D, M) import scipy.linalg from dismod3 import NEARLY_ZERO from dismod3.utils import trim SCDM = np.zeros([4, age_len]) p = np.zeros(age_len) m = np.zeros(age_len) SCDM[0, 0] = 1. SCDM[1, 0] = 0. SCDM[2, 0] = 0. SCDM[3, 0] = 0. p[0] = SCDM[1, 0] / (SCDM[0, 0] + SCDM[1, 0] + NEARLY_ZERO) m[0] = trim(m_all_cause[0] - f[0] * p[0], NEARLY_ZERO, 1 - NEARLY_ZERO) for a in range(age_len - 1): A = [[-i[a] - m[a], r[a], 0., 0.], [i[a], -r[a] - m[a] - f[a], 0., 0.], [m[a], m[a], 0., 0.], [0., f[a], 0., 0.]] SCDM[:, a + 1] = np.dot(scipy.linalg.expm(A), SCDM[:, a]) p[a + 1] = SCDM[1, a + 1] / (SCDM[0, a + 1] + SCDM[1, a + 1] + NEARLY_ZERO) m[a + 1] = m_all_cause[a + 1] - f[a + 1] * p[a + 1] # duration = E[time in bin C] hazard = r + m + f pr_not_exit = np.exp(-hazard) X = np.empty(len(hazard)) X[-1] = 1 / hazard[-1] for ii in reversed(range(len(X) - 1)): X[ii] = (pr_not_exit[ii] * (X[ii + 1] + 1)) + (1 / hazard[ii] * (1 - pr_not_exit[ii]) - pr_not_exit[ii]) country = countries_for[region][0] params = dict(age_intervals=age_intervals, condition=condition, gbd_region=region, country=country, year=year, sex=sex, effective_sample_size=1000) params['age_intervals'] = [[0, 99]] generate_and_append_data(gold_data, 'prevalence data', p, **params) generate_and_append_data(gold_data, 'incidence data', i, **params) generate_and_append_data(gold_data, 'excess-mortality data', f, **params) generate_and_append_data(gold_data, 'remission data', r, **params) generate_and_append_data(gold_data, 'duration data', X, **params) # TODO: use this approach to age standardize all gold data, and then change it to get iX as a direct sum params['age_intervals'] = [[0, 99]] iX = i * X * (1 - p) * regional_population(key) generate_and_append_data(gold_data, 'incidence_x_duration', iX, **params) params['effective_sample_size'] = 1000 params['cov'] = 0. params['age_intervals'] = age_intervals generate_and_append_data(noisy_data, 'prevalence data', p, **params) generate_and_append_data(noisy_data, 'excess-mortality data', f, **params) generate_and_append_data(noisy_data, 'remission data', r, **params) generate_and_append_data(noisy_data, 'incidence data', i, **params) col_names = sorted(data_dict_for_csv(gold_data[0]).keys()) f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in gold_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() f_name = OUTPUT_PATH + '%s_data.tsv' % condition f_file = open(f_name, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in noisy_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() # upload data file from dismod3.disease_json import dismod_server_login, twc, DISMOD_BASE_URL dismod_server_login() twc.go(DISMOD_BASE_URL + 'dismod/data/upload/') twc.formvalue(1, 'tab_separated_values', open(f_name).read()) # TODO: find or set the model number for this model, set the # expert priors and covariates, merge the covariate data into the # model, and add the "ground truth" to the disease json try: url = twc.submit() except Exception, e: print e
def fit(id): """ Download model, conduct fit, and upload results Parameters ---------- id : int The model id number for the job to fit Commandline Version: [omak] dismod4.abie ] test/parameter.sh [omak] dismod4.abie ] example/simulate.py 5 1 100 [omak] dismod4.abie ] /tmp/dismod4_csv test/parameter.csv measure_in.csv sfun_in.csv dismod4_csv: Attempt to overwrite the existing file sfun_in.csv [omak] dismod4.abie ] /tmp/dismod4_csv test/parameter.csv measure_in.csv sfun_in.csv sfun_out.csv measure_out.csv """ dm = dismod3.get_disease_model(id) mort = dismod3.fetch_disease_model('all-cause_mortality') dm.data += mort.data ## convert model to csv file column_names = 'time_lower,time_upper,age_lower,age_upper,likelihood_name,likelihood_sigma,likelihood_beta,value,integrand'.split(',') data_list = [] # add all the model data to the data list for d in dm.data: row = {} row['time_lower'] = d['year_start'] row['time_upper'] = d['year_end'] # TODO: determine if this should be +1 row['age_lower'] = d['age_start']+1. row['age_upper'] = d['age_end']+1. # TODO: determine if this should be +1 row['likelihood_name'] = 'gaussian' row['likelihood_sigma'] = .0001 # TODO: use more accurate sigma row['likelihood_beta'] = 1. row['value'] = d['value'] / float(d.get('units', 1.)) for dm3_type, dm4_type in [['remission data', 'remission'], ['excess-mortality data', 'excess'], ['incidence data', 'incidence'], ['mrr data', 'risk'], ['prevalence data', 'prevalence'], ['all-cause mortality data', 'all_cause'], ]: if d['data_type'] == dm3_type: row['integrand'] = dm4_type data_list.append(row) break # add the time/age/regions that we want to predict to the data list as well age_mesh = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] index_dict = {} for year in [1990, 2005]: for age in age_mesh: for type in ['remission', 'excess', 'incidence', 'risk', 'prevalence']: row = {} row['time_lower'] = year row['time_upper'] = year row['age_lower'] = age row['age_upper'] = age row['likelihood_name'] = 'gaussian' row['likelihood_sigma'] = inf row['likelihood_beta'] = 1. row['value'] = 0. row['integrand'] = type index_dict[(type, year, age)] = len(data_list) data_list.append(row) # save the csv file import csv fname = dismod3.settings.JOB_WORKING_DIR % id + '/measure_in.csv' try: f = open(fname, 'w') csv.writer(f).writerow(column_names) csv.DictWriter(f, column_names).writerows(data_list) f.close() except IOError, e: print 'Warning: could not create data csv. Maybe it exists already?\n%s' % e
def hep_c_fit(regions, prediction_years, data_year_start=-inf, data_year_end=inf, egypt_flag=False): """ Fit prevalence for regions and years specified """ print '\n***************************\nfitting %s for %s (using data from years %f to %f)' % (regions, prediction_years, data_year_start, data_year_end) ## load model to fit #dm = DiseaseJson(file('tests/hep_c.json').read()) dm = dismod3.get_disease_model(8021) ## adjust the expert priors dm.params['global_priors']['heterogeneity']['prevalence'] = 'Very' dm.params['global_priors']['smoothness']['prevalence']['amount'] = 'Slightly' # TODO: construct examples of adjusting other covariates # ipdb> dm.params['global_priors'].keys() # [u'increasing', u'unimodal', u'level_bounds', u'y_maximum', u'note', u'level_value', u'decreasing', u'parameter_age_mesh', u'heterogeneity', u'smoothness'] #ipdb> dm.params['global_priors']['smoothness']['prevalence'] #{u'age_start': 0, u'amount': u'Moderately', u'age_end': 100} # include a study-level covariate for 'bias' covariates_dict = dm.get_covariates() covariates_dict['Study_level']['bias']['rate']['value'] = 1 # TODO: construct additional examples of adjusting covariates ## select relevant prevalence data # TODO: streamline data selection functions if egypt_flag: dm.data = [d for d in dm.data if d['country_iso3_code'] == 'EGY'] else: dm.data = [d for d in dm.data if dismod3.utils.clean(d['gbd_region']) in regions and float(d['year_end']) >= data_year_start and float(d['year_start']) <= data_year_end and d['country_iso3_code'] != 'EGY'] ## create, fit, and save rate model dm.vars = {} keys = dismod3.utils.gbd_keys(type_list=['prevalence'], region_list=regions, year_list=prediction_years) # TODO: consider how to do this for models that use the complete disease model # TODO: consider adding hierarchical similarity priors for the male and female models k0 = keys[0] # looks like k0='prevalence+asia_south+1990+male' dm.vars[k0] = neg_binom_model.setup(dm, k0, dm.data) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.sample(iter=50000, burn=25000, thin=50, verbose=1) # make map object so we can compute AIC and BIC dm.map = mc.MAP(dm.vars) dm.map.fit() for k in keys: # save the results in the disease model dm.vars[k] = dm.vars[k0] if egypt_flag: neg_binom_model.countries_for['egypt'] = ['EGY'] # HACK: to treat egypt as its own region neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) # check autocorrelation to confirm chain has mixed test_model.summarize_acorr(dm.vars[k]['rate_stoch'].trace()) # generate plots of results dismod3.tile_plot_disease_model(dm, [k], defaults={'ymax':.15, 'alpha': .5}) dm.savefig('dm-%d-posterior-%s.%f.png' % (dm.id, k, random())) # summarize fit quality graphically, as well as parameter posteriors dismod3.plotting.plot_posterior_predicted_checks(dm, k0) dm.savefig('dm-%d-check-%s.%f.png' % (dm.id, k0, random())) dismod3.post_disease_model(dm) return dm
def generate_disease_data(condition, cov): """ Generate csv files with gold-standard disease data, and somewhat good, somewhat dense disease data, as might be expected from a condition that is carefully studied in the literature """ age_len = dismod3.MAX_AGE ages = np.arange(age_len, dtype='float') # incidence rate i0 = .005 + .02 * mc.invlogit((ages - 44) / 3) #i0 = np.maximum(0., .001 * (-.125 + np.ones_like(ages) + (ages / age_len)**2.)) # remission rate #r = 0. * ages r = .1 * np.ones_like(ages) # excess-mortality rate #f_init = .085 * (ages / 100) ** 2.5 SMR = 3. * np.ones_like(ages) - ages / age_len # all-cause mortality-rate mort = dismod3.get_disease_model('all-cause_mortality') #age_intervals = [[a, a+9] for a in range(0, dismod3.MAX_AGE-4, 10)] + [[0, 100] for ii in range(1)] age_intervals = [[a, a] for a in range(0, dismod3.MAX_AGE, 1)] # TODO: take age structure from real data sparse_intervals = dict([[region, random.sample(age_intervals, (ii**3 * len(age_intervals)) / len(countries_for)**3 / 1)] for ii, region in enumerate(countries_for)]) dense_intervals = dict([[region, random.sample(age_intervals, len(age_intervals)/2)] for ii, region in enumerate(countries_for)]) gold_data = [] noisy_data = [] for ii, region in enumerate(sorted(countries_for)): if region == 'world': continue print region sys.stdout.flush() # introduce unexplained regional variation #i = i0 * (1 + float(ii) / 21) # or not i = i0 for year in [1990, 2005]: for sex in ['male', 'female']: param_type = 'all-cause_mortality' key = dismod3.gbd_key_for(param_type, region, year, sex) m_all_cause = mort.mortality(key, mort.data) # calculate excess-mortality rate from smr f = (SMR - 1.) * m_all_cause ## compartmental model (bins S, C, D, M) import scipy.linalg from dismod3 import NEARLY_ZERO from dismod3.utils import trim SCDM = np.zeros([4, age_len]) p = np.zeros(age_len) m = np.zeros(age_len) SCDM[0,0] = 1. SCDM[1,0] = 0. SCDM[2,0] = 0. SCDM[3,0] = 0. p[0] = SCDM[1,0] / (SCDM[0,0] + SCDM[1,0] + NEARLY_ZERO) m[0] = trim(m_all_cause[0] - f[0] * p[0], NEARLY_ZERO, 1-NEARLY_ZERO) for a in range(age_len - 1): A = [[-i[a]-m[a], r[a] , 0., 0.], [ i[a] , -r[a]-m[a]-f[a], 0., 0.], [ m[a], m[a] , 0., 0.], [ 0., f[a], 0., 0.]] SCDM[:,a+1] = np.dot(scipy.linalg.expm(A), SCDM[:,a]) p[a+1] = SCDM[1,a+1] / (SCDM[0,a+1] + SCDM[1,a+1] + NEARLY_ZERO) m[a+1] = m_all_cause[a+1] - f[a+1] * p[a+1] # duration = E[time in bin C] hazard = r + m + f pr_not_exit = np.exp(-hazard) X = np.empty(len(hazard)) X[-1] = 1 / hazard[-1] for ii in reversed(range(len(X)-1)): X[ii] = (pr_not_exit[ii] * (X[ii+1] + 1)) + (1 / hazard[ii] * (1 - pr_not_exit[ii]) - pr_not_exit[ii]) country = countries_for[region][0] params = dict(age_intervals=age_intervals, condition=condition, gbd_region=region, country=country, year=year, sex=sex, effective_sample_size=1000) params['age_intervals'] = [[0,99]] generate_and_append_data(gold_data, 'prevalence data', p, **params) generate_and_append_data(gold_data, 'incidence data', i, **params) generate_and_append_data(gold_data, 'excess-mortality data', f, **params) generate_and_append_data(gold_data, 'remission data', r, **params) generate_and_append_data(gold_data, 'duration data', X, **params) # TODO: use this approach to age standardize all gold data, and then change it to get iX as a direct sum params['age_intervals'] = [[0,99]] iX = i * X * (1-p) * regional_population(key) generate_and_append_data(gold_data, 'incidence_x_duration', iX, **params) params['effective_sample_size'] = 1000 params['cov'] = 0. params['age_intervals'] = age_intervals generate_and_append_data(noisy_data, 'prevalence data', p, **params) generate_and_append_data(noisy_data, 'excess-mortality data', f, **params) generate_and_append_data(noisy_data, 'remission data', r, **params) generate_and_append_data(noisy_data, 'incidence data', i, **params) col_names = sorted(data_dict_for_csv(gold_data[0]).keys()) f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in gold_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() f_name = OUTPUT_PATH + '%s_data.tsv' % condition f_file = open(f_name, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in noisy_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() # upload data file from dismod3.disease_json import dismod_server_login, twc, DISMOD_BASE_URL dismod_server_login() twc.go(DISMOD_BASE_URL + 'dismod/data/upload/') twc.formvalue(1, 'tab_separated_values', open(f_name).read()) # TODO: find or set the model number for this model, set the # expert priors and covariates, merge the covariate data into the # model, and add the "ground truth" to the disease json try: url = twc.submit() except Exception, e: print e
#!/usr/local/bin/python2.5 import dismod3 import sys dm = dismod3.get_disease_model(894) if len(sys.argv) == 2: k = dm.params['priors'].keys()[int(sys.argv[1])] dm.data = [d for d in dm.data if \ d['gbd_region'] == k.replace('prevalence data+', '')] dm.set_priors('prevalence data', ' smooth 10\n zero 0 15\n zero 99 100\n confidence 1000 .0001\n') import dismod3.beta_binomial_model as model print 'Processing %s (%d data points)' % (k, len(dm.data)) model.fit(dm, 'map') model.fit(dm, 'mcmc') model.fit(dm, 'map') model.fit(dm, 'mcmc') model.fit(dm, 'map') else: keys = dm.params['priors'].keys() for k in keys: dm.set_priors(k, ' smooth 10\n zero 0 15\n zero 99 100\n confidence 1000 .0001\n') import dismod3.multiregion_model as model print 'Processing all regions (%d data points)' % len(dm.data)
def fit_continuous_spm(id): """ Fit continuous single parameter model Parameters ---------- id : int The model id number for the job to fit Example ------- >>> import fit_continuous_spm >>> fit_continuous_spm.fit_continuous_spm(4773) """ dm = dismod3.get_disease_model(id) ## convert model to csv file column_names = ['region', 'country', 'year', 'age', 'y', 'se', 'x0', 'x1', 'w0'] data_list = [] # add all the model data to the data list param_type = 'continuous single parameter' for d in dm.filter_data(data_type=param_type): row = {} row['region'] = dismod3.utils.clean(d['gbd_region']) row['country'] = d['country_iso3_code'] row['year'] = round(.5 * (d['year_start'] + d['year_end']), -1) row['age'] = round(.5 * (d['age_start'] + d['age_end']), -1) row['y'] = d['parameter_value'] * float(d['units']) row['se'] = d['standard_error'] * float(d['units']) row['x0'] = 1. row['x1'] = .1 * (row['year']-1997.) row['w0'] = .1 * (row['year']-1997.) data_list.append(row) # add the time/age/regions that we want to predict to the data list as well prediction_regions = dismod3.gbd_regions # FIXME: now i just take a few regions, for fast testing age_mesh = [0, 20, 40, 60, 80, 100] index_dict = {} for r in prediction_regions: for y in [1990, 2005]: for a in age_mesh: row = {} row['region'] = dismod3.utils.clean(r) row['country'] = row['region'] + '_all' row['year'] = y row['age'] = a row['y'] = pl.nan row['se'] = pl.inf row['x0'] = 1. row['x1'] = .1 * (row['year']-1997.) row['w0'] = .1 * (row['year']-1997.) index_dict[(dismod3.utils.clean(r),y,a)] = len(data_list) data_list.append(row) # save the csv file import csv fname = dismod3.settings.JOB_WORKING_DIR % id + '/data.csv' try: f = open(fname, 'w') csv.writer(f).writerow(column_names) csv.DictWriter(f, column_names).writerows(data_list) f.close() except IOError, e: print 'Warning: could not create data csv. Maybe it exists already?\n%s' % e
truth, range(a0, a1 + 1), np.ones(a1 + 1 - a0) / float(a1 + 1 - a0)) d['value'] = p0 if p0 == 0.: d['standard_error'] = .000001 elif p0 < 1.: d['standard_error'] = p0 * (1 - p0) / np.sqrt(1000) else: d['standard_error'] = p0 * .05 data.append(d) data = [] mort = dismod3.get_disease_model('all-cause_mortality') age_intervals = [[a, a + 4] for a in range(0, dismod3.MAX_AGE - 4, 5)] for sex in ['male', 'female']: for year in [1990, 2005]: for region, country in \ [['Asia, Southeast', 'THA'], ['Asia, East', 'CHN'], ['Asia, South', 'IND'], ['Europe, Central', 'ALB']]: offset = 0. if sex == 'male': offset += .5 if year == 2005: offset += .5
'id': len(data)} p0 = dismod3.utils.rate_for_range(truth, range(a0, a1 + 1), np.ones(a1 + 1 - a0) / float(a1 + 1 - a0)) d['value'] = p0 if p0 == 0.: d['standard_error'] = .000001 elif p0 < 1.: d['standard_error'] = p0 * (1-p0) / np.sqrt(1000) else: d['standard_error'] = p0 * .05 data.append(d) data = [] mort = dismod3.get_disease_model('all-cause_mortality') age_intervals = [[a, a+4] for a in range(0, dismod3.MAX_AGE-4, 5)] for sex in ['male', 'female']: for year in [1990, 2005]: for region, country in \ [['Asia, Southeast', 'THA'], ['Asia, East', 'CHN'], ['Asia, South', 'IND'], ['Europe, Central', 'ALB']]: offset = 0. if sex == 'male': offset += .5 if year == 2005: offset += .5
def daemon_loop(): on_sge = dismod3.settings.ON_SGE while True: try: job_queue = dismod3.get_job_queue() except: job_queue = [] for param_id in job_queue: #tweet('processing job %d' % id) log('processing job %d' % param_id) job_params = dismod3.remove_from_job_queue(param_id) id = int(job_params['dm_id']) dm = dismod3.get_disease_model(id) # make a working directory for the id dir = dismod3.settings.JOB_WORKING_DIR % id if not os.path.exists(dir): os.makedirs(dir) estimate_type = dm.params.get('run_status', {}).get('estimate_type', 'fit all individually') if estimate_type.find('posterior') != -1: #fit each region/year/sex individually for this model regions_to_fit = dm.params.get('run_status', {}).get('regions_to_fit', []) if regions_to_fit[0] == 'all_regions': regions_to_fit = dismod3.gbd_regions d = '%s/posterior' % dir if os.path.exists(d): rmtree(d) os.mkdir(d) os.mkdir('%s/stdout' % d) os.mkdir('%s/stderr' % d) dismod3.init_job_log(id, 'posterior', param_id) for r in regions_to_fit: for s in dismod3.gbd_sexes: for y in dismod3.gbd_years: # fit only one region, for the time being... # TODO: make region selection a user-settable option from the gui #if clean(r) != 'asia_southeast': # continue k = '%s+%s+%s' % (clean(r), s, y) o = '%s/stdout/%s' % (d, k) e = '%s/stderr/%s' % (d, k) if on_sge: call_str = dismod3.settings.GBD_FIT_STR % (o, e, '-l -r %s -s %s -y %s' % (clean(r), s, y), id) subprocess.call(call_str, shell=True) else: call_str = dismod3.settings.GBD_FIT_STR % ('-l -r %s -s %s -y %s' % (clean(r), s, y), id, o, e) subprocess.call(call_str, shell=True) time.sleep(1.) elif estimate_type.find('empirical priors') != -1: # fit empirical priors (by pooling data from all regions d = '%s/empirical_priors' % dir if os.path.exists(d): rmtree(d) os.mkdir(d) os.mkdir('%s/stdout' % d) os.mkdir('%s/stderr' % d) dismod3.init_job_log(id, 'empirical_priors', param_id) for t in ['excess-mortality', 'remission', 'incidence', 'prevalence']: o = '%s/stdout/%s' % (d, t) e = '%s/stderr/%s' % (d, t) if on_sge: subprocess.call(dismod3.settings.GBD_FIT_STR % (o, e, '-l -t %s' % t, id), shell=True) else: subprocess.call(dismod3.settings.GBD_FIT_STR % ('-l -t %s' % t, id, o, e), shell=True) else: #tweet('unrecognized estimate type: %s' % estimate_type) log('unrecognized estimate type: %s' % estimate_type) time.sleep(dismod3.settings.SLEEP_SECS)
# check that args are correct if len(args) == 1: try: id = int(args[0]) except ValueError: parser.error('disease_model_id must be an integer') exit() else: parser.error('incorrect number of arguments') exit() # fetch requested model dm = dismod3.get_disease_model(id) # define ground truth age_len = dismod3.MAX_AGE ages = np.arange(age_len, dtype='float') print 'defining model transition parameters' truth = {} # all-cause mortality-rate m = np.array( [ 0.03266595, 0.01114646, 0.00450302, 0.00226896, 0.00143311, 0.00109108, 0.00094584, 0.00087981, 0.00083913, 0.0008073 , 0.00078515, 0.00077967, 0.00079993, 0.00085375, 0.00094349,
def fit(id, opts): fit_str = '(%d) %s %s %s' % (id, opts.region or '', opts.sex or '', opts.year or '') #tweet('fitting disease model %s' % fit_str) sys.stdout.flush() # update job status file if opts.log: if opts.type and not (opts.region and opts.sex and opts.year): dismod3.log_job_status(id, 'empirical_priors', opts.type, 'Running') elif opts.region and opts.sex and opts.year and not opts.type: dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (opts.region, opts.sex, opts.year), 'Running') dm = dismod3.get_disease_model(id) fit_str = '%s %s' % (dm.params['condition'], fit_str) sex_list = opts.sex and [ opts.sex ] or dismod3.gbd_sexes year_list = opts.year and [ opts.year ] or dismod3.gbd_years region_list = opts.region and [ opts.region ] or dismod3.gbd_regions keys = gbd_keys(region_list=region_list, year_list=year_list, sex_list=sex_list) # fit empirical priors, if type is specified if opts.type: fit_str += ' emp prior for %s' % opts.type #print 'beginning ', fit_str import dismod3.neg_binom_model as model dir = dismod3.settings.JOB_WORKING_DIR % id model.fit_emp_prior(dm, opts.type, dbname='%s/empirical_priors/pickle/dm-%d-emp_prior-%s.pickle' % (dir, id, opts.type)) # if type is not specified, find consistient fit of all parameters else: import dismod3.gbd_disease_model as model # get the all-cause mortality data, and merge it into the model mort = dismod3.get_disease_model('all-cause_mortality') dm.data += mort.data # fit individually, if sex, year, and region are specified if opts.sex and opts.year and opts.region: dm.params['estimate_type'] = 'fit individually' # fit the model #print 'beginning ', fit_str dir = dismod3.settings.JOB_WORKING_DIR % id model.fit(dm, method='map', keys=keys, verbose=1) model.fit(dm, method='mcmc', keys=keys, iter=10000, thin=5, burn=5000, verbose=1, dbname='%s/posterior/pickle/dm-%d-posterior-%s-%s-%s.pickle' % (dir, id, opts.region, opts.sex, opts.year)) #model.fit(dm, method='mcmc', keys=keys, iter=1, thin=1, burn=0, verbose=1) # remove all keys that have not been changed by running this model for k in dm.params.keys(): if type(dm.params[k]) == dict: for j in dm.params[k].keys(): if not j in keys: dm.params[k].pop(j) # post results to dismod_data_server # "dumb" error handling, in case post fails (try: except: sleep random time, try again, stop after 4 tries) from twill.errors import TwillAssertionError from urllib2 import URLError import random PossibleExceptions = [TwillAssertionError, URLError] try: url = dismod3.post_disease_model(dm) except PossibleExceptions: time.sleep(random.random()*30) try: url = dismod3.post_disease_model(dm) except PossibleExceptions: time.sleep(random.random()*30) try: url = dismod3.post_disease_model(dm) except PossibleExceptions: time.sleep(random.random()*30) url = dismod3.post_disease_model(dm) # form url to view results #if opts.sex and opts.year and opts.region: # url += '/%s/%s/%s' % (opts.region, opts.year, opts.sex) #elif opts.region: # url += '/%s' % opts.region # announce completion, and url to view results #tweet('%s fit complete %s' % (fit_str, url)) sys.stdout.flush() # update job status file if opts.log: if opts.type and not (opts.region and opts.sex and opts.year): dismod3.log_job_status(id, 'empirical_priors', opts.type, 'Completed') elif opts.region and opts.sex and opts.year and not opts.type: dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (opts.region, opts.sex, opts.year), 'Completed')
def daemon_loop(): on_sge = dismod3.settings.ON_SGE while True: try: job_queue = dismod3.get_job_queue() except: job_queue = [] for param_id in job_queue: #tweet('processing job %d' % id) log('processing job %d' % param_id) job_params = dismod3.remove_from_job_queue(param_id) id = int(job_params['dm_id']) dm = dismod3.get_disease_model(id) # make a working directory for the id dir = dismod3.settings.JOB_WORKING_DIR % id if os.path.exists(dir): dismod3.disease_json.random_rename(dir) os.makedirs(dir) estimate_type = dm.params.get('run_status', {}).get('estimate_type', 'fit all individually') # sort the regions so that the data rich regions are fit first #data_hash = GBDDataHash(dm.data) #sorted_regions = sorted(dismod3.gbd_regions, reverse=True, #key=lambda r: len(data_hash.get(region=r))) if estimate_type == 'Fit continuous single parameter model': #dismod3.disease_json.create_disease_model_dir(id) o = '%s/continuous_spm.stdout' % dir e = '%s/continuous_spm.stderr' % dir if on_sge: print o print e call_str = 'qsub -cwd -o %s -e %s ' % (o, e) \ + 'run_on_cluster.sh /home/OUTPOST/abie/gbd_dev/gbd/fit_continuous_spm.py %d' % id else: call_str = 'python -u /home/abie/gbd/fit_continuous_spm.py %d 2>%s |tee %s' % (id, e, o) subprocess.call(call_str, shell=True) continue if estimate_type.find('posterior') != -1: #fit each region/year/sex individually for this model regions_to_fit = dm.params.get('run_status', {}).get('regions_to_fit', []) if regions_to_fit[0] == 'all_regions': regions_to_fit = dismod3.gbd_regions d = '%s/posterior' % dir if os.path.exists(d): rmtree(d) os.mkdir(d) os.mkdir('%s/stdout' % d) os.mkdir('%s/stderr' % d) os.mkdir('%s/pickle' % d) dismod3.init_job_log(id, 'posterior', param_id) for r in regions_to_fit: for s in dismod3.gbd_sexes: for y in dismod3.gbd_years: # fit only one region, for the time being... # TODO: make region selection a user-settable option from the gui #if clean(r) != 'asia_southeast': # continue k = '%s+%s+%s' % (clean(r), s, y) o = '%s/stdout/%s' % (d, k) e = '%s/stderr/%s' % (d, k) if on_sge: call_str = dismod3.settings.GBD_FIT_STR % (o, e, '-l -r %s -s %s -y %s' % (clean(r), s, y), id) subprocess.call(call_str, shell=True) else: call_str = dismod3.settings.GBD_FIT_STR % ('-l -r %s -s %s -y %s' % (clean(r), s, y), id, o, e) subprocess.call(call_str, shell=True) #time.sleep(1.) elif estimate_type.find('empirical priors') != -1: # fit empirical priors (by pooling data from all regions d = '%s/empirical_priors' % dir if os.path.exists(d): rmtree(d) os.mkdir(d) os.mkdir('%s/stdout' % d) os.mkdir('%s/stderr' % d) os.mkdir('%s/pickle' % d) dismod3.init_job_log(id, 'empirical_priors', param_id) for t in ['excess-mortality', 'remission', 'incidence', 'prevalence']: o = '%s/stdout/%s' % (d, t) e = '%s/stderr/%s' % (d, t) if on_sge: subprocess.call(dismod3.settings.GBD_FIT_STR % (o, e, '-l -t %s' % t, id), shell=True) else: subprocess.call(dismod3.settings.GBD_FIT_STR % ('-l -t %s' % t, id, o, e), shell=True) else: #tweet('unrecognized estimate type: %s' % estimate_type) log('unrecognized estimate type: %s' % estimate_type) time.sleep(dismod3.settings.SLEEP_SECS)