def main(): argv = sys.argv assert len(argv) == 2, 'usage: python generate_map_output.py model_id' # download the requested model id = int(argv[1]) dm = dismod3.fetch_disease_model(id) # generate a list of dicts of requested data map_output_list = generate_map_output(dm) # dump list of dicts as csv (in memory, not on disk, hence the StringIO) str_io = StringIO.StringIO() csv_f = csv.DictWriter(str_io, output_fields) csv_f.writerows(map_output_list) # print out csv file str_io.seek(0) print str_io.read()
def hep_c_fit(regions, prediction_years, data_year_start=-inf, data_year_end=inf, egypt_flag=False): """ Fit prevalence for regions and years specified """ print "\n***************************\nfitting %s for %s (using data from years %f to %f)" % ( regions, prediction_years, data_year_start, data_year_end, ) ## load model to fit # dm = DiseaseJson(file('tests/hep_c.json').read()) id = 8788 dismod3.disease_json.create_disease_model_dir(id) dm = dismod3.fetch_disease_model(id) ## adjust the expert priors dm.params["global_priors"]["heterogeneity"]["prevalence"] = "Very" dm.params["global_priors"]["smoothness"]["prevalence"]["amount"] = "Slightly" # TODO: construct examples of adjusting other covariates # ipdb> dm.params['global_priors'].keys() # [u'increasing', u'unimodal', u'level_bounds', u'y_maximum', u'note', u'level_value', u'decreasing', u'parameter_age_mesh', u'heterogeneity', u'smoothness'] # ipdb> dm.params['global_priors']['smoothness']['prevalence'] # {u'age_start': 0, u'amount': u'Moderately', u'age_end': 100} # include a study-level covariate for 'bias' covariates_dict = dm.get_covariates() covariates_dict["Study_level"]["bias"]["rate"]["value"] = 1 # TODO: construct additional examples of adjusting covariates ## select relevant prevalence data # TODO: streamline data selection functions if egypt_flag: dm.data = [d for d in dm.data if d["country_iso3_code"] == "EGY"] else: dm.data = [ d for d in dm.data if dismod3.utils.clean(d["gbd_region"]) in regions and float(d["year_end"]) >= data_year_start and float(d["year_start"]) <= data_year_end and d["country_iso3_code"] != "EGY" ] ## create, fit, and save rate model dm.vars = {} keys = dismod3.utils.gbd_keys(type_list=["prevalence"], region_list=regions, year_list=prediction_years) # TODO: consider how to do this for models that use the complete disease model # TODO: consider adding hierarchical similarity priors for the male and female models k0 = keys[0] # looks like k0='prevalence+asia_south+1990+male' dm.vars[k0] = neg_binom_model.setup(dm, k0, dm.data) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.sample(iter=50000, burn=25000, thin=50, verbose=1) # make map object so we can compute AIC and BIC dm.map = mc.MAP(dm.vars) dm.map.fit() for k in keys: # save the results in the disease model dm.vars[k] = dm.vars[k0] neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) # check autocorrelation to confirm chain has mixed test_model.summarize_acorr(dm.vars[k]["rate_stoch"].trace()) # generate plots of results dismod3.tile_plot_disease_model(dm, [k], defaults={"ymax": 0.15, "alpha": 0.5}) dm.savefig("dm-%d-posterior-%s.%f.png" % (dm.id, k, random())) # summarize fit quality graphically, as well as parameter posteriors dismod3.plotting.plot_posterior_predicted_checks(dm, k0) dm.savefig("dm-%d-check-%s.%f.png" % (dm.id, k0, random())) dismod3.post_disease_model(dm) return dm
def fit(id): """ Download model, conduct fit, and upload results Parameters ---------- id : int The model id number for the job to fit Commandline Version: [omak] dismod4.abie ] test/parameter.sh [omak] dismod4.abie ] example/simulate.py 5 1 100 [omak] dismod4.abie ] /tmp/dismod4_csv test/parameter.csv measure_in.csv sfun_in.csv dismod4_csv: Attempt to overwrite the existing file sfun_in.csv [omak] dismod4.abie ] /tmp/dismod4_csv test/parameter.csv measure_in.csv sfun_in.csv sfun_out.csv measure_out.csv """ dm = dismod3.get_disease_model(id) mort = dismod3.fetch_disease_model('all-cause_mortality') dm.data += mort.data ## convert model to csv file column_names = 'time_lower,time_upper,age_lower,age_upper,likelihood_name,likelihood_sigma,likelihood_beta,value,integrand'.split(',') data_list = [] # add all the model data to the data list for d in dm.data: row = {} row['time_lower'] = d['year_start'] row['time_upper'] = d['year_end'] # TODO: determine if this should be +1 row['age_lower'] = d['age_start']+1. row['age_upper'] = d['age_end']+1. # TODO: determine if this should be +1 row['likelihood_name'] = 'gaussian' row['likelihood_sigma'] = .0001 # TODO: use more accurate sigma row['likelihood_beta'] = 1. row['value'] = d['value'] / float(d.get('units', 1.)) for dm3_type, dm4_type in [['remission data', 'remission'], ['excess-mortality data', 'excess'], ['incidence data', 'incidence'], ['mrr data', 'risk'], ['prevalence data', 'prevalence'], ['all-cause mortality data', 'all_cause'], ]: if d['data_type'] == dm3_type: row['integrand'] = dm4_type data_list.append(row) break # add the time/age/regions that we want to predict to the data list as well age_mesh = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] index_dict = {} for year in [1990, 2005]: for age in age_mesh: for type in ['remission', 'excess', 'incidence', 'risk', 'prevalence']: row = {} row['time_lower'] = year row['time_upper'] = year row['age_lower'] = age row['age_upper'] = age row['likelihood_name'] = 'gaussian' row['likelihood_sigma'] = inf row['likelihood_beta'] = 1. row['value'] = 0. row['integrand'] = type index_dict[(type, year, age)] = len(data_list) data_list.append(row) # save the csv file import csv fname = dismod3.settings.JOB_WORKING_DIR % id + '/measure_in.csv' try: f = open(fname, 'w') csv.writer(f).writerow(column_names) csv.DictWriter(f, column_names).writerows(data_list) f.close() except IOError, e: print 'Warning: could not create data csv. Maybe it exists already?\n%s' % e
def fit_all(id): """ Enqueues all jobs necessary to fit specified model to the cluster Parameters ---------- id : int The model id number for the job to fit Example ------- >>> import fit_all >>> fit_all.fit_all(2552) """ # TODO: store all disease information in this dir already, so fetching is not necessary # download the disease model json and store it in the working dir print 'downloading disease model' dismod3.disease_json.create_disease_model_dir(id) dm = dismod3.fetch_disease_model(id) # get the all-cause mortality data, and merge it into the model mort = dismod3.fetch_disease_model('all-cause_mortality') dm.data += mort.data dm.save() # fit empirical priors (by pooling data from all regions) dir = dismod3.settings.JOB_WORKING_DIR % id # TODO: refactor into a function emp_names = [] for t in ['prevalence']: o = '%s/empirical_priors/stdout/%s' % (dir, t) e = '%s/empirical_priors/stderr/%s' % (dir, t) name_str = '%s-%d' %(t[0], id) emp_names.append(name_str) call_str = 'qsub -cwd -o %s -e %s ' % (o, e) \ + '-N %s ' % name_str \ + 'run_on_cluster.sh fit_emp_prior.py %d -t %s' % (id, t) subprocess.call(call_str, shell=True) # directory to save the country level posterior csv files temp_dir = dir + '/posterior/country_level_posterior_dm-' + str(id) + '/' if os.path.exists(temp_dir): rmtree(temp_dir) os.makedirs(temp_dir) #fit each region/year/sex individually for this model hold_str = '-hold_jid %s ' % ','.join(emp_names) post_names = [] for ii, r in enumerate(dismod3.gbd_regions): for s in dismod3.gbd_sexes: for y in dismod3.gbd_years: k = '%s+%s+%s' % (clean(r), s, y) o = '%s/posterior/stdout/%s' % (dir, k) e = '%s/posterior/stderr/%s' % (dir, k) name_str = '%s%d%s%s%d' % (r[0], ii+1, s[0], str(y)[-1], id) post_names.append(name_str) call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \ + hold_str \ + '-N %s ' % name_str \ + 'run_on_cluster.sh fit_posterior_prevonly.py %d -r %s -s %s -y %s' % (id, clean(r), s, y) subprocess.call(call_str, shell=True) # after all posteriors have finished running, upload disease model json hold_str = '-hold_jid %s ' % ','.join(post_names) o = '%s/upload.stdout' % dir e = '%s/upload.stderr' % dir call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \ + hold_str \ + '-N upld-%s ' % id \ + 'run_on_cluster.sh upload_fits.py %d' % id subprocess.call(call_str, shell=True)