def predict(type, dm, d): for k in d.keys(): d[dismod3.utils.clean(k)] = d[k] t = d['parameter'].replace(' data', '').replace(' ', '-') r = d['region'] y = int(d['year_start']) s = d['sex'] key = dismod3.gbd_key_for(t, r, y, s) a0 = int(d['age_start']) a1 = int(d['age_end']) est_by_age = dm.get_mcmc(type, key) if len(est_by_age) == 0: return -99 ages = range(a0, a1 + 1) #pop = np.ones(a1 + 1 - a0) / float(a1 + 1 - a0)) c = d['country_iso3_code'] if t == 'incidence_x_duration': pop = 1. * np.ones_like(ages) else: pop = [population_by_age[(c, str(y), s)][a] for a in ages] pop /= np.sum(pop) # normalize the pop weights to sum to 1 est = dismod3.utils.rate_for_range(est_by_age, ages, pop) d['estimate %s' % type] = est return est
def fit_emp_prior(dm, param_type, prior_str=None): """ Generate an empirical prior distribution for a single disease parameter Parameters ---------- dm : dismod3.DiseaseModel The object containing all the data, (hyper)-priors, and additional information (like input and output age-mesh). param_type : str, one of 'incidence', 'prevalence', 'remission', 'excess-mortality' The disease parameter to work with prior_str : str, optional The (hyper)-prior for this disease parameter; see utils.generate_prior_potentials for format Notes ----- The results of this fit are stored in the disease model's params hash for use when fitting multiple paramter types together Example ------- >>> import dismod3 >>> import dismod3.beta_binomial_model as model >>> dm = dismod3.get_disease_model(1) >>> model.fit_emp_prior(dm, 'incidence', 'zero 0 4, smooth 25') >>> assert dm.params.has_key('emp_prior') >>> assert dm.params['emp_prior'].has_key('incidence') >>> dismod3.post_disease_model(dm) """ if prior_str: dm.set_priors(param_type, prior_str) # remove the old PyMC model, if it exists if hasattr(dm, "vars"): delattr(dm, "vars") if hasattr(dm, "map"): delattr(dm, "map") dm.set_empirical_prior(param_type, {}) # fit the model fit(dm, method="map", param_type=param_type) # save the results in the param_hash mu = dm.vars["rate_stoch"].value se = mu * (1 - mu) * np.sqrt(dm.vars["dispersion"].value) dm.set_empirical_prior( param_type, {"mu": list(mu), "se": list(se), "dispersion": float(dm.vars["dispersion"].value)} ) for r in dismod3.gbd_regions: for y in dismod3.gbd_years: for s in dismod3.gbd_sexes: key = dismod3.gbd_key_for(param_type, r, y, s) dm.set_map(key, mu) dm.set_mcmc("lower_ui", key, mu - 1.96 * se) dm.set_mcmc("upper_ui", key, mu + 19.6 * se)
def to_djson(self, region='*'): """ Return a dismod_dataset json corresponding to this model object See ``dismod_data_json.html`` for details. region : str a regex string for the regions to load posteriors for Example ------- >> dm = DiseaseModel.objects.get(id=1) >> dm.to_djson(region='none') """ param_dict = {} if region != '*': param_filter = self.params.filter(region__contains=region) else: param_filter = self.params.all() for p in param_filter: if p.type and p.region and p.sex and p.year: if not param_dict.has_key(p.key): param_dict[p.key] = {} param_dict[p.key][dismod3.gbd_key_for(p.type,p.region,p.year,p.sex)] = json.loads(p.json) else: try: param_dict[p.key] = json.loads(p.json) except ValueError: # skip bad json, it sometimes happens, for unknown reasons (HTTP glitches?) pass # include params for all regions as well, if params were filtered above if region != '*': for p in self.params.filter(region=''): if param_dict.has_key(p.key): continue try: param_dict[p.key] = json.loads(p.json) except ValueError: # skip bad json, it sometimes happens, for unknown reasons (HTTP glitches?) pass param_dict.update(id=self.id, condition=self.condition, sex=self.sex, region=self.region, year=self.year) from dismod3.disease_json import DiseaseJson dj = DiseaseJson(json.dumps({'params': param_dict, 'data': [d.params for d in self.data.all()], 'id': self.id})) #if region != 'none': # dj.merge_posteriors(region) return dj
def setup(dm, keys): """ Generate the PyMC variables for a multi-region/year/sex generic disease model. Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) Results ------- vars : dict of PyMC stochs returns a dictionary of all the relevant PyMC objects for the multi-region/year/sex generic disease model. """ vars = {} # for each region-year-sex triple among the keys for r in dismod3.gbd_regions: for y in dismod3.gbd_years: for s in dismod3.gbd_sexes: key = dismod3.gbd_key_for('%s', r, y, s) if not key%'prevalence' in keys: continue dm.set_units(key%'prevalence', '(per person)') dm.set_units(key%'duration', '(years)') for t in 'incidence', 'remission', 'excess-mortality': dm.set_units(key%t, '(per person-year)') #dm.get_initial_estimate(key%t, [d for d in dm.data if relevant_to(d, t, r, y, s)]) data = [d for d in dm.data if relevant_to(d, 'all', r, y, s)] #data = [d for d in dm.data if relevant_to(d, 'all', r, y, 'all')] # try using data from all sexes in posterior fits sub_vars = submodel.setup(dm, key, data) vars.update(sub_vars) return vars
def fit_emp_prior(dm, param_type, iter=30000, thin=20, burn=10000, dbname='/dev/null'): """ Generate an empirical prior distribution for a single disease parameter Parameters ---------- dm : dismod3.DiseaseModel The object containing all the data, (hyper)-priors, and additional information (like input and output age-mesh). param_type : str, one of 'incidence', 'prevalence', 'remission', 'excess-mortality' The disease parameter to work with Notes ----- The results of this fit are stored in the disease model's params hash for use when fitting multiple paramter types together Example ------- $ python2.5 gbd_fit.py 231 -t incidence """ data = [d for d in dm.data if clean(d['data_type']).find(param_type) != -1 and d.get('ignore') != -1] dm.calc_effective_sample_size(data) lower_bound_data = [] if param_type == 'excess-mortality': lower_bound_data = [d for d in dm.data if d['data_type'] == 'cause-specific mortality data'] dm.calc_effective_sample_size(lower_bound_data) dm.clear_empirical_prior() dm.fit_initial_estimate(param_type, data) dm.vars = setup(dm, param_type, data, lower_bound_data=lower_bound_data) # don't do anything if there is no data for this parameter type if len(dm.vars['data']) == 0: return debug('i: %s' % ', '.join(['%.2f' % x for x in dm.vars['rate_stoch'].value[::10]])) sys.stdout.flush() # fit the model #dm.na = mc.NormApprox(dm.vars) #dm.na.fit(method='fmin_powell', verbose=1) #dm.na.sample(1000, verbose=1) log_dispersion = dm.vars.pop('log_dispersion') # remove the dispersion term while finding initial values for MCMC dm.map = mc.MAP(dm.vars) dm.vars.update(log_dispersion=log_dispersion) try: dm.map.fit(method='fmin_powell', iterlim=500, verbose=1) except KeyboardInterrupt: debug('User halted optimization routine before optimal value found') sys.stdout.flush() # make pymc warnings go to stdout mc.warnings.warn = sys.stdout.write dm.mcmc = mc.MCMC(dm.vars, db='pickle', dbname=dbname) dm.mcmc.use_step_method(mc.Metropolis, dm.vars['log_dispersion'], proposal_sd=dm.vars['dispersion_step_sd']) dm.mcmc.use_step_method(mc.AdaptiveMetropolis, dm.vars['age_coeffs_mesh'], cov=dm.vars['age_coeffs_mesh_step_cov'], verbose=0) dm.mcmc.sample(iter=iter, burn=burn, thin=thin, verbose=1) dm.mcmc.db.commit() dm.vars['region_coeffs'].value = dm.vars['region_coeffs'].stats()['mean'] dm.vars['study_coeffs'].value = dm.vars['study_coeffs'].stats()['mean'] dm.vars['age_coeffs_mesh'].value = dm.vars['age_coeffs_mesh'].stats()['mean'] dm.vars['log_dispersion'].value = dm.vars['log_dispersion'].stats()['mean'] alpha = dm.vars['region_coeffs'].stats()['mean'] beta = dm.vars['study_coeffs'].stats()['mean'] gamma_mesh = dm.vars['age_coeffs_mesh'].stats()['mean'] debug('a: %s' % ', '.join(['%.2f' % x for x in alpha])) debug('b: %s' % ', '.join(['%.2f' % x for x in beta])) debug('g: %s' % ', '.join(['%.2f' % x for x in gamma_mesh])) debug('d: %.2f' % dm.vars['dispersion'].stats()['mean']) debug('m: %s' % ', '.join(['%.2f' % x for x in dm.vars['rate_stoch'].stats()['mean'][::10]])) covariates_dict = dm.get_covariates() X = covariates(data[0], covariates_dict) debug('p: %s' % ', '.join(['%.2f' % x for x in predict_rate(X, alpha, beta, gamma_mesh, dm.vars['bounds_func'], dm.get_param_age_mesh())])) # save the results in the param_hash prior_vals = dict( alpha=list(dm.vars['region_coeffs'].stats()['mean']), beta=list(dm.vars['study_coeffs'].stats()['mean']), gamma=list(dm.vars['age_coeffs'].stats()['mean']), delta=float(dm.vars['dispersion'].stats()['mean'])) prior_vals.update( sigma_alpha=list(dm.vars['region_coeffs'].stats()['standard deviation']), sigma_beta=list(dm.vars['study_coeffs'].stats()['standard deviation']), sigma_gamma=list(dm.vars['age_coeffs'].stats()['standard deviation']), sigma_delta=float(dm.vars['dispersion'].stats()['standard deviation'])) # save the goodness-of-fit statistics for the empirical prior prior_vals.update( aic=dm.map.AIC, bic=dm.map.BIC, dic=dm.mcmc.dic() ) dm.set_empirical_prior(param_type, prior_vals) dispersion = prior_vals['delta'] median_sample_size = np.median([values_from(dm, d)[3] for d in dm.vars['data']] + [1000]) debug('median effective sample size: %.1f' % median_sample_size) param_mesh = dm.get_param_age_mesh() age_mesh = dm.get_estimate_age_mesh() import random trace = zip(dm.vars['region_coeffs'].trace(), dm.vars['study_coeffs'].trace(), dm.vars['age_coeffs'].trace())[::5] for r in dismod3.gbd_regions: print 'predicting rates for %s' % r for y in dismod3.gbd_years: for s in dismod3.gbd_sexes: key = dismod3.gbd_key_for(param_type, r, y, s) rate_trace = [] for a, b, g in trace: rate_trace.append(predict_region_rate(key, alpha=a, beta=b, gamma=g, covariates_dict=covariates_dict, bounds_func=dm.vars['bounds_func'], ages=dm.get_estimate_age_mesh())) mu = dismod3.utils.interpolate(param_mesh, np.mean(rate_trace, axis=0)[param_mesh], age_mesh) dm.set_initial_value(key, mu) dm.set_mcmc('emp_prior_mean', key, mu) # similar to saving upper_ui and lower_ui in function store_mcmc_fit below rate_trace = np.sort(rate_trace, axis=0) dm.set_mcmc('emp_prior_upper_ui', key, dismod3.utils.interpolate(param_mesh, rate_trace[.975 * len(rate_trace), :][param_mesh], age_mesh)) dm.set_mcmc('emp_prior_lower_ui', key, dismod3.utils.interpolate(param_mesh, rate_trace[.025 * len(rate_trace), :][param_mesh], age_mesh))
def generate_disease_data(condition, cov): """ Generate csv files with gold-standard disease data, and somewhat good, somewhat dense disease data, as might be expected from a condition that is carefully studied in the literature """ age_len = dismod3.MAX_AGE ages = np.arange(age_len, dtype='float') # incidence rate i0 = .005 + .02 * mc.invlogit((ages - 44) / 3) #i0 = np.maximum(0., .001 * (-.125 + np.ones_like(ages) + (ages / age_len)**2.)) # remission rate #r = 0. * ages r = .1 * np.ones_like(ages) # excess-mortality rate #f_init = .085 * (ages / 100) ** 2.5 SMR = 3. * np.ones_like(ages) - ages / age_len # all-cause mortality-rate mort = dismod3.get_disease_model('all-cause_mortality') #age_intervals = [[a, a+9] for a in range(0, dismod3.MAX_AGE-4, 10)] + [[0, 100] for ii in range(1)] age_intervals = [[a, a] for a in range(0, dismod3.MAX_AGE, 1)] # TODO: take age structure from real data sparse_intervals = dict([[ region, random.sample(age_intervals, (ii**3 * len(age_intervals)) / len(countries_for)**3 / 1) ] for ii, region in enumerate(countries_for)]) dense_intervals = dict( [[region, random.sample(age_intervals, len(age_intervals) / 2)] for ii, region in enumerate(countries_for)]) gold_data = [] noisy_data = [] for ii, region in enumerate(sorted(countries_for)): if region == 'world': continue print region sys.stdout.flush() # introduce unexplained regional variation #i = i0 * (1 + float(ii) / 21) # or not i = i0 for year in [1990, 2005]: for sex in ['male', 'female']: param_type = 'all-cause_mortality' key = dismod3.gbd_key_for(param_type, region, year, sex) m_all_cause = mort.mortality(key, mort.data) # calculate excess-mortality rate from smr f = (SMR - 1.) * m_all_cause ## compartmental model (bins S, C, D, M) import scipy.linalg from dismod3 import NEARLY_ZERO from dismod3.utils import trim SCDM = np.zeros([4, age_len]) p = np.zeros(age_len) m = np.zeros(age_len) SCDM[0, 0] = 1. SCDM[1, 0] = 0. SCDM[2, 0] = 0. SCDM[3, 0] = 0. p[0] = SCDM[1, 0] / (SCDM[0, 0] + SCDM[1, 0] + NEARLY_ZERO) m[0] = trim(m_all_cause[0] - f[0] * p[0], NEARLY_ZERO, 1 - NEARLY_ZERO) for a in range(age_len - 1): A = [[-i[a] - m[a], r[a], 0., 0.], [i[a], -r[a] - m[a] - f[a], 0., 0.], [m[a], m[a], 0., 0.], [0., f[a], 0., 0.]] SCDM[:, a + 1] = np.dot(scipy.linalg.expm(A), SCDM[:, a]) p[a + 1] = SCDM[1, a + 1] / (SCDM[0, a + 1] + SCDM[1, a + 1] + NEARLY_ZERO) m[a + 1] = m_all_cause[a + 1] - f[a + 1] * p[a + 1] # duration = E[time in bin C] hazard = r + m + f pr_not_exit = np.exp(-hazard) X = np.empty(len(hazard)) X[-1] = 1 / hazard[-1] for ii in reversed(range(len(X) - 1)): X[ii] = (pr_not_exit[ii] * (X[ii + 1] + 1)) + (1 / hazard[ii] * (1 - pr_not_exit[ii]) - pr_not_exit[ii]) country = countries_for[region][0] params = dict(age_intervals=age_intervals, condition=condition, gbd_region=region, country=country, year=year, sex=sex, effective_sample_size=1000) params['age_intervals'] = [[0, 99]] generate_and_append_data(gold_data, 'prevalence data', p, **params) generate_and_append_data(gold_data, 'incidence data', i, **params) generate_and_append_data(gold_data, 'excess-mortality data', f, **params) generate_and_append_data(gold_data, 'remission data', r, **params) generate_and_append_data(gold_data, 'duration data', X, **params) # TODO: use this approach to age standardize all gold data, and then change it to get iX as a direct sum params['age_intervals'] = [[0, 99]] iX = i * X * (1 - p) * regional_population(key) generate_and_append_data(gold_data, 'incidence_x_duration', iX, **params) params['effective_sample_size'] = 1000 params['cov'] = 0. params['age_intervals'] = age_intervals generate_and_append_data(noisy_data, 'prevalence data', p, **params) generate_and_append_data(noisy_data, 'excess-mortality data', f, **params) generate_and_append_data(noisy_data, 'remission data', r, **params) generate_and_append_data(noisy_data, 'incidence data', i, **params) col_names = sorted(data_dict_for_csv(gold_data[0]).keys()) f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in gold_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() f_name = OUTPUT_PATH + '%s_data.tsv' % condition f_file = open(f_name, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in noisy_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() # upload data file from dismod3.disease_json import dismod_server_login, twc, DISMOD_BASE_URL dismod_server_login() twc.go(DISMOD_BASE_URL + 'dismod/data/upload/') twc.formvalue(1, 'tab_separated_values', open(f_name).read()) # TODO: find or set the model number for this model, set the # expert priors and covariates, merge the covariate data into the # model, and add the "ground truth" to the disease json try: url = twc.submit() except Exception, e: print e
r = 'asia_southeast' for year in [1990]: for sex in ['male']: for dm3_type, dm4_type in [['remission', 'remission'], ['excess-mortality', 'excess'], ['incidence', 'incidence'], ['mrr', 'risk'], ['prevalence', 'prevalence'], ]: x = [0] y = [0] for age in age_mesh: x.append(age) y.append(measure_out.model[index_dict[(dm4_type, year, age)]]) key = dismod3.gbd_key_for(dm3_type, r, year, sex) est = dismod3.utils.interpolate(x, y, dm.get_estimate_age_mesh()) dm.set_truth(key, est) dismod3.tile_plot_disease_model(dm, [key], defaults={}) try: pl.savefig(dismod3.settings.JOB_WORKING_DIR % id + '/dm-%d-posterior-%s-%s-%s.png' % (id, dm3_type, sex, year)) # TODO: refactor naming into its own function except IOError, e: print 'Warning: could not create png. Maybe it exists already?\n%s' % e # save results (do this last, because it removes things from the disease model that plotting function, etc, might need dismod3.try_posting_disease_model(dm, ntries=5) print print '********************' print 'computation complete'
def fit_emp_prior(dm, param_type): """ Generate an empirical prior distribution for a single disease parameter Parameters ---------- dm : dismod3.DiseaseModel The object containing all the data, (hyper)-priors, and additional information (like input and output age-mesh). param_type : str, one of 'incidence', 'prevalence', 'remission', 'excess-mortality' The disease parameter to work with Notes ----- The results of this fit are stored in the disease model's params hash for use when fitting multiple paramter types together Example ------- $ python2.5 gbd_fit.py 175 -t incidence -p 'zero 0 4, zero 41 100, smooth 25' # takes 7m to run """ data = [d for d in dm.data if clean(d['data_type']).find(param_type) != -1] # don't do anything if there is no data for this parameter type if len(data) == 0: return dm.fit_initial_estimate(param_type, data) dm.vars = setup(dm, param_type, data) # fit the model dm.map = mc.MAP(dm.vars) try: dm.map.fit(method='fmin_powell', iterlim=500, tol=.00001, verbose=1) except KeyboardInterrupt: print 'User halted optimization routine before optimal value found' # save the results in the param_hash dm.clear_empirical_prior() prior_vals = dict( alpha=list(dm.vars['region_coeffs'].value), beta=list(dm.vars['study_coeffs'].value), gamma=list(dm.vars['age_coeffs'].value), sigma=float(dm.vars['dispersion'].value)) dm.set_empirical_prior(param_type, prior_vals) dispersion = prior_vals['sigma'] for r in dismod3.gbd_regions: for y in dismod3.gbd_years: for s in dismod3.gbd_sexes: key = dismod3.gbd_key_for(param_type, r, y, s) logit_mu = predict_logit_rate(regional_covariates(key), **prior_vals) mu = mc.invlogit(logit_mu) dm.set_initial_value(key, mu) dm.set_mcmc('emp_prior_mean', key, mu) dm.set_mcmc('emp_prior_lower_ui', key, mc.invlogit(logit_mu - 1.96*dispersion)) dm.set_mcmc('emp_prior_upper_ui', key, mc.invlogit(logit_mu + 1.96*dispersion)) key = dismod3.gbd_key_for(param_type, 'world', 1997, 'total') logit_mu = predict_logit_rate(regional_covariates(key), **prior_vals) mu = mc.invlogit(logit_mu) dm.set_initial_value(key, mu) dm.set_mcmc('emp_prior_mean', key, mu) dm.set_mcmc('emp_prior_lower_ui', key, mc.invlogit(logit_mu - 1.96*dispersion)) dm.set_mcmc('emp_prior_upper_ui', key, mc.invlogit(logit_mu + 1.96*dispersion))
def generate_disease_data(condition, cov): """ Generate csv files with gold-standard disease data, and somewhat good, somewhat dense disease data, as might be expected from a condition that is carefully studied in the literature """ age_len = dismod3.MAX_AGE ages = np.arange(age_len, dtype='float') # incidence rate i0 = .005 + .02 * mc.invlogit((ages - 44) / 3) #i0 = np.maximum(0., .001 * (-.125 + np.ones_like(ages) + (ages / age_len)**2.)) # remission rate #r = 0. * ages r = .1 * np.ones_like(ages) # excess-mortality rate #f_init = .085 * (ages / 100) ** 2.5 SMR = 3. * np.ones_like(ages) - ages / age_len # all-cause mortality-rate mort = dismod3.get_disease_model('all-cause_mortality') #age_intervals = [[a, a+9] for a in range(0, dismod3.MAX_AGE-4, 10)] + [[0, 100] for ii in range(1)] age_intervals = [[a, a] for a in range(0, dismod3.MAX_AGE, 1)] # TODO: take age structure from real data sparse_intervals = dict([[region, random.sample(age_intervals, (ii**3 * len(age_intervals)) / len(countries_for)**3 / 1)] for ii, region in enumerate(countries_for)]) dense_intervals = dict([[region, random.sample(age_intervals, len(age_intervals)/2)] for ii, region in enumerate(countries_for)]) gold_data = [] noisy_data = [] for ii, region in enumerate(sorted(countries_for)): if region == 'world': continue print region sys.stdout.flush() # introduce unexplained regional variation #i = i0 * (1 + float(ii) / 21) # or not i = i0 for year in [1990, 2005]: for sex in ['male', 'female']: param_type = 'all-cause_mortality' key = dismod3.gbd_key_for(param_type, region, year, sex) m_all_cause = mort.mortality(key, mort.data) # calculate excess-mortality rate from smr f = (SMR - 1.) * m_all_cause ## compartmental model (bins S, C, D, M) import scipy.linalg from dismod3 import NEARLY_ZERO from dismod3.utils import trim SCDM = np.zeros([4, age_len]) p = np.zeros(age_len) m = np.zeros(age_len) SCDM[0,0] = 1. SCDM[1,0] = 0. SCDM[2,0] = 0. SCDM[3,0] = 0. p[0] = SCDM[1,0] / (SCDM[0,0] + SCDM[1,0] + NEARLY_ZERO) m[0] = trim(m_all_cause[0] - f[0] * p[0], NEARLY_ZERO, 1-NEARLY_ZERO) for a in range(age_len - 1): A = [[-i[a]-m[a], r[a] , 0., 0.], [ i[a] , -r[a]-m[a]-f[a], 0., 0.], [ m[a], m[a] , 0., 0.], [ 0., f[a], 0., 0.]] SCDM[:,a+1] = np.dot(scipy.linalg.expm(A), SCDM[:,a]) p[a+1] = SCDM[1,a+1] / (SCDM[0,a+1] + SCDM[1,a+1] + NEARLY_ZERO) m[a+1] = m_all_cause[a+1] - f[a+1] * p[a+1] # duration = E[time in bin C] hazard = r + m + f pr_not_exit = np.exp(-hazard) X = np.empty(len(hazard)) X[-1] = 1 / hazard[-1] for ii in reversed(range(len(X)-1)): X[ii] = (pr_not_exit[ii] * (X[ii+1] + 1)) + (1 / hazard[ii] * (1 - pr_not_exit[ii]) - pr_not_exit[ii]) country = countries_for[region][0] params = dict(age_intervals=age_intervals, condition=condition, gbd_region=region, country=country, year=year, sex=sex, effective_sample_size=1000) params['age_intervals'] = [[0,99]] generate_and_append_data(gold_data, 'prevalence data', p, **params) generate_and_append_data(gold_data, 'incidence data', i, **params) generate_and_append_data(gold_data, 'excess-mortality data', f, **params) generate_and_append_data(gold_data, 'remission data', r, **params) generate_and_append_data(gold_data, 'duration data', X, **params) # TODO: use this approach to age standardize all gold data, and then change it to get iX as a direct sum params['age_intervals'] = [[0,99]] iX = i * X * (1-p) * regional_population(key) generate_and_append_data(gold_data, 'incidence_x_duration', iX, **params) params['effective_sample_size'] = 1000 params['cov'] = 0. params['age_intervals'] = age_intervals generate_and_append_data(noisy_data, 'prevalence data', p, **params) generate_and_append_data(noisy_data, 'excess-mortality data', f, **params) generate_and_append_data(noisy_data, 'remission data', r, **params) generate_and_append_data(noisy_data, 'incidence data', i, **params) col_names = sorted(data_dict_for_csv(gold_data[0]).keys()) f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in gold_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() f_name = OUTPUT_PATH + '%s_data.tsv' % condition f_file = open(f_name, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) for d in noisy_data: dd = data_dict_for_csv(d) csv_f.writerow([dd[c] for c in col_names]) f_file.close() # upload data file from dismod3.disease_json import dismod_server_login, twc, DISMOD_BASE_URL dismod_server_login() twc.go(DISMOD_BASE_URL + 'dismod/data/upload/') twc.formvalue(1, 'tab_separated_values', open(f_name).read()) # TODO: find or set the model number for this model, set the # expert priors and covariates, merge the covariate data into the # model, and add the "ground truth" to the disease json try: url = twc.submit() except Exception, e: print e
for r in prediction_regions: r = dismod3.utils.clean(r) for t in [1990, 2005]: x = [] y = [] yl = [] yu = [] for a in age_mesh: x.append(a) y.append(param_predicted_stats['mean'][index_dict[(r, t, a)]]) yl.append(param_predicted_stats['95% HPD interval'][index_dict[(r, t, a)],0]) yu.append(param_predicted_stats['95% HPD interval'][index_dict[(r, t, a)],1]) print r, t, zip(x,y) key = dismod3.gbd_key_for(param_type, r, t, 'all') est = dismod3.utils.interpolate(x, y, dm.get_estimate_age_mesh()) dm.set_mcmc('mean', key, est) est = dismod3.utils.interpolate(x, yl, dm.get_estimate_age_mesh()) dm.set_mcmc('lower_ui', key, est) est = dismod3.utils.interpolate(x, yu, dm.get_estimate_age_mesh()) dm.set_mcmc('upper_ui', key, est) dismod3.tile_plot_disease_model(dm, [key], defaults={}) try: pl.savefig(dismod3.settings.JOB_WORKING_DIR % id + '/dm-%d-posterior-%s-%s-%s.png' % (id, dismod3.utils.clean(r), 'all', t)) # TODO: refactor naming into its own function except IOError, e: print 'Warning: could not create png. Maybe it exists already?\n%s' % e
0.00924804, 0.01004529, 0.01089158, 0.01178793, 0.01274115, 0.0137633 , 0.01487031, 0.01608018, 0.01740874, 0.01886325, 0.02044349, 0.02214463, 0.02396039, 0.02589065, 0.0279525 , 0.03017836, 0.03261135, 0.03530052, 0.03828981, 0.04160153, 0.04523777, 0.04918468, 0.05341633, 0.05790466, 0.06263516, 0.06760523, 0.07281963, 0.07828758, 0.08401736, 0.09000903, 0.09625542, 0.10274424, 0.10945923, 0.11638187, 0.1234935 , 0.13077522, 0.13820759, 0.14577067, 0.15344416, 0.16120755, 0.16904026, 0.17692176, 0.18483165, 0.19274966, 0.20065553, 0.20852876, 0.2163489 , 0.22409584, 0.23174999, 0.23929245, 0.2467051 ]) for region in dismod3.gbd_regions: for year in dismod3.gbd_years: for sex in dismod3.gbd_sexes: key = dismod3.gbd_key_for('%s', region, year, sex) if clean(region) == 'north_america_high_income': regional_offset = 0. else: regional_offset = -.5 time_offset = (int(year)-1997)/10. if clean(sex) == 'male': sex_offset = .1 else: sex_offset = 0. # incidence rate i = mc.invlogit(mc.logit(.012 * mc.invlogit((ages - 44) / 3)) + regional_offset + time_offset + sex_offset)