def regional_population(key): """ calculate regional population for a gbd key""" t,r,y,s = type_region_year_sex_from_key(key) pop = np.zeros(MAX_AGE) for c in countries_for[clean(r)]: pop += population_by_age[(c, y, s)] return pop
def country_covariates(key, iso3, covariates_dict, derived_covariate): """ form the covariates for a gbd key""" if not (key, iso3) in covariate_hash: t,r,y,s = type_region_year_sex_from_key(key) d = {'gbd_region': r, 'year_start': y, 'year_end': y, 'sex': s} for level in ['Study_level', 'Country_level']: for k in covariates_dict[level]: if k == 'none': continue if covariates_dict[level][k]['rate']['value']: d[clean(k)] = covariates_dict[level][k]['value']['value'] if level == 'Country_level': if k not in derived_covariate: debug('WARNING: derived covariate %s not found' % key) d[clean(k)] = 0. elif not derived_covariate[k].has_key('%s+%s+%s'%(iso3,y,s)): debug('WARNING: derived covariate %s not found for (%s, %s, %s)' % (key, iso3, y, s)) d[clean(k)] = 0. else: d[clean(k)] = derived_covariate[k].get('%s+%s+%s'%(iso3,y,s), 0.) else: d[clean(k)] = float(d[clean(k)] or 0.) covariate_hash[(key, iso3)] = covariates(d, covariates_dict) return covariate_hash[(key, iso3)]
def predict_region_rate(key, alpha, beta, gamma, covariates_dict, bounds_func, ages): t,r,y,s = type_region_year_sex_from_key(key) region_rate = np.zeros(len(gamma)) total_pop = np.zeros(len(gamma)) for iso3 in countries_for[r]: region_rate += predict_country_rate(key, iso3, alpha, beta, gamma, covariates_dict, bounds_func, ages) * population_by_age.get((iso3,y,s), 1.) total_pop += population_by_age.get((iso3, y, s), 1.) return region_rate / total_pop
def regional_covariates(key): """ form the covariates for a gbd key""" t,r,y,s = type_region_year_sex_from_key(key) d = {'gbd_region': r, 'year_start': y, 'year_end': y, 'sex': s} return covariates(d)
def country_covariates(key, iso3, covariates_dict): """ form the covariates for a gbd key""" if not (key, iso3) in covariate_hash: t,r,y,s = type_region_year_sex_from_key(key) d = {'parameter': t, 'gbd_region': r, 'year_start': y, 'year_end': y, 'sex': s} for level in ['Study_level', 'Country_level']: for k in covariates_dict[level]: if k == 'none': continue d[clean(k)] = covariates_dict[level][k]['value']['value'] if d[clean(k)] == 'Country Specific Value': d[clean(k)] = covariates_dict[level][k]['defaults'].get(iso3, 0.) else: d[clean(k)] = float(d[clean(k)] or 0.) covariate_hash[(key, iso3)] = covariates(d, covariates_dict) return covariate_hash[(key, iso3)]
def regional_covariates(key, covariates_dict, derived_covariate): """ form the covariates for a gbd key""" if not key in covariate_hash: t,r,y,s = type_region_year_sex_from_key(key) d = {'gbd_region': r, 'year_start': y, 'year_end': y, 'sex': s} for level in ['Study_level', 'Country_level']: for k in covariates_dict[level]: if k == 'none': continue if covariates_dict[level][k]['rate']['value']: d[clean(k)] = covariates_dict[level][k]['value']['value'] if d[clean(k)] == 'Country Specific Value': d[clean(k)] = regional_average(derived_covariate, k, r, y, s) else: d[clean(k)] = float(d[clean(k)] or 0.) covariate_hash[key] = covariates(d, covariates_dict) return covariate_hash[key]
def regional_covariates(key, covariates_dict): """ form the covariates for a gbd key""" if not key in covariate_hash: t,r,y,s = type_region_year_sex_from_key(key) d = {'parameter': t, 'gbd_region': r, 'year_start': y, 'year_end': y, 'sex': s} for level in ['Study_level', 'Country_level']: for k in covariates_dict[level]: if k == 'none': continue d[clean(k)] = covariates_dict[level][k]['value']['value'] if d[clean(k)] == 'Country Specific Value': # FIXME: this could be returning bogus answers d[clean(k)] = regional_average(covariates_dict[level][k]['defaults'], r) else: d[clean(k)] == float(d[clean(k)] or 0.) covariate_hash[key] = covariates(d, covariates_dict) return covariate_hash[key]
def fit(dm, method='map', keys=gbd_keys(), iter=50000, burn=25000, thin=1, verbose=1, dbname='model.pickle'): """ Generate an estimate of the generic disease model parameters using maximum a posteriori liklihood (MAP) or Markov-chain Monte Carlo (MCMC) Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) method : string, optional the parameter estimation method, either 'map' or 'mcmc' keys : list, optional a list of gbd keys for the parameters to fit; it can speed up computation to holding some parameters constant while allowing others to vary iter : int, optional burn : int, optional thin : int, optional parameters for the MCMC, which control how long it takes, and how accurate it is """ if not hasattr(dm, 'vars'): print 'initializing model vars... ', dm.calc_effective_sample_size(dm.data) dm.vars = setup(dm, keys) print 'finished' if method == 'map': print 'initializing MAP object... ', map_method = 'fmin_powell' #map_method = 'fmin_l_bfgs_b' mc.MAP([dm.vars[k] for k in keys if k.find('incidence') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose) mc.MAP([dm.vars[k] for k in keys if k.find('remission') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose) mc.MAP([dm.vars[k] for k in keys if k.find('excess-mortality') != -1 or k.find('m') != -1 or k.find('mortality') != -1 or k.find('relative-risk') != -1 or k.find('bins') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose) mc.MAP([dm.vars[k] for k in keys if k.find('incidence') != -1 or k.find('bins') != -1 or k.find('prevalence') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose) mc.MAP([dm.vars[k] for k in keys if k.find('excess-mortality') != -1 or k.find('m') != -1 or k.find('mortality') != -1 or k.find('relative-risk') != -1 or k.find('bins') != -1 or k.find('prevalence') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose) dm.map = mc.MAP(dm.vars) print 'finished' try: dm.map.fit(method=map_method, iterlim=500, tol=.001, verbose=verbose) except KeyboardInterrupt: # if user cancels with cntl-c, save current values for "warm-start" pass for k in keys: try: val = dm.vars[k]['rate_stoch'].value dm.set_map(k, val) except KeyError: pass if method == 'norm_approx': dm.na = mc.NormApprox(dm.vars, eps=.0001) try: dm.na.fit(method='fmin_powell', iterlim=500, tol=.00001, verbose=verbose) except KeyboardInterrupt: # if user cancels with cntl-c, save current values for "warm-start" pass for k in keys: if dm.vars[k].has_key('rate_stoch'): dm.set_map(k, dm.vars[k]['rate_stoch'].value) try: dm.na.sample(1000, verbose=verbose) for k in keys: # TODO: rename 'rate_stoch' to something more appropriate if dm.vars[k].has_key('rate_stoch'): rate_model.store_mcmc_fit(dm, k, dm.vars[k]) except KeyboardInterrupt: # if user cancels with cntl-c, save current values for "warm-start" pass elif method == 'mcmc': # make pymc warnings go to stdout import sys mc.warnings.warn = sys.stdout.write dm.mcmc = mc.MCMC(dm.vars, db='pickle', dbname=dbname) for k in keys: if 'dispersion_step_sd' in dm.vars[k]: dm.mcmc.use_step_method(mc.Metropolis, dm.vars[k]['log_dispersion'], proposal_sd=dm.vars[k]['dispersion_step_sd']) if 'age_coeffs_mesh_step_cov' in dm.vars[k]: dm.mcmc.use_step_method(mc.AdaptiveMetropolis, dm.vars[k]['age_coeffs_mesh'], cov=dm.vars[k]['age_coeffs_mesh_step_cov'], verbose=0) try: dm.mcmc.sample(iter=iter, thin=thin, burn=burn, verbose=verbose) except KeyboardInterrupt: # if user cancels with cntl-c, save current values for "warm-start" pass dm.mcmc.db.commit() for k in keys: t,r,y,s = type_region_year_sex_from_key(k) if t in ['incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality']: import neg_binom_model neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) elif t in ['relative-risk', 'duration', 'incidence_x_duration']: import normal_model normal_model.store_mcmc_fit(dm, k, dm.vars[k])