def fit(dm, method='map', keys=gbd_keys(), iter=50000, burn=25000, thin=1, verbose=1, dbname='model.pickle'): """ Generate an estimate of the generic disease model parameters using maximum a posteriori liklihood (MAP) or Markov-chain Monte Carlo (MCMC) Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) method : string, optional the parameter estimation method, either 'map' or 'mcmc' keys : list, optional a list of gbd keys for the parameters to fit; it can speed up computation to holding some parameters constant while allowing others to vary iter : int, optional burn : int, optional thin : int, optional parameters for the MCMC, which control how long it takes, and how accurate it is """ if not hasattr(dm, 'vars'): print 'initializing model vars... ', dm.calc_effective_sample_size(dm.data) dm.vars = setup(dm, keys) print 'finished' if method == 'map': print 'initializing MAP object... ', map_method = 'fmin_powell' #map_method = 'fmin_l_bfgs_b' mc.MAP([dm.vars[k] for k in keys if k.find('incidence') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose) mc.MAP([dm.vars[k] for k in keys if k.find('remission') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose) mc.MAP([dm.vars[k] for k in keys if k.find('excess-mortality') != -1 or k.find('m') != -1 or k.find('mortality') != -1 or k.find('relative-risk') != -1 or k.find('bins') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose) mc.MAP([dm.vars[k] for k in keys if k.find('incidence') != -1 or k.find('bins') != -1 or k.find('prevalence') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose) mc.MAP([dm.vars[k] for k in keys if k.find('excess-mortality') != -1 or k.find('m') != -1 or k.find('mortality') != -1 or k.find('relative-risk') != -1 or k.find('bins') != -1 or k.find('prevalence') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose) dm.map = mc.MAP(dm.vars) print 'finished' try: dm.map.fit(method=map_method, iterlim=500, tol=.001, verbose=verbose) except KeyboardInterrupt: # if user cancels with cntl-c, save current values for "warm-start" pass for k in keys: try: val = dm.vars[k]['rate_stoch'].value dm.set_map(k, val) except KeyError: pass if method == 'norm_approx': dm.na = mc.NormApprox(dm.vars, eps=.0001) try: dm.na.fit(method='fmin_powell', iterlim=500, tol=.00001, verbose=verbose) except KeyboardInterrupt: # if user cancels with cntl-c, save current values for "warm-start" pass for k in keys: if dm.vars[k].has_key('rate_stoch'): dm.set_map(k, dm.vars[k]['rate_stoch'].value) try: dm.na.sample(1000, verbose=verbose) for k in keys: # TODO: rename 'rate_stoch' to something more appropriate if dm.vars[k].has_key('rate_stoch'): rate_model.store_mcmc_fit(dm, k, dm.vars[k]) except KeyboardInterrupt: # if user cancels with cntl-c, save current values for "warm-start" pass elif method == 'mcmc': # make pymc warnings go to stdout import sys mc.warnings.warn = sys.stdout.write dm.mcmc = mc.MCMC(dm.vars, db='pickle', dbname=dbname) for k in keys: if 'dispersion_step_sd' in dm.vars[k]: dm.mcmc.use_step_method(mc.Metropolis, dm.vars[k]['log_dispersion'], proposal_sd=dm.vars[k]['dispersion_step_sd']) if 'age_coeffs_mesh_step_cov' in dm.vars[k]: dm.mcmc.use_step_method(mc.AdaptiveMetropolis, dm.vars[k]['age_coeffs_mesh'], cov=dm.vars[k]['age_coeffs_mesh_step_cov'], verbose=0) try: dm.mcmc.sample(iter=iter, thin=thin, burn=burn, verbose=verbose) except KeyboardInterrupt: # if user cancels with cntl-c, save current values for "warm-start" pass dm.mcmc.db.commit() for k in keys: t,r,y,s = type_region_year_sex_from_key(k) if t in ['incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality']: import neg_binom_model neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) elif t in ['relative-risk', 'duration', 'incidence_x_duration']: import normal_model normal_model.store_mcmc_fit(dm, k, dm.vars[k])
def fit(dm, method='map'): """ Generate an estimate of the generic disease model parameters using maximum a posteriori liklihood (MAP) or Markov-chain Monte Carlo (MCMC) Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) method : string, optional the parameter estimation method, either 'map' or 'mcmc' Example ------- >>> import dismod3 >>> import dismod3.generic_disease_model as model >>> dm = dismod3.get_disease_model(1) >>> model.fit(dm, method='map') >>> model.fit(dm, method='mcmc') """ if not hasattr(dm, 'vars'): for param_type in ['incidence', 'remission', 'excess-mortality']: # find initial values for these rates data = [d for d in dm.data if clean(d['data_type']).find(param_type) != -1] # use a random subset of the data if there is a lot of it, # to speed things up if len(data) > 25: dm.fit_initial_estimate(param_type, random.sample(data,25)) else: dm.fit_initial_estimate(param_type, data) dm.set_units(param_type, '(per person-year)') dm.set_units('prevalence', '(per person)') dm.set_units('duration', '(years)') dm.vars = setup(dm) if method == 'map': if not hasattr(dm, 'map'): dm.map = mc.MAP(dm.vars) try: dm.map.fit(method='fmin_powell', iterlim=500, tol=.001, verbose=1) except KeyboardInterrupt: # if user cancels with cntl-c, save current values for "warm-start" pass for t in dismod3.settings.output_data_types: t = clean(t) val = dm.vars[t]['rate_stoch'].value dm.set_map(t, val) dm.set_initial_value(t, val) # better initial value may save time in the future elif method == 'mcmc': if not hasattr(dm, 'mcmc'): dm.mcmc = mc.MCMC(dm.vars) for key in dm.vars: stochs = dm.vars[key].get('logit_p_stochs', []) if len(stochs) > 0: dm.mcmc.use_step_method(mc.AdaptiveMetropolis, stochs) try: dm.mcmc.sample(iter=60*1000, burn=10*1000, thin=50, verbose=1) except KeyboardInterrupt: # if user cancels with cntl-c, save current values for "warm-start" pass for t in dismod3.settings.output_data_types: t = clean(t) rate_model.store_mcmc_fit(dm, t, dm.vars[t])