Example #1
0
def fit(dm, method='map', keys=gbd_keys(), iter=50000, burn=25000, thin=1, verbose=1,
        dbname='model.pickle'):
    """ Generate an estimate of the generic disease model parameters
    using maximum a posteriori liklihood (MAP) or Markov-chain Monte
    Carlo (MCMC)

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)

    method : string, optional
      the parameter estimation method, either 'map' or 'mcmc'

    keys : list, optional
      a list of gbd keys for the parameters to fit; it can speed up
      computation to holding some parameters constant while allowing
      others to vary

    iter : int, optional
    burn : int, optional
    thin : int, optional
      parameters for the MCMC, which control how long it takes, and
      how accurate it is
    """
    if not hasattr(dm, 'vars'):
        print 'initializing model vars... ',
        dm.calc_effective_sample_size(dm.data)
        dm.vars = setup(dm, keys)
        print 'finished'

    if method == 'map':
        print 'initializing MAP object... ',
        map_method = 'fmin_powell'
        #map_method = 'fmin_l_bfgs_b'

        mc.MAP([dm.vars[k] for k in keys if k.find('incidence') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose)
        mc.MAP([dm.vars[k] for k in keys if k.find('remission') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose)
        mc.MAP([dm.vars[k] for k in keys if
                k.find('excess-mortality') != -1 or
                k.find('m') != -1 or
                k.find('mortality') != -1 or
                k.find('relative-risk') != -1 or
                k.find('bins') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose)
        mc.MAP([dm.vars[k] for k in keys if
                k.find('incidence') != -1 or
                k.find('bins') != -1 or
                k.find('prevalence') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose)
        mc.MAP([dm.vars[k] for k in keys if
                k.find('excess-mortality') != -1 or
                k.find('m') != -1 or
                k.find('mortality') != -1 or
                k.find('relative-risk') != -1 or
                k.find('bins') != -1 or
                k.find('prevalence') != -1]).fit(method=map_method, iterlim=500, tol=.01, verbose=verbose)

        dm.map = mc.MAP(dm.vars)
        print 'finished'

        try:
            dm.map.fit(method=map_method, iterlim=500, tol=.001, verbose=verbose)
        except KeyboardInterrupt:
            # if user cancels with cntl-c, save current values for "warm-start"
            pass
        
        for k in keys:
            try:
                val = dm.vars[k]['rate_stoch'].value
                dm.set_map(k, val)
            except KeyError:
                pass

    if method == 'norm_approx':
        dm.na = mc.NormApprox(dm.vars, eps=.0001)

        try:
            dm.na.fit(method='fmin_powell', iterlim=500, tol=.00001, verbose=verbose)
        except KeyboardInterrupt:
            # if user cancels with cntl-c, save current values for "warm-start"
            pass

        for k in keys:
            if dm.vars[k].has_key('rate_stoch'):
                dm.set_map(k, dm.vars[k]['rate_stoch'].value)

        try:
            dm.na.sample(1000, verbose=verbose)
            for k in keys:
                # TODO: rename 'rate_stoch' to something more appropriate
                if dm.vars[k].has_key('rate_stoch'):
                    rate_model.store_mcmc_fit(dm, k, dm.vars[k])
        except KeyboardInterrupt:
            # if user cancels with cntl-c, save current values for "warm-start"
            pass

                        
    elif method == 'mcmc':
        # make pymc warnings go to stdout
        import sys
        mc.warnings.warn = sys.stdout.write
        
        dm.mcmc = mc.MCMC(dm.vars, db='pickle', dbname=dbname)
        for k in keys:
            if 'dispersion_step_sd' in dm.vars[k]:
                dm.mcmc.use_step_method(mc.Metropolis, dm.vars[k]['log_dispersion'],
                                        proposal_sd=dm.vars[k]['dispersion_step_sd'])
            if 'age_coeffs_mesh_step_cov' in dm.vars[k]:
                dm.mcmc.use_step_method(mc.AdaptiveMetropolis, dm.vars[k]['age_coeffs_mesh'],
                                        cov=dm.vars[k]['age_coeffs_mesh_step_cov'], verbose=0)

        try:
            dm.mcmc.sample(iter=iter, thin=thin, burn=burn, verbose=verbose)
        except KeyboardInterrupt:
            # if user cancels with cntl-c, save current values for "warm-start"
            pass
        dm.mcmc.db.commit()

        for k in keys:
            t,r,y,s = type_region_year_sex_from_key(k)
            
            if t in ['incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality']:
                import neg_binom_model
                neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])
            elif t in ['relative-risk', 'duration', 'incidence_x_duration']:
                import normal_model
                normal_model.store_mcmc_fit(dm, k, dm.vars[k])
Example #2
0
def fit(dm, method='map'):
    """ Generate an estimate of the generic disease model parameters
    using maximum a posteriori liklihood (MAP) or Markov-chain Monte
    Carlo (MCMC)

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)

    method : string, optional
      the parameter estimation method, either 'map' or 'mcmc'

    Example
    -------
    >>> import dismod3
    >>> import dismod3.generic_disease_model as model
    >>> dm = dismod3.get_disease_model(1)
    >>> model.fit(dm, method='map')
    >>> model.fit(dm, method='mcmc')
    """
    if not hasattr(dm, 'vars'):
        for param_type in ['incidence', 'remission', 'excess-mortality']:
            # find initial values for these rates
            data =  [d for d in dm.data if clean(d['data_type']).find(param_type) != -1]

            # use a random subset of the data if there is a lot of it,
            # to speed things up
            if len(data) > 25:
                dm.fit_initial_estimate(param_type, random.sample(data,25))
            else:
                dm.fit_initial_estimate(param_type, data)

            dm.set_units(param_type, '(per person-year)')

        dm.set_units('prevalence', '(per person)')
        dm.set_units('duration', '(years)')

        dm.vars = setup(dm)

    if method == 'map':
        if not hasattr(dm, 'map'):
            dm.map = mc.MAP(dm.vars)
            
        try:
            dm.map.fit(method='fmin_powell', iterlim=500, tol=.001, verbose=1)
        except KeyboardInterrupt:
            # if user cancels with cntl-c, save current values for "warm-start"
            pass

        for t in dismod3.settings.output_data_types:
            t = clean(t)
            val = dm.vars[t]['rate_stoch'].value
            dm.set_map(t, val)
            dm.set_initial_value(t, val)  # better initial value may save time in the future
    elif method == 'mcmc':
        if not hasattr(dm, 'mcmc'):
            dm.mcmc = mc.MCMC(dm.vars)
            for key in dm.vars:
                stochs = dm.vars[key].get('logit_p_stochs', [])
                if len(stochs) > 0:
                    dm.mcmc.use_step_method(mc.AdaptiveMetropolis, stochs)

        try:
            dm.mcmc.sample(iter=60*1000, burn=10*1000, thin=50, verbose=1)
        except KeyboardInterrupt:
            # if user cancels with cntl-c, save current values for "warm-start"
            pass
        for t in dismod3.settings.output_data_types:
            t = clean(t)
            rate_model.store_mcmc_fit(dm, t, dm.vars[t])