Ejemplo n.º 1
0
def initialize_model():
    ### @export 'load model'
    dm = dismod3.load_disease_model(19807)
    ### @export 'initialize model data'
    dm.params['global_priors']['level_bounds']['excess_mortality'] = dict(
        lower=.1, upper=100.)
    dm.params['global_priors']['increasing']['excess_mortality'] = dict(
        age_start=0, age_end=0)
    dm.params['global_priors']['level_bounds']['relative_risk'] = dict(
        lower=0., upper=10000.)

    for cv in dm.params['covariates']['Study_level']:
        dm.params['covariates']['Study_level'][cv]['rate']['value'] = 0
    for cv in dm.params['covariates']['Country_level']:
        dm.params['covariates']['Country_level'][cv]['rate']['value'] = 0

    level = .001
    dm.params['sex_effect_prevalence'] = dict(mean=1,
                                              upper_ci=pl.exp(level * 1.96),
                                              lower_ci=pl.exp(-level * 1.96))
    dm.params['time_effect_prevalence'] = dict(mean=1,
                                               upper_ci=pl.exp(level * 1.96),
                                               lower_ci=pl.exp(-level * 1.96))
    dm.params['region_effect_prevalence'] = dict(std=level)

    dm.clear_fit()
    dm.clear_empirical_prior()
    dismod3.neg_binom_model.covariate_hash = {}
    return dm
Ejemplo n.º 2
0
def download_model(id):
    """ Copy model from web to j drive

    Parameters
    ----------
    id : int
      The model id number to copy
    """
    dir = dismod3.settings.JOB_WORKING_DIR % id  # TODO: refactor into a function

    try:
        model = dismod3.data.ModelData.load(dir)
        print 'model already on j drive in %s' % dir

    except (IOError, AssertionError):
        print 'downloading disease model'
        dm = dismod3.load_disease_model(id)

        import simplejson as json
        try:
            model = dismod3.data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        except Exception as e:
            print e
            print 'attempting to use old covariate format'
            import old_cov_data
            model = old_cov_data.from_gbd_jsons(json.loads(dm.to_json()))

        model.save(dir)
        print 'loaded data from json, saved in new format for next time in %s' % dir
Ejemplo n.º 3
0
def initialize_model():
    ### @export 'load model'
    dm = dismod3.load_disease_model(16391)

    ### @export 'set expert priors'
    dm.set_param_age_mesh(pl.arange(0,101,10))
    dm.params['global_priors']['smoothness']['prevalence']['amount'] = 'Moderately'
    dm.params['global_priors']['heterogeneity']['prevalence'] = 'Slightly'

    dm.params['global_priors']['level_value']['prevalence'] = dict(value=0., age_before=0, age_after=100)
    dm.params['global_priors']['level_bounds']['prevalence'] = dict(lower=0., upper =.1)
    dm.params['global_priors']['increasing']['prevalence'] = dict(age_start=0, age_end=0)
    dm.params['global_priors']['decreasing']['prevalence'] = dict(age_start=100, age_end=100)
    dm.params['covariates']['Study_level']['bias']['rate']['value'] = 1
    for cv in dm.params['covariates']['Country_level']:
        dm.params['covariates']['Country_level'][cv]['rate']['value'] = 0


    ### @export 'initialize model data'
    dm.data = [d for d in dm.data if dm.relevant_to(d, 'prevalence', region, 'all', 'all')]

    # fit model
    dm.clear_fit()
    dm.clear_empirical_prior()
    dismod3.neg_binom_model.covariate_hash = {}
    return dm
Ejemplo n.º 4
0
def initialize_model():
    ### @export 'load model'
    dm = dismod3.load_disease_model(16391)

    ### @export 'set expert priors'
    dm.set_param_age_mesh(pl.arange(0, 101, 10))
    dm.params['global_priors']['smoothness']['prevalence'][
        'amount'] = 'Slightly'
    dm.params['global_priors']['heterogeneity']['prevalence'] = 'Slightly'

    dm.params['global_priors']['level_value']['prevalence'] = dict(
        value=0., age_before=0, age_after=100)
    dm.params['global_priors']['level_bounds']['prevalence'] = dict(lower=0.,
                                                                    upper=.1)
    dm.params['global_priors']['increasing']['prevalence'] = dict(age_start=0,
                                                                  age_end=0)
    dm.params['global_priors']['decreasing']['prevalence'] = dict(
        age_start=100, age_end=100)
    dm.params['covariates']['Study_level']['bias']['rate']['value'] = 1
    for cv in dm.params['covariates']['Country_level']:
        dm.params['covariates']['Country_level'][cv]['rate']['value'] = 0

    ### @export 'initialize model data'
    dm.data = [
        d for d in dm.data
        if dm.relevant_to(d, 'prevalence', region, year, 'all')
    ]

    # fit model
    dm.clear_fit()
    dm.clear_empirical_prior()
    dismod3.neg_binom_model.covariate_hash = {}
    return dm
Ejemplo n.º 5
0
def validate_prior_similarity():
    #dm = dismod3.load_disease_model(20945)
    #dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
    #t = 'i'
    #area, sex, year = 'europe_eastern', 'male', 2005

    dm = dismod3.load_disease_model(20928)
    dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
    t = 'p'
    area, sex, year = 'sub-saharan_africa_central', 'male', 2005

    # select data that is about areas in this region, recent years, and sex of male or total only
    model = dm.model
    subtree = nx.traversal.bfs_tree(model.hierarchy, area)
    relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                         if (r['area'] in subtree or r['area'] == 'all')\
                         and ((year == 2005 and r['year_end'] >= 1997) or r['year_start'] <= 1997) \
                         and r['sex'] in [sex, 'total']]
    model.input_data = model.input_data.ix[relevant_rows]

    # replace area 'all' with area
    model.input_data['area'][model.input_data['area'] == 'all'] = area


    for het in 'Slightly Moderately Very'.split():
        dm.model.parameters[t]['parameter_age_mesh'] = [0, 15, 20, 25, 35, 45, 55, 65, 75, 100]
        dm.model.parameters[t]['heterogeneity'] = het
        setup_regional_model(dm, area, sex, year)

        dm.vars = {}
        dm.vars[t] = data_model.data_model(t, dm.model, t,
                                           root_area=area, root_sex=sex, root_year=year,
                                           mu_age=None,
                                           mu_age_parent=dm.emp_priors[t, 'mu'],
                                           sigma_age_parent=dm.emp_priors[t, 'sigma'],
                                           rate_type=(t == 'rr') and 'log_normal' or 'neg_binom')

        fit_model.fit_data_model(dm.vars[t], iter=10050, burn=5000, thin=50, tune_interval=100)

        #2graphics.plot_one_effects(dm.vars[t], t, dm.model.hierarchy)
        #pl.title(het)

        graphics.plot_convergence_diag(dm.vars[t])
        pl.title(het)

        #graphics.plot_one_ppc(dm.vars[t], t)
        #pl.title(het)

        graphics.plot_one_type(dm.model, dm.vars[t], dm.emp_priors, t)
        pl.title(het)

    pl.show()
    return dm
Ejemplo n.º 6
0
def initialize_model():
    ### @export 'load model'
    dm = dismod3.load_disease_model(19271)
    ### @export 'initialize model data'
    dm.data = [d for d in dm.data if dm.relevant_to(d, type, region, year, sex)]
    for d in dm.data:
        d['standard_error'] = float(d['sd_1enadj'] or d['parameter_value_old'])/10000. / pl.sqrt(d['effective_sample_size'])
        d.pop('effective_sample_size')
    # fit model
    dm.clear_fit()
    dm.clear_empirical_prior()
    dismod3.neg_binom_model.covariate_hash = {}
    return dm
Ejemplo n.º 7
0
def fit_emp_prior(id, param_type):
    """ Fit empirical prior of specified type for specified model

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    param_type : str, one of incidence, prevalence, remission, excess-mortality
      The disease parameter to generate empirical priors for

    Example
    -------
    >>> import fit_emp_prior
    >>> fit_emp_prior.fit_emp_prior(2552, 'incidence')
    """
    #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Running')

    # load disease model
    dm = dismod3.load_disease_model(id)
    #dm.data = []  # remove all data to speed up computation, for test

    import dismod3.neg_binom_model as model
    dir = dismod3.settings.JOB_WORKING_DIR % id
    model.fit_emp_prior(dm, param_type, dbname='%s/empirical_priors/pickle/dm-%d-emp_prior-%s.pickle' % (dir, id, param_type))

    # generate empirical prior plots
    from pylab import subplot
    for sex in dismod3.settings.gbd_sexes:
        for year in dismod3.settings.gbd_years:
            keys = dismod3.utils.gbd_keys(region_list=['all'], year_list=[year], sex_list=[sex], type_list=[param_type])
            dismod3.tile_plot_disease_model(dm, keys, defaults={})
            dm.savefig('dm-%d-emp_prior-%s-%s-%s.png' % (id, param_type, sex, year))

    # TODO: put this in a separate script, which runs after all empirical priors are computed
    for effect in ['alpha', 'beta', 'gamma', 'delta']:
        dismod3.plotting.plot_empirical_prior_effects([dm], effect)
        dm.savefig('dm-%d-emp-prior-%s-%s.png' % (id, param_type, effect))

    # summarize fit quality graphically, as well as parameter posteriors
    k0 = keys[0]
    dm.vars = {k0: dm.vars}   # hack to make posterior predictions plot
    dismod3.plotting.plot_posterior_predicted_checks(dm, k0)
    dm.savefig('dm-%d-emp-prior-check-%s.png' % (dm.id, param_type))
    dm.vars = dm.vars[k0]   # undo hack to make posterior predictions plot
    
    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    dm.save('dm-%d-prior-%s.json' % (id, param_type))
    dismod3.try_posting_disease_model(dm, ntries=5)

    #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Completed')
    return dm
Ejemplo n.º 8
0
def find_fnrfx(model, disease, data_type, country, sex, year):
    '''add fixed and random effects from GBD as priors to new model'''
    # create dummy model to get appropriate Model.vars fields
    dummy = load_new_model(disease, country, sex)
    dummy.vars += dismod3.ism.age_specific_rate(dummy, data_type)
    vars = dummy.vars[data_type]

    # save random effects
    try:
        emp_re = pandas.read_csv(
            '/home/j/Project/dismod/output/dm-%s/posterior/re-%s-%s+%s+%s.csv'
            % (disease, data_type, geo_info(country, disease), sex, year),
            index_col=0)
        for col in emp_re.index:
            model.parameters[data_type]['random_effects'][col] = dict(
                dist='Constant',
                mu=emp_re.ix[col, 'mu_coeff'],
                sigma=emp_re.ix[col, 'sigma_coeff'])
    except:
        pass

    # also save empirical prior on sigma_alpha, the dispersion of the random effects
    dm = dismod3.load_disease_model(disease)
    for n in vars['sigma_alpha']:
        try:
            dm_na = dm.get_empirical_prior(full_name[data_type])['new_alpha']
            model.parameters[data_type]['random_effects'][n.__name__] = dict(
                dist=dm_na[n.__name__]['dist'],
                mu=dm_na[n.__name__]['mu'],
                sigma=dm_na[n.__name__]['sigma'],
                lower=dm_na[n.__name__]['lower'],
                upper=dm_na[n.__name__]['upper'])
        except:
            model.parameters[data_type]['random_effects'][n.__name__] = dict(
                dist='TruncatedNormal',
                mu=.05,
                sigma=.03**-2,
                lower=0.01,
                upper=0.5)
    # save fixed effects
    emp_fe = pandas.read_csv(
        '/home/j/Project/dismod/output/dm-%s/posterior/fe-%s-%s+%s+%s.csv' %
        (disease, data_type, geo_info(country, disease), sex, year),
        index_col=0)
    for n, col in zip(vars['beta'], vars['X'].columns):
        model.parameters[data_type]['fixed_effects'][col] = dict(
            dist='Constant',
            mu=emp_fe.ix[col, 'mu_coeff'],
            sigma=emp_fe.ix[col, 'sigma_coeff'])
Ejemplo n.º 9
0
Archivo: data.py Proyecto: peterhm/gbd
def fetch_disease_model_if_necessary(id, dir_name):
    try:
        model = ModelData.load(dir_name)
        print 'loaded data from new format from %s' % dir_name
    except (IOError, AssertionError):
        import os
        os.makedirs(dir_name)
        import dismod3.disease_json
        dm = dismod3.load_disease_model(id)
        import simplejson as json
        model = ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        model.save(dir_name)
        print 'loaded data from json, saved in new format for next time in %s' % dir_name
    print 'model has %d rows of input data' % len(model.input_data.index)
    return model
Ejemplo n.º 10
0
def fetch_disease_model_if_necessary(id, dir_name):
    try:
        model = ModelData.load(dir_name)
        print 'loaded data from new format from %s' % dir_name
    except (IOError, AssertionError):
        import os
        os.makedirs(dir_name)
        import dismod3.disease_json
        dm = dismod3.load_disease_model(id)
        import simplejson as json
        model = ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        model.save(dir_name)
        print 'loaded data from json, saved in new format for next time in %s' % dir_name
    print 'model has %d rows of input data' % len(model.input_data.index)
    return model
Ejemplo n.º 11
0
def main():
    import optparse

    usage = 'usage: %prog [options] disease_model_id'
    parser = optparse.OptionParser(usage)
    parser.add_option('-s', '--sex', default='male',
                      help='only estimate given sex (valid settings ``male``, ``female``, ``all``)')
    parser.add_option('-y', '--year', default='2005',
                      help='only estimate given year (valid settings ``1990``, ``2005``, ``2010``)')
    parser.add_option('-r', '--region', default='australasia',
                      help='only estimate given GBD Region')
    parser.add_option('-f', '--fast', default='False',
                      help='use MAP only')
    parser.add_option('-i', '--inconsistent', default='False',
                      help='use inconsistent model for posteriors')
    parser.add_option('-t', '--types', default='p i r',
                      help='with rate types to fit (only used if inconsistent=true)')
    parser.add_option('-z', '--zerore', default='true',
                      help='enforce zero constraint on random effects')
    parser.add_option('-o', '--onlyposterior', default='False',
                      help='skip empirical prior phase')
    
    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error('incorrect number of arguments')

    try:
        id = int(args[0])
    except ValueError:
        parser.error('disease_model_id must be an integer')


    dm = dismod3.load_disease_model(id)

    # set model id to passed-in id (should not be necessary)
    dm.id = id
    assert id == dm.id, 'model id should equal parameter id'
    
    dm = fit_posterior(dm, options.region, options.sex, options.year,
                       fast_fit=options.fast.lower() == 'true',
                       inconsistent_fit=options.inconsistent.lower() == 'true',
                       params_to_fit=options.types.split(),
                       posteriors_only=(options.onlyposterior.lower()=='true'),
                       zero_re=options.zerore.lower() == 'true')
    
    return dm
Ejemplo n.º 12
0
def fit_posterior(id, region, sex, year):
    """ Fit posterior of specified region/sex/year for specified model

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years

    Example
    -------
    >>> import fit_posterior
    >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005')
    """
    #print 'updating job status on server'
    #dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (region, sex, year), 'Running')

    dm = dismod3.load_disease_model(id)
    #dm.data = []  # for testing, remove all data
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=['prevalence'])

    # fit the model
    dir = dismod3.settings.JOB_WORKING_DIR % id
    import dismod3.neg_binom_model as model
    k0 = keys[0]
    dm.vars = {}
    dm.vars[k0] = model.setup(dm, k0, dm.data)
    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.sample(iter=50000,burn=25000,thin=50,verbose=1)
    dm.map = mc.MAP(dm.vars)
    dm.map.fit()
    model.store_mcmc_fit(dm, k0, dm.vars[k0])

    # update job status file
    #print 'updating job status on server'
    #dismod3.log_job_status(id, 'posterior',
    #                       '%s--%s--%s' % (region, sex, year), 'Completed')
    
    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=['prevalence'])
    dm.save('dm-%d-posterior-%s-%s-%s.json' % (id, region, sex, year), keys_to_save=keys)

    return dm
Ejemplo n.º 13
0
def upload_fits(id):
    """ Send results of cluster fits to dismod server

    Parameters
    ----------
    id : int
      The model id number

    Example
    -------
    >>> import fit_emp_prior
    >>> fit_emp_prior.fit_emp_prior(2552, 'incidence')
    >>> import upload_fits
    >>> upload_fits.upload_fits(2552)
    """
    # load disease model
    dm = dismod3.load_disease_model(id)  # this merges together results from all fits
    dismod3.try_posting_disease_model(dm, ntries=5)
Ejemplo n.º 14
0
def find_fnrfx(model, disease, data_type, country, sex, year):
    '''add fixed and random effects from GBD as priors to new model'''
    # create dummy model to get appropriate Model.vars fields
    dummy = load_new_model(disease, country, sex)
    dummy.vars += dismod3.ism.age_specific_rate(dummy, data_type)
    vars = dummy.vars[data_type]
    
    # save random effects
    try:
        emp_re = pandas.read_csv('/home/j/Project/dismod/output/dm-%s/posterior/re-%s-%s+%s+%s.csv'%(disease, data_type, geo_info(country,disease), sex, year), index_col=0)
        for col in emp_re.index:
            model.parameters[data_type]['random_effects'][col] = dict(dist='Constant', 
                                                                      mu=emp_re.ix[col, 'mu_coeff'], 
                                                                      sigma=emp_re.ix[col, 'sigma_coeff'])
    except:
        pass
        
    # also save empirical prior on sigma_alpha, the dispersion of the random effects
    dm = dismod3.load_disease_model(disease)
    for n in vars['sigma_alpha']:
        try:
            dm_na = dm.get_empirical_prior(full_name[data_type])['new_alpha']
            model.parameters[data_type]['random_effects'][n.__name__] = dict(dist = dm_na[n.__name__]['dist'],
                                                                             mu = dm_na[n.__name__]['mu'], 
                                                                             sigma = dm_na[n.__name__]['sigma'], 
                                                                             lower = dm_na[n.__name__]['lower'], 
                                                                             upper = dm_na[n.__name__]['upper'])
        except:
            model.parameters[data_type]['random_effects'][n.__name__] = dict(dist = 'TruncatedNormal',
                                                                             mu = .05,
                                                                             sigma = .03**-2, 
                                                                             lower = 0.01, 
                                                                             upper = 0.5)
    # save fixed effects    
    emp_fe = pandas.read_csv('/home/j/Project/dismod/output/dm-%s/posterior/fe-%s-%s+%s+%s.csv'%(disease, data_type, geo_info(country,disease), sex, year), index_col=0)
    for n, col in zip(vars['beta'], vars['X'].columns):
        model.parameters[data_type]['fixed_effects'][col] = dict(dist = 'Constant', 
                                                                 mu = emp_fe.ix[col, 'mu_coeff'], 
                                                                 sigma = emp_fe.ix[col, 'sigma_coeff'])    
Ejemplo n.º 15
0
def initialize_model():
    ### @export 'load model'
    dm = dismod3.load_disease_model(19807)
    ### @export 'initialize model data'
    dm.params['global_priors']['level_bounds']['excess_mortality'] = dict(lower=.1, upper=100.)
    dm.params['global_priors']['increasing']['excess_mortality'] = dict(age_start=0, age_end=0)
    dm.params['global_priors']['level_bounds']['relative_risk'] = dict(lower=0., upper=10000.)

    for cv in dm.params['covariates']['Study_level']:
        dm.params['covariates']['Study_level'][cv]['rate']['value'] = 0
    for cv in dm.params['covariates']['Country_level']:
        dm.params['covariates']['Country_level'][cv]['rate']['value'] = 0

    level = .001
    dm.params['sex_effect_prevalence'] = dict(mean=1, upper_ci=pl.exp(level *1.96), lower_ci=pl.exp(-level*1.96))
    dm.params['time_effect_prevalence'] = dict(mean=1, upper_ci=pl.exp(level *1.96), lower_ci=pl.exp(-level*1.96))
    dm.params['region_effect_prevalence'] = dict(std=level)

    dm.clear_fit()
    dm.clear_empirical_prior()
    dismod3.neg_binom_model.covariate_hash = {}
    return dm
Ejemplo n.º 16
0
def upload_fits(id):
    """ Send results of cluster fits to dismod server

    Parameters
    ----------
    id : int
      The model id number

    Example
    -------
    >>> import fit_emp_prior
    >>> fit_emp_prior.fit_emp_prior(2552, 'incidence')
    >>> import upload_fits
    >>> upload_fits.upload_fits(2552)
    """
    # load disease model
    dm = dismod3.load_disease_model(
        id)  # this merges together results from all fits

    # save dta output
    dir = dismod3.settings.JOB_WORKING_DIR % id  # TODO: refactor into a function
    #dm_to_dta(dm, '%s/regional_predictions' % dir)

    # plot empirical priors (in a separate script, to run after all empirical priors are computed)
    for effect in ['alpha', 'beta', 'gamma', 'delta']:
        try:
            dismod3.plotting.plot_empirical_prior_effects([dm], effect)
            dm.savefig('dm-%d-emp-prior-%s.png' % (id, effect))
        except Exception:
            print 'failed to plot %s' % effect

    # save table output
    try:
        dismod3.table.make_tables(dm)
    except Exception, e:
        print 'Failed to make table'
        print e
Ejemplo n.º 17
0
def upload_fits(id):
    """ Send results of cluster fits to dismod server

    Parameters
    ----------
    id : int
      The model id number

    Example
    -------
    >>> import fit_emp_prior
    >>> fit_emp_prior.fit_emp_prior(2552, 'incidence')
    >>> import upload_fits
    >>> upload_fits.upload_fits(2552)
    """
    # load disease model
    dm = dismod3.load_disease_model(id)  # this merges together results from all fits

    # save dta output
    dir = dismod3.settings.JOB_WORKING_DIR % id  # TODO: refactor into a function
    #dm_to_dta(dm, '%s/regional_predictions' % dir)

    # plot empirical priors (in a separate script, to run after all empirical priors are computed)
    for effect in ['alpha', 'beta', 'gamma', 'delta']:
        try:
            dismod3.plotting.plot_empirical_prior_effects([dm], effect)
            dm.savefig('dm-%d-emp-prior-%s.png' % (id, effect))
        except Exception:
            print 'failed to plot %s' % effect

    # save table output
    try:
        dismod3.table.make_tables(dm)
    except Exception, e:
        print 'Failed to make table'
        print e
Ejemplo n.º 18
0
def fit_without_confrontation(id, region, sex, year):
    """ Fit posterior of specified region/sex/year for specified model
    without trying to integrate conflicting sources of data

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years
    """

    ## load model
    dm = dismod3.load_disease_model(id)

    ## separate out prevalence and relative-risk data
    prev_data = [
        d for d in dm.data
        if dm.relevant_to(d, 'prevalence', region, year, sex)
    ]
    rr_data = [
        d for d in dm.data
        if dm.relevant_to(d, 'relative-risk', region, year, sex)
    ]
    dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data]

    ### setup the generic disease model (without prevalence data)
    import dismod3.gbd_disease_model as model
    keys = dismod3.utils.gbd_keys(region_list=[region],
                                  year_list=[year],
                                  sex_list=[sex])
    dm.calc_effective_sample_size(dm.data)
    dm.vars = model.setup(dm, keys)

    ## override the birth prevalence prior, based on the withheld prevalence data
    logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year,
                                                  sex)]['initial']['logit_C_0']
    assert len(prev_data) == 1, 'should be a single prevalance datum'
    d = prev_data[0]

    mu_logit_C_0 = mc.logit(dm.value_per_1(d) + dismod3.settings.NEARLY_ZERO)
    lb, ub = dm.bounds_per_1(d)
    sigma_logit_C_0 = (mc.logit(ub + dismod3.settings.NEARLY_ZERO) -
                       mc.logit(lb + dismod3.settings.NEARLY_ZERO)) / (2 *
                                                                       1.96)
    print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0)
    print 'ui_C_0_pri:', lb, ub

    # override the excess-mortality, based on the relative-risk data
    mu_rr = 1.01 * np.ones(dismod3.settings.MAX_AGE)
    sigma_rr = .01 * np.ones(dismod3.settings.MAX_AGE)
    for d in rr_data:
        mu_rr[d['age_start']:(d['age_end'] + 1)] = dm.value_per_1(d)
        sigma_rr[d['age_start']:(d['age_end'] + 1)] = dm.se_per_1(d)
    print 'mu_rr:', mu_rr.round(2)
    #print 'sigma_rr:', sigma_rr.round(2)

    log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year,
                                              sex)]['age_coeffs']
    log_f_mesh = log_f.parents['gamma_mesh']
    param_mesh = log_f.parents['param_mesh']

    m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region,
                                              year, sex)]
    mu_log_f = np.log((mu_rr - 1) * m_all)
    sigma_log_f = 1 / ((mu_rr - 1) * m_all) * sigma_rr * m_all
    print 'mu_log_f:', mu_log_f.round(2)[param_mesh]
    print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh]

    ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC)
    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.use_step_method(SampleFromNormal,
                            logit_C_0,
                            mu=mu_logit_C_0,
                            tau=sigma_logit_C_0**-2)
    dm.mcmc.use_step_method(SampleFromNormal,
                            log_f_mesh,
                            mu=mu_log_f[param_mesh],
                            tau=sigma_log_f[param_mesh]**-2)
    for stoch in dm.mcmc.stochastics:
        dm.mcmc.use_step_method(mc.NoStepper, stoch)
    dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE)

    #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2)
    #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2)
    #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2)
    print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2)
    print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for(
        'excess-mortality', region, year,
        sex)]['rate_stoch'].stats()['mean'].round(2)

    for k in keys:
        t, r, y, s = dismod3.utils.type_region_year_sex_from_key(k)

        if t in [
                'incidence', 'prevalence', 'remission', 'excess-mortality',
                'mortality', 'prevalence_x_excess-mortality'
        ]:
            dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])

        elif t in ['relative-risk', 'duration', 'incidence_x_duration']:
            dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k])

    from fit_posterior import save_country_level_posterior
    if str(year) == '2005':  # also generate 2010 estimates
        save_country_level_posterior(dm, region, 2010, sex,
                                     ['prevalence', 'remission'])
    save_country_level_posterior(
        dm, region, year, sex, ['prevalence', 'remission']
    )  #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split())

    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    keys = dismod3.utils.gbd_keys(region_list=[region],
                                  year_list=[year],
                                  sex_list=[sex])
    dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year),
            keys_to_save=keys)

    return dm
Ejemplo n.º 19
0
def fit_emp_prior(
    id,
    param_type,
    fast_fit=False,
    generate_emp_priors=True,
    zero_re=True,
    alt_prior=False,
    global_heterogeneity="Slightly",
):
    """ Fit empirical prior of specified type for specified model

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    param_type : str, one of incidence, prevalence, remission, excess-mortality, prevalence_x_excess-mortality
      The disease parameter to generate empirical priors for

    Example
    -------
    >>> import fit_emp_prior
    >>> fit_emp_prior.fit_emp_prior(2552, 'incidence')
    """

    dir = dismod3.settings.JOB_WORKING_DIR % id

    ## load the model from disk or from web
    import simplejson as json
    import data

    reload(data)

    dm = dismod3.load_disease_model(id)

    try:
        model = data.ModelData.load(dir)
        print "loaded data from new format from %s" % dir
    except (IOError, AssertionError):
        model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        # model.save(dir)
        print "loaded data from json, saved in new format for next time in %s" % dir

    ## next block fills in missing covariates with zero
    for col in model.input_data.columns:
        if col.startswith("x_"):
            model.input_data[col] = model.input_data[col].fillna(0.0)
    # also fill all covariates missing in output template with zeros
    model.output_template = model.output_template.fillna(0)

    # set all heterogeneity priors to Slightly for the global fit
    for t in model.parameters:
        if "heterogeneity" in model.parameters[t]:
            model.parameters[t]["heterogeneity"] = global_heterogeneity

    t = {
        "incidence": "i",
        "prevalence": "p",
        "remission": "r",
        "excess-mortality": "f",
        "prevalence_x_excess-mortality": "pf",
    }[param_type]
    model.input_data = model.get_data(t)
    if len(model.input_data) == 0:
        print "No data for type %s, exiting" % param_type
        return dm

    ### For testing:
    ## speed up computation by reducing number of knots
    ## model.parameters[t]['parameter_age_mesh'] = [0, 10, 20, 40, 60, 100]

    ## smooth Slightly, Moderately, or Very
    ## model.parameters[t]['smoothness'] = dict(age_start=0, age_end=100, amount='Very')

    ## speed up computation be reducing data size
    ## predict_area = 'super-region_0'
    ## predict_year=2005
    ## predict_sex='total'
    ## subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area)
    ## relevant_rows = [i for i, r in model.input_data.T.iteritems() \
    ##                      if (r['area'] in subtree or r['area'] == 'all')\
    ##                      and (r['year_end'] >= 1997) \
    ##                      and r['sex'] in [predict_sex, 'total']]
    ## model.input_data = model.input_data.ix[relevant_rows]

    # testing changes
    # model.input_data['effective_sample_size'] = pl.minimum(1.e3, model.input_data['effective_sample_size'])
    # missing_ess = pl.isnan(model.input_data['effective_sample_size'])
    # model.input_data['effective_sample_size'][missing_ess] = 1.
    # model.input_data['z_overdisperse'] = 1.
    # print model.describe(t)
    # model.input_data = model.input_data[model.input_data['area'].map(lambda x: x in nx.bfs_tree(model.hierarchy, 'super-region_5'))]
    # model.input_data = model.input_data = model.input_data.drop(['x_LDI_id_Updated_7July2011'], axis=1)
    # model.input_data = model.input_data.filter([model.input_data['x_nottroponinuse'] == 0.]
    # model.input_data = model.input_data[:100]

    ## speed up output by not making predictions for empirical priors
    # generate_emp_priors = False

    print "fitting", t
    model.vars += ism.age_specific_rate(
        model,
        t,
        reference_area="all",
        reference_sex="total",
        reference_year="all",
        mu_age=None,
        mu_age_parent=None,
        sigma_age_parent=None,
        rate_type=(t == "rr") and "log_normal" or "neg_binom",
        zero_re=zero_re,
    )
    # for backwards compatibility, should be removed eventually
    dm.model = model
    dm.vars = model.vars[t]
    vars = dm.vars

    if fast_fit:
        dm.map, dm.mcmc = dismod3.fit.fit_asr(model, t, iter=101, burn=0, thin=1, tune_interval=100)
    else:
        dm.map, dm.mcmc = dismod3.fit.fit_asr(
            model, t, iter=50000, burn=10000, thin=40, tune_interval=1000, verbose=True
        )

    stats = dm.vars["p_pred"].stats(batches=5)
    dm.vars["data"]["mu_pred"] = stats["mean"]
    dm.vars["data"]["sigma_pred"] = stats["standard deviation"]

    stats = dm.vars["pi"].stats(batches=5)
    dm.vars["data"]["mc_error"] = stats["mc error"]

    dm.vars["data"]["residual"] = dm.vars["data"]["value"] - dm.vars["data"]["mu_pred"]
    dm.vars["data"]["abs_residual"] = pl.absolute(dm.vars["data"]["residual"])

    graphics.plot_fit(model, data_types=[t], ylab=["PY"], plot_config=(1, 1), fig_size=(8, 8))
    if generate_emp_priors:
        for a in [dismod3.utils.clean(a) for a in dismod3.settings.gbd_regions]:
            print "generating empirical prior for %s" % a
            for s in dismod3.settings.gbd_sexes:
                for y in dismod3.settings.gbd_years:
                    key = dismod3.utils.gbd_key_for(param_type, a, y, s)
                    if t in model.parameters and "level_bounds" in model.parameters[t]:
                        lower = model.parameters[t]["level_bounds"]["lower"]
                        upper = model.parameters[t]["level_bounds"]["upper"]
                    else:
                        lower = 0
                        upper = pl.inf

                    emp_priors = covariate_model.predict_for(
                        model,
                        model.parameters[t],
                        "all",
                        "total",
                        "all",
                        a,
                        dismod3.utils.clean(s),
                        int(y),
                        alt_prior,
                        vars,
                        lower,
                        upper,
                    )
                    dm.set_mcmc("emp_prior_mean", key, emp_priors.mean(0))

                    if "eta" in vars:
                        N, A = emp_priors.shape  # N samples, for A age groups
                        delta_trace = pl.transpose(
                            [pl.exp(vars["eta"].trace()) for _ in range(A)]
                        )  # shape delta matrix to match prediction matrix
                        emp_prior_std = pl.sqrt(emp_priors.var(0) + (emp_priors ** 2 / delta_trace).mean(0))
                    else:
                        emp_prior_std = emp_priors.std(0)
                    dm.set_mcmc("emp_prior_std", key, emp_prior_std)

                    pl.plot(
                        model.parameters["ages"],
                        dm.get_mcmc("emp_prior_mean", key),
                        color="grey",
                        label=a,
                        zorder=-10,
                        alpha=0.5,
                    )
    pl.savefig(dir + "/prior-%s.png" % param_type)

    store_effect_coefficients(dm, vars, param_type)

    # graphics.plot_one_ppc(vars, t)
    # pl.savefig(dir + '/prior-%s-ppc.png'%param_type)

    graphics.plot_acorr(model)
    pl.savefig(dir + "/prior-%s-convergence.png" % param_type)
    graphics.plot_trace(model)
    pl.savefig(dir + "/prior-%s-trace.png" % param_type)

    graphics.plot_one_effects(model, t)
    pl.savefig(dir + "/prior-%s-effects.png" % param_type)

    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    try:
        dm.save("dm-%d-prior-%s.json" % (id, param_type))
    except IOError, e:
        print e
Ejemplo n.º 20
0
def validate_prior_similarity():
    #dm = dismod3.load_disease_model(20945)
    #dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
    #t = 'i'
    #area, sex, year = 'europe_eastern', 'male', 2005

    dm = dismod3.load_disease_model(20928)
    dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
    t = 'p'
    area, sex, year = 'sub-saharan_africa_central', 'male', 2005

    # select data that is about areas in this region, recent years, and sex of male or total only
    model = dm.model
    subtree = nx.traversal.bfs_tree(model.hierarchy, area)
    relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                         if (r['area'] in subtree or r['area'] == 'all')\
                         and ((year == 2005 and r['year_end'] >= 1997) or r['year_start'] <= 1997) \
                         and r['sex'] in [sex, 'total']]
    model.input_data = model.input_data.ix[relevant_rows]

    # replace area 'all' with area
    model.input_data['area'][model.input_data['area'] == 'all'] = area

    for het in 'Slightly Moderately Very'.split():
        dm.model.parameters[t]['parameter_age_mesh'] = [
            0, 15, 20, 25, 35, 45, 55, 65, 75, 100
        ]
        dm.model.parameters[t]['heterogeneity'] = het
        setup_regional_model(dm, area, sex, year)

        dm.vars = {}
        dm.vars[t] = data_model.data_model(
            t,
            dm.model,
            t,
            root_area=area,
            root_sex=sex,
            root_year=year,
            mu_age=None,
            mu_age_parent=dm.emp_priors[t, 'mu'],
            sigma_age_parent=dm.emp_priors[t, 'sigma'],
            rate_type=(t == 'rr') and 'log_normal' or 'neg_binom')

        fit_model.fit_data_model(dm.vars[t],
                                 iter=10050,
                                 burn=5000,
                                 thin=50,
                                 tune_interval=100)

        #2graphics.plot_one_effects(dm.vars[t], t, dm.model.hierarchy)
        #pl.title(het)

        graphics.plot_convergence_diag(dm.vars[t])
        pl.title(het)

        #graphics.plot_one_ppc(dm.vars[t], t)
        #pl.title(het)

        graphics.plot_one_type(dm.model, dm.vars[t], dm.emp_priors, t)
        pl.title(het)

    pl.show()
    return dm
Ejemplo n.º 21
0
pl.figure()
for iso in list(pl.unique(Y["Iso3"])):
    pl.plot(Y[(Y["Iso3"] == iso) & (Y["Rate type"] == "prevalence")].filter(like="Draw").mean(1).__array__(), label=iso)

pl.semilogy([1], [1])

Z = Y[Y["Rate type"] == "prevalence"].groupby("Age").apply(weighted_age)
pl.plot(Z.mean(1).__array__(), color="red", linewidth=3, alpha=0.5, label="Inconsistent NA/ME")

pl.legend()
pl.axis([-5, 130, 1e-6, 2])


import dismod3

dm = dismod3.load_disease_model(19807)
import fit_posterior

fit_posterior.fit_posterior(dm, "north_africa_middle_east", "male", "2005", map_only=True)
X = pandas.read_csv(
    "/var/tmp/dismod_working/test/dm-19807/posterior/dm-19807-north_africa_middle_east-male-2005.csv", index_col=None
)
pl.figure()
for iso in list(pl.unique(X["Iso3"])):
    pl.plot(X[(X["Iso3"] == iso)].filter(like="Draw").mean(1).__array__(), label=iso)

pl.semilogy([1], [1])


Z = X.groupby("Age").apply(weighted_age)
plot(Z.mean(1).__array__(), color="red", linewidth=3, alpha=0.5, label="Inconsistent NA/ME")
Ejemplo n.º 22
0
for iso in list(pl.unique(Y['Iso3'])):
    pl.plot(Y[(Y['Iso3']==iso)&(Y['Rate type']=='prevalence')].filter(like='Draw').mean(1).__array__(), label=iso)

pl.semilogy([1],[1])

Z = Y[Y['Rate type'] == 'prevalence'].groupby('Age').apply(weighted_age)
pl.plot(Z.mean(1).__array__(), color='red', linewidth=3, alpha=.5, label='Inconsistent NA/ME')

pl.legend()
pl.axis([-5,130,1e-6,2])




import dismod3
dm = dismod3.load_disease_model(19807)
import fit_posterior
fit_posterior.fit_posterior(dm, 'north_africa_middle_east', 'male', '2005', map_only=True)
X = pandas.read_csv('/var/tmp/dismod_working/test/dm-19807/posterior/dm-19807-north_africa_middle_east-male-2005.csv', index_col=None)
pl.figure()
for iso in list(pl.unique(X['Iso3'])):
    pl.plot(X[(X['Iso3']==iso)].filter(like='Draw').mean(1).__array__(), label=iso)

pl.semilogy([1],[1])


Z = X.groupby('Age').apply(weighted_age)
plot(Z.mean(1).__array__(), color='red', linewidth=3, alpha=.5, label='Inconsistent NA/ME')

plot(dm.vars['prevalence+north_africa_middle_east+2005+male']['rate_stoch'].stats()['mean'], color='red', linewidth=3, alpha=.5, label='Mean of Consistent NA/ME')
Ejemplo n.º 23
0
def fit_all(id, consistent_empirical_prior=True, consistent_posterior=True,
            posteriors_only=False, posterior_types='p i r', fast=False,
            zero_re=True,
            alt_prior=True,
            global_heterogeneity='Slightly'):
    """ Enqueues all jobs necessary to fit specified model
    to the cluster

    Parameters
    ----------
    id : int
      The model id number for the job to fit

    Example
    -------
    >>> import fit_all
    >>> fit_all.fit_all(2552)
    """
    dir = dismod3.settings.JOB_WORKING_DIR % id  # TODO: refactor into a function

    try:
        model = data.ModelData.load(dir)
        print 'loaded data from new format from %s' % dir

        # if we make it here, this model has already been run, so clean out the stdout/stderr dirs to make room for fresh messages
        call_str = 'rm -rf %s/empirical_priors/stdout/* %s/empirical_priors/stderr/* %s/posterior/stdout/* %s/posterior/stderr/* %s/json/dm-*-*.json' % (dir, dir, dir, dir, dir)
        print call_str
        subprocess.call(call_str, shell=True)

        # now load just the model, all previous fits are deleted
        dm = dismod3.load_disease_model(id)

    except (IOError, AssertionError):
        print 'downloading disease model'
        dm = dismod3.load_disease_model(id)

        import simplejson as json
        try:
            model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        except Exception as e:
            print e
            print 'attempting to use old covariate format'
            import old_cov_data
            model = old_cov_data.from_gbd_jsons(json.loads(dm.to_json()))

        model.save(dir)
        print 'loaded data from json, saved in new format for next time in %s' % dir

    def options(fast, zero_re, alt_prior, global_heterogeneity):
        call_str = ''
        call_str += ' --fast=%s'%fast
        call_str += ' --zerore=%s'%zero_re
        call_str += ' --altprior=%s'%alt_prior
        call_str += ' --globalheterogeneity=%s'%global_heterogeneity
        return call_str

    o = '%s/empirical_priors/stdout/%d_running.txt' % (dir, id)
    f = open(o, 'w')
    import time
    f.write('./run_on_cluster.sh fit_all.py --priorconsistent=%s --posteriorconsistent=%s %s\n' % (consistent_empirical_prior, consistent_posterior, options(fast, zero_re, alt_prior, global_heterogeneity)))
    f.write('Enqueued model %d on cluster at %s' % (id, time.strftime('%c')))
    f.close()

    # fit empirical priors (by pooling data from all regions)
    emp_names = []

    if not posteriors_only:

        if consistent_empirical_prior:
            t = 'all'
            o = '%s/empirical_priors/stdout/dismod_log_%s' % (dir, t)
            e = '%s/empirical_priors/stderr/dismod_log_%s' % (dir, t)
            name_str = '%s-%d' %(t[0], id)
            emp_names.append(name_str)
            if dismod3.settings.ON_SGE:
                call_str = 'qsub -cwd -o %s -e %s ' % (o, e) \
                    + '-N %s ' % name_str \
                    + 'run_on_cluster.sh '
            else:
                call_str = 'python '
            call_str += 'fit_world.py %d' % id

            call_str += options(fast, zero_re, alt_prior, global_heterogeneity)
            
            subprocess.call(call_str, shell=True)

        else:
            for t in ['excess-mortality', 'remission', 'incidence', 'prevalence']:
                o = '%s/empirical_priors/stdout/dismod_log_%s' % (dir, t)
                e = '%s/empirical_priors/stderr/dismod_log_%s' % (dir, t)
                name_str = '%s-%d' %(t[0], id)
                emp_names.append(name_str)
                if dismod3.settings.ON_SGE:
                    call_str = 'qsub -cwd -o %s -e %s ' % (o, e) \
                        + '-N %s ' % name_str \
                        + 'run_on_cluster.sh '
                else:
                    call_str = 'python '
                call_str += 'fit_emp_prior.py %d -t %s' % (id, t)

                call_str += options(fast, zero_re, alt_prior, global_heterogeneity)
                subprocess.call(call_str, shell=True)

    # directory to save the country level posterior csv files
    temp_dir = dir + '/posterior/country_level_posterior_dm-' + str(id) + '/'

    #fit each region/year/sex individually for this model
    hold_str = '-hold_jid %s ' % ','.join(emp_names)
    if posteriors_only:
        hold_str = ''
    post_names = []
    for ii, r in enumerate(dismod3.gbd_regions):
        for s in dismod3.gbd_sexes:
            for y in dismod3.gbd_years:
                k = '%s+%s+%s' % (dismod3.utils.clean(r), dismod3.utils.clean(s), y)
                o = '%s/posterior/stdout/dismod_log_%s' % (dir, k)
                e = '%s/posterior/stderr/dismod_log_%s' % (dir, k)
                name_str = '%s%d%s%s%d' % (r[0], ii+1, s[0], str(y)[-1], id)
                post_names.append(name_str)

                if dismod3.settings.ON_SGE:
                    call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \
                        + hold_str \
                        + '-N %s ' % name_str \
                        + 'run_on_cluster.sh '
                else:
                    call_str = 'python '
                call_str += 'fit_posterior.py %d -r %s -s %s -y %s' % (id, dismod3.utils.clean(r), dismod3.utils.clean(s), y)

                if not consistent_posterior:
                    call_str += ' --inconsistent=True --types="%s"' % posterior_types

                if posteriors_only:
                    call_str += ' --onlyposterior=True'

                if fast:
                    call_str += ' --fast=true'

                call_str += ' --zerore=%s'%zero_re

                subprocess.call(call_str, shell=True)

    # after all posteriors have finished running, upload disease model json
    hold_str = '-hold_jid %s ' % ','.join(post_names)
    o = '%s/empirical_priors/stdout/%d_upload.txt' % (dir, id)
    e = '%s/empirical_priors/stderr/%d_upload.txt' % (dir, id)
    if dismod3.settings.ON_SGE:
        call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \
            + hold_str \
            + '-N upld-%s ' % id \
            + 'run_on_cluster.sh '
    else:
        call_str = 'python '
    call_str += 'upload_fits.py %d' % id
    subprocess.call(call_str, shell=True)

    return dm
Ejemplo n.º 24
0
import dismod3
import book_graphics
reload(book_graphics)

results = {}
n_pred = 10000
iter = 20000
burn = 10000
thin = 10

# set font
book_graphics.set_font()

### @export 'data'
# TODO: migrate data into a csv, load with pandas
dm = dismod3.load_disease_model(15630)
dm.calc_effective_sample_size(dm.data)
some_data = ([d for d in dm.data
              if d['data_type'] == 'prevalence data'
              and d['sex'] == 'male'
              and 15 <= d['age_start'] < 20
              and d['age_end'] == 99
              and d['effective_sample_size'] > 1])
countries = pl.unique([s['region'] for s in some_data])
min_year = min([s['year_start'] for s in some_data])
max_year = max([s['year_end'] for s in some_data])
cy = ['%s-%d'%(s['region'], s['year_start']) for s in some_data]

n = pl.array([s['effective_sample_size'] for s in some_data])
r = pl.array([dm.value_per_1(s) for s in some_data])
Ejemplo n.º 25
0
import pylab as pl
import pymc as mc

import dismod3
import book_graphics
reload(book_graphics)

# set font
book_graphics.set_font()

results = {}

### @export 'data'
#dm = dismod3.load_disease_model(15596)  # epilipsy
dm = dismod3.load_disease_model(16240)  # af

data = dm.filter_data('prevalence+all+all+all')

hist = pl.zeros((101,101))
for d in data:
    hist[d['age_start'], d['age_end']] += 1

most_freq_cnt = hist.max()
rows_total = len(data)

### @export 'scatter-prevalence-age-groups'

pl.figure(**book_graphics.half_page_params)
#pl.subplot(1,2,2)
for a_0 in range(101):
Ejemplo n.º 26
0
def measure_fit(id, condition):
    """
    Determine the RMSE, MAE, and Coverage of the fit stored in model specified by id
    """

    print 'downloading model %d' % id
    sys.stdout.flush()
    dm = dismod3.load_disease_model(id)

    #print 'loading gold-standard data'
    gold_data = [
        d
        for d in csv.DictReader(open(OUTPUT_PATH + '%s_gold.tsv' % condition),
                                dialect='excel-tab')
    ]

    #print 'comparing values'
    abs_err = dict(incidence=[],
                   prevalence=[],
                   remission=[],
                   duration=[],
                   incidence_x_duration=[])
    rel_err = dict(incidence=[],
                   prevalence=[],
                   remission=[],
                   duration=[],
                   incidence_x_duration=[])
    coverage = dict(incidence=[],
                    prevalence=[],
                    remission=[],
                    duration=[],
                    incidence_x_duration=[])

    for metric in [abs_err, rel_err, coverage]:
        metric['excess mortality'] = []

    for d in gold_data:
        est = predict('mean', dm, d)
        lb = predict('lower_ui', dm, d)
        ub = predict('upper_ui', dm, d)
        if est < 0:
            continue
        val = float(d['Truth'])
        err = val - est

        #if d['Age Start'] <= 50:
        #    continue

        t = d['Parameter'].replace(' data', '')
        abs_err[t].append(err)
        if val > 0.:
            rel_err[t].append(100 * err / val)
        coverage[t].append(val >= lb and val <= ub)

    for k in abs_err:
        print '%s abs RMSE = %f' % (k, np.sqrt(np.mean(
            np.array(abs_err[k])**2)))
        print '%s abs  MAE = %f' % (k, np.median(np.abs(abs_err[k])))
    print

    for k in rel_err:
        print '%s rel pct RMSE = %f' % (
            k, np.sqrt(np.mean(np.array(rel_err[k])**2)))
        print '%s rel pct  MAE = %f' % (k, np.median(np.abs(rel_err[k])))
    print

    for k in coverage:
        print '%s coverage = %f' % (k, np.sum(coverage[k]) * 100. /
                                    len(coverage[k]))
    print

    k = 'incidence_x_duration'
    print '%s rel pct MAE =\t%f' % (k, np.median(np.abs(rel_err[k])))
    return np.median(np.abs(rel_err[k]))

    # add estimate value as a column in the gold data tsv, for looking
    # in more detail with a spreadsheet or different code
    col_names = sorted(set(gold_data[0].keys()) | set(['Estimate Value']))
    f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w')
    csv_f = csv.writer(f_file, dialect='excel-tab')
    csv_f.writerow(col_names)
    csv_f = csv.DictWriter(f_file, col_names, dialect='excel-tab')
    for d in gold_data:
        csv_f.writerow(d)
    f_file.close()
Ejemplo n.º 27
0
def fit_posterior(id, region, sex, year):
    """ Fit posterior of specified region/sex/year for specified model

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years

    Example
    -------
    >>> import fit_posterior
    >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005')
    """
    #print 'updating job status on server'
    #dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (region, sex, year), 'Running')

    dm = dismod3.load_disease_model(id)
    #dm.data = []  # for testing, remove all data
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex])

    # fit the model
    dir = dismod3.settings.JOB_WORKING_DIR % id
    import dismod3.gbd_disease_model as model
    model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    ## then sample the posterior via MCMC
    model.fit(dm, method='mcmc', keys=keys, iter=50000, thin=25, burn=25000, verbose=1,
              dbname='%s/posterior/pickle/dm-%d-posterior-%s-%s-%s.pickle' % (dir, id, region, sex, year))

    # generate plots of results
    dismod3.tile_plot_disease_model(dm, keys, defaults={})
    dm.savefig('dm-%d-posterior-%s.png' % (id, '+'.join(['all', region, sex, year])))  # TODO: refactor naming into its own function (disease_json.save_image perhaps)
    for param_type in dismod3.settings.output_data_types:
        keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=[param_type])
        dismod3.tile_plot_disease_model(dm, keys, defaults={})
        dm.savefig('dm-%d-posterior-%s-%s-%s-%s.png' % (id, dismod3.utils.clean(param_type), region, sex, year))   # TODO: refactor naming into its own function


    # summarize fit quality graphically, as well as parameter posteriors
    for k in dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]):
        if dm.vars[k].get('data'):
            dismod3.plotting.plot_posterior_predicted_checks(dm, k)
            dm.savefig('dm-%d-check-%s.png' % (dm.id, k))


    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex])
    dm.save('dm-%d-posterior-%s-%s-%s.json' % (id, region, sex, year), keys_to_save=keys)

    # make a rate_type_list
    rate_type_list = ['incidence', 'prevalence', 'remission', 'excess-mortality',
                      'mortality', 'relative-risk', 'duration', 'incidence_x_duration']

    # save country level posterior
    save_country_level_posterior(dm, region, year, sex)

    # update job status file
    #print 'updating job status on server'
    #dismod3.log_job_status(id, 'posterior',
    #                       '%s--%s--%s' % (region, sex, year), 'Completed')
    return dm
Ejemplo n.º 28
0
def main():
    import optparse

    usage = 'usage: %prog [options] disease_model_id'
    parser = optparse.OptionParser(usage)
    parser.add_option(
        '-s',
        '--sex',
        default='male',
        help=
        'only estimate given sex (valid settings ``male``, ``female``, ``all``)'
    )
    parser.add_option(
        '-y',
        '--year',
        default='2005',
        help=
        'only estimate given year (valid settings ``1990``, ``2005``, ``2010``)'
    )
    parser.add_option('-r',
                      '--region',
                      default='australasia',
                      help='only estimate given GBD Region')
    parser.add_option('-f', '--fast', default='False', help='use MAP only')
    parser.add_option('-i',
                      '--inconsistent',
                      default='False',
                      help='use inconsistent model for posteriors')
    parser.add_option(
        '-t',
        '--types',
        default='p i r',
        help='with rate types to fit (only used if inconsistent=true)')
    parser.add_option('-z',
                      '--zerore',
                      default='true',
                      help='enforce zero constraint on random effects')
    parser.add_option('-o',
                      '--onlyposterior',
                      default='False',
                      help='skip empirical prior phase')

    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error('incorrect number of arguments')

    try:
        id = int(args[0])
    except ValueError:
        parser.error('disease_model_id must be an integer')

    dm = dismod3.load_disease_model(id)

    # set model id to passed-in id (should not be necessary)
    dm.id = id
    assert id == dm.id, 'model id should equal parameter id'

    dm = fit_posterior(
        dm,
        options.region,
        options.sex,
        options.year,
        fast_fit=options.fast.lower() == 'true',
        inconsistent_fit=options.inconsistent.lower() == 'true',
        params_to_fit=options.types.split(),
        posteriors_only=(options.onlyposterior.lower() == 'true'),
        zero_re=options.zerore.lower() == 'true')

    return dm
Ejemplo n.º 29
0
def fit_emp_prior(id,
                  param_type,
                  fast_fit=False,
                  generate_emp_priors=True,
                  zero_re=True,
                  alt_prior=False,
                  global_heterogeneity='Slightly'):
    """ Fit empirical prior of specified type for specified model

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    param_type : str, one of incidence, prevalence, remission, excess-mortality, prevalence_x_excess-mortality
      The disease parameter to generate empirical priors for

    Example
    -------
    >>> import fit_emp_prior
    >>> fit_emp_prior.fit_emp_prior(2552, 'incidence')
    """

    dir = dismod3.settings.JOB_WORKING_DIR % id

    ## load the model from disk or from web
    import simplejson as json
    import data
    reload(data)

    dm = dismod3.load_disease_model(id)

    try:
        model = data.ModelData.load(dir)
        print 'loaded data from new format from %s' % dir
    except (IOError, AssertionError):
        model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        #model.save(dir)
        print 'loaded data from json, saved in new format for next time in %s' % dir

    ## next block fills in missing covariates with zero
    for col in model.input_data.columns:
        if col.startswith('x_'):
            model.input_data[col] = model.input_data[col].fillna(0.)
    # also fill all covariates missing in output template with zeros
    model.output_template = model.output_template.fillna(0)

    # set all heterogeneity priors to Slightly for the global fit
    for t in model.parameters:
        if 'heterogeneity' in model.parameters[t]:
            model.parameters[t]['heterogeneity'] = global_heterogeneity

    t = {
        'incidence': 'i',
        'prevalence': 'p',
        'remission': 'r',
        'excess-mortality': 'f',
        'prevalence_x_excess-mortality': 'pf'
    }[param_type]
    model.input_data = model.get_data(t)
    if len(model.input_data) == 0:
        print 'No data for type %s, exiting' % param_type
        return dm

    ### For testing:
    ## speed up computation by reducing number of knots
    ## model.parameters[t]['parameter_age_mesh'] = [0, 10, 20, 40, 60, 100]

    ## smooth Slightly, Moderately, or Very
    ## model.parameters[t]['smoothness'] = dict(age_start=0, age_end=100, amount='Very')

    ## speed up computation be reducing data size
    ## predict_area = 'super-region_0'
    ## predict_year=2005
    ## predict_sex='total'
    ## subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area)
    ## relevant_rows = [i for i, r in model.input_data.T.iteritems() \
    ##                      if (r['area'] in subtree or r['area'] == 'all')\
    ##                      and (r['year_end'] >= 1997) \
    ##                      and r['sex'] in [predict_sex, 'total']]
    ## model.input_data = model.input_data.ix[relevant_rows]

    # testing changes
    #model.input_data['effective_sample_size'] = pl.minimum(1.e3, model.input_data['effective_sample_size'])
    #missing_ess = pl.isnan(model.input_data['effective_sample_size'])
    #model.input_data['effective_sample_size'][missing_ess] = 1.
    #model.input_data['z_overdisperse'] = 1.
    #print model.describe(t)
    #model.input_data = model.input_data[model.input_data['area'].map(lambda x: x in nx.bfs_tree(model.hierarchy, 'super-region_5'))]
    #model.input_data = model.input_data = model.input_data.drop(['x_LDI_id_Updated_7July2011'], axis=1)
    #model.input_data = model.input_data.filter([model.input_data['x_nottroponinuse'] == 0.]
    #model.input_data = model.input_data[:100]

    ## speed up output by not making predictions for empirical priors
    #generate_emp_priors = False

    print 'fitting', t
    model.vars += ism.age_specific_rate(model,
                                        t,
                                        reference_area='all',
                                        reference_sex='total',
                                        reference_year='all',
                                        mu_age=None,
                                        mu_age_parent=None,
                                        sigma_age_parent=None,
                                        rate_type=(t == 'rr') and 'log_normal'
                                        or 'neg_binom',
                                        zero_re=zero_re)
    # for backwards compatibility, should be removed eventually
    dm.model = model
    dm.vars = model.vars[t]
    vars = dm.vars

    if fast_fit:
        dm.map, dm.mcmc = dismod3.fit.fit_asr(model,
                                              t,
                                              iter=101,
                                              burn=0,
                                              thin=1,
                                              tune_interval=100)
    else:
        dm.map, dm.mcmc = dismod3.fit.fit_asr(model,
                                              t,
                                              iter=50000,
                                              burn=10000,
                                              thin=40,
                                              tune_interval=1000,
                                              verbose=True)

    stats = dm.vars['p_pred'].stats(batches=5)
    dm.vars['data']['mu_pred'] = stats['mean']
    dm.vars['data']['sigma_pred'] = stats['standard deviation']

    stats = dm.vars['pi'].stats(batches=5)
    dm.vars['data']['mc_error'] = stats['mc error']

    dm.vars['data'][
        'residual'] = dm.vars['data']['value'] - dm.vars['data']['mu_pred']
    dm.vars['data']['abs_residual'] = pl.absolute(dm.vars['data']['residual'])

    graphics.plot_fit(model,
                      data_types=[t],
                      ylab=['PY'],
                      plot_config=(1, 1),
                      fig_size=(8, 8))
    if generate_emp_priors:
        for a in [
                dismod3.utils.clean(a) for a in dismod3.settings.gbd_regions
        ]:
            print 'generating empirical prior for %s' % a
            for s in dismod3.settings.gbd_sexes:
                for y in dismod3.settings.gbd_years:
                    key = dismod3.utils.gbd_key_for(param_type, a, y, s)
                    if t in model.parameters and 'level_bounds' in model.parameters[
                            t]:
                        lower = model.parameters[t]['level_bounds']['lower']
                        upper = model.parameters[t]['level_bounds']['upper']
                    else:
                        lower = 0
                        upper = pl.inf

                    emp_priors = covariate_model.predict_for(
                        model, model.parameters[t], 'all', 'total', 'all', a,
                        dismod3.utils.clean(s), int(y), alt_prior, vars, lower,
                        upper)
                    dm.set_mcmc('emp_prior_mean', key, emp_priors.mean(0))

                    if 'eta' in vars:
                        N, A = emp_priors.shape  # N samples, for A age groups
                        delta_trace = pl.transpose([
                            pl.exp(vars['eta'].trace()) for _ in range(A)
                        ])  # shape delta matrix to match prediction matrix
                        emp_prior_std = pl.sqrt(
                            emp_priors.var(0) +
                            (emp_priors**2 / delta_trace).mean(0))
                    else:
                        emp_prior_std = emp_priors.std(0)
                    dm.set_mcmc('emp_prior_std', key, emp_prior_std)

                    pl.plot(model.parameters['ages'],
                            dm.get_mcmc('emp_prior_mean', key),
                            color='grey',
                            label=a,
                            zorder=-10,
                            alpha=.5)
    pl.savefig(dir + '/prior-%s.png' % param_type)

    store_effect_coefficients(dm, vars, param_type)

    #graphics.plot_one_ppc(vars, t)
    #pl.savefig(dir + '/prior-%s-ppc.png'%param_type)

    graphics.plot_acorr(model)
    pl.savefig(dir + '/prior-%s-convergence.png' % param_type)
    graphics.plot_trace(model)
    pl.savefig(dir + '/prior-%s-trace.png' % param_type)

    graphics.plot_one_effects(model, t)
    pl.savefig(dir + '/prior-%s-effects.png' % param_type)

    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    try:
        dm.save('dm-%d-prior-%s.json' % (id, param_type))
    except IOError, e:
        print e
Ejemplo n.º 30
0
def measure_fit(id, condition):
    """
    Determine the RMSE, MAE, and Coverage of the fit stored in model specified by id
    """

    print 'downloading model %d' % id
    sys.stdout.flush()
    dm = dismod3.load_disease_model(id)

    #print 'loading gold-standard data'
    gold_data = [d for d in csv.DictReader(open(OUTPUT_PATH + '%s_gold.tsv' % condition), dialect='excel-tab')]


    #print 'comparing values'
    abs_err = dict(incidence=[], prevalence=[], remission=[], duration=[], incidence_x_duration=[])
    rel_err = dict(incidence=[], prevalence=[], remission=[], duration=[], incidence_x_duration=[])
    coverage = dict(incidence=[], prevalence=[], remission=[], duration=[], incidence_x_duration=[])

    for metric in [abs_err, rel_err, coverage]:
        metric['excess mortality'] = []

    for d in gold_data:
        est = predict('mean', dm, d)
        lb = predict('lower_ui', dm, d)
        ub = predict('upper_ui', dm, d)
        if est < 0:
            continue
        val = float(d['Truth'])
        err = val - est


        #if d['Age Start'] <= 50:
        #    continue

        t = d['Parameter'].replace(' data', '')
        abs_err[t].append(err)
        if val > 0.:
            rel_err[t].append(100 * err / val)
        coverage[t].append(val >= lb and val <= ub)
    
    for k in abs_err:
        print '%s abs RMSE = %f' % (k, np.sqrt(np.mean(np.array(abs_err[k])**2)))
        print '%s abs  MAE = %f' % (k, np.median(np.abs(abs_err[k])))
    print
    
    for k in rel_err:
        print '%s rel pct RMSE = %f' % (k, np.sqrt(np.mean(np.array(rel_err[k])**2)))
        print '%s rel pct  MAE = %f' % (k, np.median(np.abs(rel_err[k])))
    print

    for k in coverage:
        print '%s coverage = %f' % (k, np.sum(coverage[k]) * 100. / len(coverage[k]))
    print

    k = 'incidence_x_duration'
    print '%s rel pct MAE =\t%f' % (k, np.median(np.abs(rel_err[k])))
    return np.median(np.abs(rel_err[k]))

    # add estimate value as a column in the gold data tsv, for looking
    # in more detail with a spreadsheet or different code
    col_names = sorted(set(gold_data[0].keys()) | set(['Estimate Value']))
    f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w')
    csv_f = csv.writer(f_file, dialect='excel-tab')
    csv_f.writerow(col_names)
    csv_f = csv.DictWriter(f_file, col_names, dialect='excel-tab')
    for d in gold_data:
        csv_f.writerow(d)
    f_file.close()
Ejemplo n.º 31
0
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.mu['abs_err'].mean(),
                                                                         pl.median(pl.absolute(model.mu['rel_err'].dropna())),
                                                                         model.mu['covered?'].mean())
    print

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'mu')
    data_simulation.finalize_results(model)

    print model.results

    return model


if __name__ == '__main__':
    region, sex, year = 'north_america_high_income', 'male', 1990

    import fit_posterior, upload_fits
    import data
    import simplejson as json

    ## load the model from disk or from web
    dm = dismod3.load_disease_model(24842)
    dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
    data = upload_fits.merge_data_csvs(24842)
    dm.model.input_data['mu_pred'] = data['mu_pred']

    simulate_data(dm, region, sex, year)
    fit_simulated(dm, region, sex, year)
    store_results(dm, region, sex, year)
Ejemplo n.º 32
0
import pylab as pl
import pymc as mc

import dismod3
import book_graphics

reload(book_graphics)

# set font
book_graphics.set_font()

results = {}

### @export 'data'
#dm = dismod3.load_disease_model(15596)  # epilipsy
dm = dismod3.load_disease_model(16240)  # af

data = dm.filter_data('prevalence+all+all+all')

hist = pl.zeros((101, 101))
for d in data:
    hist[d['age_start'], d['age_end']] += 1

most_freq_cnt = hist.max()
rows_total = len(data)

### @export 'scatter-prevalence-age-groups'

pl.figure(**book_graphics.half_page_params)
#pl.subplot(1,2,2)
for a_0 in range(101):
Ejemplo n.º 33
0
def fit_world(id, fast_fit=False, zero_re=True, alt_prior=False, global_heterogeneity='Slightly'):
    """ Fit consistent for all data in world

    Parameters
    ----------
    id : int
      The model id number for the job to fit

    Example
    -------
    >>> import fit_world
    >>> dm = fit_world.dismod3.load_disease_model(1234)
    >>> fit_world.fit_world(dm)
    """

    dir = dismod3.settings.JOB_WORKING_DIR % id

    ## load the model from disk or from web
    import simplejson as json
    import data
    reload(data)

    try:
        model = data.ModelData.load(dir)
        print 'loaded data from new format from %s' % dir
        dm = dismod3.load_disease_model(id)
    except (IOError, AssertionError):
        dm = dismod3.load_disease_model(id)
        model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        try:
            model.save(dir)
            print 'loaded data from json, saved in new format for next time in %s' % dir
        except IOError:
            print 'loaded data from json, failed to save in new format'


    ## next block fills in missing covariates with zero
    for col in model.input_data.columns:
        if col.startswith('x_'):
            model.input_data[col] = model.input_data[col].fillna(0.)
    # also fill all covariates missing in output template with zeros
    model.output_template = model.output_template.fillna(0)

    # set all heterogeneity priors to Slightly for the global fit
    for t in model.parameters:
        if 'heterogeneity' in model.parameters[t]:
            model.parameters[t]['heterogeneity'] = global_heterogeneity

    ### For testing:
    ## speed up computation by reducing number of knots
    ## for t in 'irf':
    ##     model.parameters[t]['parameter_age_mesh'] = [0, 100]
    model.vars += dismod3.ism.consistent(model,
                                         reference_area='all',
                                         reference_sex='total',
                                         reference_year='all',
                                         priors={},
                                         zero_re=zero_re)

    ## fit model to data
    if fast_fit:
        dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, 105, 0, 1, 100)
    else:
        dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, iter=50000, burn=10000, thin=40, tune_interval=1000, verbose=True)

    dm.model = model

    # borrow strength to inform sigma_alpha between rate types post-hoc
    types_with_re = ['rr', 'f', 'i', 'm', 'smr', 'p', 'r', 'pf', 'm_with', 'X']
    ## first calculate sigma_alpha_bar from posterior draws from each alpha
    alpha_vals = []
    for type in types_with_re:
        if 'alpha' in model.vars[type]:
            for alpha_i in model.vars[type]['alpha']:
                alpha_vals += [a for a in alpha_i.trace() if a != 0]  # remove zeros because areas with no siblings are included for convenience but are pinned to zero
    ## then blend sigma_alpha_i and sigma_alpha_bar for each sigma_alpha_i
    if len(alpha_vals) > 0:
        sigma_alpha_bar = pl.std(alpha_vals)
        for type in types_with_re:
            if 'sigma_alpha' in model.vars[type]:
                for sigma_alpha_i in model.vars[type]['sigma_alpha']:
                    cur_val = sigma_alpha_i.trace()
                    sigma_alpha_i.trace._trace[0] = (cur_val + sigma_alpha_bar) * pl.ones_like(sigma_alpha_i.trace._trace[0])


    for t in 'p i r f rr pf m_with'.split():
        param_type = dict(i='incidence', r='remission', f='excess-mortality', p='prevalence', rr='relative-risk', pf='prevalence_x_excess-mortality', m_with='mortality')[t]
        #graphics.plot_one_type(model, model.vars[t], {}, t)
        for a in [dismod3.utils.clean(a) for a in dismod3.settings.gbd_regions]:
            print 'generating empirical prior for %s' % a
            for s in dismod3.settings.gbd_sexes:
                for y in dismod3.settings.gbd_years:
                    key = dismod3.utils.gbd_key_for(param_type, a, y, s)
                    if t in model.parameters and 'level_bounds' in model.parameters[t]:
                        lower=model.parameters[t]['level_bounds']['lower']
                        upper=model.parameters[t]['level_bounds']['upper']
                    else:
                        lower=0
                        upper=pl.inf
                        
                    emp_priors = covariate_model.predict_for(model,
                                                             model.parameters.get(t, {}),
                                                             'all', 'total', 'all',
                                                             a, dismod3.utils.clean(s), int(y),
                                                             alt_prior,
                                                             model.vars[t], lower, upper)
                    dm.set_mcmc('emp_prior_mean', key, emp_priors.mean(0))
                    if 'eta' in model.vars[t]:
                        N,A = emp_priors.shape  # N samples, for A age groups
                        delta_trace = pl.transpose([pl.exp(model.vars[t]['eta'].trace()) for _ in range(A)])  # shape delta matrix to match prediction matrix
                        emp_prior_std = pl.sqrt(emp_priors.var(0) + (emp_priors**2 / delta_trace).mean(0))
                    else:
                        emp_prior_std = emp_priors.std(0)
                    dm.set_mcmc('emp_prior_std', key, emp_prior_std)


        from fit_emp_prior import store_effect_coefficients
        store_effect_coefficients(dm, model.vars[t], param_type)

    
        if 'p_pred' in model.vars[t]:
            graphics.plot_one_ppc(model, t)
            pl.savefig(dir + '/prior-%s-ppc.png'%param_type)

        if 'p_pred' in model.vars[t] or 'lb' in model.vars[t]:
            graphics.plot_one_effects(model, t)
            pl.savefig(dir + '/prior-%s-effects.png'%param_type)


    for t in 'i r f p rr pf X m_with smr'.split():
        fname = dir + '/empirical_priors/data-%s.csv'%t
        print 'saving tables for', t, 'to', fname
        if 'data' in model.vars[t] and 'p_pred' in model.vars[t]:
            stats = model.vars[t]['p_pred'].stats(batches=5)
            model.vars[t]['data']['mu_pred'] = stats['mean']
            model.vars[t]['data']['sigma_pred'] = stats['standard deviation']

            stats = model.vars[t]['pi'].stats(batches=5)
            model.vars[t]['data']['mc_error'] = stats['mc error']

            model.vars[t]['data']['residual'] = model.vars[t]['data']['value'] - model.vars[t]['data']['mu_pred']
            model.vars[t]['data']['abs_residual'] = pl.absolute(model.vars[t]['data']['residual'])
            #if 'delta' in model.vars[t]:
            #    model.vars[t]['data']['logp'] = [mc.negative_binomial_like(n*p_obs, n*p_pred, n*p_pred*d) for n, p_obs, p_pred, d \
            #                                  in zip(model.vars[t]['data']['effective_sample_size'],
            #                                         model.vars[t]['data']['value'],
            #                                         model.vars[t]['data']['mu_pred'],
            #                                         pl.atleast_1d(model.vars[t]['delta'].stats()['mean']))]
            model.vars[t]['data'].to_csv(fname)


    graphics.plot_fit(model)
    pl.savefig(dir + '/prior.png')

    graphics.plot_acorr(model)
    pl.savefig(dir + '/prior-convergence.png')

    graphics.plot_trace(model)
    pl.savefig(dir + '/prior-trace.png')
    
    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    try:
        dm.save('dm-%d-prior-%s.json' % (dm.id, 'all'))
    except IOError, e:
        print e
Ejemplo n.º 34
0
import book_graphics

reload(book_graphics)

results = {}

### @export 'data'
region = 'north_america_high_income'
sex = 'female'
year = '2005'

heterogeneity = ['Slightly', 'Very']

for ii in range(2):
    # load model
    dm = dismod3.load_disease_model(16370)

    # set expert priors and other model parameters
    dm.set_param_age_mesh([0, 15, 20, 25, 30, 35, 40, 45, 50, 100])

    dm.params['global_priors']['level_value']['incidence']['age_before'] = 15
    dm.params['global_priors']['level_value']['incidence']['age_after'] = 50
    dm.params['global_priors']['smoothness']['incidence']['age_start'] = 15

    dm.params['global_priors']['level_value']['remission']['age_before'] = 40
    dm.params['global_priors']['level_bounds']['remission']['upper'] = 10.

    dm.params['global_priors']['level_value']['excess_mortality'][
        'age_before'] = 101

    dm.params['global_priors']['level_value']['prevalence']['age_before'] = 15
Ejemplo n.º 35
0
def fit_without_confrontation(id, region, sex, year):
    """ Fit posterior of specified region/sex/year for specified model
    without trying to integrate conflicting sources of data

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years
    """

    ## load model
    dm = dismod3.load_disease_model(id)


    ## separate out prevalence and relative-risk data
    prev_data = [d for d in dm.data if dm.relevant_to(d, 'prevalence', region, year, sex)]
    rr_data = [d for d in dm.data if dm.relevant_to(d, 'relative-risk', region, year, sex)]
    dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data]


    ### setup the generic disease model (without prevalence data)
    import dismod3.gbd_disease_model as model
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex])
    dm.calc_effective_sample_size(dm.data)
    dm.vars = model.setup(dm, keys)


    ## override the birth prevalence prior, based on the withheld prevalence data
    logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year, sex)]['initial']['logit_C_0']
    assert len(prev_data) == 1, 'should be a single prevalance datum'
    d = prev_data[0]

    mu_logit_C_0 = mc.logit(dm.value_per_1(d)+dismod3.settings.NEARLY_ZERO)
    lb, ub = dm.bounds_per_1(d)
    sigma_logit_C_0 = (mc.logit(ub+dismod3.settings.NEARLY_ZERO) - mc.logit(lb+dismod3.settings.NEARLY_ZERO)) / (2 * 1.96)
    print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0)
    print 'ui_C_0_pri:', lb, ub

    # override the excess-mortality, based on the relative-risk data
    mu_rr = 1.01*np.ones(dismod3.settings.MAX_AGE)
    sigma_rr = .01*np.ones(dismod3.settings.MAX_AGE)
    for d in rr_data:
        mu_rr[d['age_start']:(d['age_end']+1)] = dm.value_per_1(d)
        sigma_rr[d['age_start']:(d['age_end']+1)] = dm.se_per_1(d)
    print 'mu_rr:', mu_rr.round(2)
    #print 'sigma_rr:', sigma_rr.round(2)

    log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['age_coeffs']
    log_f_mesh = log_f.parents['gamma_mesh']
    param_mesh = log_f.parents['param_mesh']
    
    m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region, year, sex)]
    mu_log_f = np.log((mu_rr-1) * m_all)
    sigma_log_f = 1 / ((mu_rr-1) * m_all) * sigma_rr * m_all
    print 'mu_log_f:', mu_log_f.round(2)[param_mesh]
    print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh]
    
    ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC)
    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.use_step_method(SampleFromNormal, logit_C_0, mu=mu_logit_C_0, tau=sigma_logit_C_0**-2)
    dm.mcmc.use_step_method(SampleFromNormal, log_f_mesh, mu=mu_log_f[param_mesh], tau=sigma_log_f[param_mesh]**-2)
    for stoch in dm.mcmc.stochastics:
        dm.mcmc.use_step_method(mc.NoStepper, stoch)
    dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE)

    #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2)
    #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2)
    #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2)
    print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2)
    print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['rate_stoch'].stats()['mean'].round(2)


    for k in keys:
        t,r,y,s = dismod3.utils.type_region_year_sex_from_key(k)

        if t in ['incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality']:
            dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])

        elif t in ['relative-risk', 'duration', 'incidence_x_duration']:
            dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k])

    from fit_posterior import save_country_level_posterior
    if str(year) == '2005':  # also generate 2010 estimates
        save_country_level_posterior(dm, region, 2010, sex, ['prevalence', 'remission'])
    save_country_level_posterior(dm, region, year, sex, ['prevalence', 'remission'])  #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split())


    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex])
    dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year), keys_to_save=keys)

    return dm
Ejemplo n.º 36
0
sys.path += ['..']


import pylab as pl
import pymc as mc

import dismod3
import book_graphics
reload(book_graphics)

results = {}
models = {}

for ii in range(2):
    ### @export 'load model'
    dm = dismod3.load_disease_model(16391)

    ### @export 'set expert priors'
    dm.params['global_priors']['level_value']['incidence'] = dict(value=0., age_before=1., age_after=99)

    dm.params['global_priors']['smoothness']['prevalence']['amount'] = 'Slightly'
    dm.params['global_priors']['heterogeneity']['prevalence'] = 'Slightly'
    dm.params['global_priors']['level_value']['prevalence'] = dict(value=0., age_before=0, age_after=100)
    dm.params['global_priors']['level_bounds']['prevalence'] = dict(lower=0., upper =.05)
    dm.params['global_priors']['increasing']['prevalence'] = dict(age_start=0, age_end=0)
    dm.params['global_priors']['decreasing']['prevalence'] = dict(age_start=100, age_end=100)
    dm.params['sex_effect_prevalence'] = dict(mean=1, upper_ci=1.0001, lower_ci=.9999)
    dm.params['time_effect_prevalence'] = dict(mean=1, upper_ci=1.0001, lower_ci=.9999)
    dm.params['region_effect_prevalence'] = dict(std=.0001)
    dm.params['covariates']['Study_level']['bias']['rate']['value'] = 0
    for cv in dm.params['covariates']['Country_level']:
Ejemplo n.º 37
0
        model.mu['abs_err'].mean(),
        pl.median(pl.absolute(
            model.mu['rel_err'].dropna())), model.mu['covered?'].mean())
    print

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'mu')
    data_simulation.finalize_results(model)

    print model.results

    return model


if __name__ == '__main__':
    region, sex, year = 'north_america_high_income', 'male', 1990

    import fit_posterior, upload_fits
    import data
    import simplejson as json

    ## load the model from disk or from web
    dm = dismod3.load_disease_model(24842)
    dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
    data = upload_fits.merge_data_csvs(24842)
    dm.model.input_data['mu_pred'] = data['mu_pred']

    simulate_data(dm, region, sex, year)
    fit_simulated(dm, region, sex, year)
    store_results(dm, region, sex, year)