Example #1
0
def test_save_country_level_posterior():
    """ Test exporting country level posterior output """
    # load model to test fitting
    dm = DiseaseJson(file('tests/dismoditis.json').read())

    # fit posterior where there is data
    from dismod3 import gbd_disease_model
    keys = dismod3.utils.gbd_keys(region_list=['asia_southeast'],
                                  year_list=[1990], sex_list=['male'])
    gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=100, thin=1, burn=0, verbose=1, dbname='/dev/null')     ## then sample the posterior via MCMC

    # make a rate_type_list
    rate_type_list = ['incidence', 'prevalence', 'remission', 'excess-mortality']

    # job working directory
    job_wd = dismod3.settings.JOB_WORKING_DIR % dm.id

    # directory to save the file
    dir = job_wd + '/posterior/country_level_posterior_dm-' + str(dm.id) + '/'
    import os
    from shutil import rmtree
    if os.path.exists(dir):
        rmtree(dir)
    os.makedirs(dir)

    # save country level posterior in csv file
    from fit_posterior import save_country_level_posterior
    save_country_level_posterior(dm, 'asia_southeast', '1990', 'male', rate_type_list)

    # zip the csv file
    from upload_fits import zip_country_level_posterior_files
    zip_country_level_posterior_files(dm.id)
Example #2
0
def fit_without_confrontation(id, region, sex, year):
    """ Fit posterior of specified region/sex/year for specified model
    without trying to integrate conflicting sources of data

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years
    """

    ## load model
    dm = dismod3.load_disease_model(id)

    ## separate out prevalence and relative-risk data
    prev_data = [
        d for d in dm.data
        if dm.relevant_to(d, 'prevalence', region, year, sex)
    ]
    rr_data = [
        d for d in dm.data
        if dm.relevant_to(d, 'relative-risk', region, year, sex)
    ]
    dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data]

    ### setup the generic disease model (without prevalence data)
    import dismod3.gbd_disease_model as model
    keys = dismod3.utils.gbd_keys(region_list=[region],
                                  year_list=[year],
                                  sex_list=[sex])
    dm.calc_effective_sample_size(dm.data)
    dm.vars = model.setup(dm, keys)

    ## override the birth prevalence prior, based on the withheld prevalence data
    logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year,
                                                  sex)]['initial']['logit_C_0']
    assert len(prev_data) == 1, 'should be a single prevalance datum'
    d = prev_data[0]

    mu_logit_C_0 = mc.logit(dm.value_per_1(d) + dismod3.settings.NEARLY_ZERO)
    lb, ub = dm.bounds_per_1(d)
    sigma_logit_C_0 = (mc.logit(ub + dismod3.settings.NEARLY_ZERO) -
                       mc.logit(lb + dismod3.settings.NEARLY_ZERO)) / (2 *
                                                                       1.96)
    print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0)
    print 'ui_C_0_pri:', lb, ub

    # override the excess-mortality, based on the relative-risk data
    mu_rr = 1.01 * np.ones(dismod3.settings.MAX_AGE)
    sigma_rr = .01 * np.ones(dismod3.settings.MAX_AGE)
    for d in rr_data:
        mu_rr[d['age_start']:(d['age_end'] + 1)] = dm.value_per_1(d)
        sigma_rr[d['age_start']:(d['age_end'] + 1)] = dm.se_per_1(d)
    print 'mu_rr:', mu_rr.round(2)
    #print 'sigma_rr:', sigma_rr.round(2)

    log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year,
                                              sex)]['age_coeffs']
    log_f_mesh = log_f.parents['gamma_mesh']
    param_mesh = log_f.parents['param_mesh']

    m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region,
                                              year, sex)]
    mu_log_f = np.log((mu_rr - 1) * m_all)
    sigma_log_f = 1 / ((mu_rr - 1) * m_all) * sigma_rr * m_all
    print 'mu_log_f:', mu_log_f.round(2)[param_mesh]
    print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh]

    ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC)
    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.use_step_method(SampleFromNormal,
                            logit_C_0,
                            mu=mu_logit_C_0,
                            tau=sigma_logit_C_0**-2)
    dm.mcmc.use_step_method(SampleFromNormal,
                            log_f_mesh,
                            mu=mu_log_f[param_mesh],
                            tau=sigma_log_f[param_mesh]**-2)
    for stoch in dm.mcmc.stochastics:
        dm.mcmc.use_step_method(mc.NoStepper, stoch)
    dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE)

    #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2)
    #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2)
    #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2)
    print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2)
    print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for(
        'excess-mortality', region, year,
        sex)]['rate_stoch'].stats()['mean'].round(2)

    for k in keys:
        t, r, y, s = dismod3.utils.type_region_year_sex_from_key(k)

        if t in [
                'incidence', 'prevalence', 'remission', 'excess-mortality',
                'mortality', 'prevalence_x_excess-mortality'
        ]:
            dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])

        elif t in ['relative-risk', 'duration', 'incidence_x_duration']:
            dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k])

    from fit_posterior import save_country_level_posterior
    if str(year) == '2005':  # also generate 2010 estimates
        save_country_level_posterior(dm, region, 2010, sex,
                                     ['prevalence', 'remission'])
    save_country_level_posterior(
        dm, region, year, sex, ['prevalence', 'remission']
    )  #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split())

    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    keys = dismod3.utils.gbd_keys(region_list=[region],
                                  year_list=[year],
                                  sex_list=[sex])
    dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year),
            keys_to_save=keys)

    return dm
def fit_without_confrontation(id, region, sex, year):
    """ Fit posterior of specified region/sex/year for specified model
    without trying to integrate conflicting sources of data

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years
    """

    ## load model
    dm = dismod3.load_disease_model(id)


    ## separate out prevalence and relative-risk data
    prev_data = [d for d in dm.data if dm.relevant_to(d, 'prevalence', region, year, sex)]
    rr_data = [d for d in dm.data if dm.relevant_to(d, 'relative-risk', region, year, sex)]
    dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data]


    ### setup the generic disease model (without prevalence data)
    import dismod3.gbd_disease_model as model
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex])
    dm.calc_effective_sample_size(dm.data)
    dm.vars = model.setup(dm, keys)


    ## override the birth prevalence prior, based on the withheld prevalence data
    logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year, sex)]['initial']['logit_C_0']
    assert len(prev_data) == 1, 'should be a single prevalance datum'
    d = prev_data[0]

    mu_logit_C_0 = mc.logit(dm.value_per_1(d)+dismod3.settings.NEARLY_ZERO)
    lb, ub = dm.bounds_per_1(d)
    sigma_logit_C_0 = (mc.logit(ub+dismod3.settings.NEARLY_ZERO) - mc.logit(lb+dismod3.settings.NEARLY_ZERO)) / (2 * 1.96)
    print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0)
    print 'ui_C_0_pri:', lb, ub

    # override the excess-mortality, based on the relative-risk data
    mu_rr = 1.01*np.ones(dismod3.settings.MAX_AGE)
    sigma_rr = .01*np.ones(dismod3.settings.MAX_AGE)
    for d in rr_data:
        mu_rr[d['age_start']:(d['age_end']+1)] = dm.value_per_1(d)
        sigma_rr[d['age_start']:(d['age_end']+1)] = dm.se_per_1(d)
    print 'mu_rr:', mu_rr.round(2)
    #print 'sigma_rr:', sigma_rr.round(2)

    log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['age_coeffs']
    log_f_mesh = log_f.parents['gamma_mesh']
    param_mesh = log_f.parents['param_mesh']
    
    m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region, year, sex)]
    mu_log_f = np.log((mu_rr-1) * m_all)
    sigma_log_f = 1 / ((mu_rr-1) * m_all) * sigma_rr * m_all
    print 'mu_log_f:', mu_log_f.round(2)[param_mesh]
    print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh]
    
    ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC)
    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.use_step_method(SampleFromNormal, logit_C_0, mu=mu_logit_C_0, tau=sigma_logit_C_0**-2)
    dm.mcmc.use_step_method(SampleFromNormal, log_f_mesh, mu=mu_log_f[param_mesh], tau=sigma_log_f[param_mesh]**-2)
    for stoch in dm.mcmc.stochastics:
        dm.mcmc.use_step_method(mc.NoStepper, stoch)
    dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE)

    #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2)
    #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2)
    #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2)
    print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2)
    print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['rate_stoch'].stats()['mean'].round(2)


    for k in keys:
        t,r,y,s = dismod3.utils.type_region_year_sex_from_key(k)

        if t in ['incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality']:
            dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])

        elif t in ['relative-risk', 'duration', 'incidence_x_duration']:
            dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k])

    from fit_posterior import save_country_level_posterior
    if str(year) == '2005':  # also generate 2010 estimates
        save_country_level_posterior(dm, region, 2010, sex, ['prevalence', 'remission'])
    save_country_level_posterior(dm, region, year, sex, ['prevalence', 'remission'])  #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split())


    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex])
    dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year), keys_to_save=keys)

    return dm