Beispiel #1
0
def test_simulated_disease():
    """ Test fit for simulated disease data"""

    # load model to test fitting
    dm = DiseaseJson(file('tests/test_disease_1.json').read())

    # filter and noise up data
    cov = .5
    
    data = []
    for d in dm.data:
        d['truth'] = d['value']
        if dismod3.utils.clean(d['gbd_region']) == 'north_america_high_income':
            if d['data_type'] == 'all-cause mortality data':
                data.append(d)
            else:
                se = (cov * d['value'])
                d['value'] = mc.rtruncnorm(d['truth'], se**-2, 0, np.inf)
                d['age_start'] -= 5
                d['age_end'] = d['age_start']+9
                d['age_weights'] = np.ones(d['age_end']-d['age_start']+1)
                d['age_weights'] /= float(len(d['age_weights']))

                d['standard_error'] = se

                data.append(d)

    dm.data = data
    
    # fit empirical priors and compare fit to data
    from dismod3 import neg_binom_model
    for rate_type in 'prevalence incidence remission excess-mortality'.split():
        neg_binom_model.fit_emp_prior(dm, rate_type, '/dev/null')
        check_emp_prior_fits(dm)


    # fit posterior
    delattr(dm, 'vars')  # remove vars so that gbd_disease_model creates its own version
    from dismod3 import gbd_disease_model
    keys = dismod3.utils.gbd_keys(region_list=['north_america_high_income'],
                                  year_list=[1990], sex_list=['male'])
    gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null')     ## then sample the posterior via MCMC


    print 'error compared to the noisy data (coefficient of variation = %.2f)' % cov
    check_posterior_fits(dm)


    for d in dm.data:
        d['value'] = d['truth']
        d['age_start'] += 5
        d['age_end'] = d['age_start']
        d['age_weights'] = np.ones(d['age_end']-d['age_start']+1)
        d['age_weights'] /= float(len(d['age_weights']))

    print 'error compared to the truth'
    check_posterior_fits(dm)

    return dm
Beispiel #2
0
def test_triangle_pattern():
    """ Test fit for empirical prior to data showing a linearly increasing age pattern"""

    # load model to test fitting
    dm = DiseaseJson(file('tests/single_low_noise.json').read())

    # create linear age pattern data
    import copy
    d = dm.data.pop()
    for a in range(10, 100, 20):
        d = copy.copy(d)
        d['age_start'] = a
        d['age_end'] = a
        d['parameter_value'] = .01*min(a, 100-a)
        d['value'] = .01*min(a, 100-a)
        dm.data.append(d)

    # fit empirical priors
    from dismod3 import neg_binom_model
    neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null')

    # compare fit to data
    check_emp_prior_fits(dm)

    # fit posterior
    delattr(dm, 'vars')  # remove vars so that gbd_disease_model creates its own version
    from dismod3 import gbd_disease_model
    keys = dismod3.utils.gbd_keys(region_list=['asia_southeast'],
                                  year_list=[1990], sex_list=['male'])
    gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null')     ## then sample the posterior via MCMC

    # compare fit to data
    check_posterior_fits(dm)
Beispiel #3
0
def test_dismoditis_wo_prevalence():
    """ Test fit for simple example"""

    # load model to test fitting
    dm = DiseaseJson(file('tests/dismoditis.json').read())

    # remove all prevalence data
    dm.data = [d for d in dm.data if d['parameter'] != 'prevalence data']

    # fit empirical priors
    neg_binom_model.fit_emp_prior(dm, 'incidence', '/dev/null')
    check_emp_prior_fits(dm)
    neg_binom_model.fit_emp_prior(dm, 'excess-mortality', '/dev/null')
    check_emp_prior_fits(dm)

    # fit posterior
    delattr(dm, 'vars')  # remove vars so that gbd_disease_model creates its own version
    from dismod3 import gbd_disease_model
    keys = dismod3.utils.gbd_keys(region_list=['asia_southeast'],
                                  year_list=[1990], sex_list=['male'])
    #gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null')     ## then sample the posterior via MCMC

    # compare fit to data
    check_posterior_fits(dm)
Beispiel #4
0
def test_hep_c():
    """ Test fit for subset of hep_c data

    data is filtered to include only prevalence with
    region == 'europe_western' and sex == 'all'
    """

    # load model to test fitting
    dm = DiseaseJson(file('tests/hep_c_europe_western.json').read())

    # fit empirical priors
    neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null')

    # fit posterior
    delattr(dm, 'vars')  # remove vars so that gbd_disease_model creates its own version
    from dismod3 import gbd_disease_model
    keys = dismod3.utils.gbd_keys(region_list=['europe_western'],
                                  year_list=[1990], sex_list=['male'])
    gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null')     ## then sample the posterior via MCMC

    # check that prevalence is smooth near age zero
    prediction = dm.get_mcmc('mean', 'prevalence+europe_western+1990+male')
    print prediction
    return dm
    assert prediction[100] < .1, 'prediction should not shoot up in oldest ages'
Beispiel #5
0
def test_increasing_prior():
    """ Test fit for empirical prior to data showing a linearly increasing age pattern with a fine age mesh"""

    # load model to test fitting
    dm = DiseaseJson(file('tests/single_low_noise.json').read())

    dm.params['global_priors']['increasing']['incidence']['age_end'] = 100

    # create linear age pattern data
    import copy
    d = dm.data.pop()
    for a in range(10, 100, 10):
        d = copy.copy(d)
        d['age_start'] = a
        d['age_end'] = a
        d['parameter_value'] = .01*a
        d['value'] = .01*a
        dm.data.append(d)

    # fit empirical priors
    from dismod3 import neg_binom_model
    neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null')

    # compare fit to data, and check that it is increasing
    check_emp_prior_fits(dm)
    assert np.all(np.diff(dm.get_mcmc('emp_prior_mean', dismod3.utils.gbd_key_for('prevalence', 'asia_southeast', 1990, 'male'))) >= 0), 'expert prior says increasing'
Beispiel #6
0
def test_single_rate():
    """ Test fit for a single low-noise data point"""

    # load model to test fitting
    dm = DiseaseJson(file('tests/single_low_noise.json').read())

    # fit empirical priors
    neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null')

    # compare fit to data
    check_emp_prior_fits(dm)
Beispiel #7
0
def fit_emp_prior(id, param_type):
    """ Fit empirical prior of specified type for specified model

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    param_type : str, one of incidence, prevalence, remission, excess-mortality
      The disease parameter to generate empirical priors for

    Example
    -------
    >>> import fit_emp_prior
    >>> fit_emp_prior.fit_emp_prior(2552, 'incidence')
    """
    #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Running')

    # load disease model
    dm = dismod3.load_disease_model(id)
    #dm.data = []  # remove all data to speed up computation, for test

    import dismod3.neg_binom_model as model
    dir = dismod3.settings.JOB_WORKING_DIR % id
    model.fit_emp_prior(dm, param_type, dbname='%s/empirical_priors/pickle/dm-%d-emp_prior-%s.pickle' % (dir, id, param_type))

    # generate empirical prior plots
    from pylab import subplot
    for sex in dismod3.settings.gbd_sexes:
        for year in dismod3.settings.gbd_years:
            keys = dismod3.utils.gbd_keys(region_list=['all'], year_list=[year], sex_list=[sex], type_list=[param_type])
            dismod3.tile_plot_disease_model(dm, keys, defaults={})
            dm.savefig('dm-%d-emp_prior-%s-%s-%s.png' % (id, param_type, sex, year))

    # TODO: put this in a separate script, which runs after all empirical priors are computed
    for effect in ['alpha', 'beta', 'gamma', 'delta']:
        dismod3.plotting.plot_empirical_prior_effects([dm], effect)
        dm.savefig('dm-%d-emp-prior-%s-%s.png' % (id, param_type, effect))

    # summarize fit quality graphically, as well as parameter posteriors
    k0 = keys[0]
    dm.vars = {k0: dm.vars}   # hack to make posterior predictions plot
    dismod3.plotting.plot_posterior_predicted_checks(dm, k0)
    dm.savefig('dm-%d-emp-prior-check-%s.png' % (dm.id, param_type))
    dm.vars = dm.vars[k0]   # undo hack to make posterior predictions plot
    
    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    dm.save('dm-%d-prior-%s.json' % (id, param_type))
    dismod3.try_posting_disease_model(dm, ntries=5)

    #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Completed')
    return dm
Beispiel #8
0
def fit_model(dm, region, year, sex):
    """ Fit the empirical priors, and the posterior for a specific region/year/sex
    """
    
    # fit empirical priors
    for rate_type in 'prevalence incidence remission excess-mortality'.split():
        neg_binom_model.fit_emp_prior(dm, rate_type, '/dev/null')

    # fit posterior
    delattr(dm, 'vars')  # remove vars so that gbd_disease_model creates its own version
    from dismod3 import gbd_disease_model
    keys = dismod3.utils.gbd_keys(region_list=[region],
                                  year_list=[year], sex_list=[sex])
    gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null')     ## then sample the posterior via MCMC
Beispiel #9
0
def test_mesh_refinement():
    """ Compare fit for coarse and fine age mesh"""

    # load model and fit it
    dm1 = DiseaseJson(file('tests/single_low_noise.json').read())
    dm1.set_param_age_mesh(arange(0,101,20))
    from dismod3 import neg_binom_model
    neg_binom_model.fit_emp_prior(dm1, 'prevalence', '/dev/null')

    # load another copy and fit it with a finer age mesh
    dm2 = DiseaseJson(file('tests/single_low_noise.json').read())
    dm2.set_param_age_mesh(arange(0,101,5))
    from dismod3 import neg_binom_model
    neg_binom_model.fit_emp_prior(dm2, 'prevalence', '/dev/null')

    # compare fits
    p1 = dm1.get_mcmc('emp_prior_mean', dismod3.utils.gbd_key_for('prevalence', 'asia_southeast', 1990, 'male'))
    p2 = dm2.get_mcmc('emp_prior_mean', dismod3.utils.gbd_key_for('prevalence', 'asia_southeast', 1990, 'male'))
    print p1[::20]
    print p2[::20]
    assert np.all(abs(p1[::20] / p2[::20] - 1.) < .05), 'Prediction should be closer to data'
Beispiel #10
0
def test_dismoditis():
    """ Test fit for simple example"""

    # load model to test fitting
    dm = DiseaseJson(file('tests/dismoditis.json').read())
    for d in dm.data:
        d['standard_error'] = .01
    # fit empirical priors
    neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null')
    check_emp_prior_fits(dm)
    neg_binom_model.fit_emp_prior(dm, 'incidence', '/dev/null')
    check_emp_prior_fits(dm)
    neg_binom_model.fit_emp_prior(dm, 'excess-mortality', '/dev/null')
    check_emp_prior_fits(dm)

    # fit posterior where there is no data
    delattr(dm, 'vars')  # remove vars so that gbd_disease_model creates its own version
    from dismod3 import gbd_disease_model
    keys = dismod3.utils.gbd_keys(region_list=['north_america_high_income'],
                                  year_list=[1990], sex_list=['male'])
    gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null')     ## then sample the posterior via MCMC
    check_posterior_fits(dm)
    
    # check that prevalence is smooth near age zero
    prediction = dm.get_mcmc('mean', 'prevalence+north_america_high_income+1990+male')
    assert prediction[1]-prediction[0] < .01, 'prediction should be smooth near zero'
Beispiel #11
0
def test_linear_pattern():
    """ Test fit for empirical prior to data showing a linearly increasing age pattern"""

    # load model to test fitting
    dm = DiseaseJson(file('tests/single_low_noise.json').read())

    # create linear age pattern data
    import copy
    d = dm.data.pop()
    for a in range(10, 100, 20):
        d = copy.copy(d)
        d['age_start'] = a
        d['age_end'] = a
        d['parameter_value'] = .01*a
        d['value'] = .01*a
        dm.data.append(d)

    # fit empirical priors
    from dismod3 import neg_binom_model
    neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null')

    # compare fit to data
    check_emp_prior_fits(dm)
Beispiel #12
0
def fit_simulated_disease(n=300, cv=2.):
    """ Test fit for simulated disease data with noise and missingness"""

    # load model to test fitting
    dm = DiseaseJson(file('tests/simulation_gold_standard.json').read())
    
    # adjust any priors and covariates as desired
    dm.set_param_age_mesh(arange(0,101,2))
    for type in 'incidence prevalence remission excess_mortality'.split():
        dm.params['global_priors']['heterogeneity'][type] = 'Very'
        dm.params['covariates']['Country_level']['LDI_id']['rate']['value'] = 0
    
    # filter and noise up data
    mort_data = []
    all_data = []
    for d in dm.data:
        d['truth'] = d['value']
        d['age_weights'] = array([1.])
        if d['data_type'] == 'all-cause mortality data':
            mort_data.append(d)
        else:
            if d['value'] > 0:
                se = (cv / 100.) * d['value']
                Y_i = mc.rtruncnorm(d['truth'], se**-2, 0, np.inf)
                d['value'] = Y_i
                d['standard_error'] = se
                d['effective_sample_size'] = Y_i * (1-Y_i) / se**2


            all_data.append(d)
    sampled_data = random.sample(all_data, n) + mort_data
    dm.data = sampled_data

    # fit empirical priors and compare fit to data
    from dismod3 import neg_binom_model
    for rate_type in 'prevalence incidence remission excess-mortality'.split():
        #neg_binom_model.fit_emp_prior(dm, rate_type, iter=1000, thin=1, burn=0, dbname='/dev/null')
        neg_binom_model.fit_emp_prior(dm, rate_type, iter=30000, thin=15, burn=15000, dbname='/dev/null')
        check_emp_prior_fits(dm)


    # fit posterior
    delattr(dm, 'vars')  # remove vars so that gbd_disease_model creates its own version
    from dismod3 import gbd_disease_model
    keys = dismod3.utils.gbd_keys(region_list=['north_america_high_income'],
                                  year_list=[1990], sex_list=['male'])
    gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=30000, thin=15, burn=15000, verbose=1, dbname='/dev/null')     ## then sample the posterior via MCMC
    #gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=1, burn=0, verbose=1, dbname='/dev/null')     ## fast for dev


    print 'error compared to the noisy data (coefficient of variation = %.2f)' % cv
    check_posterior_fits(dm)

    dm.data = all_data
    for d in dm.data:
        if d['data_type'] != 'all-cause mortality data':
            d['noisy_data'] = d['value']
            d['value'] = d['truth']

    print 'error compared to the truth'
    are, coverage = check_posterior_fits(dm)
    print
    print 'Median Absolute Relative Error of Posterior Predictions:', median(are)
    print 'Pct coverage:', 100*mean(coverage)
    f = open('score_%d_%f.txt' % (n, cv), 'a')
    f.write('%10.10f,%10.10f\n' % (median(are), mean(coverage)))
    f.close()

    dm.all_data = all_data
    dm.data = sampled_data
    for d in dm.data:
        if d['data_type'] != 'all-cause mortality data':
            d['value'] = d['noisy_data']

    generate_figure(dm, n, cv)

    return dm
Beispiel #13
0
def fit(id, opts):
    fit_str = '(%d) %s %s %s' % (id, opts.region or '', opts.sex or '', opts.year or '')
    #tweet('fitting disease model %s' % fit_str)
    sys.stdout.flush()
    
    # update job status file
    if opts.log:
        if opts.type and not (opts.region and opts.sex and opts.year):
            dismod3.log_job_status(id, 'empirical_priors', opts.type, 'Running')
        elif opts.region and opts.sex and opts.year and not opts.type:
            dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (opts.region, opts.sex, opts.year), 'Running')

    dm = dismod3.get_disease_model(id)
    fit_str = '%s %s' % (dm.params['condition'], fit_str)

    sex_list = opts.sex and [ opts.sex ] or dismod3.gbd_sexes
    year_list = opts.year and [ opts.year ] or dismod3.gbd_years
    region_list = opts.region and [ opts.region ] or dismod3.gbd_regions
    keys = gbd_keys(region_list=region_list, year_list=year_list, sex_list=sex_list)

    # fit empirical priors, if type is specified
    if opts.type:
        fit_str += ' emp prior for %s' % opts.type
        #print 'beginning ', fit_str
        import dismod3.neg_binom_model as model

        dir = dismod3.settings.JOB_WORKING_DIR % id
        model.fit_emp_prior(dm, opts.type, dbname='%s/empirical_priors/pickle/dm-%d-emp_prior-%s.pickle' % (dir, id, opts.type))

    # if type is not specified, find consistient fit of all parameters
    else:
        import dismod3.gbd_disease_model as model

        # get the all-cause mortality data, and merge it into the model
        mort = dismod3.get_disease_model('all-cause_mortality')
        dm.data += mort.data

        # fit individually, if sex, year, and region are specified
        if opts.sex and opts.year and opts.region:
            dm.params['estimate_type'] = 'fit individually'

        # fit the model
        #print 'beginning ', fit_str
        dir = dismod3.settings.JOB_WORKING_DIR % id
        model.fit(dm, method='map', keys=keys, verbose=1)
        model.fit(dm, method='mcmc', keys=keys, iter=10000, thin=5, burn=5000, verbose=1,
                  dbname='%s/posterior/pickle/dm-%d-posterior-%s-%s-%s.pickle' % (dir, id, opts.region, opts.sex, opts.year))
        #model.fit(dm, method='mcmc', keys=keys, iter=1, thin=1, burn=0, verbose=1)

    # remove all keys that have not been changed by running this model
    for k in dm.params.keys():
        if type(dm.params[k]) == dict:
            for j in dm.params[k].keys():
                if not j in keys:
                    dm.params[k].pop(j)

    # post results to dismod_data_server
    # "dumb" error handling, in case post fails (try: except: sleep random time, try again, stop after 4 tries)
    from twill.errors import TwillAssertionError
    from urllib2 import URLError
    import random

    PossibleExceptions = [TwillAssertionError, URLError]
    try:
        url = dismod3.post_disease_model(dm)
    except PossibleExceptions:
        time.sleep(random.random()*30)
        try:
            url = dismod3.post_disease_model(dm)
        except PossibleExceptions:
            time.sleep(random.random()*30)
            try:
                url = dismod3.post_disease_model(dm)
            except PossibleExceptions:
                time.sleep(random.random()*30)
                url = dismod3.post_disease_model(dm)

    # form url to view results
    #if opts.sex and opts.year and opts.region:
    #    url += '/%s/%s/%s' % (opts.region, opts.year, opts.sex)
    #elif opts.region:
    #    url += '/%s' % opts.region

    # announce completion, and url to view results
    #tweet('%s fit complete %s' % (fit_str, url))
    sys.stdout.flush()

    # update job status file
    if opts.log:
        if opts.type and not (opts.region and opts.sex and opts.year):
            dismod3.log_job_status(id, 'empirical_priors', opts.type, 'Completed')
        elif opts.region and opts.sex and opts.year and not opts.type:
            dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (opts.region, opts.sex, opts.year), 'Completed')