Exemple #1
0
def test_simulated_disease():
    """ Test fit for simulated disease data"""

    # load model to test fitting
    dm = DiseaseJson(file('tests/test_disease_1.json').read())

    # filter and noise up data
    cov = .5
    
    data = []
    for d in dm.data:
        d['truth'] = d['value']
        if dismod3.utils.clean(d['gbd_region']) == 'north_america_high_income':
            if d['data_type'] == 'all-cause mortality data':
                data.append(d)
            else:
                se = (cov * d['value'])
                d['value'] = mc.rtruncnorm(d['truth'], se**-2, 0, np.inf)
                d['age_start'] -= 5
                d['age_end'] = d['age_start']+9
                d['age_weights'] = np.ones(d['age_end']-d['age_start']+1)
                d['age_weights'] /= float(len(d['age_weights']))

                d['standard_error'] = se

                data.append(d)

    dm.data = data
    
    # fit empirical priors and compare fit to data
    from dismod3 import neg_binom_model
    for rate_type in 'prevalence incidence remission excess-mortality'.split():
        neg_binom_model.fit_emp_prior(dm, rate_type, '/dev/null')
        check_emp_prior_fits(dm)


    # fit posterior
    delattr(dm, 'vars')  # remove vars so that gbd_disease_model creates its own version
    from dismod3 import gbd_disease_model
    keys = dismod3.utils.gbd_keys(region_list=['north_america_high_income'],
                                  year_list=[1990], sex_list=['male'])
    gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null')     ## then sample the posterior via MCMC


    print 'error compared to the noisy data (coefficient of variation = %.2f)' % cov
    check_posterior_fits(dm)


    for d in dm.data:
        d['value'] = d['truth']
        d['age_start'] += 5
        d['age_end'] = d['age_start']
        d['age_weights'] = np.ones(d['age_end']-d['age_start']+1)
        d['age_weights'] /= float(len(d['age_weights']))

    print 'error compared to the truth'
    check_posterior_fits(dm)

    return dm
Exemple #2
0
def test_dismoditis_wo_prevalence():
    """ Test fit for simple example"""

    # load model to test fitting
    dm = DiseaseJson(file('tests/dismoditis.json').read())

    # remove all prevalence data
    dm.data = [d for d in dm.data if d['parameter'] != 'prevalence data']

    # fit empirical priors
    neg_binom_model.fit_emp_prior(dm, 'incidence', '/dev/null')
    check_emp_prior_fits(dm)
    neg_binom_model.fit_emp_prior(dm, 'excess-mortality', '/dev/null')
    check_emp_prior_fits(dm)

    # fit posterior
    delattr(dm, 'vars')  # remove vars so that gbd_disease_model creates its own version
    from dismod3 import gbd_disease_model
    keys = dismod3.utils.gbd_keys(region_list=['asia_southeast'],
                                  year_list=[1990], sex_list=['male'])
    #gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null')     ## then sample the posterior via MCMC

    # compare fit to data
    check_posterior_fits(dm)
Exemple #3
0
def fit_simulated_disease(n=300, cv=2.):
    """ Test fit for simulated disease data with noise and missingness"""

    # load model to test fitting
    dm = DiseaseJson(file('tests/simulation_gold_standard.json').read())
    
    # adjust any priors and covariates as desired
    dm.set_param_age_mesh(arange(0,101,2))
    for type in 'incidence prevalence remission excess_mortality'.split():
        dm.params['global_priors']['heterogeneity'][type] = 'Very'
        dm.params['covariates']['Country_level']['LDI_id']['rate']['value'] = 0
    
    # filter and noise up data
    mort_data = []
    all_data = []
    for d in dm.data:
        d['truth'] = d['value']
        d['age_weights'] = array([1.])
        if d['data_type'] == 'all-cause mortality data':
            mort_data.append(d)
        else:
            if d['value'] > 0:
                se = (cv / 100.) * d['value']
                Y_i = mc.rtruncnorm(d['truth'], se**-2, 0, np.inf)
                d['value'] = Y_i
                d['standard_error'] = se
                d['effective_sample_size'] = Y_i * (1-Y_i) / se**2


            all_data.append(d)
    sampled_data = random.sample(all_data, n) + mort_data
    dm.data = sampled_data

    # fit empirical priors and compare fit to data
    from dismod3 import neg_binom_model
    for rate_type in 'prevalence incidence remission excess-mortality'.split():
        #neg_binom_model.fit_emp_prior(dm, rate_type, iter=1000, thin=1, burn=0, dbname='/dev/null')
        neg_binom_model.fit_emp_prior(dm, rate_type, iter=30000, thin=15, burn=15000, dbname='/dev/null')
        check_emp_prior_fits(dm)


    # fit posterior
    delattr(dm, 'vars')  # remove vars so that gbd_disease_model creates its own version
    from dismod3 import gbd_disease_model
    keys = dismod3.utils.gbd_keys(region_list=['north_america_high_income'],
                                  year_list=[1990], sex_list=['male'])
    gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1)     ## first generate decent initial conditions
    gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=30000, thin=15, burn=15000, verbose=1, dbname='/dev/null')     ## then sample the posterior via MCMC
    #gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=1, burn=0, verbose=1, dbname='/dev/null')     ## fast for dev


    print 'error compared to the noisy data (coefficient of variation = %.2f)' % cv
    check_posterior_fits(dm)

    dm.data = all_data
    for d in dm.data:
        if d['data_type'] != 'all-cause mortality data':
            d['noisy_data'] = d['value']
            d['value'] = d['truth']

    print 'error compared to the truth'
    are, coverage = check_posterior_fits(dm)
    print
    print 'Median Absolute Relative Error of Posterior Predictions:', median(are)
    print 'Pct coverage:', 100*mean(coverage)
    f = open('score_%d_%f.txt' % (n, cv), 'a')
    f.write('%10.10f,%10.10f\n' % (median(are), mean(coverage)))
    f.close()

    dm.all_data = all_data
    dm.data = sampled_data
    for d in dm.data:
        if d['data_type'] != 'all-cause mortality data':
            d['value'] = d['noisy_data']

    generate_figure(dm, n, cv)

    return dm