Example #1
0
def test_covariate_model_dispersion():
    # simulate normal data
    n = 100

    model = data.ModelData()
    model.hierarchy, model.output_template = data_simulation.small_output()

    Z = mc.rcategorical([.5, 5.], n)
    zeta_true = -.2

    pi_true = .1
    ess = 10000.*pl.ones(n)
    eta_true = pl.log(50)
    delta_true = 50 + pl.exp(eta_true)

    p = mc.rnegative_binomial(pi_true*ess, delta_true*pl.exp(Z*zeta_true)) / ess

    
    model.input_data = pandas.DataFrame(dict(value=p, z_0=Z))
    model.input_data['area'] = 'all'
    model.input_data['sex'] = 'total'
    model.input_data['year_start'] = 2000
    model.input_data['year_end'] = 2000



    # create model and priors
    vars = dict(mu=mc.Uninformative('mu_test', value=pi_true))
    vars.update(covariate_model.mean_covariate_model('test', vars['mu'], model.input_data, {}, model, 'all', 'total', 'all'))
    vars.update(covariate_model.dispersion_covariate_model('test', model.input_data, .1, 10.))
    vars.update(rate_model.neg_binom_model('test', vars['pi'], vars['delta'], p, ess))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)
def resample(data):
    if len(data) == 0:
        return data

    delta_true = .1
    p = data['mu_pred']+1.e-6


    # TODO: abstract this block of code into rate_model.py; it is also called in data_model.py
    ## ensure that all data has uncertainty quantified appropriately
    # first replace all missing se from ci
    missing_se = pl.isnan(data['standard_error']) | (data['standard_error'] <= 0)
    data['standard_error'][missing_se] = (data['upper_ci'][missing_se] - data['lower_ci'][missing_se]) / (2*1.96)

    # then replace all missing ess with se
    missing_ess = pl.isnan(data['effective_sample_size'])
    data['effective_sample_size'][missing_ess] = data['value'][missing_ess]*(1-data['value'][missing_ess])/data['standard_error'][missing_ess]**2

    # warn and drop data that doesn't have effective sample size quantified, or is is non-positive
    missing_ess = pl.isnan(data['effective_sample_size']) | (data['effective_sample_size'] < 0)
    if sum(missing_ess) > 0:
        print 'WARNING: %d rows of data has invalid quantification of uncertainty.' % sum(missing_ess)
        data['effective_sample_size'][missing_ess] = 1.0

    n = data['effective_sample_size']

    data['true'] = p
    data['value'] = (1.0 * mc.rnegative_binomial(n*p, delta_true*n*p)) / n

    # uncomment below to test the effect of having very wrong data
    #data['value'] = 0.
    #data['effective_sample_size'] = 1.e6

    return data
Example #3
0
def test_covariate_model_dispersion():
    # simulate normal data
    n = 100

    model = dismod_mr.data.ModelData()
    model.hierarchy, model.output_template = dismod_mr.testing.data_simulation.small_output()

    Z = mc.rcategorical([.5, 5.], n)
    zeta_true = -.2

    pi_true = .1
    ess = 10000.*np.ones(n)
    eta_true = np.log(50)
    delta_true = 50 + np.exp(eta_true)

    p = mc.rnegative_binomial(pi_true*ess, delta_true*np.exp(Z*zeta_true)) / ess

    model.input_data = pd.DataFrame(dict(value=p, z_0=Z))
    model.input_data['area'] = 'all'
    model.input_data['sex'] = 'total'
    model.input_data['year_start'] = 2000
    model.input_data['year_end'] = 2000

    # create model and priors
    variables = dict(mu=mc.Uninformative('mu_test', value=pi_true))
    variables.update(dismod_mr.model.covariates.mean_covariate_model('test', variables['mu'], model.input_data, {},
                                                                     model, 'all', 'total', 'all'))
    variables.update(dismod_mr.model.covariates.dispersion_covariate_model('test', model.input_data, .1, 10.))
    variables.update(dismod_mr.model.likelihood.neg_binom('test', variables['pi'], variables['delta'], p, ess))

    # fit model
    m = mc.MCMC(variables)
    m.sample(2)
Example #4
0
 def predictions(value=value,
                 N=N,
                 S=data_sample,
                 mu=rates,
                 delta=delta):
     r_S = mc.rnegative_binomial(N[S]*mu, delta)/N[S]
     r = pl.zeros(len(vars['data']))
     r[S] = r_S
     return r
Example #5
0
def simulate_age_group_data(N=50, delta_true=150, pi_true=true_rate_function):
    """ generate simulated data
    """
    # start with a simple model with N rows of data
    model = data_simulation.simple_model(N)

    # record the true age-specific rates
    model.ages = pl.arange(0, 101, 1)
    model.pi_age_true = pi_true(model.ages)

    # choose age groups randomly
    age_width = mc.runiform(1, 100, size=N)
    age_mid = mc.runiform(age_width / 2, 100 - age_width / 2, size=N)
    age_width[:10] = 10
    age_mid[:10] = pl.arange(5, 105, 10)
    #age_width[10:20] = 10
    #age_mid[10:20] = pl.arange(5, 105, 10)

    age_start = pl.array(age_mid - age_width / 2, dtype=int)
    age_end = pl.array(age_mid + age_width / 2, dtype=int)

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end

    # choose effective sample size uniformly at random
    n = mc.runiform(100, 10000, size=N)
    model.input_data['effective_sample_size'] = n

    # integrate true age-specific rate across age groups to find true group rate
    model.input_data['true'] = pl.nan
    model.input_data['age_weights'] = ''

    for i in range(N):
        beta = mc.rnormal(0., .025**-2)

        # TODO: clean this up, it is computing more than is necessary
        age_weights = pl.exp(beta * model.ages)
        sum_pi_wt = pl.cumsum(model.pi_age_true * age_weights)
        sum_wt = pl.cumsum(age_weights)
        p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] -
                                                           sum_wt[age_start])

        model.input_data.ix[i, 'true'] = p[i]
        model.input_data.ix[i, 'age_weights'] = ';'.join(
            ['%.4f' % w for w in age_weights[age_start[i]:(age_end[i] + 1)]])

    # sample observed rate values from negative binomial distribution
    model.input_data['value'] = mc.rnegative_binomial(
        n * model.input_data['true'], delta_true) / n

    print model.input_data.drop(['standard_error', 'upper_ci', 'lower_ci'],
                                axis=1)
    return model
Example #6
0
def simulate_age_group_data(N=50, delta_true=150, pi_true=true_rate_function):
    """ generate simulated data
    """
    # start with a simple model with N rows of data
    model = data_simulation.simple_model(N)


    # record the true age-specific rates
    model.ages = pl.arange(0, 101, 1)
    model.pi_age_true = pi_true(model.ages)


    # choose age groups randomly
    age_width = mc.runiform(1, 100, size=N)
    age_mid = mc.runiform(age_width/2, 100-age_width/2, size=N)
    age_width[:10] = 10
    age_mid[:10] = pl.arange(5, 105, 10)
    #age_width[10:20] = 10
    #age_mid[10:20] = pl.arange(5, 105, 10)

    age_start = pl.array(age_mid - age_width/2, dtype=int)
    age_end = pl.array(age_mid + age_width/2, dtype=int)

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end


    # choose effective sample size uniformly at random
    n = mc.runiform(100, 10000, size=N)
    model.input_data['effective_sample_size'] = n


    # integrate true age-specific rate across age groups to find true group rate
    model.input_data['true'] = pl.nan
    model.input_data['age_weights'] = ''

    for i in range(N):
        beta = mc.rnormal(0., .025**-2)

        # TODO: clean this up, it is computing more than is necessary
        age_weights = pl.exp(beta*model.ages)
        sum_pi_wt = pl.cumsum(model.pi_age_true*age_weights)
        sum_wt = pl.cumsum(age_weights)
        p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start])

        model.input_data.ix[i, 'true'] = p[i]
        model.input_data.ix[i, 'age_weights'] = ';'.join(['%.4f'%w for w in age_weights[age_start[i]:(age_end[i]+1)]])

    # sample observed rate values from negative binomial distribution
    model.input_data['value'] = mc.rnegative_binomial(n*model.input_data['true'], delta_true) / n

    print model.input_data.drop(['standard_error', 'upper_ci', 'lower_ci'], axis=1)
    return model
Example #7
0
def test_neg_binom_model_sim(N=16):
    # simulate negative binomial data
    pi_true = .01
    delta_true = 50

    n = pl.array(pl.exp(mc.rnormal(10, 1**-2, size=N)), dtype=int)
    k = pl.array(mc.rnegative_binomial(n*pi_true, delta_true, size=N), dtype=float)
    p = k/n

    # create NB model and priors
    vars = dict(mu_age=mc.Uniform('mu_age', 0., 1000., value=.01),
                sigma=mc.Uniform('sigma', 0., 10000., value=1000.))
    vars['mu_interval'] = mc.Lambda('mu_interval', lambda mu=vars['mu_age']: mu*pl.ones(N))
    vars.update(rate_model.log_normal_model('sim', vars['mu_interval'], vars['sigma'], p, 1./pl.sqrt(n)))

    # fit NB model
    m = mc.MCMC(vars)
    m.sample(1)
Example #8
0
def resample(data):
    if len(data) == 0:
        return data

    delta_true = .1
    p = data['mu_pred'] + 1.e-6

    # TODO: abstract this block of code into rate_model.py; it is also called in data_model.py
    ## ensure that all data has uncertainty quantified appropriately
    # first replace all missing se from ci
    missing_se = pl.isnan(
        data['standard_error']) | (data['standard_error'] <= 0)
    data['standard_error'][missing_se] = (
        data['upper_ci'][missing_se] - data['lower_ci'][missing_se]) / (2 *
                                                                        1.96)

    # then replace all missing ess with se
    missing_ess = pl.isnan(data['effective_sample_size'])
    data['effective_sample_size'][missing_ess] = data['value'][missing_ess] * (
        1 -
        data['value'][missing_ess]) / data['standard_error'][missing_ess]**2

    # warn and drop data that doesn't have effective sample size quantified, or is is non-positive
    missing_ess = pl.isnan(
        data['effective_sample_size']) | (data['effective_sample_size'] < 0)
    if sum(missing_ess) > 0:
        print 'WARNING: %d rows of data has invalid quantification of uncertainty.' % sum(
            missing_ess)
        data['effective_sample_size'][missing_ess] = 1.0

    n = data['effective_sample_size']

    data['true'] = p
    data['value'] = (1.0 *
                     mc.rnegative_binomial(n * p, delta_true * n * p)) / n

    # uncomment below to test the effect of having very wrong data
    #data['value'] = 0.
    #data['effective_sample_size'] = 1.e6

    return data
Example #9
0
def generate_data(N, delta_true, pi_true, heterogeneity, bias, sigma_prior):
    a = pl.arange(0, 101, 1)
    pi_age_true = pi_true(a)

    model = data_simulation.simple_model(N)
    model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10)
    model.parameters['p']['smoothness'] = dict(amount='Moderately')
    model.parameters['p']['heterogeneity'] = heterogeneity

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    age_weights = pl.ones_like(a)
    sum_pi_wt = pl.cumsum(pi_age_true*age_weights)
    sum_wt = pl.cumsum(age_weights)
    p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start])

    # correct cases where age_start == age_end
    i = age_start == age_end
    if pl.any(i):
        p[i] = pi_age_true[age_start[i]]

    n = mc.runiform(10000, 100000, size=N)

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n * pl.exp(bias)

    emp_priors = {}
    emp_priors['p', 'mu'] = pi_age_true
    emp_priors['p', 'sigma'] = sigma_prior*pi_age_true
    model.emp_priors = emp_priors

    model.a = a
    model.pi_age_true = pi_age_true
    model.delta_true = delta_true

    return model
Example #10
0
 def p_pred(pi=pi, delta=delta, n=n_nonzero):
     return mc.rnegative_binomial(pi * n + 1.0e-9, delta) / pl.array(n + 1.0e-9, dtype=float)
Example #11
0
 def p_pred(pi=pi, delta=delta, n=n_nonzero):
     return mc.rnegative_binomial(pi * n + 1.e-9, delta) / pl.array(
         n + 1.e-9, dtype=float)
Example #12
0
 def pred(pi=pi, delta=delta):
     return mc.rnegative_binomial(pi * n_pred, delta) / float(n_pred)
Example #13
0
import pylab as pl
import pymc as mc

import dismod3
import book_graphics
reload(book_graphics)

# set font
book_graphics.set_font()

n_small = 500
pi_true = .025
delta_true = 5.

n = pl.array(pl.exp(mc.rnormal(10, 1**-2, size=16)), dtype=int)
k = pl.array(mc.rnegative_binomial(n * pi_true, delta_true), dtype=float)
r = k / n

iter = 20000
burn = 10000
thin = 10
results = {}
xmax = .07

### @export 'distribution-comparison'
pl.figure(**book_graphics.quarter_page_params)

ax = pl.axes([.1, .3, .85, .65])
x = pl.arange(0, n_small * pi_true * 4, .1)

# plot binomial distribution
Example #14
0
def validate_consistent_re(N=500, delta_true=.15, sigma_true=[.1,.1,.1,.1,.1], 
                           true=dict(i=quadratic, f=constant, r=constant)):
    types = pl.array(['i', 'r', 'f', 'p'])

    ## generate simulated data
    model = data_simulation.simple_model(N)
    model.input_data['effective_sample_size'] = 1.
    model.input_data['value'] = 0.
    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    sim = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    for t in 'irf':
        for i, k_i in enumerate(sim[t]['knots']):
            sim[t]['gamma'][i].value = pl.log(true[t](k_i))

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    data_type = types[mc.rcategorical(pl.ones(len(types), dtype=float) / float(len(types)), size=N)]


    a = pl.arange(101)
    age_weights = pl.ones_like(a)
    sum_wt = pl.cumsum(age_weights)

    p = pl.zeros(N)
    for t in types:
        mu_t = sim[t]['mu_age'].value
        sum_mu_wt = pl.cumsum(mu_t*age_weights)
    
        p_t = (sum_mu_wt[age_end] - sum_mu_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start])

        # correct cases where age_start == age_end
        i = age_start == age_end
        if pl.any(i):
            p_t[i] = mu_t[age_start[i]]

        # copy part into p
        p[data_type==t] = p_t[data_type==t]


    # add covariate shifts
    import dismod3
    import simplejson as json
    gbd_model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    model.hierarchy = gbd_model.hierarchy

    from validate_covariates import alpha_true_sim
    area_list = pl.array(['all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT', 'IRN', 'IRQ', 'JOR', 'SYR'])
    alpha = {}
    for t in types:
        alpha[t] = alpha_true_sim(model, area_list, sigma_true)
    print json.dumps(alpha, indent=2)

    model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)]
    
    for i, a in model.input_data['area'].iteritems():
        t = data_type[i]
        p[i] = p[i] * pl.exp(pl.sum([alpha[t][n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha]))

    n = mc.runiform(100, 10000, size=N)

    model.input_data['data_type'] = data_type
    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true) / n

    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    #model.map, model.mcmc = fit_model.fit_consistent_model(model.vars, iter=101, burn=0, thin=1, tune_interval=100)
    model.map, model.mcmc = fit_model.fit_consistent_model(model.vars, iter=10000, burn=5000, thin=25, tune_interval=100)

    graphics.plot_convergence_diag(model.vars)

    graphics.plot_fit(model, model.vars, {}, {})
    for i, t in enumerate('i r f p rr pf'.split()):
        pl.subplot(2, 3, i+1)
        pl.plot(range(101), sim[t]['mu_age'].value, 'w-', label='Truth', linewidth=2)
        pl.plot(range(101), sim[t]['mu_age'].value, 'r-', label='Truth', linewidth=1)

    pl.show()

    model.input_data['mu_pred'] = 0.
    model.input_data['sigma_pred'] = 0.
    for t in types:
        model.input_data['mu_pred'][data_type==t] = model.vars[t]['p_pred'].stats()['mean']
        model.input_data['sigma_pred'][data_type==t] = model.vars[t]['p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true for t in types if t != 'rr']))
    model.delta['mu_pred'] = [pl.exp(model.vars[t]['eta'].trace()).mean() for t in types if t != 'rr']
    model.delta['sigma_pred'] = [pl.exp(model.vars[t]['eta'].trace()).std() for t in types if t != 'rr']
    data_simulation.add_quality_metrics(model.delta)

    model.alpha = pandas.DataFrame()
    model.sigma = pandas.DataFrame()
    for t in types:
        alpha_t = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
        alpha_t['true'] = pandas.Series(dict(alpha[t]))
        alpha_t['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars[t]['alpha']], index=model.vars[t]['U'].columns)
        alpha_t['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars[t]['alpha']], index=model.vars[t]['U'].columns)
        alpha_t['type'] = t
        model.alpha = model.alpha.append(alpha_t.dropna(), ignore_index=True)

        sigma_t = pandas.DataFrame(dict(true=sigma_true))
        sigma_t['mu_pred'] = [n.stats()['mean'] for n in model.vars[t]['sigma_alpha']]
        sigma_t['sigma_pred'] = [n.stats()['standard deviation'] for n in model.vars[t]['sigma_alpha']]
        model.sigma = model.sigma.append(sigma_t.dropna(), ignore_index=True)

    data_simulation.add_quality_metrics(model.alpha)
    data_simulation.add_quality_metrics(model.sigma)


    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame()
    for t in types:
        model.mu = model.mu.append(pandas.DataFrame(dict(true=sim[t]['mu_age'].value,
                                                         mu_pred=model.vars[t]['mu_age'].stats()['mean'],
                                                         sigma_pred=model.vars[t]['mu_age'].stats()['standard deviation'])),
                                   ignore_index=True)
    data_simulation.add_quality_metrics(model.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.mu['abs_err'].mean(),
                                                                         pl.median(pl.absolute(model.mu['rel_err'].dropna())),
                                                                         model.mu['covered?'].mean())
    print


    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.add_to_results(model, 'alpha')
    data_simulation.add_to_results(model, 'sigma')
    data_simulation.finalize_results(model)

    print model.results

    return model
Example #15
0
 def predictions(value=value, N=N,
                 mu_i=rates,
                 delta=delta,
                 Z=Z, eta=0.):
     return mc.rnegative_binomial(N*mu_i, delta + eta*Z)/N
def validate_age_integrating_model_sim(N=500,
                                       delta_true=.15,
                                       pi_true=quadratic):
    ## generate simulated data
    a = pl.arange(0, 101, 1)
    pi_age_true = pi_true(a)

    model = data_simulation.simple_model(N)
    #model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10)
    #model.parameters['p']['smoothness'] = dict(amount='Very')

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    age_weights = pl.ones_like(a)
    sum_pi_wt = pl.cumsum(pi_age_true * age_weights)
    sum_wt = pl.cumsum(age_weights)
    p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] -
                                                       sum_wt[age_start])

    # correct cases where age_start == age_end
    i = age_start == age_end
    if pl.any(i):
        p[i] = pi_age_true[age_start[i]]

    n = mc.runiform(100, 10000, size=N)

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total',
                                            'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=10000,
                                                     burn=5000,
                                                     thin=25,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    pl.plot(a, pi_age_true, 'r:', label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(
        dict(true=pi_age_true,
             mu_pred=model.vars['p']['mu_age'].stats()['mean'],
             sigma_pred=model.vars['p']['mu_age'].stats()
             ['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    model.results = pandas.DataFrame(model.results,
                                     columns='param bias mae mare pc'.split())

    print model.results

    return model
Example #17
0
def validate_covariate_model_dispersion(N=1000, delta_true=.15, pi_true=.01, zeta_true=[.5, -.5, 0.]):
    ## generate simulated data
    a = pl.arange(0, 100, 1)
    pi_age_true = pi_true * pl.ones_like(a)

    model = data.ModelData()
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    Z = mc.rbernoulli(.5, size=(N, len(zeta_true))) * 1.0
    delta = delta_true * pl.exp(pl.dot(Z, zeta_true))
    for i in range(len(zeta_true)):
        model.input_data['z_%d'%i] = Z[:,i]

    model.input_data['true'] = pi_true

    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N)

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta*n*p) / n


    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=5, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()


    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    add_quality_metrics(model.input_data)


    model.zeta = pandas.DataFrame(index=model.vars['p']['Z'].columns)
    model.zeta['true'] = zeta_true
    
    model.zeta['mu_pred'] = model.vars['p']['zeta'].stats()['mean']
    model.zeta['sigma_pred'] = model.vars['p']['zeta'].stats()['standard deviation']
    add_quality_metrics(model.zeta)

    print '\nzeta'
    print model.zeta
    
    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (pl.median(pl.absolute(model.zeta['abs_err'].dropna())),
                                                           model.zeta.dropna()['covered?'].mean())


    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'delta')
    add_to_results(model, 'input_data')
    add_to_results(model, 'zeta')
    model.results = pandas.DataFrame(model.results, columns='param bias mae mare pc'.split())

    return model
Example #18
0
age_end = pl.array(age_mid + age_width / 2, dtype=int)

model.input_data['age_start'] = age_start
model.input_data['age_end'] = age_end

# choose effective sample size uniformly at random
n = mc.runiform(100, 10000, size=N)
model.input_data['effective_sample_size'] = n

# find true rate, with covariate
p = model.pi_age_true[age_start] * pl.exp(
    model.input_data['x_cov'] * beta_true)

# sample observed rate values from negative binomial distribution
model.input_data['true'] = p
model.input_data['value'] = mc.rnegative_binomial(n * p, delta_true) / n

# print model.input_data.drop(['standard_error', 'upper_ci', 'lower_ci'], axis=1)

# Create age-group model
## Spline model to represent age-specific rate
model.vars += dismod3.age_pattern.spline(name='sim',
                                         ages=model.ages,
                                         knots=pl.arange(0, 101, 20),
                                         smoothing=pl.inf,
                                         interpolation_method='linear')

## Midpoint model to represent age-group data
model.vars += dismod3.age_group.midpoint_approx(
    name='sim',
    ages=model.ages,
Example #19
0
def validate_age_pattern_model_sim(N=500, delta_true=.15, pi_true=quadratic):
    ## generate simulated data
    a = pl.arange(0, 101, 1)
    pi_age_true = pi_true(a)

    model = data_simulation.simple_model(N)
    model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10)

    age_list = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    p = pi_age_true[age_list]
    n = mc.runiform(100, 10000, size=N)

    model.input_data['age_start'] = age_list
    model.input_data['age_end'] = age_list
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    pl.plot(a, pi_age_true, 'r:', label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(dict(true=pi_age_true,
                                     mu_pred=model.vars['p']['mu_age'].stats()['mean'],
                                     sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    model.results = pandas.DataFrame(model.results, columns='param bias mae mare pc'.split())

    print model.results

    return model
Example #20
0
def validate_covariate_model_fe(N=100,
                                delta_true=3,
                                pi_true=.01,
                                beta_true=[.5, -.5, 0.],
                                replicate=0):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)

    ## generate simulated data
    a = pl.arange(0, 100, 1)
    pi_age_true = pi_true * pl.ones_like(a)

    model = data.ModelData()
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    # add fixed effect to simulated data
    X = mc.rnormal(0., 1.**-2, size=(N, len(beta_true)))
    Y_true = pl.dot(X, beta_true)

    for i in range(len(beta_true)):
        model.input_data['x_%d' % i] = X[:, i]
    model.input_data['true'] = pi_true * pl.exp(Y_true)

    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N)

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n * p, delta_true) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total',
                                            'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=10000,
                                                     burn=5000,
                                                     thin=5,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    add_quality_metrics(model.input_data)

    model.beta = pandas.DataFrame(index=model.vars['p']['X'].columns)
    model.beta['true'] = 0.
    for i in range(len(beta_true)):
        model.beta['true']['x_%d' % i] = beta_true[i]

    model.beta['mu_pred'] = [
        n.stats()['mean'] for n in model.vars['p']['beta']
    ]
    model.beta['sigma_pred'] = [
        n.stats()['standard deviation'] for n in model.vars['p']['beta']
    ]
    add_quality_metrics(model.beta)

    print '\nbeta'
    print model.beta

    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'beta')

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta
    add_to_results(model, 'delta')

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (
        pl.median(pl.absolute(model.beta['abs_err'].dropna())),
        model.beta.dropna()['covered?'].mean())
    add_to_results(model, 'input_data')
    add_to_results(model, 'beta')

    model.results = pandas.DataFrame(model.results)
    return model
Example #21
0
def validate_covariate_model_dispersion(N=1000,
                                        delta_true=.15,
                                        pi_true=.01,
                                        zeta_true=[.5, -.5, 0.]):
    ## generate simulated data
    a = pl.arange(0, 100, 1)
    pi_age_true = pi_true * pl.ones_like(a)

    model = data.ModelData()
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    Z = mc.rbernoulli(.5, size=(N, len(zeta_true))) * 1.0
    delta = delta_true * pl.exp(pl.dot(Z, zeta_true))
    for i in range(len(zeta_true)):
        model.input_data['z_%d' % i] = Z[:, i]

    model.input_data['true'] = pi_true

    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N)

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n * p, delta * n * p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total',
                                            'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=10000,
                                                     burn=5000,
                                                     thin=5,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    add_quality_metrics(model.input_data)

    model.zeta = pandas.DataFrame(index=model.vars['p']['Z'].columns)
    model.zeta['true'] = zeta_true

    model.zeta['mu_pred'] = model.vars['p']['zeta'].stats()['mean']
    model.zeta['sigma_pred'] = model.vars['p']['zeta'].stats(
    )['standard deviation']
    add_quality_metrics(model.zeta)

    print '\nzeta'
    print model.zeta

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (
        pl.median(pl.absolute(model.zeta['abs_err'].dropna())),
        model.zeta.dropna()['covered?'].mean())

    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'delta')
    add_to_results(model, 'input_data')
    add_to_results(model, 'zeta')
    model.results = pandas.DataFrame(model.results,
                                     columns='param bias mae mare pc'.split())

    return model
Example #22
0
def validate_covariate_model_re(N=500,
                                delta_true=.15,
                                pi_true=.01,
                                sigma_true=[.1, .1, .1, .1, .1],
                                ess=1000):
    ## set simulation parameters
    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(
        json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.parameters['p'][
        'heterogeneity'] = 'Slightly'  # ensure heterogeneity is slightly

    area_list = []
    for sr in sorted(model.hierarchy.successors('all')):
        area_list.append(sr)
        for r in sorted(model.hierarchy.successors(sr)):
            area_list.append(r)
            area_list += sorted(model.hierarchy.successors(r))[:5]
    area_list = pl.array(area_list)

    ## generate simulation data
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    alpha = alpha_true_sim(model, area_list, sigma_true)

    # choose observed prevalence values
    model.input_data['effective_sample_size'] = ess

    model.input_data['area'] = area_list[mc.rcategorical(
        pl.ones(len(area_list)) / float(len(area_list)), N)]

    model.input_data['true'] = pl.nan
    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = pi_true * pl.exp(
            pl.sum([
                alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a)
                if n in alpha
            ]))

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total',
                                            'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=20000,
                                                     burn=10000,
                                                     thin=10,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    add_quality_metrics(model.input_data)

    model.alpha = pandas.DataFrame(
        index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series(
        [n.stats()['mean'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series(
        [n.stats()['standard deviation'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    add_quality_metrics(model.alpha)

    print '\nalpha'
    print model.alpha.dropna()

    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [
        n.stats()['mean'] for n in model.vars['p']['sigma_alpha']
    ]
    model.sigma['sigma_pred'] = [
        n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']
    ]
    add_quality_metrics(model.sigma)

    print 'sigma_alpha'
    print model.sigma

    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'sigma')

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta
    add_to_results(model, 'delta')

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (
        pl.median(pl.absolute(model.alpha['abs_err'].dropna())),
        model.alpha.dropna()['covered?'].mean())
    add_to_results(model, 'input_data')
    add_to_results(model, 'alpha')

    model.results = pandas.DataFrame(model.results)
    return model
Example #23
0
def validate_covariate_model_fe(N=100, delta_true=3, pi_true=.01, beta_true=[.5, -.5, 0.], replicate=0):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)
    
    ## generate simulated data
    a = pl.arange(0, 100, 1)
    pi_age_true = pi_true * pl.ones_like(a)

    model = data.ModelData()
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    # add fixed effect to simulated data
    X = mc.rnormal(0., 1.**-2, size=(N,len(beta_true)))
    Y_true = pl.dot(X, beta_true)

    for i in range(len(beta_true)):
        model.input_data['x_%d'%i] = X[:,i]
    model.input_data['true'] = pi_true * pl.exp(Y_true)

    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N)

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true) / n


    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=5, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    add_quality_metrics(model.input_data)


    model.beta = pandas.DataFrame(index=model.vars['p']['X'].columns)
    model.beta['true'] = 0.
    for i in range(len(beta_true)):
        model.beta['true']['x_%d'%i] = beta_true[i]
    
    model.beta['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['beta']]
    model.beta['sigma_pred'] = [n.stats()['standard deviation'] for n in model.vars['p']['beta']]
    add_quality_metrics(model.beta)

    print '\nbeta'
    print model.beta
    
    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'beta')

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta
    add_to_results(model, 'delta')

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (pl.median(pl.absolute(model.beta['abs_err'].dropna())),
                                                           model.beta.dropna()['covered?'].mean())
    add_to_results(model, 'input_data')
    add_to_results(model, 'beta')

    model.results = pandas.DataFrame(model.results)
    return model
Example #24
0
replicates = 1000

residuals = [[], []]
coverage = [[], []]

### @export 'neg-binom-sim-study'
pi_true = .025
delta_true = 5.
n_pred = 1.e9

for i in range(replicates):
    print '\nsimulation replicate %d' % i
    ## generate simulated data
    n = pl.array(pl.exp(mc.rnormal(10, 1**-2, size=16)), dtype=int)
    k = pl.array(mc.rnegative_binomial(n*pi_true, delta_true), dtype=float)
    r = k/n


    ## setup negative binomial model
    pi = mc.Uniform('pi', lower=0, upper=1, value=.5)
    delta = mc.Uninformative('delta', value=100.)

    @mc.potential
    def obs(pi=pi, delta=delta):
        return mc.negative_binomial_like(r*n, pi*n, delta)

    @mc.deterministic
    def pred(pi=pi, delta=delta):
        return mc.rnegative_binomial(pi*n_pred, delta) / float(n_pred)
def validate_ai_re(N=500, delta_true=.15, sigma_true=[.1,.1,.1,.1,.1], pi_true=quadratic, smoothness='Moderately', heterogeneity='Slightly'):
    ## generate simulated data
    a = pl.arange(0, 101, 1)
    pi_age_true = pi_true(a)


    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    gbd_hierarchy = model.hierarchy

    model = data_simulation.simple_model(N)
    model.hierarchy = gbd_hierarchy

    model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10)
    model.parameters['p']['smoothness'] = dict(amount=smoothness)
    model.parameters['p']['heterogeneity'] = heterogeneity

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    age_weights = pl.ones_like(a)
    sum_pi_wt = pl.cumsum(pi_age_true*age_weights)
    sum_wt = pl.cumsum(age_weights*1.)
    p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start])

    # correct cases where age_start == age_end
    i = age_start == age_end
    if pl.any(i):
        p[i] = pi_age_true[age_start[i]]

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, size=N)


    from validate_covariates import alpha_true_sim
    area_list = pl.array(['all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT', 'IRN', 'IRQ', 'JOR', 'SYR'])
    alpha = alpha_true_sim(model, area_list, sigma_true)
    print alpha

    model.input_data['true'] = pl.nan

    model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)]
    
    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = p[i] * pl.exp(pl.sum([alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha]))
    p = model.input_data['true']

    n = model.input_data['effective_sample_size']
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'north_africa_middle_east', 'total', 'all', None, None, None)
    #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=1005, burn=500, thin=5, tune_interval=100)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    pl.plot(range(101), pi_age_true, 'r:', label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    model.alpha = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    model.alpha = model.alpha.dropna()
    data_simulation.add_quality_metrics(model.alpha)

    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['sigma_alpha']]
    model.sigma['sigma_pred']=[n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']]
    data_simulation.add_quality_metrics(model.sigma)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(dict(true=pi_age_true,
                                     mu_pred=model.vars['p']['mu_age'].stats()['mean'],
                                     sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.add_to_results(model, 'alpha')
    data_simulation.add_to_results(model, 'sigma')
    data_simulation.finalize_results(model)

    print model.results

    return model
Example #26
0
 def pred(pi=pi, delta=delta):
     return mc.rnegative_binomial(pi*n_pred, delta) / float(n_pred)
Example #27
0
model.input_data['age_start'] = age_start
model.input_data['age_end'] = age_end


# choose effective sample size uniformly at random
n = mc.runiform(100, 10000, size=N)
model.input_data['effective_sample_size'] = n


# find true rate, with covariate
p = model.pi_age_true[age_start] * pl.exp(model.input_data['x_cov']*beta_true)


# sample observed rate values from negative binomial distribution
model.input_data['true'] = p
model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true) / n

# print model.input_data.drop(['standard_error', 'upper_ci', 'lower_ci'], axis=1)






# Create age-group model
## Spline model to represent age-specific rate
model.vars += dismod3.age_pattern.spline(name='sim', ages=model.ages,
                                         knots=pl.arange(0,101,20),
                                         smoothing=pl.inf,
                                         interpolation_method='linear')
def validate_consistent_model_sim(N=500,
                                  delta_true=.5,
                                  true=dict(i=quadratic,
                                            f=constant,
                                            r=constant)):
    types = pl.array(['i', 'r', 'f', 'p'])

    ## generate simulated data
    model = data_simulation.simple_model(N)
    model.input_data['effective_sample_size'] = 1.
    model.input_data['value'] = 0.

    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    sim = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    for t in 'irf':
        for i, k_i in enumerate(sim[t]['knots']):
            sim[t]['gamma'][i].value = pl.log(true[t](k_i))

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    data_type = types[mc.rcategorical(pl.ones(len(types), dtype=float) /
                                      float(len(types)),
                                      size=N)]

    a = pl.arange(101)
    age_weights = pl.ones_like(a)
    sum_wt = pl.cumsum(age_weights)

    p = pl.zeros(N)
    for t in types:
        mu_t = sim[t]['mu_age'].value
        sum_mu_wt = pl.cumsum(mu_t * age_weights)

        p_t = (sum_mu_wt[age_end] - sum_mu_wt[age_start]) / (sum_wt[age_end] -
                                                             sum_wt[age_start])

        # correct cases where age_start == age_end
        i = age_start == age_end
        if pl.any(i):
            p_t[i] = mu_t[age_start[i]]

        # copy part into p
        p[data_type == t] = p_t[data_type == t]
    n = mc.runiform(100, 10000, size=N)

    model.input_data['data_type'] = data_type
    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars = consistent_model.consistent_model(model, 'all', 'total',
                                                   'all', {})
    model.map, model.mcmc = fit_model.fit_consistent_model(model.vars,
                                                           iter=10000,
                                                           burn=5000,
                                                           thin=25,
                                                           tune_interval=100)

    graphics.plot_convergence_diag(model.vars)

    graphics.plot_fit(model, model.vars, {}, {})
    for i, t in enumerate('i r f p rr pf'.split()):
        pl.subplot(2, 3, i + 1)
        pl.plot(a, sim[t]['mu_age'].value, 'w-', label='Truth', linewidth=2)
        pl.plot(a, sim[t]['mu_age'].value, 'r-', label='Truth', linewidth=1)

    #graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    #pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = 0.
    model.input_data['sigma_pred'] = 0.
    for t in types:
        model.input_data['mu_pred'][
            data_type == t] = model.vars[t]['p_pred'].stats()['mean']
        model.input_data['sigma_pred'][data_type == t] = model.vars['p'][
            'p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(
        dict(true=[delta_true for t in types if t != 'rr']))
    model.delta['mu_pred'] = [
        pl.exp(model.vars[t]['eta'].trace()).mean() for t in types if t != 'rr'
    ]
    model.delta['sigma_pred'] = [
        pl.exp(model.vars[t]['eta'].trace()).std() for t in types if t != 'rr'
    ]
    data_simulation.add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame()
    for t in types:
        model.mu = model.mu.append(pandas.DataFrame(
            dict(true=sim[t]['mu_age'].value,
                 mu_pred=model.vars[t]['mu_age'].stats()['mean'],
                 sigma_pred=model.vars[t]['mu_age'].stats()
                 ['standard deviation'])),
                                   ignore_index=True)
    data_simulation.add_quality_metrics(model.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.mu['abs_err'].mean(),
        pl.median(pl.absolute(
            model.mu['rel_err'].dropna())), model.mu['covered?'].mean())
    print

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.finalize_results(model)

    print model.results

    return model
Example #29
0
def validate_covariate_model_re(N=500, delta_true=.15, pi_true=.01, sigma_true = [.1,.1,.1,.1,.1], ess=1000):
    ## set simulation parameters
    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.parameters['p']['heterogeneity'] = 'Slightly'  # ensure heterogeneity is slightly

    area_list = []
    for sr in sorted(model.hierarchy.successors('all')):
        area_list.append(sr)
        for r in sorted(model.hierarchy.successors(sr)):
            area_list.append(r)
            area_list += sorted(model.hierarchy.successors(r))[:5]
    area_list = pl.array(area_list)


    ## generate simulation data
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    alpha = alpha_true_sim(model, area_list, sigma_true)

    # choose observed prevalence values
    model.input_data['effective_sample_size'] = ess

    model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)]

    model.input_data['true'] = pl.nan
    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = pi_true * pl.exp(pl.sum([alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha]))

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n



    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=20000, burn=10000, thin=10, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    add_quality_metrics(model.input_data)


    model.alpha = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    add_quality_metrics(model.alpha)

    print '\nalpha'
    print model.alpha.dropna()


    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['sigma_alpha']]
    model.sigma['sigma_pred']=[n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']]
    add_quality_metrics(model.sigma)

    print 'sigma_alpha'
    print model.sigma

    
    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'sigma')

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta
    add_to_results(model, 'delta')

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (pl.median(pl.absolute(model.alpha['abs_err'].dropna())),
                                                          model.alpha.dropna()['covered?'].mean())
    add_to_results(model, 'input_data')
    add_to_results(model, 'alpha')

    model.results = pandas.DataFrame(model.results)
    return model
def validate_ai_re(N=500,
                   delta_true=.15,
                   sigma_true=[.1, .1, .1, .1, .1],
                   pi_true=quadratic,
                   smoothness='Moderately',
                   heterogeneity='Slightly'):
    ## generate simulated data
    a = pl.arange(0, 101, 1)
    pi_age_true = pi_true(a)

    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(
        json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    gbd_hierarchy = model.hierarchy

    model = data_simulation.simple_model(N)
    model.hierarchy = gbd_hierarchy

    model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10)
    model.parameters['p']['smoothness'] = dict(amount=smoothness)
    model.parameters['p']['heterogeneity'] = heterogeneity

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    age_weights = pl.ones_like(a)
    sum_pi_wt = pl.cumsum(pi_age_true * age_weights)
    sum_wt = pl.cumsum(age_weights * 1.)
    p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] -
                                                       sum_wt[age_start])

    # correct cases where age_start == age_end
    i = age_start == age_end
    if pl.any(i):
        p[i] = pi_age_true[age_start[i]]

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, size=N)

    from validate_covariates import alpha_true_sim
    area_list = pl.array([
        'all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT',
        'IRN', 'IRQ', 'JOR', 'SYR'
    ])
    alpha = alpha_true_sim(model, area_list, sigma_true)
    print alpha

    model.input_data['true'] = pl.nan

    model.input_data['area'] = area_list[mc.rcategorical(
        pl.ones(len(area_list)) / float(len(area_list)), N)]

    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = p[i] * pl.exp(
            pl.sum([
                alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a)
                if n in alpha
            ]))
    p = model.input_data['true']

    n = model.input_data['effective_sample_size']
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p',
                                            'north_africa_middle_east',
                                            'total', 'all', None, None, None)
    #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=1005, burn=500, thin=5, tune_interval=100)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=10000,
                                                     burn=5000,
                                                     thin=25,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    pl.plot(range(101), pi_age_true, 'r:', label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    model.alpha = pandas.DataFrame(
        index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series(
        [n.stats()['mean'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series(
        [n.stats()['standard deviation'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    model.alpha = model.alpha.dropna()
    data_simulation.add_quality_metrics(model.alpha)

    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [
        n.stats()['mean'] for n in model.vars['p']['sigma_alpha']
    ]
    model.sigma['sigma_pred'] = [
        n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']
    ]
    data_simulation.add_quality_metrics(model.sigma)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(
        dict(true=pi_age_true,
             mu_pred=model.vars['p']['mu_age'].stats()['mean'],
             sigma_pred=model.vars['p']['mu_age'].stats()
             ['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.add_to_results(model, 'alpha')
    data_simulation.add_to_results(model, 'sigma')
    data_simulation.finalize_results(model)

    print model.results

    return model