Example #1
0
def validate_fit_data_model():
    vars = data_model.data_model('validation', model, 'p',
                                 root_area='all', root_sex='total', root_year='all',
                                 mu_age=None, mu_age_parent=None)

    m = fit_model.fit_data_model(vars)

    return m
def validate_prior_similarity():
    #dm = dismod3.load_disease_model(20945)
    #dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
    #t = 'i'
    #area, sex, year = 'europe_eastern', 'male', 2005

    dm = dismod3.load_disease_model(20928)
    dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
    t = 'p'
    area, sex, year = 'sub-saharan_africa_central', 'male', 2005

    # select data that is about areas in this region, recent years, and sex of male or total only
    model = dm.model
    subtree = nx.traversal.bfs_tree(model.hierarchy, area)
    relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                         if (r['area'] in subtree or r['area'] == 'all')\
                         and ((year == 2005 and r['year_end'] >= 1997) or r['year_start'] <= 1997) \
                         and r['sex'] in [sex, 'total']]
    model.input_data = model.input_data.ix[relevant_rows]

    # replace area 'all' with area
    model.input_data['area'][model.input_data['area'] == 'all'] = area


    for het in 'Slightly Moderately Very'.split():
        dm.model.parameters[t]['parameter_age_mesh'] = [0, 15, 20, 25, 35, 45, 55, 65, 75, 100]
        dm.model.parameters[t]['heterogeneity'] = het
        setup_regional_model(dm, area, sex, year)

        dm.vars = {}
        dm.vars[t] = data_model.data_model(t, dm.model, t,
                                           root_area=area, root_sex=sex, root_year=year,
                                           mu_age=None,
                                           mu_age_parent=dm.emp_priors[t, 'mu'],
                                           sigma_age_parent=dm.emp_priors[t, 'sigma'],
                                           rate_type=(t == 'rr') and 'log_normal' or 'neg_binom')

        fit_model.fit_data_model(dm.vars[t], iter=10050, burn=5000, thin=50, tune_interval=100)

        #2graphics.plot_one_effects(dm.vars[t], t, dm.model.hierarchy)
        #pl.title(het)

        graphics.plot_convergence_diag(dm.vars[t])
        pl.title(het)

        #graphics.plot_one_ppc(dm.vars[t], t)
        #pl.title(het)

        graphics.plot_one_type(dm.model, dm.vars[t], dm.emp_priors, t)
        pl.title(het)

    pl.show()
    return dm
Example #3
0
def validate_fit_data_model():
    vars = data_model.data_model('validation',
                                 model,
                                 'p',
                                 root_area='all',
                                 root_sex='total',
                                 root_year='all',
                                 mu_age=None,
                                 mu_age_parent=None)

    m = fit_model.fit_data_model(vars)

    return m
Example #4
0
def fit(model):
    emp_priors = model.emp_priors

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, emp_priors['p', 'mu'], emp_priors['p', 'sigma'])
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=5000, burn=2000, thin=25, tune_interval=100)
    #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=101, burn=0, thin=1, tune_interval=100)

    #graphics.plot_one_ppc(model.vars['p'], 'p')
    #graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], emp_priors, 'p')
    pl.plot(model.a, model.pi_age_true, 'b--', linewidth=3, alpha=.5, label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')
    pl.title('Heterogeneity %s'%model.parameters['p']['heterogeneity'])

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[model.delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(dict(true=model.pi_age_true,
                                     mu_pred=model.vars['p']['mu_age'].stats()['mean'],
                                     sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.finalize_results(model)

    print model.results
Example #5
0
def test_data_model_sim():
    # generate simulated data
    n = 50
    sigma_true = .025

    # start with truth
    a = pl.arange(0, 100, 1)
    pi_age_true = .0001 * (a * (100. - a) + 100.)

    # choose age intervals to measure
    age_start = pl.array(mc.runiform(0, 100, n), dtype=int)
    age_start.sort()  # sort to make it easy to discard the edges when testing
    age_end = pl.array(mc.runiform(age_start+1, pl.minimum(age_start+10,100)), dtype=int)

    # find truth for the integral across the age intervals
    import scipy.integrate
    pi_interval_true = [scipy.integrate.trapz(pi_age_true[a_0i:(a_1i+1)]) / (a_1i - a_0i) 
                        for a_0i, a_1i in zip(age_start, age_end)]

    # generate covariates that add explained variation
    X = mc.rnormal(0., 1.**2, size=(n,3))
    beta_true = [-.1, .1, .2]
    Y_true = pl.dot(X, beta_true)

    # calculate the true value of the rate in each interval
    pi_true = pi_interval_true*pl.exp(Y_true)

    # simulate the noisy measurement of the rate in each interval
    p = mc.rnormal(pi_true, 1./sigma_true**2.)

    # store the simulated data in a pandas DataFrame
    data = pandas.DataFrame(dict(value=p, age_start=age_start, age_end=age_end,
                                 x_0=X[:,0], x_1=X[:,1], x_2=X[:,2]))
    data['effective_sample_size'] = pl.maximum(p*(1-p)/sigma_true**2, 1.)

    data['standard_error'] = pl.nan
    data['upper_ci'] = pl.nan
    data['lower_ci'] = pl.nan

    data['year_start'] = 2005.  # TODO: make these vary
    data['year_end'] = 2005.
    data['sex'] = 'total'
    data['area'] = 'all'

    # generate a moderately complicated hierarchy graph for the model
    hierarchy = nx.DiGraph()
    hierarchy.add_node('all')
    hierarchy.add_edge('all', 'super-region-1', weight=.1)
    hierarchy.add_edge('super-region-1', 'NAHI', weight=.1)
    hierarchy.add_edge('NAHI', 'CAN', weight=.1)
    hierarchy.add_edge('NAHI', 'USA', weight=.1)
    output_template=pandas.DataFrame(dict(year=[1990, 1990, 2005, 2005, 2010, 2010]*2,
                                          sex=['male', 'female']*3*2,
                                          x_0=[.5]*6*2,
                                          x_1=[0.]*6*2,
                                          x_2=[.5]*6*2,
                                          pop=[50.]*6*2,
                                          area=['CAN']*6 + ['USA']*6))
    

    # create model and priors
    vars = data_model.data_model('test', data, hierarchy, 'all')


    # fit model
    mc.MAP(vars).fit(method='fmin_powell', verbose=1)
    m = mc.MCMC(vars)
    m.use_step_method(mc.AdaptiveMetropolis, [m.gamma_bar, m.gamma, m.beta])
    m.sample(30000, 15000, 15)

    # check estimates
    pi_usa = data_model.predict_for(output_template, hierarchy, 'all', 'USA', 'male', 1990, vars)
    assert pl.allclose(pi_usa.mean(), (m.mu_age.trace()*pl.exp(.05)).mean(), rtol=.1)

    # check convergence
    print 'gamma mc error:', m.gamma_bar.stats()['mc error'].round(2), m.gamma.stats()['mc error'].round(2)


    # plot results
    for a_0i, a_1i, p_i in zip(age_start, age_end, p):
        pl.plot([a_0i, a_1i], [p_i,p_i], 'rs-', mew=1, mec='w', ms=4)
    pl.plot(a, pi_age_true, 'g-', linewidth=2)
    pl.plot(pl.arange(101), m.mu_age.stats()['mean'], 'k-', drawstyle='steps-post', linewidth=3)
    pl.plot(pl.arange(101), m.mu_age.stats()['95% HPD interval'], 'k', linestyle='steps-post:')
    pl.plot(pl.arange(101), pi_usa.mean(0), 'r-', linewidth=2, drawstyle='steps-post')
    pl.savefig('age_integrating_sim.png')

    # compare estimate to ground truth (skip endpoints, because they are extra hard to get right)
    assert pl.allclose(m.pi.stats()['mean'][10:-10], pi_true[10:-10], rtol=.2)
    lb, ub = m.pi.stats()['95% HPD interval'].T
    assert pl.mean((lb <= pi_true)[10:-10] & (pi_true <= ub)[10:-10]) > .75
Example #6
0
def validate_age_pattern_model_sim(N=500, delta_true=.15, pi_true=quadratic):
    ## generate simulated data
    a = pl.arange(0, 101, 1)
    pi_age_true = pi_true(a)

    model = data_simulation.simple_model(N)
    model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10)

    age_list = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    p = pi_age_true[age_list]
    n = mc.runiform(100, 10000, size=N)

    model.input_data['age_start'] = age_list
    model.input_data['age_end'] = age_list
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    pl.plot(a, pi_age_true, 'r:', label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(dict(true=pi_age_true,
                                     mu_pred=model.vars['p']['mu_age'].stats()['mean'],
                                     sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    model.results = pandas.DataFrame(model.results, columns='param bias mae mare pc'.split())

    print model.results

    return model
Example #7
0
def validate_covariate_model_fe(N=100,
                                delta_true=3,
                                pi_true=.01,
                                beta_true=[.5, -.5, 0.],
                                replicate=0):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)

    ## generate simulated data
    a = pl.arange(0, 100, 1)
    pi_age_true = pi_true * pl.ones_like(a)

    model = data.ModelData()
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    # add fixed effect to simulated data
    X = mc.rnormal(0., 1.**-2, size=(N, len(beta_true)))
    Y_true = pl.dot(X, beta_true)

    for i in range(len(beta_true)):
        model.input_data['x_%d' % i] = X[:, i]
    model.input_data['true'] = pi_true * pl.exp(Y_true)

    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N)

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n * p, delta_true) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total',
                                            'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=10000,
                                                     burn=5000,
                                                     thin=5,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    add_quality_metrics(model.input_data)

    model.beta = pandas.DataFrame(index=model.vars['p']['X'].columns)
    model.beta['true'] = 0.
    for i in range(len(beta_true)):
        model.beta['true']['x_%d' % i] = beta_true[i]

    model.beta['mu_pred'] = [
        n.stats()['mean'] for n in model.vars['p']['beta']
    ]
    model.beta['sigma_pred'] = [
        n.stats()['standard deviation'] for n in model.vars['p']['beta']
    ]
    add_quality_metrics(model.beta)

    print '\nbeta'
    print model.beta

    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'beta')

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta
    add_to_results(model, 'delta')

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (
        pl.median(pl.absolute(model.beta['abs_err'].dropna())),
        model.beta.dropna()['covered?'].mean())
    add_to_results(model, 'input_data')
    add_to_results(model, 'beta')

    model.results = pandas.DataFrame(model.results)
    return model
Example #8
0
def validate_covariate_model_dispersion(N=1000, delta_true=.15, pi_true=.01, zeta_true=[.5, -.5, 0.]):
    ## generate simulated data
    a = pl.arange(0, 100, 1)
    pi_age_true = pi_true * pl.ones_like(a)

    model = data.ModelData()
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    Z = mc.rbernoulli(.5, size=(N, len(zeta_true))) * 1.0
    delta = delta_true * pl.exp(pl.dot(Z, zeta_true))
    for i in range(len(zeta_true)):
        model.input_data['z_%d'%i] = Z[:,i]

    model.input_data['true'] = pi_true

    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N)

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta*n*p) / n


    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=5, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()


    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    add_quality_metrics(model.input_data)


    model.zeta = pandas.DataFrame(index=model.vars['p']['Z'].columns)
    model.zeta['true'] = zeta_true
    
    model.zeta['mu_pred'] = model.vars['p']['zeta'].stats()['mean']
    model.zeta['sigma_pred'] = model.vars['p']['zeta'].stats()['standard deviation']
    add_quality_metrics(model.zeta)

    print '\nzeta'
    print model.zeta
    
    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (pl.median(pl.absolute(model.zeta['abs_err'].dropna())),
                                                           model.zeta.dropna()['covered?'].mean())


    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'delta')
    add_to_results(model, 'input_data')
    add_to_results(model, 'zeta')
    model.results = pandas.DataFrame(model.results, columns='param bias mae mare pc'.split())

    return model
def validate_ai_re(N=500, delta_true=.15, sigma_true=[.1,.1,.1,.1,.1], pi_true=quadratic, smoothness='Moderately', heterogeneity='Slightly'):
    ## generate simulated data
    a = pl.arange(0, 101, 1)
    pi_age_true = pi_true(a)


    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    gbd_hierarchy = model.hierarchy

    model = data_simulation.simple_model(N)
    model.hierarchy = gbd_hierarchy

    model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10)
    model.parameters['p']['smoothness'] = dict(amount=smoothness)
    model.parameters['p']['heterogeneity'] = heterogeneity

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    age_weights = pl.ones_like(a)
    sum_pi_wt = pl.cumsum(pi_age_true*age_weights)
    sum_wt = pl.cumsum(age_weights*1.)
    p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start])

    # correct cases where age_start == age_end
    i = age_start == age_end
    if pl.any(i):
        p[i] = pi_age_true[age_start[i]]

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, size=N)


    from validate_covariates import alpha_true_sim
    area_list = pl.array(['all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT', 'IRN', 'IRQ', 'JOR', 'SYR'])
    alpha = alpha_true_sim(model, area_list, sigma_true)
    print alpha

    model.input_data['true'] = pl.nan

    model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)]
    
    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = p[i] * pl.exp(pl.sum([alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha]))
    p = model.input_data['true']

    n = model.input_data['effective_sample_size']
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'north_africa_middle_east', 'total', 'all', None, None, None)
    #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=1005, burn=500, thin=5, tune_interval=100)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    pl.plot(range(101), pi_age_true, 'r:', label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    model.alpha = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    model.alpha = model.alpha.dropna()
    data_simulation.add_quality_metrics(model.alpha)

    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['sigma_alpha']]
    model.sigma['sigma_pred']=[n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']]
    data_simulation.add_quality_metrics(model.sigma)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(dict(true=pi_age_true,
                                     mu_pred=model.vars['p']['mu_age'].stats()['mean'],
                                     sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.add_to_results(model, 'alpha')
    data_simulation.add_to_results(model, 'sigma')
    data_simulation.finalize_results(model)

    print model.results

    return model
Example #10
0
def validate_prior_similarity():
    #dm = dismod3.load_disease_model(20945)
    #dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
    #t = 'i'
    #area, sex, year = 'europe_eastern', 'male', 2005

    dm = dismod3.load_disease_model(20928)
    dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
    t = 'p'
    area, sex, year = 'sub-saharan_africa_central', 'male', 2005

    # select data that is about areas in this region, recent years, and sex of male or total only
    model = dm.model
    subtree = nx.traversal.bfs_tree(model.hierarchy, area)
    relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                         if (r['area'] in subtree or r['area'] == 'all')\
                         and ((year == 2005 and r['year_end'] >= 1997) or r['year_start'] <= 1997) \
                         and r['sex'] in [sex, 'total']]
    model.input_data = model.input_data.ix[relevant_rows]

    # replace area 'all' with area
    model.input_data['area'][model.input_data['area'] == 'all'] = area

    for het in 'Slightly Moderately Very'.split():
        dm.model.parameters[t]['parameter_age_mesh'] = [
            0, 15, 20, 25, 35, 45, 55, 65, 75, 100
        ]
        dm.model.parameters[t]['heterogeneity'] = het
        setup_regional_model(dm, area, sex, year)

        dm.vars = {}
        dm.vars[t] = data_model.data_model(
            t,
            dm.model,
            t,
            root_area=area,
            root_sex=sex,
            root_year=year,
            mu_age=None,
            mu_age_parent=dm.emp_priors[t, 'mu'],
            sigma_age_parent=dm.emp_priors[t, 'sigma'],
            rate_type=(t == 'rr') and 'log_normal' or 'neg_binom')

        fit_model.fit_data_model(dm.vars[t],
                                 iter=10050,
                                 burn=5000,
                                 thin=50,
                                 tune_interval=100)

        #2graphics.plot_one_effects(dm.vars[t], t, dm.model.hierarchy)
        #pl.title(het)

        graphics.plot_convergence_diag(dm.vars[t])
        pl.title(het)

        #graphics.plot_one_ppc(dm.vars[t], t)
        #pl.title(het)

        graphics.plot_one_type(dm.model, dm.vars[t], dm.emp_priors, t)
        pl.title(het)

    pl.show()
    return dm
Example #11
0
#!/usr/bin/python3
import tensorflow as tf
import numpy as np
import scipy.ndimage
from tf_DNN import DNN
from tf_RNN import RNN
from tf_CNN import CNN
from data_model import data_model

theData = data_model()

# theModel = DNN(theData, h_layers = [512,256,128])
# theModel.train(max_epochs=100, batch_size=5000, l_rate=1e-03)

theModel = RNN(theData, 28, rnn_size=128)
theModel.train(max_epochs=100, batch_size=5000, l_rate=1e-03)

# theModel = CNN(theData)
# theModel.train(max_epochs=100, batch_size=5000, l_rate=1e-03)

theModel.save_dir = './model_trained'
theModel.restore_model()
dat = np.vectorize(lambda x: 255 - x)(np.ndarray.flatten(
    scipy.ndimage.imread("test1.png", flatten=True)))
theModel.test_input([dat])
model.parameters['p']['fixed_effects']['x_CODcorrected_Cirrhosis_ASDR'] = dict(
    dist='TruncatedNormal', mu=0, sigma=.01, lower=-1., upper=1.)
model.parameters['p']['fixed_effects'][
    'x_IHME_alcohol_liters_pc_25July11'] = dict(dist='TruncatedNormal',
                                                mu=0,
                                                sigma=.01,
                                                lower=-1.,
                                                upper=1.)
# create model for global prevalence
root_area = 'all'
t = 'p'
vars = data_model.data_model(t,
                             model,
                             t,
                             root_area='all',
                             root_sex='total',
                             root_year='all',
                             mu_age=None,
                             mu_age_parent=None,
                             sigma_age_parent=None)

m = fit_model.fit_data_model(vars,
                             iter=4000,
                             burn=2000,
                             thin=20,
                             tune_interval=100)

# generate estimates for all leaves of the hierarchy
est = {}
for n in model.hierarchy:
    if len(model.hierarchy.successors(n)) == 0:
Example #13
0
def test_data_model_sim():
    # generate simulated data
    n = 50
    sigma_true = .025

    # start with truth
    a = pl.arange(0, 100, 1)
    pi_age_true = .0001 * (a * (100. - a) + 100.)

    # choose age intervals to measure
    age_start = pl.array(mc.runiform(0, 100, n), dtype=int)
    age_start.sort()  # sort to make it easy to discard the edges when testing
    age_end = pl.array(mc.runiform(age_start + 1,
                                   pl.minimum(age_start + 10, 100)),
                       dtype=int)

    # find truth for the integral across the age intervals
    import scipy.integrate
    pi_interval_true = [
        scipy.integrate.trapz(pi_age_true[a_0i:(a_1i + 1)]) / (a_1i - a_0i)
        for a_0i, a_1i in zip(age_start, age_end)
    ]

    # generate covariates that add explained variation
    X = mc.rnormal(0., 1.**2, size=(n, 3))
    beta_true = [-.1, .1, .2]
    Y_true = pl.dot(X, beta_true)

    # calculate the true value of the rate in each interval
    pi_true = pi_interval_true * pl.exp(Y_true)

    # simulate the noisy measurement of the rate in each interval
    p = mc.rnormal(pi_true, 1. / sigma_true**2.)

    # store the simulated data in a pandas DataFrame
    data = pandas.DataFrame(
        dict(value=p,
             age_start=age_start,
             age_end=age_end,
             x_0=X[:, 0],
             x_1=X[:, 1],
             x_2=X[:, 2]))
    data['effective_sample_size'] = pl.maximum(p * (1 - p) / sigma_true**2, 1.)

    data['standard_error'] = pl.nan
    data['upper_ci'] = pl.nan
    data['lower_ci'] = pl.nan

    data['year_start'] = 2005.  # TODO: make these vary
    data['year_end'] = 2005.
    data['sex'] = 'total'
    data['area'] = 'all'

    # generate a moderately complicated hierarchy graph for the model
    hierarchy = nx.DiGraph()
    hierarchy.add_node('all')
    hierarchy.add_edge('all', 'super-region-1', weight=.1)
    hierarchy.add_edge('super-region-1', 'NAHI', weight=.1)
    hierarchy.add_edge('NAHI', 'CAN', weight=.1)
    hierarchy.add_edge('NAHI', 'USA', weight=.1)
    output_template = pandas.DataFrame(
        dict(year=[1990, 1990, 2005, 2005, 2010, 2010] * 2,
             sex=['male', 'female'] * 3 * 2,
             x_0=[.5] * 6 * 2,
             x_1=[0.] * 6 * 2,
             x_2=[.5] * 6 * 2,
             pop=[50.] * 6 * 2,
             area=['CAN'] * 6 + ['USA'] * 6))

    # create model and priors
    vars = data_model.data_model('test', data, hierarchy, 'all')

    # fit model
    mc.MAP(vars).fit(method='fmin_powell', verbose=1)
    m = mc.MCMC(vars)
    m.use_step_method(mc.AdaptiveMetropolis, [m.gamma_bar, m.gamma, m.beta])
    m.sample(30000, 15000, 15)

    # check estimates
    pi_usa = data_model.predict_for(output_template, hierarchy, 'all', 'USA',
                                    'male', 1990, vars)
    assert pl.allclose(pi_usa.mean(), (m.mu_age.trace() * pl.exp(.05)).mean(),
                       rtol=.1)

    # check convergence
    print 'gamma mc error:', m.gamma_bar.stats()['mc error'].round(
        2), m.gamma.stats()['mc error'].round(2)

    # plot results
    for a_0i, a_1i, p_i in zip(age_start, age_end, p):
        pl.plot([a_0i, a_1i], [p_i, p_i], 'rs-', mew=1, mec='w', ms=4)
    pl.plot(a, pi_age_true, 'g-', linewidth=2)
    pl.plot(pl.arange(101),
            m.mu_age.stats()['mean'],
            'k-',
            drawstyle='steps-post',
            linewidth=3)
    pl.plot(pl.arange(101),
            m.mu_age.stats()['95% HPD interval'],
            'k',
            linestyle='steps-post:')
    pl.plot(pl.arange(101),
            pi_usa.mean(0),
            'r-',
            linewidth=2,
            drawstyle='steps-post')
    pl.savefig('age_integrating_sim.png')

    # compare estimate to ground truth (skip endpoints, because they are extra hard to get right)
    assert pl.allclose(m.pi.stats()['mean'][10:-10], pi_true[10:-10], rtol=.2)
    lb, ub = m.pi.stats()['95% HPD interval'].T
    assert pl.mean((lb <= pi_true)[10:-10] & (pi_true <= ub)[10:-10]) > .75
Example #14
0
def validate_covariate_model_fe(N=100, delta_true=3, pi_true=.01, beta_true=[.5, -.5, 0.], replicate=0):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)
    
    ## generate simulated data
    a = pl.arange(0, 100, 1)
    pi_age_true = pi_true * pl.ones_like(a)

    model = data.ModelData()
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    # add fixed effect to simulated data
    X = mc.rnormal(0., 1.**-2, size=(N,len(beta_true)))
    Y_true = pl.dot(X, beta_true)

    for i in range(len(beta_true)):
        model.input_data['x_%d'%i] = X[:,i]
    model.input_data['true'] = pi_true * pl.exp(Y_true)

    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N)

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true) / n


    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=5, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    add_quality_metrics(model.input_data)


    model.beta = pandas.DataFrame(index=model.vars['p']['X'].columns)
    model.beta['true'] = 0.
    for i in range(len(beta_true)):
        model.beta['true']['x_%d'%i] = beta_true[i]
    
    model.beta['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['beta']]
    model.beta['sigma_pred'] = [n.stats()['standard deviation'] for n in model.vars['p']['beta']]
    add_quality_metrics(model.beta)

    print '\nbeta'
    print model.beta
    
    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'beta')

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta
    add_to_results(model, 'delta')

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (pl.median(pl.absolute(model.beta['abs_err'].dropna())),
                                                           model.beta.dropna()['covered?'].mean())
    add_to_results(model, 'input_data')
    add_to_results(model, 'beta')

    model.results = pandas.DataFrame(model.results)
    return model
Example #15
0
sys.path.insert(0, currentdir)
sys.path.insert(0, currentdir + "/components")

import data_model as dm

from right_panel import right_panel
from left_panel import left_panel
from mid_panel import mid_panel

# region Read in global data
import os, sys, inspect

currentdir = os.path.dirname(
    os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
data_reader = dm.data_model(parentdir + "/data/raw")
# endregion

# region Setup app and layout/frontend
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.COSMO])
# CERULEAN, COSMO, CYBORG, DARKLY, FLATLY, JOURNAL, LITERA, LUMEN, LUX, MATERIA, MINTY, PULSE, SANDSTONE, SIMPLEX, SKETCHY, SLATE, SOLAR, SPACELAB, SUPERHERO, UNITED, YETI
server = app.server
country_panel = right_panel(data_reader)
global_panel = left_panel(data_reader)
map_panel = mid_panel(data_reader)
alt.themes.enable("ggplot2")
app.title = "Covid-19 Data Portal"
dashboard_heading = ("Covid-19 Data Portal")
app.layout = dbc.Container([
    dbc.Row(
        dbc.Col(html.Div([html.H1(dashboard_heading)]), className="heading")),
def validate_ai_re(N=500,
                   delta_true=.15,
                   sigma_true=[.1, .1, .1, .1, .1],
                   pi_true=quadratic,
                   smoothness='Moderately',
                   heterogeneity='Slightly'):
    ## generate simulated data
    a = pl.arange(0, 101, 1)
    pi_age_true = pi_true(a)

    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(
        json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    gbd_hierarchy = model.hierarchy

    model = data_simulation.simple_model(N)
    model.hierarchy = gbd_hierarchy

    model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10)
    model.parameters['p']['smoothness'] = dict(amount=smoothness)
    model.parameters['p']['heterogeneity'] = heterogeneity

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    age_weights = pl.ones_like(a)
    sum_pi_wt = pl.cumsum(pi_age_true * age_weights)
    sum_wt = pl.cumsum(age_weights * 1.)
    p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] -
                                                       sum_wt[age_start])

    # correct cases where age_start == age_end
    i = age_start == age_end
    if pl.any(i):
        p[i] = pi_age_true[age_start[i]]

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, size=N)

    from validate_covariates import alpha_true_sim
    area_list = pl.array([
        'all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT',
        'IRN', 'IRQ', 'JOR', 'SYR'
    ])
    alpha = alpha_true_sim(model, area_list, sigma_true)
    print alpha

    model.input_data['true'] = pl.nan

    model.input_data['area'] = area_list[mc.rcategorical(
        pl.ones(len(area_list)) / float(len(area_list)), N)]

    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = p[i] * pl.exp(
            pl.sum([
                alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a)
                if n in alpha
            ]))
    p = model.input_data['true']

    n = model.input_data['effective_sample_size']
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p',
                                            'north_africa_middle_east',
                                            'total', 'all', None, None, None)
    #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=1005, burn=500, thin=5, tune_interval=100)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=10000,
                                                     burn=5000,
                                                     thin=25,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    pl.plot(range(101), pi_age_true, 'r:', label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    model.alpha = pandas.DataFrame(
        index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series(
        [n.stats()['mean'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series(
        [n.stats()['standard deviation'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    model.alpha = model.alpha.dropna()
    data_simulation.add_quality_metrics(model.alpha)

    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [
        n.stats()['mean'] for n in model.vars['p']['sigma_alpha']
    ]
    model.sigma['sigma_pred'] = [
        n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']
    ]
    data_simulation.add_quality_metrics(model.sigma)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(
        dict(true=pi_age_true,
             mu_pred=model.vars['p']['mu_age'].stats()['mean'],
             sigma_pred=model.vars['p']['mu_age'].stats()
             ['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.add_to_results(model, 'alpha')
    data_simulation.add_to_results(model, 'sigma')
    data_simulation.finalize_results(model)

    print model.results

    return model
Example #17
0
def validate_covariate_model_re(N=500,
                                delta_true=.15,
                                pi_true=.01,
                                sigma_true=[.1, .1, .1, .1, .1],
                                ess=1000):
    ## set simulation parameters
    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(
        json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.parameters['p'][
        'heterogeneity'] = 'Slightly'  # ensure heterogeneity is slightly

    area_list = []
    for sr in sorted(model.hierarchy.successors('all')):
        area_list.append(sr)
        for r in sorted(model.hierarchy.successors(sr)):
            area_list.append(r)
            area_list += sorted(model.hierarchy.successors(r))[:5]
    area_list = pl.array(area_list)

    ## generate simulation data
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    alpha = alpha_true_sim(model, area_list, sigma_true)

    # choose observed prevalence values
    model.input_data['effective_sample_size'] = ess

    model.input_data['area'] = area_list[mc.rcategorical(
        pl.ones(len(area_list)) / float(len(area_list)), N)]

    model.input_data['true'] = pl.nan
    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = pi_true * pl.exp(
            pl.sum([
                alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a)
                if n in alpha
            ]))

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total',
                                            'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=20000,
                                                     burn=10000,
                                                     thin=10,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    add_quality_metrics(model.input_data)

    model.alpha = pandas.DataFrame(
        index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series(
        [n.stats()['mean'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series(
        [n.stats()['standard deviation'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    add_quality_metrics(model.alpha)

    print '\nalpha'
    print model.alpha.dropna()

    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [
        n.stats()['mean'] for n in model.vars['p']['sigma_alpha']
    ]
    model.sigma['sigma_pred'] = [
        n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']
    ]
    add_quality_metrics(model.sigma)

    print 'sigma_alpha'
    print model.sigma

    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'sigma')

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta
    add_to_results(model, 'delta')

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (
        pl.median(pl.absolute(model.alpha['abs_err'].dropna())),
        model.alpha.dropna()['covered?'].mean())
    add_to_results(model, 'input_data')
    add_to_results(model, 'alpha')

    model.results = pandas.DataFrame(model.results)
    return model
import graphics

reload(data_model)
reload(covariate_model)

# load the model from disk, and adjust the data and parameters for this example
model = data.ModelData.from_gbd_json('/var/tmp/dismod_working/test/dm-19807/json/dm-19807.json')

model.parameters['p']['parameter_age_mesh'] = range(0,101,20)
model.parameters['p']['fixed_effects']['x_CODcorrected_Cirrhosis_ASDR'] = dict(dist='TruncatedNormal', mu=0, sigma=.01, lower=-1., upper=1.)
model.parameters['p']['fixed_effects']['x_IHME_alcohol_liters_pc_25July11'] = dict(dist='TruncatedNormal', mu=0, sigma=.01, lower=-1., upper=1.)
# create model for global prevalence
root_area = 'all'
t = 'p'
vars = data_model.data_model(t, model, t,
                             root_area='all', root_sex='total', root_year='all',
                             mu_age=None, mu_age_parent=None, sigma_age_parent=None)

m = fit_model.fit_data_model(vars, iter=4000, burn=2000, thin=20, tune_interval=100)


# generate estimates for all leaves of the hierarchy
est = {}
for n in model.hierarchy:
    if len(model.hierarchy.successors(n)) == 0:
        est[n] = pl.median(covariate_model.predict_for(model,
                                                       'all', 'total', 'all',
                                                       n, 'male', 2005, 0., vars, 0., 1.), axis=0)

graphics.plot_one_type(model, vars, {}, 'p')
Example #19
0
def validate_covariate_model_dispersion(N=1000,
                                        delta_true=.15,
                                        pi_true=.01,
                                        zeta_true=[.5, -.5, 0.]):
    ## generate simulated data
    a = pl.arange(0, 100, 1)
    pi_age_true = pi_true * pl.ones_like(a)

    model = data.ModelData()
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    Z = mc.rbernoulli(.5, size=(N, len(zeta_true))) * 1.0
    delta = delta_true * pl.exp(pl.dot(Z, zeta_true))
    for i in range(len(zeta_true)):
        model.input_data['z_%d' % i] = Z[:, i]

    model.input_data['true'] = pi_true

    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N)

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n * p, delta * n * p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total',
                                            'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=10000,
                                                     burn=5000,
                                                     thin=5,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    add_quality_metrics(model.input_data)

    model.zeta = pandas.DataFrame(index=model.vars['p']['Z'].columns)
    model.zeta['true'] = zeta_true

    model.zeta['mu_pred'] = model.vars['p']['zeta'].stats()['mean']
    model.zeta['sigma_pred'] = model.vars['p']['zeta'].stats(
    )['standard deviation']
    add_quality_metrics(model.zeta)

    print '\nzeta'
    print model.zeta

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (
        pl.median(pl.absolute(model.zeta['abs_err'].dropna())),
        model.zeta.dropna()['covered?'].mean())

    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'delta')
    add_to_results(model, 'input_data')
    add_to_results(model, 'zeta')
    model.results = pandas.DataFrame(model.results,
                                     columns='param bias mae mare pc'.split())

    return model
def validate_age_integrating_model_sim(N=500,
                                       delta_true=.15,
                                       pi_true=quadratic):
    ## generate simulated data
    a = pl.arange(0, 101, 1)
    pi_age_true = pi_true(a)

    model = data_simulation.simple_model(N)
    #model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10)
    #model.parameters['p']['smoothness'] = dict(amount='Very')

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    age_weights = pl.ones_like(a)
    sum_pi_wt = pl.cumsum(pi_age_true * age_weights)
    sum_wt = pl.cumsum(age_weights)
    p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] -
                                                       sum_wt[age_start])

    # correct cases where age_start == age_end
    i = age_start == age_end
    if pl.any(i):
        p[i] = pi_age_true[age_start[i]]

    n = mc.runiform(100, 10000, size=N)

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total',
                                            'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=10000,
                                                     burn=5000,
                                                     thin=25,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    pl.plot(a, pi_age_true, 'r:', label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(
        dict(true=pi_age_true,
             mu_pred=model.vars['p']['mu_age'].stats()['mean'],
             sigma_pred=model.vars['p']['mu_age'].stats()
             ['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    model.results = pandas.DataFrame(model.results,
                                     columns='param bias mae mare pc'.split())

    print model.results

    return model
Example #21
0
def validate_covariate_model_re(N=500, delta_true=.15, pi_true=.01, sigma_true = [.1,.1,.1,.1,.1], ess=1000):
    ## set simulation parameters
    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.parameters['p']['heterogeneity'] = 'Slightly'  # ensure heterogeneity is slightly

    area_list = []
    for sr in sorted(model.hierarchy.successors('all')):
        area_list.append(sr)
        for r in sorted(model.hierarchy.successors(sr)):
            area_list.append(r)
            area_list += sorted(model.hierarchy.successors(r))[:5]
    area_list = pl.array(area_list)


    ## generate simulation data
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    alpha = alpha_true_sim(model, area_list, sigma_true)

    # choose observed prevalence values
    model.input_data['effective_sample_size'] = ess

    model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)]

    model.input_data['true'] = pl.nan
    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = pi_true * pl.exp(pl.sum([alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha]))

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n



    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=20000, burn=10000, thin=10, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    add_quality_metrics(model.input_data)


    model.alpha = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    add_quality_metrics(model.alpha)

    print '\nalpha'
    print model.alpha.dropna()


    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['sigma_alpha']]
    model.sigma['sigma_pred']=[n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']]
    add_quality_metrics(model.sigma)

    print 'sigma_alpha'
    print model.sigma

    
    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'sigma')

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta
    add_to_results(model, 'delta')

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (pl.median(pl.absolute(model.alpha['abs_err'].dropna())),
                                                          model.alpha.dropna()['covered?'].mean())
    add_to_results(model, 'input_data')
    add_to_results(model, 'alpha')

    model.results = pandas.DataFrame(model.results)
    return model