def validate_fit_data_model(): vars = data_model.data_model('validation', model, 'p', root_area='all', root_sex='total', root_year='all', mu_age=None, mu_age_parent=None) m = fit_model.fit_data_model(vars) return m
def validate_prior_similarity(): #dm = dismod3.load_disease_model(20945) #dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) #t = 'i' #area, sex, year = 'europe_eastern', 'male', 2005 dm = dismod3.load_disease_model(20928) dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) t = 'p' area, sex, year = 'sub-saharan_africa_central', 'male', 2005 # select data that is about areas in this region, recent years, and sex of male or total only model = dm.model subtree = nx.traversal.bfs_tree(model.hierarchy, area) relevant_rows = [i for i, r in model.input_data.T.iteritems() \ if (r['area'] in subtree or r['area'] == 'all')\ and ((year == 2005 and r['year_end'] >= 1997) or r['year_start'] <= 1997) \ and r['sex'] in [sex, 'total']] model.input_data = model.input_data.ix[relevant_rows] # replace area 'all' with area model.input_data['area'][model.input_data['area'] == 'all'] = area for het in 'Slightly Moderately Very'.split(): dm.model.parameters[t]['parameter_age_mesh'] = [0, 15, 20, 25, 35, 45, 55, 65, 75, 100] dm.model.parameters[t]['heterogeneity'] = het setup_regional_model(dm, area, sex, year) dm.vars = {} dm.vars[t] = data_model.data_model(t, dm.model, t, root_area=area, root_sex=sex, root_year=year, mu_age=None, mu_age_parent=dm.emp_priors[t, 'mu'], sigma_age_parent=dm.emp_priors[t, 'sigma'], rate_type=(t == 'rr') and 'log_normal' or 'neg_binom') fit_model.fit_data_model(dm.vars[t], iter=10050, burn=5000, thin=50, tune_interval=100) #2graphics.plot_one_effects(dm.vars[t], t, dm.model.hierarchy) #pl.title(het) graphics.plot_convergence_diag(dm.vars[t]) pl.title(het) #graphics.plot_one_ppc(dm.vars[t], t) #pl.title(het) graphics.plot_one_type(dm.model, dm.vars[t], dm.emp_priors, t) pl.title(het) pl.show() return dm
def fit(model): emp_priors = model.emp_priors ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, emp_priors['p', 'mu'], emp_priors['p', 'sigma']) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=5000, burn=2000, thin=25, tune_interval=100) #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=101, burn=0, thin=1, tune_interval=100) #graphics.plot_one_ppc(model.vars['p'], 'p') #graphics.plot_convergence_diag(model.vars) graphics.plot_one_type(model, model.vars['p'], emp_priors, 'p') pl.plot(model.a, model.pi_age_true, 'b--', linewidth=3, alpha=.5, label='Truth') pl.legend(fancybox=True, shadow=True, loc='upper left') pl.title('Heterogeneity %s'%model.parameters['p']['heterogeneity']) pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation'] data_simulation.add_quality_metrics(model.input_data) model.delta = pandas.DataFrame(dict(true=[model.delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() data_simulation.add_quality_metrics(model.delta) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) model.mu = pandas.DataFrame(dict(true=model.pi_age_true, mu_pred=model.vars['p']['mu_age'].stats()['mean'], sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation'])) data_simulation.add_quality_metrics(model.mu) data_simulation.initialize_results(model) data_simulation.add_to_results(model, 'delta') data_simulation.add_to_results(model, 'mu') data_simulation.add_to_results(model, 'input_data') data_simulation.finalize_results(model) print model.results
def test_data_model_sim(): # generate simulated data n = 50 sigma_true = .025 # start with truth a = pl.arange(0, 100, 1) pi_age_true = .0001 * (a * (100. - a) + 100.) # choose age intervals to measure age_start = pl.array(mc.runiform(0, 100, n), dtype=int) age_start.sort() # sort to make it easy to discard the edges when testing age_end = pl.array(mc.runiform(age_start+1, pl.minimum(age_start+10,100)), dtype=int) # find truth for the integral across the age intervals import scipy.integrate pi_interval_true = [scipy.integrate.trapz(pi_age_true[a_0i:(a_1i+1)]) / (a_1i - a_0i) for a_0i, a_1i in zip(age_start, age_end)] # generate covariates that add explained variation X = mc.rnormal(0., 1.**2, size=(n,3)) beta_true = [-.1, .1, .2] Y_true = pl.dot(X, beta_true) # calculate the true value of the rate in each interval pi_true = pi_interval_true*pl.exp(Y_true) # simulate the noisy measurement of the rate in each interval p = mc.rnormal(pi_true, 1./sigma_true**2.) # store the simulated data in a pandas DataFrame data = pandas.DataFrame(dict(value=p, age_start=age_start, age_end=age_end, x_0=X[:,0], x_1=X[:,1], x_2=X[:,2])) data['effective_sample_size'] = pl.maximum(p*(1-p)/sigma_true**2, 1.) data['standard_error'] = pl.nan data['upper_ci'] = pl.nan data['lower_ci'] = pl.nan data['year_start'] = 2005. # TODO: make these vary data['year_end'] = 2005. data['sex'] = 'total' data['area'] = 'all' # generate a moderately complicated hierarchy graph for the model hierarchy = nx.DiGraph() hierarchy.add_node('all') hierarchy.add_edge('all', 'super-region-1', weight=.1) hierarchy.add_edge('super-region-1', 'NAHI', weight=.1) hierarchy.add_edge('NAHI', 'CAN', weight=.1) hierarchy.add_edge('NAHI', 'USA', weight=.1) output_template=pandas.DataFrame(dict(year=[1990, 1990, 2005, 2005, 2010, 2010]*2, sex=['male', 'female']*3*2, x_0=[.5]*6*2, x_1=[0.]*6*2, x_2=[.5]*6*2, pop=[50.]*6*2, area=['CAN']*6 + ['USA']*6)) # create model and priors vars = data_model.data_model('test', data, hierarchy, 'all') # fit model mc.MAP(vars).fit(method='fmin_powell', verbose=1) m = mc.MCMC(vars) m.use_step_method(mc.AdaptiveMetropolis, [m.gamma_bar, m.gamma, m.beta]) m.sample(30000, 15000, 15) # check estimates pi_usa = data_model.predict_for(output_template, hierarchy, 'all', 'USA', 'male', 1990, vars) assert pl.allclose(pi_usa.mean(), (m.mu_age.trace()*pl.exp(.05)).mean(), rtol=.1) # check convergence print 'gamma mc error:', m.gamma_bar.stats()['mc error'].round(2), m.gamma.stats()['mc error'].round(2) # plot results for a_0i, a_1i, p_i in zip(age_start, age_end, p): pl.plot([a_0i, a_1i], [p_i,p_i], 'rs-', mew=1, mec='w', ms=4) pl.plot(a, pi_age_true, 'g-', linewidth=2) pl.plot(pl.arange(101), m.mu_age.stats()['mean'], 'k-', drawstyle='steps-post', linewidth=3) pl.plot(pl.arange(101), m.mu_age.stats()['95% HPD interval'], 'k', linestyle='steps-post:') pl.plot(pl.arange(101), pi_usa.mean(0), 'r-', linewidth=2, drawstyle='steps-post') pl.savefig('age_integrating_sim.png') # compare estimate to ground truth (skip endpoints, because they are extra hard to get right) assert pl.allclose(m.pi.stats()['mean'][10:-10], pi_true[10:-10], rtol=.2) lb, ub = m.pi.stats()['95% HPD interval'].T assert pl.mean((lb <= pi_true)[10:-10] & (pi_true <= ub)[10:-10]) > .75
def validate_age_pattern_model_sim(N=500, delta_true=.15, pi_true=quadratic): ## generate simulated data a = pl.arange(0, 101, 1) pi_age_true = pi_true(a) model = data_simulation.simple_model(N) model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10) age_list = pl.array(mc.runiform(0, 100, size=N), dtype=int) p = pi_age_true[age_list] n = mc.runiform(100, 10000, size=N) model.input_data['age_start'] = age_list model.input_data['age_end'] = age_list model.input_data['effective_sample_size'] = n model.input_data['true'] = p model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) graphics.plot_one_type(model, model.vars['p'], {}, 'p') pl.plot(a, pi_age_true, 'r:', label='Truth') pl.legend(fancybox=True, shadow=True, loc='upper left') pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation'] data_simulation.add_quality_metrics(model.input_data) model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() data_simulation.add_quality_metrics(model.delta) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) model.mu = pandas.DataFrame(dict(true=pi_age_true, mu_pred=model.vars['p']['mu_age'].stats()['mean'], sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation'])) data_simulation.add_quality_metrics(model.mu) model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) data_simulation.add_to_results(model, 'delta') data_simulation.add_to_results(model, 'mu') data_simulation.add_to_results(model, 'input_data') model.results = pandas.DataFrame(model.results, columns='param bias mae mare pc'.split()) print model.results return model
def validate_covariate_model_fe(N=100, delta_true=3, pi_true=.01, beta_true=[.5, -.5, 0.], replicate=0): # set random seed for reproducibility mc.np.random.seed(1234567 + replicate) ## generate simulated data a = pl.arange(0, 100, 1) pi_age_true = pi_true * pl.ones_like(a) model = data.ModelData() model.parameters['p']['parameter_age_mesh'] = [0, 100] model.input_data = pandas.DataFrame(index=range(N)) initialize_input_data(model.input_data) # add fixed effect to simulated data X = mc.rnormal(0., 1.**-2, size=(N, len(beta_true))) Y_true = pl.dot(X, beta_true) for i in range(len(beta_true)): model.input_data['x_%d' % i] = X[:, i] model.input_data['true'] = pi_true * pl.exp(Y_true) model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N) n = model.input_data['effective_sample_size'] p = model.input_data['true'] model.input_data['value'] = mc.rnegative_binomial(n * p, delta_true) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=5, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats( )['standard deviation'] add_quality_metrics(model.input_data) model.beta = pandas.DataFrame(index=model.vars['p']['X'].columns) model.beta['true'] = 0. for i in range(len(beta_true)): model.beta['true']['x_%d' % i] = beta_true[i] model.beta['mu_pred'] = [ n.stats()['mean'] for n in model.vars['p']['beta'] ] model.beta['sigma_pred'] = [ n.stats()['standard deviation'] for n in model.vars['p']['beta'] ] add_quality_metrics(model.beta) print '\nbeta' print model.beta model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) add_to_results(model, 'beta') model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() add_quality_metrics(model.delta) print 'delta' print model.delta add_to_results(model, 'delta') print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) print 'effect prediction MAE: %.3f, coverage: %.2f' % ( pl.median(pl.absolute(model.beta['abs_err'].dropna())), model.beta.dropna()['covered?'].mean()) add_to_results(model, 'input_data') add_to_results(model, 'beta') model.results = pandas.DataFrame(model.results) return model
def validate_covariate_model_dispersion(N=1000, delta_true=.15, pi_true=.01, zeta_true=[.5, -.5, 0.]): ## generate simulated data a = pl.arange(0, 100, 1) pi_age_true = pi_true * pl.ones_like(a) model = data.ModelData() model.parameters['p']['parameter_age_mesh'] = [0, 100] model.input_data = pandas.DataFrame(index=range(N)) initialize_input_data(model.input_data) Z = mc.rbernoulli(.5, size=(N, len(zeta_true))) * 1.0 delta = delta_true * pl.exp(pl.dot(Z, zeta_true)) for i in range(len(zeta_true)): model.input_data['z_%d'%i] = Z[:,i] model.input_data['true'] = pi_true model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N) n = model.input_data['effective_sample_size'] p = model.input_data['true'] model.input_data['value'] = mc.rnegative_binomial(n*p, delta*n*p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=5, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation'] add_quality_metrics(model.input_data) model.zeta = pandas.DataFrame(index=model.vars['p']['Z'].columns) model.zeta['true'] = zeta_true model.zeta['mu_pred'] = model.vars['p']['zeta'].stats()['mean'] model.zeta['sigma_pred'] = model.vars['p']['zeta'].stats()['standard deviation'] add_quality_metrics(model.zeta) print '\nzeta' print model.zeta model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() add_quality_metrics(model.delta) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) print 'effect prediction MAE: %.3f, coverage: %.2f' % (pl.median(pl.absolute(model.zeta['abs_err'].dropna())), model.zeta.dropna()['covered?'].mean()) model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) add_to_results(model, 'delta') add_to_results(model, 'input_data') add_to_results(model, 'zeta') model.results = pandas.DataFrame(model.results, columns='param bias mae mare pc'.split()) return model
def validate_ai_re(N=500, delta_true=.15, sigma_true=[.1,.1,.1,.1,.1], pi_true=quadratic, smoothness='Moderately', heterogeneity='Slightly'): ## generate simulated data a = pl.arange(0, 101, 1) pi_age_true = pi_true(a) import dismod3 import simplejson as json model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json())) gbd_hierarchy = model.hierarchy model = data_simulation.simple_model(N) model.hierarchy = gbd_hierarchy model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10) model.parameters['p']['smoothness'] = dict(amount=smoothness) model.parameters['p']['heterogeneity'] = heterogeneity age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int) age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int) age_weights = pl.ones_like(a) sum_pi_wt = pl.cumsum(pi_age_true*age_weights) sum_wt = pl.cumsum(age_weights*1.) p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start]) # correct cases where age_start == age_end i = age_start == age_end if pl.any(i): p[i] = pi_age_true[age_start[i]] model.input_data['age_start'] = age_start model.input_data['age_end'] = age_end model.input_data['effective_sample_size'] = mc.runiform(100, 10000, size=N) from validate_covariates import alpha_true_sim area_list = pl.array(['all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT', 'IRN', 'IRQ', 'JOR', 'SYR']) alpha = alpha_true_sim(model, area_list, sigma_true) print alpha model.input_data['true'] = pl.nan model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)] for i, a in model.input_data['area'].iteritems(): model.input_data['true'][i] = p[i] * pl.exp(pl.sum([alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha])) p = model.input_data['true'] n = model.input_data['effective_sample_size'] model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'north_africa_middle_east', 'total', 'all', None, None, None) #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=1005, burn=500, thin=5, tune_interval=100) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) graphics.plot_one_type(model, model.vars['p'], {}, 'p') pl.plot(range(101), pi_age_true, 'r:', label='Truth') pl.legend(fancybox=True, shadow=True, loc='upper left') pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation'] data_simulation.add_quality_metrics(model.input_data) model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() data_simulation.add_quality_metrics(model.delta) model.alpha = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)]) model.alpha['true'] = pandas.Series(dict(alpha)) model.alpha['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns) model.alpha['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns) model.alpha = model.alpha.dropna() data_simulation.add_quality_metrics(model.alpha) model.sigma = pandas.DataFrame(dict(true=sigma_true)) model.sigma['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['sigma_alpha']] model.sigma['sigma_pred']=[n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']] data_simulation.add_quality_metrics(model.sigma) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) model.mu = pandas.DataFrame(dict(true=pi_age_true, mu_pred=model.vars['p']['mu_age'].stats()['mean'], sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation'])) data_simulation.add_quality_metrics(model.mu) data_simulation.initialize_results(model) data_simulation.add_to_results(model, 'delta') data_simulation.add_to_results(model, 'mu') data_simulation.add_to_results(model, 'input_data') data_simulation.add_to_results(model, 'alpha') data_simulation.add_to_results(model, 'sigma') data_simulation.finalize_results(model) print model.results return model
def validate_prior_similarity(): #dm = dismod3.load_disease_model(20945) #dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) #t = 'i' #area, sex, year = 'europe_eastern', 'male', 2005 dm = dismod3.load_disease_model(20928) dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) t = 'p' area, sex, year = 'sub-saharan_africa_central', 'male', 2005 # select data that is about areas in this region, recent years, and sex of male or total only model = dm.model subtree = nx.traversal.bfs_tree(model.hierarchy, area) relevant_rows = [i for i, r in model.input_data.T.iteritems() \ if (r['area'] in subtree or r['area'] == 'all')\ and ((year == 2005 and r['year_end'] >= 1997) or r['year_start'] <= 1997) \ and r['sex'] in [sex, 'total']] model.input_data = model.input_data.ix[relevant_rows] # replace area 'all' with area model.input_data['area'][model.input_data['area'] == 'all'] = area for het in 'Slightly Moderately Very'.split(): dm.model.parameters[t]['parameter_age_mesh'] = [ 0, 15, 20, 25, 35, 45, 55, 65, 75, 100 ] dm.model.parameters[t]['heterogeneity'] = het setup_regional_model(dm, area, sex, year) dm.vars = {} dm.vars[t] = data_model.data_model( t, dm.model, t, root_area=area, root_sex=sex, root_year=year, mu_age=None, mu_age_parent=dm.emp_priors[t, 'mu'], sigma_age_parent=dm.emp_priors[t, 'sigma'], rate_type=(t == 'rr') and 'log_normal' or 'neg_binom') fit_model.fit_data_model(dm.vars[t], iter=10050, burn=5000, thin=50, tune_interval=100) #2graphics.plot_one_effects(dm.vars[t], t, dm.model.hierarchy) #pl.title(het) graphics.plot_convergence_diag(dm.vars[t]) pl.title(het) #graphics.plot_one_ppc(dm.vars[t], t) #pl.title(het) graphics.plot_one_type(dm.model, dm.vars[t], dm.emp_priors, t) pl.title(het) pl.show() return dm
#!/usr/bin/python3 import tensorflow as tf import numpy as np import scipy.ndimage from tf_DNN import DNN from tf_RNN import RNN from tf_CNN import CNN from data_model import data_model theData = data_model() # theModel = DNN(theData, h_layers = [512,256,128]) # theModel.train(max_epochs=100, batch_size=5000, l_rate=1e-03) theModel = RNN(theData, 28, rnn_size=128) theModel.train(max_epochs=100, batch_size=5000, l_rate=1e-03) # theModel = CNN(theData) # theModel.train(max_epochs=100, batch_size=5000, l_rate=1e-03) theModel.save_dir = './model_trained' theModel.restore_model() dat = np.vectorize(lambda x: 255 - x)(np.ndarray.flatten( scipy.ndimage.imread("test1.png", flatten=True))) theModel.test_input([dat])
model.parameters['p']['fixed_effects']['x_CODcorrected_Cirrhosis_ASDR'] = dict( dist='TruncatedNormal', mu=0, sigma=.01, lower=-1., upper=1.) model.parameters['p']['fixed_effects'][ 'x_IHME_alcohol_liters_pc_25July11'] = dict(dist='TruncatedNormal', mu=0, sigma=.01, lower=-1., upper=1.) # create model for global prevalence root_area = 'all' t = 'p' vars = data_model.data_model(t, model, t, root_area='all', root_sex='total', root_year='all', mu_age=None, mu_age_parent=None, sigma_age_parent=None) m = fit_model.fit_data_model(vars, iter=4000, burn=2000, thin=20, tune_interval=100) # generate estimates for all leaves of the hierarchy est = {} for n in model.hierarchy: if len(model.hierarchy.successors(n)) == 0:
def test_data_model_sim(): # generate simulated data n = 50 sigma_true = .025 # start with truth a = pl.arange(0, 100, 1) pi_age_true = .0001 * (a * (100. - a) + 100.) # choose age intervals to measure age_start = pl.array(mc.runiform(0, 100, n), dtype=int) age_start.sort() # sort to make it easy to discard the edges when testing age_end = pl.array(mc.runiform(age_start + 1, pl.minimum(age_start + 10, 100)), dtype=int) # find truth for the integral across the age intervals import scipy.integrate pi_interval_true = [ scipy.integrate.trapz(pi_age_true[a_0i:(a_1i + 1)]) / (a_1i - a_0i) for a_0i, a_1i in zip(age_start, age_end) ] # generate covariates that add explained variation X = mc.rnormal(0., 1.**2, size=(n, 3)) beta_true = [-.1, .1, .2] Y_true = pl.dot(X, beta_true) # calculate the true value of the rate in each interval pi_true = pi_interval_true * pl.exp(Y_true) # simulate the noisy measurement of the rate in each interval p = mc.rnormal(pi_true, 1. / sigma_true**2.) # store the simulated data in a pandas DataFrame data = pandas.DataFrame( dict(value=p, age_start=age_start, age_end=age_end, x_0=X[:, 0], x_1=X[:, 1], x_2=X[:, 2])) data['effective_sample_size'] = pl.maximum(p * (1 - p) / sigma_true**2, 1.) data['standard_error'] = pl.nan data['upper_ci'] = pl.nan data['lower_ci'] = pl.nan data['year_start'] = 2005. # TODO: make these vary data['year_end'] = 2005. data['sex'] = 'total' data['area'] = 'all' # generate a moderately complicated hierarchy graph for the model hierarchy = nx.DiGraph() hierarchy.add_node('all') hierarchy.add_edge('all', 'super-region-1', weight=.1) hierarchy.add_edge('super-region-1', 'NAHI', weight=.1) hierarchy.add_edge('NAHI', 'CAN', weight=.1) hierarchy.add_edge('NAHI', 'USA', weight=.1) output_template = pandas.DataFrame( dict(year=[1990, 1990, 2005, 2005, 2010, 2010] * 2, sex=['male', 'female'] * 3 * 2, x_0=[.5] * 6 * 2, x_1=[0.] * 6 * 2, x_2=[.5] * 6 * 2, pop=[50.] * 6 * 2, area=['CAN'] * 6 + ['USA'] * 6)) # create model and priors vars = data_model.data_model('test', data, hierarchy, 'all') # fit model mc.MAP(vars).fit(method='fmin_powell', verbose=1) m = mc.MCMC(vars) m.use_step_method(mc.AdaptiveMetropolis, [m.gamma_bar, m.gamma, m.beta]) m.sample(30000, 15000, 15) # check estimates pi_usa = data_model.predict_for(output_template, hierarchy, 'all', 'USA', 'male', 1990, vars) assert pl.allclose(pi_usa.mean(), (m.mu_age.trace() * pl.exp(.05)).mean(), rtol=.1) # check convergence print 'gamma mc error:', m.gamma_bar.stats()['mc error'].round( 2), m.gamma.stats()['mc error'].round(2) # plot results for a_0i, a_1i, p_i in zip(age_start, age_end, p): pl.plot([a_0i, a_1i], [p_i, p_i], 'rs-', mew=1, mec='w', ms=4) pl.plot(a, pi_age_true, 'g-', linewidth=2) pl.plot(pl.arange(101), m.mu_age.stats()['mean'], 'k-', drawstyle='steps-post', linewidth=3) pl.plot(pl.arange(101), m.mu_age.stats()['95% HPD interval'], 'k', linestyle='steps-post:') pl.plot(pl.arange(101), pi_usa.mean(0), 'r-', linewidth=2, drawstyle='steps-post') pl.savefig('age_integrating_sim.png') # compare estimate to ground truth (skip endpoints, because they are extra hard to get right) assert pl.allclose(m.pi.stats()['mean'][10:-10], pi_true[10:-10], rtol=.2) lb, ub = m.pi.stats()['95% HPD interval'].T assert pl.mean((lb <= pi_true)[10:-10] & (pi_true <= ub)[10:-10]) > .75
def validate_covariate_model_fe(N=100, delta_true=3, pi_true=.01, beta_true=[.5, -.5, 0.], replicate=0): # set random seed for reproducibility mc.np.random.seed(1234567 + replicate) ## generate simulated data a = pl.arange(0, 100, 1) pi_age_true = pi_true * pl.ones_like(a) model = data.ModelData() model.parameters['p']['parameter_age_mesh'] = [0, 100] model.input_data = pandas.DataFrame(index=range(N)) initialize_input_data(model.input_data) # add fixed effect to simulated data X = mc.rnormal(0., 1.**-2, size=(N,len(beta_true))) Y_true = pl.dot(X, beta_true) for i in range(len(beta_true)): model.input_data['x_%d'%i] = X[:,i] model.input_data['true'] = pi_true * pl.exp(Y_true) model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N) n = model.input_data['effective_sample_size'] p = model.input_data['true'] model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=5, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation'] add_quality_metrics(model.input_data) model.beta = pandas.DataFrame(index=model.vars['p']['X'].columns) model.beta['true'] = 0. for i in range(len(beta_true)): model.beta['true']['x_%d'%i] = beta_true[i] model.beta['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['beta']] model.beta['sigma_pred'] = [n.stats()['standard deviation'] for n in model.vars['p']['beta']] add_quality_metrics(model.beta) print '\nbeta' print model.beta model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) add_to_results(model, 'beta') model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() add_quality_metrics(model.delta) print 'delta' print model.delta add_to_results(model, 'delta') print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) print 'effect prediction MAE: %.3f, coverage: %.2f' % (pl.median(pl.absolute(model.beta['abs_err'].dropna())), model.beta.dropna()['covered?'].mean()) add_to_results(model, 'input_data') add_to_results(model, 'beta') model.results = pandas.DataFrame(model.results) return model
sys.path.insert(0, currentdir) sys.path.insert(0, currentdir + "/components") import data_model as dm from right_panel import right_panel from left_panel import left_panel from mid_panel import mid_panel # region Read in global data import os, sys, inspect currentdir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(os.path.dirname(currentdir)) data_reader = dm.data_model(parentdir + "/data/raw") # endregion # region Setup app and layout/frontend app = dash.Dash(__name__, external_stylesheets=[dbc.themes.COSMO]) # CERULEAN, COSMO, CYBORG, DARKLY, FLATLY, JOURNAL, LITERA, LUMEN, LUX, MATERIA, MINTY, PULSE, SANDSTONE, SIMPLEX, SKETCHY, SLATE, SOLAR, SPACELAB, SUPERHERO, UNITED, YETI server = app.server country_panel = right_panel(data_reader) global_panel = left_panel(data_reader) map_panel = mid_panel(data_reader) alt.themes.enable("ggplot2") app.title = "Covid-19 Data Portal" dashboard_heading = ("Covid-19 Data Portal") app.layout = dbc.Container([ dbc.Row( dbc.Col(html.Div([html.H1(dashboard_heading)]), className="heading")),
def validate_ai_re(N=500, delta_true=.15, sigma_true=[.1, .1, .1, .1, .1], pi_true=quadratic, smoothness='Moderately', heterogeneity='Slightly'): ## generate simulated data a = pl.arange(0, 101, 1) pi_age_true = pi_true(a) import dismod3 import simplejson as json model = data.ModelData.from_gbd_jsons( json.loads(dismod3.disease_json.DiseaseJson().to_json())) gbd_hierarchy = model.hierarchy model = data_simulation.simple_model(N) model.hierarchy = gbd_hierarchy model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10) model.parameters['p']['smoothness'] = dict(amount=smoothness) model.parameters['p']['heterogeneity'] = heterogeneity age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int) age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int) age_weights = pl.ones_like(a) sum_pi_wt = pl.cumsum(pi_age_true * age_weights) sum_wt = pl.cumsum(age_weights * 1.) p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start]) # correct cases where age_start == age_end i = age_start == age_end if pl.any(i): p[i] = pi_age_true[age_start[i]] model.input_data['age_start'] = age_start model.input_data['age_end'] = age_end model.input_data['effective_sample_size'] = mc.runiform(100, 10000, size=N) from validate_covariates import alpha_true_sim area_list = pl.array([ 'all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT', 'IRN', 'IRQ', 'JOR', 'SYR' ]) alpha = alpha_true_sim(model, area_list, sigma_true) print alpha model.input_data['true'] = pl.nan model.input_data['area'] = area_list[mc.rcategorical( pl.ones(len(area_list)) / float(len(area_list)), N)] for i, a in model.input_data['area'].iteritems(): model.input_data['true'][i] = p[i] * pl.exp( pl.sum([ alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha ])) p = model.input_data['true'] n = model.input_data['effective_sample_size'] model.input_data['value'] = mc.rnegative_binomial(n * p, delta_true * n * p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'north_africa_middle_east', 'total', 'all', None, None, None) #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=1005, burn=500, thin=5, tune_interval=100) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) graphics.plot_one_type(model, model.vars['p'], {}, 'p') pl.plot(range(101), pi_age_true, 'r:', label='Truth') pl.legend(fancybox=True, shadow=True, loc='upper left') pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats( )['standard deviation'] data_simulation.add_quality_metrics(model.input_data) model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() data_simulation.add_quality_metrics(model.delta) model.alpha = pandas.DataFrame( index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)]) model.alpha['true'] = pandas.Series(dict(alpha)) model.alpha['mu_pred'] = pandas.Series( [n.stats()['mean'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns) model.alpha['sigma_pred'] = pandas.Series( [n.stats()['standard deviation'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns) model.alpha = model.alpha.dropna() data_simulation.add_quality_metrics(model.alpha) model.sigma = pandas.DataFrame(dict(true=sigma_true)) model.sigma['mu_pred'] = [ n.stats()['mean'] for n in model.vars['p']['sigma_alpha'] ] model.sigma['sigma_pred'] = [ n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha'] ] data_simulation.add_quality_metrics(model.sigma) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) model.mu = pandas.DataFrame( dict(true=pi_age_true, mu_pred=model.vars['p']['mu_age'].stats()['mean'], sigma_pred=model.vars['p']['mu_age'].stats() ['standard deviation'])) data_simulation.add_quality_metrics(model.mu) data_simulation.initialize_results(model) data_simulation.add_to_results(model, 'delta') data_simulation.add_to_results(model, 'mu') data_simulation.add_to_results(model, 'input_data') data_simulation.add_to_results(model, 'alpha') data_simulation.add_to_results(model, 'sigma') data_simulation.finalize_results(model) print model.results return model
def validate_covariate_model_re(N=500, delta_true=.15, pi_true=.01, sigma_true=[.1, .1, .1, .1, .1], ess=1000): ## set simulation parameters import dismod3 import simplejson as json model = data.ModelData.from_gbd_jsons( json.loads(dismod3.disease_json.DiseaseJson().to_json())) model.parameters['p']['parameter_age_mesh'] = [0, 100] model.parameters['p'][ 'heterogeneity'] = 'Slightly' # ensure heterogeneity is slightly area_list = [] for sr in sorted(model.hierarchy.successors('all')): area_list.append(sr) for r in sorted(model.hierarchy.successors(sr)): area_list.append(r) area_list += sorted(model.hierarchy.successors(r))[:5] area_list = pl.array(area_list) ## generate simulation data model.input_data = pandas.DataFrame(index=range(N)) initialize_input_data(model.input_data) alpha = alpha_true_sim(model, area_list, sigma_true) # choose observed prevalence values model.input_data['effective_sample_size'] = ess model.input_data['area'] = area_list[mc.rcategorical( pl.ones(len(area_list)) / float(len(area_list)), N)] model.input_data['true'] = pl.nan for i, a in model.input_data['area'].iteritems(): model.input_data['true'][i] = pi_true * pl.exp( pl.sum([ alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha ])) n = model.input_data['effective_sample_size'] p = model.input_data['true'] model.input_data['value'] = mc.rnegative_binomial(n * p, delta_true * n * p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=20000, burn=10000, thin=10, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats( )['standard deviation'] add_quality_metrics(model.input_data) model.alpha = pandas.DataFrame( index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)]) model.alpha['true'] = pandas.Series(dict(alpha)) model.alpha['mu_pred'] = pandas.Series( [n.stats()['mean'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns) model.alpha['sigma_pred'] = pandas.Series( [n.stats()['standard deviation'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns) add_quality_metrics(model.alpha) print '\nalpha' print model.alpha.dropna() model.sigma = pandas.DataFrame(dict(true=sigma_true)) model.sigma['mu_pred'] = [ n.stats()['mean'] for n in model.vars['p']['sigma_alpha'] ] model.sigma['sigma_pred'] = [ n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha'] ] add_quality_metrics(model.sigma) print 'sigma_alpha' print model.sigma model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) add_to_results(model, 'sigma') model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() add_quality_metrics(model.delta) print 'delta' print model.delta add_to_results(model, 'delta') print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) print 'effect prediction MAE: %.3f, coverage: %.2f' % ( pl.median(pl.absolute(model.alpha['abs_err'].dropna())), model.alpha.dropna()['covered?'].mean()) add_to_results(model, 'input_data') add_to_results(model, 'alpha') model.results = pandas.DataFrame(model.results) return model
import graphics reload(data_model) reload(covariate_model) # load the model from disk, and adjust the data and parameters for this example model = data.ModelData.from_gbd_json('/var/tmp/dismod_working/test/dm-19807/json/dm-19807.json') model.parameters['p']['parameter_age_mesh'] = range(0,101,20) model.parameters['p']['fixed_effects']['x_CODcorrected_Cirrhosis_ASDR'] = dict(dist='TruncatedNormal', mu=0, sigma=.01, lower=-1., upper=1.) model.parameters['p']['fixed_effects']['x_IHME_alcohol_liters_pc_25July11'] = dict(dist='TruncatedNormal', mu=0, sigma=.01, lower=-1., upper=1.) # create model for global prevalence root_area = 'all' t = 'p' vars = data_model.data_model(t, model, t, root_area='all', root_sex='total', root_year='all', mu_age=None, mu_age_parent=None, sigma_age_parent=None) m = fit_model.fit_data_model(vars, iter=4000, burn=2000, thin=20, tune_interval=100) # generate estimates for all leaves of the hierarchy est = {} for n in model.hierarchy: if len(model.hierarchy.successors(n)) == 0: est[n] = pl.median(covariate_model.predict_for(model, 'all', 'total', 'all', n, 'male', 2005, 0., vars, 0., 1.), axis=0) graphics.plot_one_type(model, vars, {}, 'p')
def validate_covariate_model_dispersion(N=1000, delta_true=.15, pi_true=.01, zeta_true=[.5, -.5, 0.]): ## generate simulated data a = pl.arange(0, 100, 1) pi_age_true = pi_true * pl.ones_like(a) model = data.ModelData() model.parameters['p']['parameter_age_mesh'] = [0, 100] model.input_data = pandas.DataFrame(index=range(N)) initialize_input_data(model.input_data) Z = mc.rbernoulli(.5, size=(N, len(zeta_true))) * 1.0 delta = delta_true * pl.exp(pl.dot(Z, zeta_true)) for i in range(len(zeta_true)): model.input_data['z_%d' % i] = Z[:, i] model.input_data['true'] = pi_true model.input_data['effective_sample_size'] = mc.runiform(100, 10000, N) n = model.input_data['effective_sample_size'] p = model.input_data['true'] model.input_data['value'] = mc.rnegative_binomial(n * p, delta * n * p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=5, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats( )['standard deviation'] add_quality_metrics(model.input_data) model.zeta = pandas.DataFrame(index=model.vars['p']['Z'].columns) model.zeta['true'] = zeta_true model.zeta['mu_pred'] = model.vars['p']['zeta'].stats()['mean'] model.zeta['sigma_pred'] = model.vars['p']['zeta'].stats( )['standard deviation'] add_quality_metrics(model.zeta) print '\nzeta' print model.zeta model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() add_quality_metrics(model.delta) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) print 'effect prediction MAE: %.3f, coverage: %.2f' % ( pl.median(pl.absolute(model.zeta['abs_err'].dropna())), model.zeta.dropna()['covered?'].mean()) model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) add_to_results(model, 'delta') add_to_results(model, 'input_data') add_to_results(model, 'zeta') model.results = pandas.DataFrame(model.results, columns='param bias mae mare pc'.split()) return model
def validate_age_integrating_model_sim(N=500, delta_true=.15, pi_true=quadratic): ## generate simulated data a = pl.arange(0, 101, 1) pi_age_true = pi_true(a) model = data_simulation.simple_model(N) #model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10) #model.parameters['p']['smoothness'] = dict(amount='Very') age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int) age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int) age_weights = pl.ones_like(a) sum_pi_wt = pl.cumsum(pi_age_true * age_weights) sum_wt = pl.cumsum(age_weights) p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start]) # correct cases where age_start == age_end i = age_start == age_end if pl.any(i): p[i] = pi_age_true[age_start[i]] n = mc.runiform(100, 10000, size=N) model.input_data['age_start'] = age_start model.input_data['age_end'] = age_end model.input_data['effective_sample_size'] = n model.input_data['true'] = p model.input_data['value'] = mc.rnegative_binomial(n * p, delta_true * n * p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) graphics.plot_one_type(model, model.vars['p'], {}, 'p') pl.plot(a, pi_age_true, 'r:', label='Truth') pl.legend(fancybox=True, shadow=True, loc='upper left') pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats( )['standard deviation'] data_simulation.add_quality_metrics(model.input_data) model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() data_simulation.add_quality_metrics(model.delta) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) model.mu = pandas.DataFrame( dict(true=pi_age_true, mu_pred=model.vars['p']['mu_age'].stats()['mean'], sigma_pred=model.vars['p']['mu_age'].stats() ['standard deviation'])) data_simulation.add_quality_metrics(model.mu) model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) data_simulation.add_to_results(model, 'delta') data_simulation.add_to_results(model, 'mu') data_simulation.add_to_results(model, 'input_data') model.results = pandas.DataFrame(model.results, columns='param bias mae mare pc'.split()) print model.results return model
def validate_covariate_model_re(N=500, delta_true=.15, pi_true=.01, sigma_true = [.1,.1,.1,.1,.1], ess=1000): ## set simulation parameters import dismod3 import simplejson as json model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json())) model.parameters['p']['parameter_age_mesh'] = [0, 100] model.parameters['p']['heterogeneity'] = 'Slightly' # ensure heterogeneity is slightly area_list = [] for sr in sorted(model.hierarchy.successors('all')): area_list.append(sr) for r in sorted(model.hierarchy.successors(sr)): area_list.append(r) area_list += sorted(model.hierarchy.successors(r))[:5] area_list = pl.array(area_list) ## generate simulation data model.input_data = pandas.DataFrame(index=range(N)) initialize_input_data(model.input_data) alpha = alpha_true_sim(model, area_list, sigma_true) # choose observed prevalence values model.input_data['effective_sample_size'] = ess model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)] model.input_data['true'] = pl.nan for i, a in model.input_data['area'].iteritems(): model.input_data['true'][i] = pi_true * pl.exp(pl.sum([alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha])) n = model.input_data['effective_sample_size'] p = model.input_data['true'] model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None) model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=20000, burn=10000, thin=10, tune_interval=100) graphics.plot_one_ppc(model.vars['p'], 'p') graphics.plot_convergence_diag(model.vars) pl.show() model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean'] model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation'] add_quality_metrics(model.input_data) model.alpha = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)]) model.alpha['true'] = pandas.Series(dict(alpha)) model.alpha['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns) model.alpha['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns) add_quality_metrics(model.alpha) print '\nalpha' print model.alpha.dropna() model.sigma = pandas.DataFrame(dict(true=sigma_true)) model.sigma['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['sigma_alpha']] model.sigma['sigma_pred']=[n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']] add_quality_metrics(model.sigma) print 'sigma_alpha' print model.sigma model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[]) add_to_results(model, 'sigma') model.delta = pandas.DataFrame(dict(true=[delta_true])) model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean() model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std() add_quality_metrics(model.delta) print 'delta' print model.delta add_to_results(model, 'delta') print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) print 'effect prediction MAE: %.3f, coverage: %.2f' % (pl.median(pl.absolute(model.alpha['abs_err'].dropna())), model.alpha.dropna()['covered?'].mean()) add_to_results(model, 'input_data') add_to_results(model, 'alpha') model.results = pandas.DataFrame(model.results) return model