def validate_fit_consistent_model(): # create model and priors vars = consistent_model.consistent_model(model, 'all', 'total', 'all', {}) m = fit_model.fit_consistent_model(vars) return m
def fit_simulated(dm, area, sex, year): #dm.map, dm.mcmc = fit_model.fit_consistent_model(dm.vars, iter=101, burn=0, thin=1, tune_interval=100) dm.map, dm.mcmc = fit_model.fit_consistent_model(dm.vars, iter=10000, burn=5000, thin=25, tune_interval=100) posteriors = {} for t in 'i r f p rr pf'.split(): est_k = covariate_model.predict_for(dm.model, area, sex, year, area, sex, year, 1., dm.vars[t], 0., pl.inf) posteriors[t] = est_k dm.posteriors = posteriors
subtree = nx.traversal.bfs_tree(model.hierarchy, root_area) relevant_rows = [i for i, r in model.input_data.T.iteritems() \ if r['area'] in subtree \ and r['year_end'] >= 1997 \ and r['sex'] in ['male', 'total'] \ and r['data_type'] in ['pf', 'm']] model.input_data = model.input_data.ix[relevant_rows] ## create and fit consistent model at gbd region level vars = consistent_model.consistent_model(model, root_area=root_area, root_sex='male', root_year=2005, priors={}) posterior_model = fit_model.fit_consistent_model(vars, iter=1030, burn=500, thin=5, tune_interval=100) ## generate estimates predict_area = root_area posteriors = {} for t in 'i r f p rr pf'.split(): posteriors[t] = pl.median(covariate_model.predict_for( model.output_template, model.hierarchy, root_area, 'male', 2005, predict_area, 'male', 2005, vars[t]), axis=0) graphics.all_plots(model, vars, {}, posteriors)
root_area = 'latin_america_central' subtree = nx.traversal.bfs_tree(model.hierarchy, root_area) relevant_rows = [i for i, r in model.input_data.T.iteritems() \ if r['area'] in subtree \ and r['year_end'] >= 1997 \ and r['sex'] in ['male', 'total']] model.input_data = model.input_data.ix[relevant_rows] ## create and fit consistent model at gbd region level vars = consistent_model.consistent_model(model, root_area=root_area, root_sex='male', root_year=2005, priors={}) posterior_model = fit_model.fit_consistent_model(vars, iter=3003, burn=1500, thin=10, tune_interval=100) ## generate estimates for latin_america_central, male, 2005 predict_area = root_area posteriors = {} for t in 'i r f p rr pf'.split(): posteriors[t] = pl.median(covariate_model.predict_for( model.output_template, model.hierarchy, root_area, 'male', 2005, predict_area, 'male', 2005, vars[t]), axis=0) graphics.all_plots(model, vars, {}, posteriors)
model.parameters['r']['level_value'] = dict(age_before=100, age_after=100, value=0.) # no covariates model.input_data = model.input_data.drop([col for col in model.input_data.columns if col.startswith('x_')], axis=1) # create model for (europe_western, male, 2005) root_area = 'europe_western' subtree = nx.traversal.bfs_tree(model.hierarchy, root_area) relevant_rows = [i for i, r in model.input_data.T.iteritems() \ if r['area'] in subtree \ and r['year_end'] >= 1997 \ and r['sex'] in ['male', 'total'] \ and r['data_type'] in ['pf', 'm']] model.input_data = model.input_data.ix[relevant_rows] ## create and fit consistent model at gbd region level vars = consistent_model.consistent_model(model, root_area=root_area, root_sex='male', root_year=2005, priors={}) posterior_model = fit_model.fit_consistent_model(vars, iter=1030, burn=500, thin=5, tune_interval=100) ## generate estimates predict_area = root_area posteriors = {} for t in 'i r f p rr pf'.split(): posteriors[t] = pl.median(covariate_model.predict_for(model.output_template, model.hierarchy, root_area, 'male', 2005, predict_area, 'male', 2005, vars[t]), axis=0) graphics.all_plots(model, vars, {}, posteriors)
# create model for (europe_western, male, 2005) root_area = 'europe_western' subtree = nx.traversal.bfs_tree(model.hierarchy, root_area) relevant_rows = [i for i, r in model.input_data.T.iteritems() \ if r['area'] in subtree \ and r['year_end'] >= 1997 \ and r['sex'] in ['male', 'total']] model.input_data = model.input_data.ix[relevant_rows] ## create and fit consistent model at gbd region level vars = consistent_model.consistent_model(model, root_area=root_area, root_sex='male', root_year=2005, priors={}) posterior_model = fit_model.fit_consistent_model(vars, iter=101, burn=0, thin=1) ## generate estimates predict_area = root_area posteriors = {} for t in 'i r f p rr pf'.split(): posteriors[t] = pl.median(covariate_model.predict_for( model.output_template, model.hierarchy, root_area, 'male', 2005, predict_area, 'male', 2005, vars[t]), axis=0) graphics.all_plots(model, vars, {}, posteriors)
# create model for (latin_america_central, male, 2005) root_area = "latin_america_central" subtree = nx.traversal.bfs_tree(model.hierarchy, root_area) relevant_rows = [ i for i, r in model.input_data.T.iteritems() if r["area"] in subtree and r["year_end"] >= 1997 and r["sex"] in ["male", "total"] ] model.input_data = model.input_data.ix[relevant_rows] ## create and fit consistent model at gbd region level vars = consistent_model.consistent_model(model, root_area=root_area, root_sex="male", root_year=2005, priors={}) posterior_model = fit_model.fit_consistent_model(vars, iter=3003, burn=1500, thin=10, tune_interval=100) ## generate estimates for latin_america_central, male, 2005 predict_area = root_area posteriors = {} for t in "i r f p rr pf".split(): posteriors[t] = pl.median( covariate_model.predict_for( model.output_template, model.hierarchy, root_area, "male", 2005, predict_area, "male", 2005, vars[t] ), axis=0, ) graphics.all_plots(model, vars, {}, posteriors)
model.parameters['r']['level_value'] = dict(age_before=100, age_after=100, value=12.) model.parameters['r']['level_bounds'] = dict(lower=0., upper=1000.) # no covariates model.input_data = model.input_data.drop([col for col in model.input_data.columns if col.startswith('x_')], axis=1) # create model for (europe_western, male, 2005) root_area = 'europe_western' subtree = nx.traversal.bfs_tree(model.hierarchy, root_area) relevant_rows = [i for i, r in model.input_data.T.iteritems() \ if r['area'] in subtree \ and r['year_end'] >= 1997 \ and r['sex'] in ['male', 'total']] model.input_data = model.input_data.ix[relevant_rows] ## create and fit consistent model at gbd region level vars = consistent_model.consistent_model(model, root_area=root_area, root_sex='male', root_year=2005, priors={}) posterior_model = fit_model.fit_consistent_model(vars, iter=101, burn=0, thin=1) ## generate estimates predict_area = root_area posteriors = {} for t in 'i r f p rr pf'.split(): posteriors[t] = pl.median(covariate_model.predict_for(model.output_template, model.hierarchy, root_area, 'male', 2005, predict_area, 'male', 2005, vars[t]), axis=0) graphics.all_plots(model, vars, {}, posteriors)
def validate_consistent_re(N=500, delta_true=.15, sigma_true=[.1,.1,.1,.1,.1], true=dict(i=quadratic, f=constant, r=constant)): types = pl.array(['i', 'r', 'f', 'p']) ## generate simulated data model = data_simulation.simple_model(N) model.input_data['effective_sample_size'] = 1. model.input_data['value'] = 0. # coarse knot spacing for fast testing for t in types: model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20) sim = consistent_model.consistent_model(model, 'all', 'total', 'all', {}) for t in 'irf': for i, k_i in enumerate(sim[t]['knots']): sim[t]['gamma'][i].value = pl.log(true[t](k_i)) age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int) age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int) data_type = types[mc.rcategorical(pl.ones(len(types), dtype=float) / float(len(types)), size=N)] a = pl.arange(101) age_weights = pl.ones_like(a) sum_wt = pl.cumsum(age_weights) p = pl.zeros(N) for t in types: mu_t = sim[t]['mu_age'].value sum_mu_wt = pl.cumsum(mu_t*age_weights) p_t = (sum_mu_wt[age_end] - sum_mu_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start]) # correct cases where age_start == age_end i = age_start == age_end if pl.any(i): p_t[i] = mu_t[age_start[i]] # copy part into p p[data_type==t] = p_t[data_type==t] # add covariate shifts import dismod3 import simplejson as json gbd_model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json())) model.hierarchy = gbd_model.hierarchy from validate_covariates import alpha_true_sim area_list = pl.array(['all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT', 'IRN', 'IRQ', 'JOR', 'SYR']) alpha = {} for t in types: alpha[t] = alpha_true_sim(model, area_list, sigma_true) print json.dumps(alpha, indent=2) model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)] for i, a in model.input_data['area'].iteritems(): t = data_type[i] p[i] = p[i] * pl.exp(pl.sum([alpha[t][n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha])) n = mc.runiform(100, 10000, size=N) model.input_data['data_type'] = data_type model.input_data['age_start'] = age_start model.input_data['age_end'] = age_end model.input_data['effective_sample_size'] = n model.input_data['true'] = p model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true) / n # coarse knot spacing for fast testing for t in types: model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20) ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars = consistent_model.consistent_model(model, 'all', 'total', 'all', {}) #model.map, model.mcmc = fit_model.fit_consistent_model(model.vars, iter=101, burn=0, thin=1, tune_interval=100) model.map, model.mcmc = fit_model.fit_consistent_model(model.vars, iter=10000, burn=5000, thin=25, tune_interval=100) graphics.plot_convergence_diag(model.vars) graphics.plot_fit(model, model.vars, {}, {}) for i, t in enumerate('i r f p rr pf'.split()): pl.subplot(2, 3, i+1) pl.plot(range(101), sim[t]['mu_age'].value, 'w-', label='Truth', linewidth=2) pl.plot(range(101), sim[t]['mu_age'].value, 'r-', label='Truth', linewidth=1) pl.show() model.input_data['mu_pred'] = 0. model.input_data['sigma_pred'] = 0. for t in types: model.input_data['mu_pred'][data_type==t] = model.vars[t]['p_pred'].stats()['mean'] model.input_data['sigma_pred'][data_type==t] = model.vars[t]['p_pred'].stats()['standard deviation'] data_simulation.add_quality_metrics(model.input_data) model.delta = pandas.DataFrame(dict(true=[delta_true for t in types if t != 'rr'])) model.delta['mu_pred'] = [pl.exp(model.vars[t]['eta'].trace()).mean() for t in types if t != 'rr'] model.delta['sigma_pred'] = [pl.exp(model.vars[t]['eta'].trace()).std() for t in types if t != 'rr'] data_simulation.add_quality_metrics(model.delta) model.alpha = pandas.DataFrame() model.sigma = pandas.DataFrame() for t in types: alpha_t = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)]) alpha_t['true'] = pandas.Series(dict(alpha[t])) alpha_t['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars[t]['alpha']], index=model.vars[t]['U'].columns) alpha_t['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars[t]['alpha']], index=model.vars[t]['U'].columns) alpha_t['type'] = t model.alpha = model.alpha.append(alpha_t.dropna(), ignore_index=True) sigma_t = pandas.DataFrame(dict(true=sigma_true)) sigma_t['mu_pred'] = [n.stats()['mean'] for n in model.vars[t]['sigma_alpha']] sigma_t['sigma_pred'] = [n.stats()['standard deviation'] for n in model.vars[t]['sigma_alpha']] model.sigma = model.sigma.append(sigma_t.dropna(), ignore_index=True) data_simulation.add_quality_metrics(model.alpha) data_simulation.add_quality_metrics(model.sigma) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) model.mu = pandas.DataFrame() for t in types: model.mu = model.mu.append(pandas.DataFrame(dict(true=sim[t]['mu_age'].value, mu_pred=model.vars[t]['mu_age'].stats()['mean'], sigma_pred=model.vars[t]['mu_age'].stats()['standard deviation'])), ignore_index=True) data_simulation.add_quality_metrics(model.mu) print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.mu['abs_err'].mean(), pl.median(pl.absolute(model.mu['rel_err'].dropna())), model.mu['covered?'].mean()) print data_simulation.initialize_results(model) data_simulation.add_to_results(model, 'delta') data_simulation.add_to_results(model, 'mu') data_simulation.add_to_results(model, 'input_data') data_simulation.add_to_results(model, 'alpha') data_simulation.add_to_results(model, 'sigma') data_simulation.finalize_results(model) print model.results return model
def validate_consistent_model_sim(N=500, delta_true=.5, true=dict(i=quadratic, f=constant, r=constant)): types = pl.array(['i', 'r', 'f', 'p']) ## generate simulated data model = data_simulation.simple_model(N) model.input_data['effective_sample_size'] = 1. model.input_data['value'] = 0. for t in types: model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20) sim = consistent_model.consistent_model(model, 'all', 'total', 'all', {}) for t in 'irf': for i, k_i in enumerate(sim[t]['knots']): sim[t]['gamma'][i].value = pl.log(true[t](k_i)) age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int) age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int) data_type = types[mc.rcategorical(pl.ones(len(types), dtype=float) / float(len(types)), size=N)] a = pl.arange(101) age_weights = pl.ones_like(a) sum_wt = pl.cumsum(age_weights) p = pl.zeros(N) for t in types: mu_t = sim[t]['mu_age'].value sum_mu_wt = pl.cumsum(mu_t * age_weights) p_t = (sum_mu_wt[age_end] - sum_mu_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start]) # correct cases where age_start == age_end i = age_start == age_end if pl.any(i): p_t[i] = mu_t[age_start[i]] # copy part into p p[data_type == t] = p_t[data_type == t] n = mc.runiform(100, 10000, size=N) model.input_data['data_type'] = data_type model.input_data['age_start'] = age_start model.input_data['age_end'] = age_end model.input_data['effective_sample_size'] = n model.input_data['true'] = p model.input_data['value'] = mc.rnegative_binomial(n * p, delta_true * n * p) / n # coarse knot spacing for fast testing for t in types: model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20) ## Then fit the model and compare the estimates to the truth model.vars = {} model.vars = consistent_model.consistent_model(model, 'all', 'total', 'all', {}) model.map, model.mcmc = fit_model.fit_consistent_model(model.vars, iter=10000, burn=5000, thin=25, tune_interval=100) graphics.plot_convergence_diag(model.vars) graphics.plot_fit(model, model.vars, {}, {}) for i, t in enumerate('i r f p rr pf'.split()): pl.subplot(2, 3, i + 1) pl.plot(a, sim[t]['mu_age'].value, 'w-', label='Truth', linewidth=2) pl.plot(a, sim[t]['mu_age'].value, 'r-', label='Truth', linewidth=1) #graphics.plot_one_type(model, model.vars['p'], {}, 'p') #pl.legend(fancybox=True, shadow=True, loc='upper left') pl.show() model.input_data['mu_pred'] = 0. model.input_data['sigma_pred'] = 0. for t in types: model.input_data['mu_pred'][ data_type == t] = model.vars[t]['p_pred'].stats()['mean'] model.input_data['sigma_pred'][data_type == t] = model.vars['p'][ 'p_pred'].stats()['standard deviation'] data_simulation.add_quality_metrics(model.input_data) model.delta = pandas.DataFrame( dict(true=[delta_true for t in types if t != 'rr'])) model.delta['mu_pred'] = [ pl.exp(model.vars[t]['eta'].trace()).mean() for t in types if t != 'rr' ] model.delta['sigma_pred'] = [ pl.exp(model.vars[t]['eta'].trace()).std() for t in types if t != 'rr' ] data_simulation.add_quality_metrics(model.delta) print 'delta' print model.delta print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( model.input_data['abs_err'].mean(), pl.median(pl.absolute(model.input_data['rel_err'].dropna())), model.input_data['covered?'].mean()) model.mu = pandas.DataFrame() for t in types: model.mu = model.mu.append(pandas.DataFrame( dict(true=sim[t]['mu_age'].value, mu_pred=model.vars[t]['mu_age'].stats()['mean'], sigma_pred=model.vars[t]['mu_age'].stats() ['standard deviation'])), ignore_index=True) data_simulation.add_quality_metrics(model.mu) print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % ( model.mu['abs_err'].mean(), pl.median(pl.absolute( model.mu['rel_err'].dropna())), model.mu['covered?'].mean()) print data_simulation.initialize_results(model) data_simulation.add_to_results(model, 'delta') data_simulation.add_to_results(model, 'mu') data_simulation.add_to_results(model, 'input_data') data_simulation.finalize_results(model) print model.results return model