def setup(dm, key, data_list, rate_stoch): """ Generate the PyMC variables for a log-normal model of a function of age Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str the name of the key for everything about this model (priors, initial values, estimations) data_list : list of data dicts the observed data to use in the beta-binomial liklihood function rate_stoch : pymc.Stochastic a PyMC stochastic (or deterministic) object, with len(rate_stoch.value) == len(dm.get_estimation_age_mesh()). Results ------- vars : dict Return a dictionary of all the relevant PyMC objects for the log-normal model. vars['rate_stoch'] is of particular relevance, for details see the beta_binomial_model """ vars = {} est_mesh = dm.get_estimate_age_mesh() vars['rate_stoch'] = rate_stoch # set up priors and observed data prior_str = dm.get_priors(key) generate_prior_potentials(vars, prior_str, est_mesh) vars['observed_rates'] = [] for d in data_list: age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end']) age_weights = d.get('age_weights', np.ones(len(age_indices)) / len(age_indices)) lb, ub = dm.bounds_per_1(d) se = (np.log(ub) - np.log(lb)) / (2. * 1.96) if np.isnan(se) or se <= 0.: se = 1. print 'data %d: log(value) = %f, se = %f' % (d['id'], np.log(dm.value_per_1(d)), se) @mc.observed @mc.stochastic(name='obs_%d' % d['id']) def obs(f=vars['rate_stoch'], age_indices=age_indices, age_weights=age_weights, value=np.log(dm.value_per_1(d)), tau=se**-2, data=d): f_i = rate_for_range(f, age_indices, age_weights) return mc.normal_like(value, np.log(f_i), tau) vars['observed_rates'].append(obs) return vars
def setup(dm, key, data_list=[], rate_stoch=None, emp_prior={}, lower_bound_data=[]): """ Generate the PyMC variables for a negative-binomial model of a single rate function Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str the name of the key for everything about this model (priors, initial values, estimations) data_list : list of data dicts the observed data to use in the negative binomial liklihood function rate_stoch : pymc.Stochastic, optional a PyMC stochastic (or deterministic) object, with len(rate_stoch.value) == len(dm.get_estimation_age_mesh()). This is used to link rate stochs into a larger model, for example. emp_prior : dict, optional the empirical prior dictionary, retrieved from the disease model if appropriate by:: >>> t, r, y, s = type_region_year_sex_from_key(key) >>> emp_prior = dm.get_empirical_prior(t) Results ------- vars : dict Return a dictionary of all the relevant PyMC objects for the rate model. vars['rate_stoch'] is of particular relevance; this is what is used to link the rate model into more complicated models, like the generic disease model. """ vars = {} est_mesh = dm.get_estimate_age_mesh() param_mesh = dm.get_param_age_mesh() if np.any(np.diff(est_mesh) != 1): raise ValueError, 'ERROR: Gaps in estimation age mesh must all equal 1' # calculate effective sample size for all data and lower bound data dm.calc_effective_sample_size(data_list) dm.calc_effective_sample_size(lower_bound_data) # generate regional covariates covariate_dict = dm.get_covariates() X_region, X_study = regional_covariates(key, covariate_dict) # use confidence prior from prior_str mu_delta = 100. sigma_delta = 1. from dismod3.settings import PRIOR_SEP_STR for line in dm.get_priors(key).split(PRIOR_SEP_STR): prior = line.strip().split() if len(prior) == 0: continue if prior[0] == 'heterogeneity': mu_delta = float(prior[1]) sigma_delta = float(prior[2]) # use the empirical prior mean if it is available if len(set(emp_prior.keys()) & set(['alpha', 'beta', 'gamma'])) == 3: mu_alpha = np.array(emp_prior['alpha']) sigma_alpha = np.maximum(.1, emp_prior['sigma_alpha']) alpha = np.array(emp_prior['alpha']) vars.update(region_coeffs=alpha) beta = np.array(emp_prior['beta']) sigma_beta = np.maximum(.1, emp_prior['sigma_beta']) vars.update(study_coeffs=beta) mu_gamma = np.array(emp_prior['gamma']) sigma_gamma = np.maximum(.1, emp_prior['sigma_gamma']) # leave mu_delta and sigma_delta as they were set in the expert prior else: import dismod3.regional_similarity_matrices as similarity_matrices n = len(X_region) mu_alpha = np.zeros(n) sigma_alpha = .01 C_alpha = similarity_matrices.regions_nested_in_superregions(n, sigma_alpha) #C_alpha = similarity_matrices.all_related_equally(n, sigma_alpha) alpha = mc.MvNormalCov('region_coeffs_%s' % key, mu=mu_alpha, C=C_alpha, value=mu_alpha) vars.update(region_coeffs=alpha) mu_beta = np.zeros(len(X_study)) sigma_beta = .1 beta = mc.Normal('study_coeffs_%s' % key, mu=mu_beta, tau=sigma_beta**-2., value=mu_beta) vars.update(study_coeffs=beta) mu_gamma = -5.*np.ones(len(est_mesh)) sigma_gamma = 10.*np.ones(len(est_mesh)) if mu_delta != 0.: log_delta = mc.Uninformative('log_dispersion_%s' % key, value=np.log(mu_delta-1)) delta = mc.Lambda('dispersion_%s' % key, lambda x=log_delta: 1. + np.exp(x)) @mc.potential(name='potential_dispersion_%s' % key) def delta_pot(delta=delta, mu=mu_delta, tau=sigma_delta**-2): return mc.normal_like(delta, mu, tau) vars.update(dispersion=delta, log_dispersion=log_delta, dispersion_potential=delta_pot, dispersion_step_sd=.1*sigma_delta) if len(sigma_gamma) == 1: sigma_gamma = sigma_gamma[0]*np.ones(len(est_mesh)) # create varible for interpolated rate; # also create variable for age-specific rate function, if it does not yet exist if rate_stoch: # if the rate_stoch already exists, for example prevalence in the generic model, # we use it to back-calculate mu and eventually gamma mu = rate_stoch @mc.deterministic(name='age_coeffs_%s' % key) def gamma(mu=mu, Xa=X_region, Xb=X_study, alpha=alpha, beta=beta): return np.log(1.e-8 + mu) - np.dot(alpha, Xa) - np.dot(beta, Xb) @mc.potential(name='age_coeffs_potential_%s' % key) def gamma_potential(gamma=gamma, mu_gamma=mu_gamma, tau_gamma=1./sigma_gamma[param_mesh]**2, param_mesh=param_mesh): return mc.normal_like(gamma[param_mesh], mu_gamma[param_mesh], tau_gamma) vars.update(rate_stoch=mu, age_coeffs=gamma, age_coeffs_potential=gamma_potential) else: # if the rate_stoch does not yet exists, we make gamma a stoch, and use it to calculate mu # for computational efficiency, gamma is a linearly interpolated version of gamma_mesh initial_gamma = mu_gamma # FOR TEST: use a linear age pattern for remission, since there is not sufficient data for more complicated fit #if key.find('remission') == 0: # param_mesh = [0., 100.] #param_mesh = est_mesh # try full mesh; how much does this slow things down, really? answer: a lot gamma_mesh = mc.Normal('age_coeffs_mesh_%s' % key, mu=mu_gamma[param_mesh], tau=sigma_gamma[param_mesh]**-2, value=initial_gamma[param_mesh]) @mc.deterministic(name='age_coeffs_%s' % key) def gamma(gamma_mesh=gamma_mesh, param_mesh=param_mesh, est_mesh=est_mesh): return interpolate(param_mesh, gamma_mesh, est_mesh) @mc.deterministic(name=key) def mu(Xa=X_region, Xb=X_study, alpha=alpha, beta=beta, gamma=gamma): return predict_rate([Xa, Xb], alpha, beta, gamma, lambda f, age: f, est_mesh) # Create a guess at the covariance matrix for MCMC proposals to update gamma_mesh from pymc.gp.cov_funs import matern a = np.atleast_2d(param_mesh).T C = matern.euclidean(a, a, diff_degree = 2, amp = 1.**2, scale = 10.) vars.update(age_coeffs_mesh=gamma_mesh, age_coeffs=gamma, rate_stoch=mu, age_coeffs_mesh_step_cov=.005*np.array(C)) # adjust value of gamma_mesh based on priors, if necessary # TODO: implement more adjustments, currently only adjusted based on at_least priors for line in dm.get_priors(key).split(PRIOR_SEP_STR): prior = line.strip().split() if len(prior) == 0: continue if prior[0] == 'at_least': delta_gamma = np.log(np.maximum(mu.value, float(prior[1]))) - np.log(mu.value) gamma_mesh.value = gamma_mesh.value + delta_gamma[param_mesh] # create potentials for priors generate_prior_potentials(vars, dm.get_priors(key), est_mesh) # create effect coefficients to explain overdispersion eta = mc.Laplace('eta_%s' % key, mu=0., tau=1., value=0.) vars['eta'] = eta # create observed stochastics for data vars['data'] = [] if mu_delta != 0.: value = [] N = [] Xa = [] Xb = [] ai = [] aw = [] # overdispersion-explaining covariates Z = [] for d in data_list: try: age_indices, age_weights, Y_i, N_i = values_from(dm, d) except ValueError: debug('WARNING: could not calculate likelihood for data %d' % d['id']) continue value.append(Y_i*N_i) N.append(N_i) Xa.append(covariates(d, covariate_dict)[0]) Xb.append(covariates(d, covariate_dict)[1]) ai.append(age_indices) aw.append(age_weights) Z.append(float(d.get('bias', 0.))) vars['data'].append(d) N = np.array(N) Z = np.array(Z) vars['effective_sample_size'] = list(N) if len(vars['data']) > 0: @mc.deterministic(name='rate_%s' % key) def rates(N=N, Xa=Xa, Xb=Xb, alpha=alpha, beta=beta, gamma=gamma, bounds_func=vars['bounds_func'], age_indices=ai, age_weights=aw): # calculate study-specific rate function shifts = np.exp(np.dot(Xa, alpha) + np.dot(Xb, np.atleast_1d(beta))) exp_gamma = np.exp(gamma) mu_i = [np.dot(weights, bounds_func(s_i * exp_gamma[ages], ages)) for s_i, ages, weights in zip(shifts, age_indices, age_weights)] # TODO: try vectorizing this loop to increase speed return mu_i vars['expected_rates'] = rates @mc.observed @mc.stochastic(name='data_%s' % key) def obs(value=value, N=N, mu_i=rates, delta=delta, Z=Z, eta=0.): logp = mc.negative_binomial_like(value, N*mu_i, delta + eta*Z) return logp vars['observed_counts'] = obs @mc.deterministic(name='predicted_data_%s' % key) def predictions(value=value, N=N, mu_i=rates, delta=delta, Z=Z, eta=0.): return mc.rnegative_binomial(N*mu_i, delta + eta*Z)/N vars['predicted_rates'] = predictions debug('likelihood of %s contains %d rates' % (key, len(vars['data']))) # now do the same thing for the lower bound data # TODO: refactor to remove duplicated code vars['lower_bound_data'] = [] value = [] N = [] Xa = [] Xb = [] ai = [] aw = [] for d in lower_bound_data: try: age_indices, age_weights, Y_i, N_i = values_from(dm, d) except ValueError: debug('WARNING: could not calculate likelihood for data %d' % d['id']) continue value.append(Y_i*N_i) N.append(N_i) Xa.append(covariates(d, covariate_dict)[0]) Xb.append(covariates(d, covariate_dict)[1]) ai.append(age_indices) aw.append(age_weights) vars['lower_bound_data'].append(d) N = np.array(N) value = np.array(value) if len(vars['lower_bound_data']) > 0: @mc.observed @mc.stochastic(name='lower_bound_data_%s' % key) def obs_lb(value=value, N=N, Xa=Xa, Xb=Xb, alpha=alpha, beta=beta, gamma=gamma, bounds_func=vars['bounds_func'], delta=delta, age_indices=ai, age_weights=aw): # calculate study-specific rate function shifts = np.exp(np.dot(Xa, alpha) + np.dot(Xb, np.atleast_1d(beta))) exp_gamma = np.exp(gamma) mu_i = [np.dot(weights, bounds_func(s_i * exp_gamma[ages], ages)) for s_i, ages, weights in zip(shifts, age_indices, age_weights)] # TODO: try vectorizing this loop to increase speed rate_param = mu_i*N violated_bounds = np.nonzero(rate_param < value) logp = mc.negative_binomial_like(value[violated_bounds], rate_param[violated_bounds], delta) return logp vars['observed_lower_bounds'] = obs_lb debug('likelihood of %s contains %d lowerbounds' % (key, len(vars['lower_bound_data']))) return vars
def setup(dm, key, data_list, rate_stoch=None, emp_prior={}): """ Generate the PyMC variables for a beta binomial model of a single rate function Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str the name of the key for everything about this model (priors, initial values, estimations) data_list : list of data dicts the observed data to use in the beta-binomial liklihood function rate_stoch : pymc.Stochastic, optional a PyMC stochastic (or deterministic) object, with len(rate_stoch.value) == len(dm.get_estimation_age_mesh()). This is used to link beta-binomial stochs into a larger model, for example. emp_prior : dict, optional the empirical prior dictionary, retrieved from the disease model if appropriate by:: >>> t, r, y, s = type_region_year_sex_from_key(key) >>> emp_prior = dm.get_empirical_prior(t) Results ------- vars : dict Return a dictionary of all the relevant PyMC objects for the beta binomial model. vars['rate_stoch'] is of particular relevance; this is what is used to link the beta-binomial model into more complicated models, like the generic disease model. Details ------- The beta binomial model parameters are the following: * the mean age-specific rate function * dispersion of this mean * the p_i value for each data observation that has a standard error (data observations that do not have standard errors recorded are fit as observations of the beta r.v., while observations with standard errors recorded have a latent variable for the beta, and an observed binomial r.v.). """ vars = {} est_mesh = dm.get_estimate_age_mesh() if np.any(np.diff(est_mesh) != 1): raise ValueError, "ERROR: Gaps in estimation age mesh must all equal 1" # set up age-specific rate function, if it does not yet exist if not rate_stoch: param_mesh = dm.get_param_age_mesh() if emp_prior.has_key("mu"): initial_value = emp_prior["mu"] else: initial_value = dm.get_initial_value(key) # find the logit of the initial values, which is a little bit # of work because initial values are sampled from the est_mesh, # but the logit_initial_values are needed on the param_mesh logit_initial_value = mc.logit(interpolate(est_mesh, initial_value, param_mesh)) logit_rate = mc.Normal( "logit(%s)" % key, mu=-5.0 * np.ones(len(param_mesh)), tau=1.0e-2, value=logit_initial_value ) # logit_rate = [mc.Normal('logit(%s)_%d' % (key, a), mu=-5., tau=1.e-2) for a in param_mesh] vars["logit_rate"] = logit_rate @mc.deterministic(name=key) def rate_stoch(logit_rate=logit_rate): return interpolate(param_mesh, mc.invlogit(logit_rate), est_mesh) if emp_prior.has_key("mu"): @mc.potential(name="empirical_prior_%s" % key) def emp_prior_potential(f=rate_stoch, mu=emp_prior["mu"], tau=1.0 / np.array(emp_prior["se"]) ** 2): return mc.normal_like(f, mu, tau) vars["empirical_prior"] = emp_prior_potential vars["rate_stoch"] = rate_stoch # create stochastic variable for over-dispersion "random effect" mu_od = emp_prior.get("dispersion", 0.001) dispersion = mc.Gamma("dispersion_%s" % key, alpha=10.0, beta=10.0 / mu_od) vars["dispersion"] = dispersion @mc.deterministic(name="alpha_%s" % key) def alpha(rate=rate_stoch, dispersion=dispersion): return rate / dispersion ** 2 @mc.deterministic(name="beta_%s" % key) def beta(rate=rate_stoch, dispersion=dispersion): return (1.0 - rate) / dispersion ** 2 vars["alpha"] = alpha vars["beta"] = beta # create potentials for priors vars["priors"] = generate_prior_potentials(dm.get_priors(key), est_mesh, rate_stoch, dispersion) # create latent and observed stochastics for data vars["data"] = data_list vars["ab"] = [] vars["latent_p"] = [] vars["observations"] = [] for d in data_list: # set up observed stochs for all relevant data id = d["id"] if d["value"] == MISSING: print "WARNING: data %d missing value" % id continue # ensure all rate data is valid d_val = dm.value_per_1(d) d_se = dm.se_per_1(d) if d_val < 0 or d_val > 1: print "WARNING: data %d not in range [0,1]" % id continue if d["age_start"] < est_mesh[0] or d["age_end"] > est_mesh[-1]: raise ValueError, "Data %d is outside of estimation range---([%d, %d] is not inside [%d, %d])" % ( d["id"], d["age_start"], d["age_end"], est_mesh[0], est_mesh[-1], ) age_indices = indices_for_range(est_mesh, d["age_start"], d["age_end"]) age_weights = d["age_weights"] @mc.deterministic(name="a_%d^%s" % (id, key)) def a_i(alpha=alpha, age_indices=age_indices, age_weights=age_weights): return rate_for_range(alpha, age_indices, age_weights) @mc.deterministic(name="b_%d^%s" % (id, key)) def b_i(beta=beta, age_indices=age_indices, age_weights=age_weights): return rate_for_range(beta, age_indices, age_weights) vars["ab"] += [a_i, b_i] if d_se > 0: # if the data has a standard error, model it as a realization # of a beta binomial r.v. latent_p_i = mc.Beta( "latent_p_%d^%s" % (id, key), alpha=a_i, beta=b_i, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO) ) vars["latent_p"].append(latent_p_i) denominator = d_val * (1 - d_val) / d_se ** 2.0 numerator = d_val * denominator obs_binomial = mc.Binomial( "data_%d^%s" % (id, key), value=numerator, n=denominator, p=latent_p_i, observed=True ) vars["observations"].append(obs_binomial) else: # if the data is a point estimate with no uncertainty # recorded, model it as a realization of a beta r.v. obs_p_i = mc.Beta( "latent_p_%d" % id, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO), alpha=a_i, beta=b_i, observed=True ) vars["observations"].append(obs_p_i) return vars
def setup(dm, key, data_list, rate_stoch=None, emp_prior={}): """ Generate the PyMC variables for a logit-normal model of a single rate function Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str the name of the key for everything about this model (priors, initial values, estimations) data_list : list of data dicts the observed data to use in the logit-normal liklihood function rate_stoch : pymc.Stochastic, optional a PyMC stochastic (or deterministic) object, with len(rate_stoch.value) == len(dm.get_estimation_age_mesh()). This is used to link rate stochs into a larger model, for example. emp_prior : dict, optional the empirical prior dictionary, retrieved from the disease model if appropriate by:: >>> t, r, y, s = type_region_year_sex_from_key(key) >>> emp_prior = dm.get_empirical_prior(t) Results ------- vars : dict Return a dictionary of all the relevant PyMC objects for the rate model. vars['rate_stoch'] is of particular relevance; this is what is used to link the rate model into more complicated models, like the generic disease model. """ vars = {} est_mesh = dm.get_estimate_age_mesh() param_mesh = dm.get_param_age_mesh() if np.any(np.diff(est_mesh) != 1): raise ValueError, 'ERROR: Gaps in estimation age mesh must all equal 1' # for debugging #if key == 'incidence+asia_southeast+1990+female': # import pdb; pdb.set_trace() # generate regional covariates X_region, X_study = regional_covariates(key) # use the empirical prior mean if it is available if set(emp_prior.keys()) == set(['alpha', 'beta', 'gamma', 'sigma']): mu_alpha = np.array(emp_prior['alpha']) sigma_alpha = .01 beta = np.array(emp_prior['beta']) mu_gamma = np.array(emp_prior['gamma']) sigma_gamma = emp_prior['sigma'] mu_sigma = .01 conf_sigma = 1000. else: mu_alpha = np.zeros(len(X_region)) sigma_alpha = 1. mu_beta = np.zeros(len(X_study)) sigma_beta = .01 beta = mc.Normal('study_coeffs_%s' % key, mu=mu_beta, tau=1/sigma_beta**2, value=mu_beta) vars.update(study_coeffs=beta) mu_gamma = -5.*np.ones(len(est_mesh)) sigma_gamma = 1. mu_sigma = .1 conf_sigma = 10. alpha = mc.Normal('region_coeffs_%s' % key, mu=mu_alpha, tau=1/sigma_alpha**2, value=mu_alpha) vars.update(region_coeffs=alpha) log_sigma = mc.Uninformative('log(dispersion_%s)' % key, value=np.log(mu_sigma)) @mc.deterministic(name='dispersion_%s' % key) def sigma(log_sigma=log_sigma): return np.exp(log_sigma) # TODO: replace this potential in the generate_prior_potentials function if confidence is set @mc.potential(name='dispersion_potential_%s' % key) def sigma_potential(sigma=sigma, alpha=conf_sigma, beta=conf_sigma/mu_sigma): return mc.gamma_like(sigma, alpha, beta) vars.update(log_dispersion=log_sigma, dispersion=sigma, dispersion_potential=sigma_potential) # create varible for interpolated logit rate; # also create variable for age-specific rate function, if it does not yet exist if rate_stoch: # if the rate_stoch already exists, for example prevalence in the generic model, # we use it to back-calculate mu and eventually gamma @mc.deterministic(name='logit_%s' % key) def mu(invlogit_mu=rate_stoch): return mc.logit(invlogit_mu) @mc.deterministic(name='age_coeffs_%s' % key) def gamma(mu=mu, Xa=X_region, Xb=X_study, alpha=alpha, beta=beta): return mu - np.dot(alpha, Xa) - np.dot(beta, Xb) @mc.potential(name='age_coeffs_potential_%s' % key) def gamma_potential(gamma=gamma, mu_gamma=mu_gamma, tau_gamma=1./sigma_gamma**2, param_mesh=param_mesh): return mc.normal_like(gamma[param_mesh], mu_gamma[param_mesh], tau_gamma) vars.update(rate_stoch=rate_stoch, logit_rate_stoch=mu, age_coeffs=gamma, age_coeffs_potential=gamma_potential) else: # if the rate_stoch does not yet exists, we make gamma a stoch, and use it to calculate mu # for computational efficiency, gamma is a linearly interpolated version of gamma_mesh initial_gamma = mu_gamma gamma_mesh = mc.Normal('age_coeffs_mesh_%s' % key, mu=mu_gamma[param_mesh], tau=1/sigma_gamma**2, value=initial_gamma[param_mesh]) @mc.deterministic(name='age_coeffs_%s' % key) def gamma(gamma_mesh=gamma_mesh, param_mesh=param_mesh, est_mesh=est_mesh): return interpolate(param_mesh, gamma_mesh, est_mesh) @mc.deterministic(name='logit_%s' % key) def mu(Xa=X_region, Xb=X_study, alpha=alpha, beta=beta, gamma=gamma): return np.dot(alpha, Xa) + np.dot(beta, Xb) + gamma @mc.deterministic(name=key) def rate_stoch(mu=mu): return mc.invlogit(mu) vars.update(age_coeffs_mesh=gamma_mesh, age_coeffs=gamma, logit_rate_stoch=mu, rate_stoch=rate_stoch) # create potentials for priors vars['priors'] = generate_prior_potentials(dm.get_priors(key), est_mesh, rate_stoch) # create observed stochastics for data vars['data'] = data_list vars['observed_rates'] = [] min_val = min([1.e-9] + [dm.value_per_1(d) for d in data_list if dm.value_per_1(d) > 0]) # TODO: assess validity of this minimum value max_se = max([.000001] + [dm.se_per_1(d) for d in data_list if dm.se_per_1(d) > 0]) # TODO: assess validity of this maximum std err #import pdb; pdb.set_trace() for d in data_list: try: age_indices, age_weights, logit_val, logit_se = values_from(dm, d, min_val, max_se) except ValueError: continue @mc.observed @mc.stochastic(name='data_%d' % d['id']) def obs(value=logit_val, logit_se=logit_se, X=covariates(d), alpha=alpha, beta=beta, gamma=gamma, sigma=sigma, age_indices=age_indices, age_weights=age_weights): # calculate study-specific rate function mu = predict_logit_rate(X, alpha, beta, gamma) mu_i = rate_for_range(mu, age_indices, age_weights) tau_i = 1. / (sigma**2 + logit_se**2) logp = mc.normal_like(x=value, mu=mu_i, tau=tau_i) return logp vars['observed_rates'].append(obs) return vars
def setup(dm, key, data_list, rate_stoch): """ Generate the PyMC variables for a normal model of a function of age Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str the name of the key for everything about this model (priors, initial values, estimations) data_list : list of data dicts the observed data to use in the beta-binomial liklihood function rate_stoch : pymc.Stochastic a PyMC stochastic (or deterministic) object, with len(rate_stoch.value) == len(dm.get_estimation_age_mesh()). Results ------- vars : dict Return a dictionary of all the relevant PyMC objects for the normal model. vars['rate_stoch'] is of particular relevance, for details see the beta_binomial_model """ vars = {} est_mesh = dm.get_estimate_age_mesh() if np.any(np.diff(est_mesh) != 1): raise ValueError, 'ERROR: Gaps in estimation age mesh must all equal 1' vars['rate_stoch'] = rate_stoch # set up priors and observed data prior_str = dm.get_priors(key) generate_prior_potentials(vars, prior_str, est_mesh) vars['observed_rates'] = [] for d in data_list: # set up observed stochs for all relevant data id = d['id'] if d['value'] == MISSING: print 'WARNING: data %d missing value' % id continue # ensure all rate data is valid d_val = dm.value_per_1(d) d_se = dm.se_per_1(d) if d['age_start'] < est_mesh[0] or d['age_end'] > est_mesh[-1]: raise ValueError, 'Data %d is outside of estimation range---([%d, %d] is not inside [%d, %d])' \ % (d['id'], d['age_start'], d['age_end'], est_mesh[0], est_mesh[-1]) age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end']) age_weights = d.get('age_weights', np.ones(len(age_indices)) / len(age_indices)) # data must have standard error to use normal model if d_se == 0: raise ValueError, 'Data %d has invalid standard error' % d['id'] @mc.observed @mc.stochastic(name='obs_%d' % id) def obs(f=rate_stoch, age_indices=age_indices, age_weights=age_weights, value=d_val, tau=1./(d_se)**2): f_i = rate_for_range(f, age_indices, age_weights) return mc.normal_like(value, f_i, tau) vars['observed_rates'].append(obs) return vars