def setup(dm, key, data_list, rate_stoch): """ Generate the PyMC variables for a log-normal model of a function of age Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str the name of the key for everything about this model (priors, initial values, estimations) data_list : list of data dicts the observed data to use in the beta-binomial liklihood function rate_stoch : pymc.Stochastic a PyMC stochastic (or deterministic) object, with len(rate_stoch.value) == len(dm.get_estimation_age_mesh()). Results ------- vars : dict Return a dictionary of all the relevant PyMC objects for the log-normal model. vars['rate_stoch'] is of particular relevance, for details see the beta_binomial_model """ vars = {} est_mesh = dm.get_estimate_age_mesh() vars['rate_stoch'] = rate_stoch # set up priors and observed data prior_str = dm.get_priors(key) generate_prior_potentials(vars, prior_str, est_mesh) vars['observed_rates'] = [] for d in data_list: age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end']) age_weights = d.get('age_weights', np.ones(len(age_indices)) / len(age_indices)) lb, ub = dm.bounds_per_1(d) se = (np.log(ub) - np.log(lb)) / (2. * 1.96) if np.isnan(se) or se <= 0.: se = 1. print 'data %d: log(value) = %f, se = %f' % (d['id'], np.log(dm.value_per_1(d)), se) @mc.observed @mc.stochastic(name='obs_%d' % d['id']) def obs(f=vars['rate_stoch'], age_indices=age_indices, age_weights=age_weights, value=np.log(dm.value_per_1(d)), tau=se**-2, data=d): f_i = rate_for_range(f, age_indices, age_weights) return mc.normal_like(value, np.log(f_i), tau) vars['observed_rates'].append(obs) return vars
def values_from(dm, d, min_val=1.e-5, max_se=.1): """ Extract the normalized values from a piece of data Parameters ---------- dm : disease model d : data dict min_val : float, optional the value to use instead of zero, since logit cannot model true zero max_se : float, optional the standard error to use for data with missing or zero standard error """ est_mesh = dm.get_estimate_age_mesh() # get the index vector and weight vector for the age range age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end']) age_weights = d.get('age_weights', np.ones(len(age_indices))) # ensure all rate data is valid d_val = dm.value_per_1(d) if d_val < 0 or d_val > 1: debug('WARNING: data %d not in range (0,1)' % d['id']) raise ValueError elif d_val == 0.: d_val = min_val / 10. # TODO: determine if this is an acceptible way to deal with zero elif d_val == 1.: d_val = 1. - min_val / 10. logit_val = mc.logit(d_val) d_se = dm.se_per_1(d) if d_se == MISSING: d_se = max_se #TODO: determine if this is an acceptible way to deal with missing elif d_se == 0.: d_se = max_se logit_se = (1/d_val + 1/(1-d_val)) * d_se return age_indices, age_weights, logit_val, logit_se
def values_from(dm, d): """ Extract the normalized values from a piece of data Parameters ---------- dm : disease model d : data dict """ est_mesh = dm.get_estimate_age_mesh() # get the index vector and weight vector for the age range age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end']) age_weights = d.get('age_weights', np.ones(len(age_indices))/len(age_indices)) # ensure all rate data is valid Y_i = dm.value_per_1(d) # TODO: allow Y_i > 1, extract effective sample size appropriately in this case if Y_i < 0: debug('WARNING: data %d < 0' % d['id']) raise ValueError N_i = max(1., d['effective_sample_size']) return age_indices, age_weights, Y_i, N_i
def setup(dm, key="%s", data_list=None): """ Generate the PyMC variables for a generic disease model Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str, optional a string for modifying the names of the stochs in this model, must contain a single %s that will be substituted data_list : list of data dicts the observed data to use in the rate stoch likelihood functions Results ------- vars : dict of PyMC stochs returns a dictionary of all the relevant PyMC objects for the generic disease model. """ vars = {} # setup all-cause mortality param_type = "all-cause_mortality" data = [d for d in data_list if d["data_type"] == "all-cause mortality data"] m_all_cause = dm.mortality(key % param_type, data) # make covariate vectors and estimation vectors to know dimensions of these objects covariate_dict = dm.get_covariates() derived_covariate = dm.get_derived_covariate_values() X_region, X_study = rate_model.regional_covariates(key, covariate_dict, derived_covariate) est_mesh = dm.get_estimate_age_mesh() # update age_weights on non-incidence/prevalence data to reflect # prior prevalence distribution, if available prior_prev = dm.get_mcmc("emp_prior_mean", key % "prevalence") if len(prior_prev) > 0: for d in data: if d["data_type"].startswith("incidence") or d["data_type"].startswith("prevalence"): continue age_indices = indices_for_range(est_mesh, d["age_start"], d["age_end"]) d["age_weights"] = prior_prev[age_indices] d["age_weights"] /= sum( d["age_weights"] ) # age weights must sum to 1 (optimization of inner loop removed check on this) # create negative binomial models for incidence, remission, and # excess-mortality (which are all treated as "free" parameters) for param_type in ["incidence", "remission", "excess-mortality"]: data = [d for d in data_list if d["data_type"] == "%s data" % param_type] lower_bound_data = [] # TODO: include lower bound data when appropriate (this has not come up yet) prior_dict = dm.get_empirical_prior( param_type ) # use empirical priors for the type/region/year/sex if available if prior_dict == {}: # otherwise use weakly informative priors prior_dict.update( alpha=np.zeros(len(X_region)), beta=np.zeros(len(X_study)), gamma=-5 * np.ones(len(est_mesh)), sigma_alpha=[1.0], sigma_beta=[1.0], sigma_gamma=[10.0], # delta is filled in from the global prior dict in neg_binom setup ) vars[key % param_type] = rate_model.setup( dm, key % param_type, data, emp_prior=prior_dict, lower_bound_data=lower_bound_data ) # create nicer names for the rate stochastic from each neg-binom rate model i = vars[key % "incidence"]["rate_stoch"] r = vars[key % "remission"]["rate_stoch"] f = vars[key % "excess-mortality"]["rate_stoch"] # initial fraction of population with the condition logit_C_0 = mc.Normal( "logit_%s" % (key % "C_0"), -5.0, 10.0 ** -2, value=-5.0 ) # represet C_0 in logit space to allow unconstrained posterior maximization @mc.deterministic(name=key % "C_0") def C_0(logit_C_0=logit_C_0): return mc.invlogit(logit_C_0) # initial fraction population with and without condition @mc.deterministic(name=key % "S_0") def SC_0(C_0=C_0): return np.array([1.0 - C_0, C_0]).ravel() vars[key % "bins"] = {"initial": [SC_0, C_0, logit_C_0]} # iterative solution to difference equations to obtain bin sizes for all ages import scipy.linalg @mc.deterministic(name=key % "bins") def SCpm(SC_0=SC_0, i=i, r=r, f=f, m_all_cause=m_all_cause, age_mesh=dm.get_param_age_mesh()): SC = np.zeros([2, len(age_mesh)]) p = np.zeros(len(age_mesh)) m = np.zeros(len(age_mesh)) SC[:, 0] = SC_0 p[0] = SC_0[1] / (SC_0[0] + SC_0[1]) m[0] = trim( m_all_cause[age_mesh[0]] - f[age_mesh[0]] * p[0], 0.1 * m_all_cause[age_mesh[0]], 1 - NEARLY_ZERO ) # trim m[0] to avoid numerical instability for ii, a in enumerate(age_mesh[:-1]): A = np.array([[-i[a] - m[ii], r[a]], [i[a], -r[a] - m[ii] - f[a]]]) * (age_mesh[ii + 1] - age_mesh[ii]) SC[:, ii + 1] = np.dot(scipy.linalg.expm(A), SC[:, ii]) p[ii + 1] = trim(SC[1, ii + 1] / (SC[0, ii + 1] + SC[1, ii + 1]), NEARLY_ZERO, 1 - NEARLY_ZERO) m[ii + 1] = trim( m_all_cause[age_mesh[ii + 1]] - f[age_mesh[ii + 1]] * p[ii + 1], 0.1 * m_all_cause[age_mesh[ii + 1]], 1 - NEARLY_ZERO, ) SCpm = np.zeros([4, len(age_mesh)]) SCpm[0:2, :] = SC SCpm[2, :] = p SCpm[3, :] = m return SCpm vars[key % "bins"]["age > 0"] = [SCpm] # prevalence = # with condition / (# with condition + # without) @mc.deterministic(name=key % "p") def p(SCpm=SCpm, param_mesh=dm.get_param_age_mesh(), est_mesh=dm.get_estimate_age_mesh()): return dismod3.utils.interpolate(param_mesh, SCpm[2, :], est_mesh) data = [d for d in data_list if d["data_type"] == "prevalence data"] prior_dict = dm.get_empirical_prior("prevalence") if prior_dict == {}: prior_dict.update( alpha=np.zeros(len(X_region)), beta=np.zeros(len(X_study)), gamma=-5 * np.ones(len(est_mesh)), sigma_alpha=[1.0], sigma_beta=[1.0], sigma_gamma=[10.0], # delta is filled in from the global prior dict in neg_binom setup ) vars[key % "prevalence"] = rate_model.setup(dm, key % "prevalence", data, p, emp_prior=prior_dict) p = vars[key % "prevalence"][ "rate_stoch" ] # replace perfectly consistent p with version including level-bound priors # make a blank prior dict, to avoid weirdness blank_prior_dict = dict( alpha=np.zeros(len(X_region)), beta=np.zeros(len(X_study)), gamma=-5 * np.ones(len(est_mesh)), sigma_alpha=[1.0], sigma_beta=[1.0], sigma_gamma=[10.0], delta=100.0, sigma_delta=1.0, ) # cause-specific-mortality is a lower bound on p*f @mc.deterministic(name=key % "pf") def pf(p=p, f=f): return (p + NEARLY_ZERO) * f data = [d for d in data_list if d["data_type"] == "prevalence x excess-mortality data"] lower_bound_data = [d for d in data_list if d["data_type"] == "cause-specific mortality data"] vars[key % "prevalence_x_excess-mortality"] = rate_model.setup( dm, key % "pf", rate_stoch=pf, data_list=data, lower_bound_data=lower_bound_data, emp_prior=blank_prior_dict ) # m = m_all_cause - f * p @mc.deterministic(name=key % "m") def m(SCpm=SCpm, param_mesh=dm.get_param_age_mesh(), est_mesh=dm.get_estimate_age_mesh()): return dismod3.utils.interpolate(param_mesh, SCpm[3, :], est_mesh) vars[key % "m"] = m # m_with = m + f @mc.deterministic(name=key % "m_with") def m_with(m=m, f=f): return m + f data = [d for d in data_list if d["data_type"] == "mortality data"] # TODO: test this # prior_dict = dm.get_empirical_prior('excess-mortality') # TODO: make separate prior for with-condition mortality vars[key % "mortality"] = rate_model.setup(dm, key % "m_with", data, m_with, emp_prior=blank_prior_dict) # mortality rate ratio = mortality with condition / mortality without @mc.deterministic(name=key % "RR") def RR(m=m, m_with=m_with): return m_with / (m + 0.0001) data = [d for d in data_list if d["data_type"] == "relative-risk data"] vars[key % "relative-risk"] = log_normal_model.setup(dm, key % "relative-risk", data, RR) # standardized mortality rate ratio = mortality with condition / all-cause mortality @mc.deterministic(name=key % "SMR") def SMR(m_with=m_with, m_all_cause=m_all_cause): return m_with / (m_all_cause + 0.0001) data = [d for d in data_list if d["data_type"] == "smr data"] vars[key % "smr"] = log_normal_model.setup(dm, key % "smr", data, SMR) # duration = E[time in bin C] @mc.deterministic(name=key % "X") def X(r=r, m=m, f=f): hazard = r + m + f pr_not_exit = np.exp(-hazard) X = np.empty(len(hazard)) X[-1] = 1 / hazard[-1] for i in reversed(range(len(X) - 1)): X[i] = pr_not_exit[i] * (X[i + 1] + 1) + 1 / hazard[i] * (1 - pr_not_exit[i]) - pr_not_exit[i] return X data = [d for d in data_list if d["data_type"] == "duration data"] vars[key % "duration"] = normal_model.setup(dm, key % "duration", data, X) # YLD[a] = disability weight * i[a] * X[a] * regional_population[a] @mc.deterministic(name=key % "i*X") def iX(i=i, X=X, p=p, pop=rate_model.regional_population(key)): birth_yld = np.zeros_like(p) birth_yld[0] = p[0] * pop[0] return i * X * (1 - p) * pop + birth_yld vars[key % "incidence_x_duration"] = {"rate_stoch": iX} return vars
def setup(dm, key, data_list, rate_stoch=None, emp_prior={}): """ Generate the PyMC variables for a beta binomial model of a single rate function Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str the name of the key for everything about this model (priors, initial values, estimations) data_list : list of data dicts the observed data to use in the beta-binomial liklihood function rate_stoch : pymc.Stochastic, optional a PyMC stochastic (or deterministic) object, with len(rate_stoch.value) == len(dm.get_estimation_age_mesh()). This is used to link beta-binomial stochs into a larger model, for example. emp_prior : dict, optional the empirical prior dictionary, retrieved from the disease model if appropriate by:: >>> t, r, y, s = type_region_year_sex_from_key(key) >>> emp_prior = dm.get_empirical_prior(t) Results ------- vars : dict Return a dictionary of all the relevant PyMC objects for the beta binomial model. vars['rate_stoch'] is of particular relevance; this is what is used to link the beta-binomial model into more complicated models, like the generic disease model. Details ------- The beta binomial model parameters are the following: * the mean age-specific rate function * dispersion of this mean * the p_i value for each data observation that has a standard error (data observations that do not have standard errors recorded are fit as observations of the beta r.v., while observations with standard errors recorded have a latent variable for the beta, and an observed binomial r.v.). """ vars = {} est_mesh = dm.get_estimate_age_mesh() if np.any(np.diff(est_mesh) != 1): raise ValueError, "ERROR: Gaps in estimation age mesh must all equal 1" # set up age-specific rate function, if it does not yet exist if not rate_stoch: param_mesh = dm.get_param_age_mesh() if emp_prior.has_key("mu"): initial_value = emp_prior["mu"] else: initial_value = dm.get_initial_value(key) # find the logit of the initial values, which is a little bit # of work because initial values are sampled from the est_mesh, # but the logit_initial_values are needed on the param_mesh logit_initial_value = mc.logit(interpolate(est_mesh, initial_value, param_mesh)) logit_rate = mc.Normal( "logit(%s)" % key, mu=-5.0 * np.ones(len(param_mesh)), tau=1.0e-2, value=logit_initial_value ) # logit_rate = [mc.Normal('logit(%s)_%d' % (key, a), mu=-5., tau=1.e-2) for a in param_mesh] vars["logit_rate"] = logit_rate @mc.deterministic(name=key) def rate_stoch(logit_rate=logit_rate): return interpolate(param_mesh, mc.invlogit(logit_rate), est_mesh) if emp_prior.has_key("mu"): @mc.potential(name="empirical_prior_%s" % key) def emp_prior_potential(f=rate_stoch, mu=emp_prior["mu"], tau=1.0 / np.array(emp_prior["se"]) ** 2): return mc.normal_like(f, mu, tau) vars["empirical_prior"] = emp_prior_potential vars["rate_stoch"] = rate_stoch # create stochastic variable for over-dispersion "random effect" mu_od = emp_prior.get("dispersion", 0.001) dispersion = mc.Gamma("dispersion_%s" % key, alpha=10.0, beta=10.0 / mu_od) vars["dispersion"] = dispersion @mc.deterministic(name="alpha_%s" % key) def alpha(rate=rate_stoch, dispersion=dispersion): return rate / dispersion ** 2 @mc.deterministic(name="beta_%s" % key) def beta(rate=rate_stoch, dispersion=dispersion): return (1.0 - rate) / dispersion ** 2 vars["alpha"] = alpha vars["beta"] = beta # create potentials for priors vars["priors"] = generate_prior_potentials(dm.get_priors(key), est_mesh, rate_stoch, dispersion) # create latent and observed stochastics for data vars["data"] = data_list vars["ab"] = [] vars["latent_p"] = [] vars["observations"] = [] for d in data_list: # set up observed stochs for all relevant data id = d["id"] if d["value"] == MISSING: print "WARNING: data %d missing value" % id continue # ensure all rate data is valid d_val = dm.value_per_1(d) d_se = dm.se_per_1(d) if d_val < 0 or d_val > 1: print "WARNING: data %d not in range [0,1]" % id continue if d["age_start"] < est_mesh[0] or d["age_end"] > est_mesh[-1]: raise ValueError, "Data %d is outside of estimation range---([%d, %d] is not inside [%d, %d])" % ( d["id"], d["age_start"], d["age_end"], est_mesh[0], est_mesh[-1], ) age_indices = indices_for_range(est_mesh, d["age_start"], d["age_end"]) age_weights = d["age_weights"] @mc.deterministic(name="a_%d^%s" % (id, key)) def a_i(alpha=alpha, age_indices=age_indices, age_weights=age_weights): return rate_for_range(alpha, age_indices, age_weights) @mc.deterministic(name="b_%d^%s" % (id, key)) def b_i(beta=beta, age_indices=age_indices, age_weights=age_weights): return rate_for_range(beta, age_indices, age_weights) vars["ab"] += [a_i, b_i] if d_se > 0: # if the data has a standard error, model it as a realization # of a beta binomial r.v. latent_p_i = mc.Beta( "latent_p_%d^%s" % (id, key), alpha=a_i, beta=b_i, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO) ) vars["latent_p"].append(latent_p_i) denominator = d_val * (1 - d_val) / d_se ** 2.0 numerator = d_val * denominator obs_binomial = mc.Binomial( "data_%d^%s" % (id, key), value=numerator, n=denominator, p=latent_p_i, observed=True ) vars["observations"].append(obs_binomial) else: # if the data is a point estimate with no uncertainty # recorded, model it as a realization of a beta r.v. obs_p_i = mc.Beta( "latent_p_%d" % id, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO), alpha=a_i, beta=b_i, observed=True ) vars["observations"].append(obs_p_i) return vars
def setup(dm, key='%s', data_list=None): """ Generate the PyMC variables for a generic disease model Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str, optional a string for modifying the names of the stochs in this model, must contain a single %s that will be substituted data_list : list of data dicts the observed data to use in the rate stoch likelihood functions Results ------- vars : dict of PyMC stochs returns a dictionary of all the relevant PyMC objects for the generic disease model. """ vars = {} param_type = 'all-cause_mortality' data = [d for d in data_list if d['data_type'] == 'all-cause mortality data'] m_all_cause = dm.mortality(key % param_type, data) covariate_dict = dm.get_covariates() X_region, X_study = rate_model.regional_covariates(key, covariate_dict) est_mesh = dm.get_estimate_age_mesh() # update age_weights on non-incidence/prevalence data to reflect # prior prevalence distribution, if available prior_prev = dm.get_mcmc('emp_prior_mean', key % 'prevalence') if len(prior_prev) > 0: for d in data: if d['data_type'].startswith('incidence') or d['data_type'].startswith('prevalence'): continue age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end']) d['age_weights'] = prior_prev[age_indices] d['age_weights'] /= sum(d['age_weights']) # age weights must sum to 1 (optimization of inner loop removed check on this) for param_type in ['incidence', 'remission', 'excess-mortality']: data = [d for d in data_list if d['data_type'] == '%s data' % param_type] lower_bound_data = [] # TODO: include lower bound data when appropriate prior_dict = dm.get_empirical_prior(param_type) if prior_dict == {}: prior_dict.update(alpha=np.zeros(len(X_region)), beta=np.zeros(len(X_study)), gamma=-5*np.ones(len(est_mesh)), sigma_alpha=[1.], sigma_beta=[1.], sigma_gamma=[10.], # delta is filled in from the global prior dict in neg_binom setup ) vars[key % param_type] = rate_model.setup(dm, key % param_type, data, emp_prior=prior_dict, lower_bound_data=lower_bound_data) i = vars[key % 'incidence']['rate_stoch'] r = vars[key % 'remission']['rate_stoch'] f = vars[key % 'excess-mortality']['rate_stoch'] # Initial population with condition logit_C_0 = mc.Normal('logit_%s' % (key % 'C_0'), -5., 10.**-2, value=-5.) @mc.deterministic(name=key % 'C_0') def C_0(logit_C_0=logit_C_0): return mc.invlogit(logit_C_0) # Initial population without condition @mc.deterministic(name=key % 'S_0') def SC_0(C_0=C_0): return np.array([1. - C_0, C_0]).ravel() vars[key % 'bins'] = {'initial': [SC_0, C_0, logit_C_0]} # iterative solution to difference equations to obtain bin sizes for all ages import scipy.linalg @mc.deterministic(name=key % 'bins') def SCpm(SC_0=SC_0, i=i, r=r, f=f, m_all_cause=m_all_cause, age_mesh=dm.get_param_age_mesh()): SC = np.zeros([2, len(age_mesh)]) p = np.zeros(len(age_mesh)) m = np.zeros(len(age_mesh)) SC[:,0] = SC_0 p[0] = SC_0[1] / (SC_0[0] + SC_0[1]) m[0] = trim(m_all_cause[age_mesh[0]] - f[age_mesh[0]] * p[0], .1*m_all_cause[age_mesh[0]], 1-NEARLY_ZERO) for ii, a in enumerate(age_mesh[:-1]): A = np.array([[-i[a]-m[ii], r[a] ], [ i[a] , -r[a]-m[ii]-f[a]]]) * (age_mesh[ii+1] - age_mesh[ii]) SC[:,ii+1] = np.dot(scipy.linalg.expm(A), SC[:,ii]) p[ii+1] = trim(SC[1,ii+1] / (SC[0,ii+1] + SC[1,ii+1]), NEARLY_ZERO, 1-NEARLY_ZERO) m[ii+1] = trim(m_all_cause[age_mesh[ii+1]] - f[age_mesh[ii+1]] * p[ii+1], .1*m_all_cause[age_mesh[ii+1]], 1-NEARLY_ZERO) SCpm = np.zeros([4, len(age_mesh)]) SCpm[0:2,:] = SC SCpm[2,:] = p SCpm[3,:] = m return SCpm vars[key % 'bins']['age > 0'] = [SCpm] # prevalence = # with condition / (# with condition + # without) @mc.deterministic(name=key % 'p') def p(SCpm=SCpm, param_mesh=dm.get_param_age_mesh(), est_mesh=dm.get_estimate_age_mesh()): return dismod3.utils.interpolate(param_mesh, SCpm[2,:], est_mesh) data = [d for d in data_list if d['data_type'] == 'prevalence data'] prior_dict = dm.get_empirical_prior('prevalence') if prior_dict == {}: prior_dict.update(alpha=np.zeros(len(X_region)), beta=np.zeros(len(X_study)), gamma=-5*np.ones(len(est_mesh)), sigma_alpha=[1.], sigma_beta=[1.], sigma_gamma=[10.], # delta is filled in from the global prior dict in neg_binom setup ) vars[key % 'prevalence'] = rate_model.setup(dm, key % 'prevalence', data, p, emp_prior=prior_dict) p = vars[key % 'prevalence']['rate_stoch'] # replace perfectly consistent p with version including level-bound priors # make a blank prior dict, to avoid weirdness blank_prior_dict = dict(alpha=np.zeros(len(X_region)), beta=np.zeros(len(X_study)), gamma=-5*np.ones(len(est_mesh)), sigma_alpha=[1.], sigma_beta=[1.], sigma_gamma=[10.], delta=100., sigma_delta=1. ) # cause-specific-mortality is a lower bound on p*f @mc.deterministic(name=key % 'pf') def pf(p=p, f=f): return (p+NEARLY_ZERO)*f # TODO: add a 'with-condition population mortality rate date' type # data = [d for d in data_list if d['data_type'] == 'with-condition population mortality rate data'] data = [] lower_bound_data = [d for d in data_list if d['data_type'] == 'cause-specific mortality data'] vars[key % 'prevalence_x_excess-mortality'] = rate_model.setup(dm, key % 'pf', rate_stoch=pf, data_list=data, lower_bound_data=lower_bound_data, emp_prior=blank_prior_dict) # m = m_all_cause - f * p @mc.deterministic(name=key % 'm') def m(SCpm=SCpm, param_mesh=dm.get_param_age_mesh(), est_mesh=dm.get_estimate_age_mesh()): return dismod3.utils.interpolate(param_mesh, SCpm[3,:], est_mesh) vars[key % 'm'] = m # m_with = m + f @mc.deterministic(name=key % 'm_with') def m_with(m=m, f=f): return m + f data = [d for d in data_list if d['data_type'] == 'mortality data'] # TODO: test this #prior_dict = dm.get_empirical_prior('excess-mortality') # TODO: make separate prior for with-condition mortality vars[key % 'mortality'] = rate_model.setup(dm, key % 'm_with', data, m_with, emp_prior=blank_prior_dict) # mortality rate ratio = mortality with condition / mortality without @mc.deterministic(name=key % 'RR') def RR(m=m, m_with=m_with): return m_with / (m + .0001) data = [d for d in data_list if d['data_type'] == 'relative-risk data'] vars[key % 'relative-risk'] = log_normal_model.setup(dm, key % 'relative-risk', data, RR) # standardized mortality rate ratio = mortality with condition / all-cause mortality @mc.deterministic(name=key % 'SMR') def SMR(m_with=m_with, m_all_cause=m_all_cause): return m_with / (m_all_cause + .0001) data = [d for d in data_list if d['data_type'] == 'smr data'] vars[key % 'smr'] = log_normal_model.setup(dm, key % 'smr', data, SMR) # duration = E[time in bin C] @mc.deterministic(name=key % 'X') def X(r=r, m=m, f=f): hazard = r + m + f pr_not_exit = np.exp(-hazard) X = np.empty(len(hazard)) X[-1] = 1 / hazard[-1] for i in reversed(range(len(X)-1)): X[i] = pr_not_exit[i] * (X[i+1] + 1) + 1 / hazard[i] * (1 - pr_not_exit[i]) - pr_not_exit[i] return X data = [d for d in data_list if d['data_type'] == 'duration data'] vars[key % 'duration'] = normal_model.setup(dm, key % 'duration', data, X) # YLD[a] = disability weight * i[a] * X[a] * regional_population[a] @mc.deterministic(name=key % 'i*X') def iX(i=i, X=X, p=p, pop=rate_model.regional_population(key)): return i * X * (1-p) * pop vars[key % 'incidence_x_duration'] = {'rate_stoch': iX} return vars
def setup(dm, key, data_list, rate_stoch): """ Generate the PyMC variables for a normal model of a function of age Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str the name of the key for everything about this model (priors, initial values, estimations) data_list : list of data dicts the observed data to use in the beta-binomial liklihood function rate_stoch : pymc.Stochastic a PyMC stochastic (or deterministic) object, with len(rate_stoch.value) == len(dm.get_estimation_age_mesh()). Results ------- vars : dict Return a dictionary of all the relevant PyMC objects for the normal model. vars['rate_stoch'] is of particular relevance, for details see the beta_binomial_model """ vars = {} est_mesh = dm.get_estimate_age_mesh() if np.any(np.diff(est_mesh) != 1): raise ValueError, 'ERROR: Gaps in estimation age mesh must all equal 1' vars['rate_stoch'] = rate_stoch # set up priors and observed data prior_str = dm.get_priors(key) generate_prior_potentials(vars, prior_str, est_mesh) vars['observed_rates'] = [] for d in data_list: # set up observed stochs for all relevant data id = d['id'] if d['value'] == MISSING: print 'WARNING: data %d missing value' % id continue # ensure all rate data is valid d_val = dm.value_per_1(d) d_se = dm.se_per_1(d) if d['age_start'] < est_mesh[0] or d['age_end'] > est_mesh[-1]: raise ValueError, 'Data %d is outside of estimation range---([%d, %d] is not inside [%d, %d])' \ % (d['id'], d['age_start'], d['age_end'], est_mesh[0], est_mesh[-1]) age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end']) age_weights = d.get('age_weights', np.ones(len(age_indices)) / len(age_indices)) # data must have standard error to use normal model if d_se == 0: raise ValueError, 'Data %d has invalid standard error' % d['id'] @mc.observed @mc.stochastic(name='obs_%d' % id) def obs(f=rate_stoch, age_indices=age_indices, age_weights=age_weights, value=d_val, tau=1./(d_se)**2): f_i = rate_for_range(f, age_indices, age_weights) return mc.normal_like(value, f_i, tau) vars['observed_rates'].append(obs) return vars