def rate_stoch(logit_rate=logit_rate): return interpolate(param_mesh, mc.invlogit(logit_rate), est_mesh)
def gamma(gamma_mesh=gamma_mesh, param_mesh=param_mesh, est_mesh=est_mesh): return interpolate(param_mesh, gamma_mesh, est_mesh)
def setup(dm, key, data_list, rate_stoch=None, emp_prior={}): """ Generate the PyMC variables for a beta binomial model of a single rate function Parameters ---------- dm : dismod3.DiseaseModel the object containing all the data, priors, and additional information (like input and output age-mesh) key : str the name of the key for everything about this model (priors, initial values, estimations) data_list : list of data dicts the observed data to use in the beta-binomial liklihood function rate_stoch : pymc.Stochastic, optional a PyMC stochastic (or deterministic) object, with len(rate_stoch.value) == len(dm.get_estimation_age_mesh()). This is used to link beta-binomial stochs into a larger model, for example. emp_prior : dict, optional the empirical prior dictionary, retrieved from the disease model if appropriate by:: >>> t, r, y, s = type_region_year_sex_from_key(key) >>> emp_prior = dm.get_empirical_prior(t) Results ------- vars : dict Return a dictionary of all the relevant PyMC objects for the beta binomial model. vars['rate_stoch'] is of particular relevance; this is what is used to link the beta-binomial model into more complicated models, like the generic disease model. Details ------- The beta binomial model parameters are the following: * the mean age-specific rate function * dispersion of this mean * the p_i value for each data observation that has a standard error (data observations that do not have standard errors recorded are fit as observations of the beta r.v., while observations with standard errors recorded have a latent variable for the beta, and an observed binomial r.v.). """ vars = {} est_mesh = dm.get_estimate_age_mesh() if np.any(np.diff(est_mesh) != 1): raise ValueError, "ERROR: Gaps in estimation age mesh must all equal 1" # set up age-specific rate function, if it does not yet exist if not rate_stoch: param_mesh = dm.get_param_age_mesh() if emp_prior.has_key("mu"): initial_value = emp_prior["mu"] else: initial_value = dm.get_initial_value(key) # find the logit of the initial values, which is a little bit # of work because initial values are sampled from the est_mesh, # but the logit_initial_values are needed on the param_mesh logit_initial_value = mc.logit(interpolate(est_mesh, initial_value, param_mesh)) logit_rate = mc.Normal( "logit(%s)" % key, mu=-5.0 * np.ones(len(param_mesh)), tau=1.0e-2, value=logit_initial_value ) # logit_rate = [mc.Normal('logit(%s)_%d' % (key, a), mu=-5., tau=1.e-2) for a in param_mesh] vars["logit_rate"] = logit_rate @mc.deterministic(name=key) def rate_stoch(logit_rate=logit_rate): return interpolate(param_mesh, mc.invlogit(logit_rate), est_mesh) if emp_prior.has_key("mu"): @mc.potential(name="empirical_prior_%s" % key) def emp_prior_potential(f=rate_stoch, mu=emp_prior["mu"], tau=1.0 / np.array(emp_prior["se"]) ** 2): return mc.normal_like(f, mu, tau) vars["empirical_prior"] = emp_prior_potential vars["rate_stoch"] = rate_stoch # create stochastic variable for over-dispersion "random effect" mu_od = emp_prior.get("dispersion", 0.001) dispersion = mc.Gamma("dispersion_%s" % key, alpha=10.0, beta=10.0 / mu_od) vars["dispersion"] = dispersion @mc.deterministic(name="alpha_%s" % key) def alpha(rate=rate_stoch, dispersion=dispersion): return rate / dispersion ** 2 @mc.deterministic(name="beta_%s" % key) def beta(rate=rate_stoch, dispersion=dispersion): return (1.0 - rate) / dispersion ** 2 vars["alpha"] = alpha vars["beta"] = beta # create potentials for priors vars["priors"] = generate_prior_potentials(dm.get_priors(key), est_mesh, rate_stoch, dispersion) # create latent and observed stochastics for data vars["data"] = data_list vars["ab"] = [] vars["latent_p"] = [] vars["observations"] = [] for d in data_list: # set up observed stochs for all relevant data id = d["id"] if d["value"] == MISSING: print "WARNING: data %d missing value" % id continue # ensure all rate data is valid d_val = dm.value_per_1(d) d_se = dm.se_per_1(d) if d_val < 0 or d_val > 1: print "WARNING: data %d not in range [0,1]" % id continue if d["age_start"] < est_mesh[0] or d["age_end"] > est_mesh[-1]: raise ValueError, "Data %d is outside of estimation range---([%d, %d] is not inside [%d, %d])" % ( d["id"], d["age_start"], d["age_end"], est_mesh[0], est_mesh[-1], ) age_indices = indices_for_range(est_mesh, d["age_start"], d["age_end"]) age_weights = d["age_weights"] @mc.deterministic(name="a_%d^%s" % (id, key)) def a_i(alpha=alpha, age_indices=age_indices, age_weights=age_weights): return rate_for_range(alpha, age_indices, age_weights) @mc.deterministic(name="b_%d^%s" % (id, key)) def b_i(beta=beta, age_indices=age_indices, age_weights=age_weights): return rate_for_range(beta, age_indices, age_weights) vars["ab"] += [a_i, b_i] if d_se > 0: # if the data has a standard error, model it as a realization # of a beta binomial r.v. latent_p_i = mc.Beta( "latent_p_%d^%s" % (id, key), alpha=a_i, beta=b_i, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO) ) vars["latent_p"].append(latent_p_i) denominator = d_val * (1 - d_val) / d_se ** 2.0 numerator = d_val * denominator obs_binomial = mc.Binomial( "data_%d^%s" % (id, key), value=numerator, n=denominator, p=latent_p_i, observed=True ) vars["observations"].append(obs_binomial) else: # if the data is a point estimate with no uncertainty # recorded, model it as a realization of a beta r.v. obs_p_i = mc.Beta( "latent_p_%d" % id, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO), alpha=a_i, beta=b_i, observed=True ) vars["observations"].append(obs_p_i) return vars