Beispiel #1
0
 def rate_stoch(logit_rate=logit_rate):
     return interpolate(param_mesh, mc.invlogit(logit_rate), est_mesh)
Beispiel #2
0
 def gamma(gamma_mesh=gamma_mesh, param_mesh=param_mesh, est_mesh=est_mesh):
     return interpolate(param_mesh, gamma_mesh, est_mesh)
Beispiel #3
0
def setup(dm, key, data_list, rate_stoch=None, emp_prior={}):
    """ Generate the PyMC variables for a beta binomial model of
    a single rate function

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the beta-binomial liklihood function

    rate_stoch : pymc.Stochastic, optional
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).
      This is used to link beta-binomial stochs into a larger model,
      for example.

    emp_prior : dict, optional
      the empirical prior dictionary, retrieved from the disease model
      if appropriate by::

          >>> t, r, y, s = type_region_year_sex_from_key(key)
          >>> emp_prior = dm.get_empirical_prior(t)
      

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      beta binomial model.  vars['rate_stoch'] is of particular
      relevance; this is what is used to link the beta-binomial model
      into more complicated models, like the generic disease model.

    Details
    -------
    The beta binomial model parameters are the following:
      * the mean age-specific rate function
      * dispersion of this mean
      * the p_i value for each data observation that has a standard
        error (data observations that do not have standard errors
        recorded are fit as observations of the beta r.v., while
        observations with standard errors recorded have a latent
        variable for the beta, and an observed binomial r.v.).
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    if np.any(np.diff(est_mesh) != 1):
        raise ValueError, "ERROR: Gaps in estimation age mesh must all equal 1"

    # set up age-specific rate function, if it does not yet exist
    if not rate_stoch:
        param_mesh = dm.get_param_age_mesh()

        if emp_prior.has_key("mu"):
            initial_value = emp_prior["mu"]
        else:
            initial_value = dm.get_initial_value(key)

        # find the logit of the initial values, which is a little bit
        # of work because initial values are sampled from the est_mesh,
        # but the logit_initial_values are needed on the param_mesh
        logit_initial_value = mc.logit(interpolate(est_mesh, initial_value, param_mesh))

        logit_rate = mc.Normal(
            "logit(%s)" % key, mu=-5.0 * np.ones(len(param_mesh)), tau=1.0e-2, value=logit_initial_value
        )
        # logit_rate = [mc.Normal('logit(%s)_%d' % (key, a), mu=-5., tau=1.e-2) for a in param_mesh]
        vars["logit_rate"] = logit_rate

        @mc.deterministic(name=key)
        def rate_stoch(logit_rate=logit_rate):
            return interpolate(param_mesh, mc.invlogit(logit_rate), est_mesh)

    if emp_prior.has_key("mu"):

        @mc.potential(name="empirical_prior_%s" % key)
        def emp_prior_potential(f=rate_stoch, mu=emp_prior["mu"], tau=1.0 / np.array(emp_prior["se"]) ** 2):
            return mc.normal_like(f, mu, tau)

        vars["empirical_prior"] = emp_prior_potential

    vars["rate_stoch"] = rate_stoch

    # create stochastic variable for over-dispersion "random effect"
    mu_od = emp_prior.get("dispersion", 0.001)
    dispersion = mc.Gamma("dispersion_%s" % key, alpha=10.0, beta=10.0 / mu_od)
    vars["dispersion"] = dispersion

    @mc.deterministic(name="alpha_%s" % key)
    def alpha(rate=rate_stoch, dispersion=dispersion):
        return rate / dispersion ** 2

    @mc.deterministic(name="beta_%s" % key)
    def beta(rate=rate_stoch, dispersion=dispersion):
        return (1.0 - rate) / dispersion ** 2

    vars["alpha"] = alpha
    vars["beta"] = beta

    # create potentials for priors
    vars["priors"] = generate_prior_potentials(dm.get_priors(key), est_mesh, rate_stoch, dispersion)

    # create latent and observed stochastics for data
    vars["data"] = data_list
    vars["ab"] = []
    vars["latent_p"] = []
    vars["observations"] = []

    for d in data_list:
        # set up observed stochs for all relevant data
        id = d["id"]

        if d["value"] == MISSING:
            print "WARNING: data %d missing value" % id
            continue

        # ensure all rate data is valid
        d_val = dm.value_per_1(d)
        d_se = dm.se_per_1(d)

        if d_val < 0 or d_val > 1:
            print "WARNING: data %d not in range [0,1]" % id
            continue

        if d["age_start"] < est_mesh[0] or d["age_end"] > est_mesh[-1]:
            raise ValueError, "Data %d is outside of estimation range---([%d, %d] is not inside [%d, %d])" % (
                d["id"],
                d["age_start"],
                d["age_end"],
                est_mesh[0],
                est_mesh[-1],
            )

        age_indices = indices_for_range(est_mesh, d["age_start"], d["age_end"])
        age_weights = d["age_weights"]

        @mc.deterministic(name="a_%d^%s" % (id, key))
        def a_i(alpha=alpha, age_indices=age_indices, age_weights=age_weights):
            return rate_for_range(alpha, age_indices, age_weights)

        @mc.deterministic(name="b_%d^%s" % (id, key))
        def b_i(beta=beta, age_indices=age_indices, age_weights=age_weights):
            return rate_for_range(beta, age_indices, age_weights)

        vars["ab"] += [a_i, b_i]

        if d_se > 0:
            # if the data has a standard error, model it as a realization
            # of a beta binomial r.v.
            latent_p_i = mc.Beta(
                "latent_p_%d^%s" % (id, key), alpha=a_i, beta=b_i, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO)
            )
            vars["latent_p"].append(latent_p_i)

            denominator = d_val * (1 - d_val) / d_se ** 2.0
            numerator = d_val * denominator
            obs_binomial = mc.Binomial(
                "data_%d^%s" % (id, key), value=numerator, n=denominator, p=latent_p_i, observed=True
            )
            vars["observations"].append(obs_binomial)
        else:
            # if the data is a point estimate with no uncertainty
            # recorded, model it as a realization of a beta r.v.
            obs_p_i = mc.Beta(
                "latent_p_%d" % id, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO), alpha=a_i, beta=b_i, observed=True
            )
            vars["observations"].append(obs_p_i)

    return vars