Ejemplo n.º 1
0
def setup(dm, key, data_list, rate_stoch):
    """ Generate the PyMC variables for a log-normal model of
    a function of age

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the beta-binomial liklihood function

    rate_stoch : pymc.Stochastic
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      log-normal model.  vars['rate_stoch'] is of particular
      relevance, for details see the beta_binomial_model
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    vars['rate_stoch'] = rate_stoch

    # set up priors and observed data
    prior_str = dm.get_priors(key)
    generate_prior_potentials(vars, prior_str, est_mesh)

    vars['observed_rates'] = []
    for d in data_list:
        age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end'])
        age_weights = d.get('age_weights', np.ones(len(age_indices)) / len(age_indices))

        lb, ub = dm.bounds_per_1(d)
        se = (np.log(ub) - np.log(lb)) / (2. * 1.96)
        if np.isnan(se) or se <= 0.:
            se = 1.
        print 'data %d: log(value) = %f, se = %f' % (d['id'], np.log(dm.value_per_1(d)), se)
        
        @mc.observed
        @mc.stochastic(name='obs_%d' % d['id'])
        def obs(f=vars['rate_stoch'],
                age_indices=age_indices,
                age_weights=age_weights,
                value=np.log(dm.value_per_1(d)),
                tau=se**-2, data=d):
            f_i = rate_for_range(f, age_indices, age_weights)
            return mc.normal_like(value, np.log(f_i), tau)
        vars['observed_rates'].append(obs)
        
    return vars
Ejemplo n.º 2
0
def values_from(dm, d, min_val=1.e-5, max_se=.1):
    """ Extract the normalized values from a piece of data

    Parameters
    ----------
    dm : disease model

    d : data dict

    min_val : float, optional
      the value to use instead of zero, since logit cannot model true zero

    max_se : float, optional
      the standard error to use for data with missing or zero standard error
    """
    est_mesh = dm.get_estimate_age_mesh()

    # get the index vector and weight vector for the age range
    age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end'])
    age_weights = d.get('age_weights', np.ones(len(age_indices)))

    # ensure all rate data is valid
    d_val = dm.value_per_1(d)
    if d_val < 0 or d_val > 1:
        debug('WARNING: data %d not in range (0,1)' % d['id'])
        raise ValueError
    elif d_val == 0.:
        d_val = min_val / 10.  # TODO: determine if this is an acceptible way to deal with zero
    elif d_val == 1.:
        d_val = 1. - min_val / 10.

    logit_val = mc.logit(d_val)

    d_se = dm.se_per_1(d)
    if d_se == MISSING:
        d_se = max_se #TODO: determine if this is an acceptible way to deal with missing
    elif d_se == 0.:
        d_se = max_se

    logit_se = (1/d_val + 1/(1-d_val)) * d_se

    return age_indices, age_weights, logit_val, logit_se
Ejemplo n.º 3
0
def values_from(dm, d):
    """ Extract the normalized values from a piece of data

    Parameters
    ----------
    dm : disease model

    d : data dict
    """
    est_mesh = dm.get_estimate_age_mesh()

    # get the index vector and weight vector for the age range
    age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end'])
    age_weights = d.get('age_weights', np.ones(len(age_indices))/len(age_indices))

    # ensure all rate data is valid
    Y_i = dm.value_per_1(d)
    # TODO: allow Y_i > 1, extract effective sample size appropriately in this case
    if Y_i < 0:
        debug('WARNING: data %d < 0' % d['id'])
        raise ValueError

    N_i = max(1., d['effective_sample_size'])
    return age_indices, age_weights, Y_i, N_i
Ejemplo n.º 4
0
def setup(dm, key="%s", data_list=None):
    """ Generate the PyMC variables for a generic disease model

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)

    key : str, optional
      a string for modifying the names of the stochs in this model,
      must contain a single %s that will be substituted

    data_list : list of data dicts
      the observed data to use in the rate stoch likelihood functions
    
    Results
    -------
    vars : dict of PyMC stochs
      returns a dictionary of all the relevant PyMC objects for the
      generic disease model.
    """
    vars = {}

    # setup all-cause mortality
    param_type = "all-cause_mortality"
    data = [d for d in data_list if d["data_type"] == "all-cause mortality data"]
    m_all_cause = dm.mortality(key % param_type, data)

    # make covariate vectors and estimation vectors to know dimensions of these objects
    covariate_dict = dm.get_covariates()
    derived_covariate = dm.get_derived_covariate_values()
    X_region, X_study = rate_model.regional_covariates(key, covariate_dict, derived_covariate)
    est_mesh = dm.get_estimate_age_mesh()

    # update age_weights on non-incidence/prevalence data to reflect
    # prior prevalence distribution, if available
    prior_prev = dm.get_mcmc("emp_prior_mean", key % "prevalence")
    if len(prior_prev) > 0:
        for d in data:
            if d["data_type"].startswith("incidence") or d["data_type"].startswith("prevalence"):
                continue
            age_indices = indices_for_range(est_mesh, d["age_start"], d["age_end"])
            d["age_weights"] = prior_prev[age_indices]
            d["age_weights"] /= sum(
                d["age_weights"]
            )  # age weights must sum to 1 (optimization of inner loop removed check on this)

    # create negative binomial models for incidence, remission, and
    # excess-mortality (which are all treated as "free" parameters)
    for param_type in ["incidence", "remission", "excess-mortality"]:
        data = [d for d in data_list if d["data_type"] == "%s data" % param_type]

        lower_bound_data = []  # TODO: include lower bound data when appropriate (this has not come up yet)

        prior_dict = dm.get_empirical_prior(
            param_type
        )  # use empirical priors for the type/region/year/sex if available
        if prior_dict == {}:  # otherwise use weakly informative priors
            prior_dict.update(
                alpha=np.zeros(len(X_region)),
                beta=np.zeros(len(X_study)),
                gamma=-5 * np.ones(len(est_mesh)),
                sigma_alpha=[1.0],
                sigma_beta=[1.0],
                sigma_gamma=[10.0],
                # delta is filled in from the global prior dict in neg_binom setup
            )
        vars[key % param_type] = rate_model.setup(
            dm, key % param_type, data, emp_prior=prior_dict, lower_bound_data=lower_bound_data
        )

    # create nicer names for the rate stochastic from each neg-binom rate model
    i = vars[key % "incidence"]["rate_stoch"]
    r = vars[key % "remission"]["rate_stoch"]
    f = vars[key % "excess-mortality"]["rate_stoch"]

    # initial fraction of population with the condition
    logit_C_0 = mc.Normal(
        "logit_%s" % (key % "C_0"), -5.0, 10.0 ** -2, value=-5.0
    )  # represet C_0 in logit space to allow unconstrained posterior maximization

    @mc.deterministic(name=key % "C_0")
    def C_0(logit_C_0=logit_C_0):
        return mc.invlogit(logit_C_0)

    # initial fraction population with and without condition
    @mc.deterministic(name=key % "S_0")
    def SC_0(C_0=C_0):
        return np.array([1.0 - C_0, C_0]).ravel()

    vars[key % "bins"] = {"initial": [SC_0, C_0, logit_C_0]}

    # iterative solution to difference equations to obtain bin sizes for all ages
    import scipy.linalg

    @mc.deterministic(name=key % "bins")
    def SCpm(SC_0=SC_0, i=i, r=r, f=f, m_all_cause=m_all_cause, age_mesh=dm.get_param_age_mesh()):
        SC = np.zeros([2, len(age_mesh)])
        p = np.zeros(len(age_mesh))
        m = np.zeros(len(age_mesh))

        SC[:, 0] = SC_0
        p[0] = SC_0[1] / (SC_0[0] + SC_0[1])
        m[0] = trim(
            m_all_cause[age_mesh[0]] - f[age_mesh[0]] * p[0], 0.1 * m_all_cause[age_mesh[0]], 1 - NEARLY_ZERO
        )  # trim m[0] to avoid numerical instability

        for ii, a in enumerate(age_mesh[:-1]):
            A = np.array([[-i[a] - m[ii], r[a]], [i[a], -r[a] - m[ii] - f[a]]]) * (age_mesh[ii + 1] - age_mesh[ii])

            SC[:, ii + 1] = np.dot(scipy.linalg.expm(A), SC[:, ii])

            p[ii + 1] = trim(SC[1, ii + 1] / (SC[0, ii + 1] + SC[1, ii + 1]), NEARLY_ZERO, 1 - NEARLY_ZERO)
            m[ii + 1] = trim(
                m_all_cause[age_mesh[ii + 1]] - f[age_mesh[ii + 1]] * p[ii + 1],
                0.1 * m_all_cause[age_mesh[ii + 1]],
                1 - NEARLY_ZERO,
            )

        SCpm = np.zeros([4, len(age_mesh)])
        SCpm[0:2, :] = SC
        SCpm[2, :] = p
        SCpm[3, :] = m
        return SCpm

    vars[key % "bins"]["age > 0"] = [SCpm]

    # prevalence = # with condition / (# with condition + # without)
    @mc.deterministic(name=key % "p")
    def p(SCpm=SCpm, param_mesh=dm.get_param_age_mesh(), est_mesh=dm.get_estimate_age_mesh()):
        return dismod3.utils.interpolate(param_mesh, SCpm[2, :], est_mesh)

    data = [d for d in data_list if d["data_type"] == "prevalence data"]
    prior_dict = dm.get_empirical_prior("prevalence")
    if prior_dict == {}:
        prior_dict.update(
            alpha=np.zeros(len(X_region)),
            beta=np.zeros(len(X_study)),
            gamma=-5 * np.ones(len(est_mesh)),
            sigma_alpha=[1.0],
            sigma_beta=[1.0],
            sigma_gamma=[10.0],
            # delta is filled in from the global prior dict in neg_binom setup
        )

    vars[key % "prevalence"] = rate_model.setup(dm, key % "prevalence", data, p, emp_prior=prior_dict)
    p = vars[key % "prevalence"][
        "rate_stoch"
    ]  # replace perfectly consistent p with version including level-bound priors

    # make a blank prior dict, to avoid weirdness
    blank_prior_dict = dict(
        alpha=np.zeros(len(X_region)),
        beta=np.zeros(len(X_study)),
        gamma=-5 * np.ones(len(est_mesh)),
        sigma_alpha=[1.0],
        sigma_beta=[1.0],
        sigma_gamma=[10.0],
        delta=100.0,
        sigma_delta=1.0,
    )
    # cause-specific-mortality is a lower bound on p*f
    @mc.deterministic(name=key % "pf")
    def pf(p=p, f=f):
        return (p + NEARLY_ZERO) * f

    data = [d for d in data_list if d["data_type"] == "prevalence x excess-mortality data"]
    lower_bound_data = [d for d in data_list if d["data_type"] == "cause-specific mortality data"]

    vars[key % "prevalence_x_excess-mortality"] = rate_model.setup(
        dm, key % "pf", rate_stoch=pf, data_list=data, lower_bound_data=lower_bound_data, emp_prior=blank_prior_dict
    )

    # m = m_all_cause - f * p
    @mc.deterministic(name=key % "m")
    def m(SCpm=SCpm, param_mesh=dm.get_param_age_mesh(), est_mesh=dm.get_estimate_age_mesh()):
        return dismod3.utils.interpolate(param_mesh, SCpm[3, :], est_mesh)

    vars[key % "m"] = m

    # m_with = m + f
    @mc.deterministic(name=key % "m_with")
    def m_with(m=m, f=f):
        return m + f

    data = [d for d in data_list if d["data_type"] == "mortality data"]
    # TODO: test this
    # prior_dict = dm.get_empirical_prior('excess-mortality')  # TODO:  make separate prior for with-condition mortality
    vars[key % "mortality"] = rate_model.setup(dm, key % "m_with", data, m_with, emp_prior=blank_prior_dict)

    # mortality rate ratio = mortality with condition / mortality without
    @mc.deterministic(name=key % "RR")
    def RR(m=m, m_with=m_with):
        return m_with / (m + 0.0001)

    data = [d for d in data_list if d["data_type"] == "relative-risk data"]
    vars[key % "relative-risk"] = log_normal_model.setup(dm, key % "relative-risk", data, RR)

    # standardized mortality rate ratio = mortality with condition / all-cause mortality
    @mc.deterministic(name=key % "SMR")
    def SMR(m_with=m_with, m_all_cause=m_all_cause):
        return m_with / (m_all_cause + 0.0001)

    data = [d for d in data_list if d["data_type"] == "smr data"]
    vars[key % "smr"] = log_normal_model.setup(dm, key % "smr", data, SMR)

    # duration = E[time in bin C]
    @mc.deterministic(name=key % "X")
    def X(r=r, m=m, f=f):
        hazard = r + m + f
        pr_not_exit = np.exp(-hazard)
        X = np.empty(len(hazard))
        X[-1] = 1 / hazard[-1]
        for i in reversed(range(len(X) - 1)):
            X[i] = pr_not_exit[i] * (X[i + 1] + 1) + 1 / hazard[i] * (1 - pr_not_exit[i]) - pr_not_exit[i]
        return X

    data = [d for d in data_list if d["data_type"] == "duration data"]
    vars[key % "duration"] = normal_model.setup(dm, key % "duration", data, X)

    # YLD[a] = disability weight * i[a] * X[a] * regional_population[a]
    @mc.deterministic(name=key % "i*X")
    def iX(i=i, X=X, p=p, pop=rate_model.regional_population(key)):
        birth_yld = np.zeros_like(p)
        birth_yld[0] = p[0] * pop[0]

        return i * X * (1 - p) * pop + birth_yld

    vars[key % "incidence_x_duration"] = {"rate_stoch": iX}

    return vars
Ejemplo n.º 5
0
def setup(dm, key, data_list, rate_stoch=None, emp_prior={}):
    """ Generate the PyMC variables for a beta binomial model of
    a single rate function

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the beta-binomial liklihood function

    rate_stoch : pymc.Stochastic, optional
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).
      This is used to link beta-binomial stochs into a larger model,
      for example.

    emp_prior : dict, optional
      the empirical prior dictionary, retrieved from the disease model
      if appropriate by::

          >>> t, r, y, s = type_region_year_sex_from_key(key)
          >>> emp_prior = dm.get_empirical_prior(t)
      

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      beta binomial model.  vars['rate_stoch'] is of particular
      relevance; this is what is used to link the beta-binomial model
      into more complicated models, like the generic disease model.

    Details
    -------
    The beta binomial model parameters are the following:
      * the mean age-specific rate function
      * dispersion of this mean
      * the p_i value for each data observation that has a standard
        error (data observations that do not have standard errors
        recorded are fit as observations of the beta r.v., while
        observations with standard errors recorded have a latent
        variable for the beta, and an observed binomial r.v.).
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    if np.any(np.diff(est_mesh) != 1):
        raise ValueError, "ERROR: Gaps in estimation age mesh must all equal 1"

    # set up age-specific rate function, if it does not yet exist
    if not rate_stoch:
        param_mesh = dm.get_param_age_mesh()

        if emp_prior.has_key("mu"):
            initial_value = emp_prior["mu"]
        else:
            initial_value = dm.get_initial_value(key)

        # find the logit of the initial values, which is a little bit
        # of work because initial values are sampled from the est_mesh,
        # but the logit_initial_values are needed on the param_mesh
        logit_initial_value = mc.logit(interpolate(est_mesh, initial_value, param_mesh))

        logit_rate = mc.Normal(
            "logit(%s)" % key, mu=-5.0 * np.ones(len(param_mesh)), tau=1.0e-2, value=logit_initial_value
        )
        # logit_rate = [mc.Normal('logit(%s)_%d' % (key, a), mu=-5., tau=1.e-2) for a in param_mesh]
        vars["logit_rate"] = logit_rate

        @mc.deterministic(name=key)
        def rate_stoch(logit_rate=logit_rate):
            return interpolate(param_mesh, mc.invlogit(logit_rate), est_mesh)

    if emp_prior.has_key("mu"):

        @mc.potential(name="empirical_prior_%s" % key)
        def emp_prior_potential(f=rate_stoch, mu=emp_prior["mu"], tau=1.0 / np.array(emp_prior["se"]) ** 2):
            return mc.normal_like(f, mu, tau)

        vars["empirical_prior"] = emp_prior_potential

    vars["rate_stoch"] = rate_stoch

    # create stochastic variable for over-dispersion "random effect"
    mu_od = emp_prior.get("dispersion", 0.001)
    dispersion = mc.Gamma("dispersion_%s" % key, alpha=10.0, beta=10.0 / mu_od)
    vars["dispersion"] = dispersion

    @mc.deterministic(name="alpha_%s" % key)
    def alpha(rate=rate_stoch, dispersion=dispersion):
        return rate / dispersion ** 2

    @mc.deterministic(name="beta_%s" % key)
    def beta(rate=rate_stoch, dispersion=dispersion):
        return (1.0 - rate) / dispersion ** 2

    vars["alpha"] = alpha
    vars["beta"] = beta

    # create potentials for priors
    vars["priors"] = generate_prior_potentials(dm.get_priors(key), est_mesh, rate_stoch, dispersion)

    # create latent and observed stochastics for data
    vars["data"] = data_list
    vars["ab"] = []
    vars["latent_p"] = []
    vars["observations"] = []

    for d in data_list:
        # set up observed stochs for all relevant data
        id = d["id"]

        if d["value"] == MISSING:
            print "WARNING: data %d missing value" % id
            continue

        # ensure all rate data is valid
        d_val = dm.value_per_1(d)
        d_se = dm.se_per_1(d)

        if d_val < 0 or d_val > 1:
            print "WARNING: data %d not in range [0,1]" % id
            continue

        if d["age_start"] < est_mesh[0] or d["age_end"] > est_mesh[-1]:
            raise ValueError, "Data %d is outside of estimation range---([%d, %d] is not inside [%d, %d])" % (
                d["id"],
                d["age_start"],
                d["age_end"],
                est_mesh[0],
                est_mesh[-1],
            )

        age_indices = indices_for_range(est_mesh, d["age_start"], d["age_end"])
        age_weights = d["age_weights"]

        @mc.deterministic(name="a_%d^%s" % (id, key))
        def a_i(alpha=alpha, age_indices=age_indices, age_weights=age_weights):
            return rate_for_range(alpha, age_indices, age_weights)

        @mc.deterministic(name="b_%d^%s" % (id, key))
        def b_i(beta=beta, age_indices=age_indices, age_weights=age_weights):
            return rate_for_range(beta, age_indices, age_weights)

        vars["ab"] += [a_i, b_i]

        if d_se > 0:
            # if the data has a standard error, model it as a realization
            # of a beta binomial r.v.
            latent_p_i = mc.Beta(
                "latent_p_%d^%s" % (id, key), alpha=a_i, beta=b_i, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO)
            )
            vars["latent_p"].append(latent_p_i)

            denominator = d_val * (1 - d_val) / d_se ** 2.0
            numerator = d_val * denominator
            obs_binomial = mc.Binomial(
                "data_%d^%s" % (id, key), value=numerator, n=denominator, p=latent_p_i, observed=True
            )
            vars["observations"].append(obs_binomial)
        else:
            # if the data is a point estimate with no uncertainty
            # recorded, model it as a realization of a beta r.v.
            obs_p_i = mc.Beta(
                "latent_p_%d" % id, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO), alpha=a_i, beta=b_i, observed=True
            )
            vars["observations"].append(obs_p_i)

    return vars
Ejemplo n.º 6
0
def setup(dm, key='%s', data_list=None):
    """ Generate the PyMC variables for a generic disease model

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)

    key : str, optional
      a string for modifying the names of the stochs in this model,
      must contain a single %s that will be substituted

    data_list : list of data dicts
      the observed data to use in the rate stoch likelihood functions
    
    Results
    -------
    vars : dict of PyMC stochs
      returns a dictionary of all the relevant PyMC objects for the
      generic disease model.
    """
    vars = {}


    param_type = 'all-cause_mortality'
    data = [d for d in data_list if d['data_type'] == 'all-cause mortality data']
    m_all_cause = dm.mortality(key % param_type, data)

    covariate_dict = dm.get_covariates()
    X_region, X_study = rate_model.regional_covariates(key, covariate_dict)
    est_mesh = dm.get_estimate_age_mesh()

    # update age_weights on non-incidence/prevalence data to reflect
    # prior prevalence distribution, if available
    prior_prev = dm.get_mcmc('emp_prior_mean', key % 'prevalence')
    if len(prior_prev) > 0:
        for d in data:
            if d['data_type'].startswith('incidence') or d['data_type'].startswith('prevalence'):
                continue
            age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end'])
            d['age_weights'] = prior_prev[age_indices]
            d['age_weights'] /= sum(d['age_weights']) # age weights must sum to 1 (optimization of inner loop removed check on this)
                                      

    for param_type in ['incidence', 'remission', 'excess-mortality']:
        data = [d for d in data_list if d['data_type'] == '%s data' % param_type]

        lower_bound_data = []
        # TODO: include lower bound data when appropriate
        
        prior_dict = dm.get_empirical_prior(param_type)
        if prior_dict == {}:
            prior_dict.update(alpha=np.zeros(len(X_region)),
                              beta=np.zeros(len(X_study)),
                              gamma=-5*np.ones(len(est_mesh)),
                              sigma_alpha=[1.],
                              sigma_beta=[1.],
                              sigma_gamma=[10.],
                              # delta is filled in from the global prior dict in neg_binom setup
                              )
        vars[key % param_type] = rate_model.setup(dm, key % param_type, data,
                                                  emp_prior=prior_dict, lower_bound_data=lower_bound_data)

    i = vars[key % 'incidence']['rate_stoch']
    r = vars[key % 'remission']['rate_stoch']
    f = vars[key % 'excess-mortality']['rate_stoch']

    # Initial population with condition
    logit_C_0 = mc.Normal('logit_%s' % (key % 'C_0'), -5., 10.**-2, value=-5.)
    @mc.deterministic(name=key % 'C_0')
    def C_0(logit_C_0=logit_C_0):
        return mc.invlogit(logit_C_0)
    
    # Initial population without condition
    @mc.deterministic(name=key % 'S_0')
    def SC_0(C_0=C_0):
        return np.array([1. - C_0, C_0]).ravel()
    vars[key % 'bins'] = {'initial': [SC_0, C_0, logit_C_0]}
    
    
    # iterative solution to difference equations to obtain bin sizes for all ages
    import scipy.linalg
    @mc.deterministic(name=key % 'bins')
    def SCpm(SC_0=SC_0, i=i, r=r, f=f, m_all_cause=m_all_cause, age_mesh=dm.get_param_age_mesh()):
        SC = np.zeros([2, len(age_mesh)])
        p = np.zeros(len(age_mesh))
        m = np.zeros(len(age_mesh))
        
        SC[:,0] = SC_0
        p[0] = SC_0[1] / (SC_0[0] + SC_0[1])
        m[0] = trim(m_all_cause[age_mesh[0]] - f[age_mesh[0]] * p[0], .1*m_all_cause[age_mesh[0]], 1-NEARLY_ZERO)

        for ii, a in enumerate(age_mesh[:-1]):
            A = np.array([[-i[a]-m[ii],  r[a]          ],
                          [ i[a]     , -r[a]-m[ii]-f[a]]]) * (age_mesh[ii+1] - age_mesh[ii])

            SC[:,ii+1] = np.dot(scipy.linalg.expm(A), SC[:,ii])
            
            p[ii+1] = trim(SC[1,ii+1] / (SC[0,ii+1] + SC[1,ii+1]), NEARLY_ZERO, 1-NEARLY_ZERO)
            m[ii+1] = trim(m_all_cause[age_mesh[ii+1]] - f[age_mesh[ii+1]] * p[ii+1], .1*m_all_cause[age_mesh[ii+1]], 1-NEARLY_ZERO)

        SCpm = np.zeros([4, len(age_mesh)])
        SCpm[0:2,:] = SC
        SCpm[2,:] = p
        SCpm[3,:] = m
        return SCpm

    vars[key % 'bins']['age > 0'] = [SCpm]

    
    # prevalence = # with condition / (# with condition + # without)
    @mc.deterministic(name=key % 'p')
    def p(SCpm=SCpm, param_mesh=dm.get_param_age_mesh(), est_mesh=dm.get_estimate_age_mesh()):
        return dismod3.utils.interpolate(param_mesh, SCpm[2,:], est_mesh)
    data = [d for d in data_list if d['data_type'] == 'prevalence data']
    prior_dict = dm.get_empirical_prior('prevalence')
    if prior_dict == {}:
        prior_dict.update(alpha=np.zeros(len(X_region)),
                          beta=np.zeros(len(X_study)),
                          gamma=-5*np.ones(len(est_mesh)),
                          sigma_alpha=[1.],
                          sigma_beta=[1.],
                          sigma_gamma=[10.],
                          # delta is filled in from the global prior dict in neg_binom setup
                          )
    
    vars[key % 'prevalence'] = rate_model.setup(dm, key % 'prevalence', data, p, emp_prior=prior_dict)
    p = vars[key % 'prevalence']['rate_stoch']  # replace perfectly consistent p with version including level-bound priors
    
    # make a blank prior dict, to avoid weirdness
    blank_prior_dict = dict(alpha=np.zeros(len(X_region)),
                            beta=np.zeros(len(X_study)),
                            gamma=-5*np.ones(len(est_mesh)),
                            sigma_alpha=[1.],
                            sigma_beta=[1.],
                            sigma_gamma=[10.],
                            delta=100.,
                            sigma_delta=1.
                            )
    # cause-specific-mortality is a lower bound on p*f
    @mc.deterministic(name=key % 'pf')
    def pf(p=p, f=f):
        return (p+NEARLY_ZERO)*f
    # TODO: add a 'with-condition population mortality rate date' type
    # data = [d for d in data_list if d['data_type'] == 'with-condition population mortality rate data']
    data = []
    lower_bound_data = [d for d in data_list if d['data_type'] == 'cause-specific mortality data']
    vars[key % 'prevalence_x_excess-mortality'] = rate_model.setup(dm, key % 'pf', rate_stoch=pf, data_list=data, lower_bound_data=lower_bound_data, emp_prior=blank_prior_dict)
        

    # m = m_all_cause - f * p
    @mc.deterministic(name=key % 'm')
    def m(SCpm=SCpm, param_mesh=dm.get_param_age_mesh(), est_mesh=dm.get_estimate_age_mesh()):
        return dismod3.utils.interpolate(param_mesh,  SCpm[3,:], est_mesh)
    vars[key % 'm'] = m

    # m_with = m + f
    @mc.deterministic(name=key % 'm_with')
    def m_with(m=m, f=f):
        return m + f
    data = [d for d in data_list if d['data_type'] == 'mortality data']
    # TODO: test this
    #prior_dict = dm.get_empirical_prior('excess-mortality')  # TODO:  make separate prior for with-condition mortality
    vars[key % 'mortality'] = rate_model.setup(dm, key % 'm_with', data, m_with, emp_prior=blank_prior_dict)

    # mortality rate ratio = mortality with condition / mortality without
    @mc.deterministic(name=key % 'RR')
    def RR(m=m, m_with=m_with):
        return m_with / (m + .0001)
    data = [d for d in data_list if d['data_type'] == 'relative-risk data']
    vars[key % 'relative-risk'] = log_normal_model.setup(dm, key % 'relative-risk', data, RR)
    
    # standardized mortality rate ratio = mortality with condition / all-cause mortality
    @mc.deterministic(name=key % 'SMR')
    def SMR(m_with=m_with, m_all_cause=m_all_cause):
        return m_with / (m_all_cause + .0001)
    data = [d for d in data_list if d['data_type'] == 'smr data']
    vars[key % 'smr'] = log_normal_model.setup(dm, key % 'smr', data, SMR)

    # duration = E[time in bin C]
    @mc.deterministic(name=key % 'X')
    def X(r=r, m=m, f=f):
        hazard = r + m + f
        pr_not_exit = np.exp(-hazard)
        X = np.empty(len(hazard))
        X[-1] = 1 / hazard[-1]
        for i in reversed(range(len(X)-1)):
            X[i] = pr_not_exit[i] * (X[i+1] + 1) + 1 / hazard[i] * (1 - pr_not_exit[i]) - pr_not_exit[i]
        return X
    data = [d for d in data_list if d['data_type'] == 'duration data']
    vars[key % 'duration'] = normal_model.setup(dm, key % 'duration', data, X)

    # YLD[a] = disability weight * i[a] * X[a] * regional_population[a]
    @mc.deterministic(name=key % 'i*X')
    def iX(i=i, X=X, p=p, pop=rate_model.regional_population(key)):
        return i * X * (1-p) * pop 
    vars[key % 'incidence_x_duration'] = {'rate_stoch': iX}

    return vars
Ejemplo n.º 7
0
def setup(dm, key, data_list, rate_stoch):
    """ Generate the PyMC variables for a normal model of
    a function of age

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the beta-binomial liklihood function

    rate_stoch : pymc.Stochastic
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      normal model.  vars['rate_stoch'] is of particular
      relevance, for details see the beta_binomial_model
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    if np.any(np.diff(est_mesh) != 1):
        raise ValueError, 'ERROR: Gaps in estimation age mesh must all equal 1'

    vars['rate_stoch'] = rate_stoch

    # set up priors and observed data
    prior_str = dm.get_priors(key)
    generate_prior_potentials(vars, prior_str, est_mesh)

    vars['observed_rates'] = []
    for d in data_list:
        # set up observed stochs for all relevant data
        id = d['id']
        
        if d['value'] == MISSING:
            print 'WARNING: data %d missing value' % id
            continue

        # ensure all rate data is valid
        d_val = dm.value_per_1(d)
        d_se = dm.se_per_1(d)

        if d['age_start'] < est_mesh[0] or d['age_end'] > est_mesh[-1]:
            raise ValueError, 'Data %d is outside of estimation range---([%d, %d] is not inside [%d, %d])' \
                % (d['id'], d['age_start'], d['age_end'], est_mesh[0], est_mesh[-1])

        age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end'])
        age_weights = d.get('age_weights', np.ones(len(age_indices)) / len(age_indices))

        # data must have standard error to use normal model
        if d_se == 0:
            raise ValueError, 'Data %d has invalid standard error' % d['id']

        @mc.observed
        @mc.stochastic(name='obs_%d' % id)
        def obs(f=rate_stoch,
                age_indices=age_indices,
                age_weights=age_weights,
                value=d_val,
                tau=1./(d_se)**2):
            f_i = rate_for_range(f, age_indices, age_weights)
            return mc.normal_like(value, f_i, tau)
        vars['observed_rates'].append(obs)
        
    return vars