def plot_funnel(pi_true, sigma_str):
    n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
    sigma = float(sigma_str)*pl.ones_like(n)
    p = pi_true*pl.ones_like(n)

    oln = rate_model.offset_log_normal('funnel', p, sigma, p, pl.sqrt(p*(1-p)/n))
    r = oln['p_pred'].value

    pl.vlines([pi_true], .1*n.min(), 10*n.max(),
              linewidth=2, linestyle='-', color='w', zorder=9)
    pl.vlines([pi_true], .1*n.min(), 10*n.max(),
              linewidth=1, linestyle='--', color='black', zorder=10)
    pl.plot(r, n, 'ko',
            mew=0, alpha=.25)

    pl.semilogy(schiz['r'], schiz['n'], 'ks', mew=1, mec='white', ms=4,
                label='Observed values')

    pl.xlabel('Rate (per PY)')
    pl.ylabel('Study size (PY)')
    pl.xticks([0, .005, .01])
    pl.axis([-.0001, .0101, 50., 15000000])
    pl.title(r'$\sigma = %s$'%sigma_str)
Exemple #2
0
def age_specific_rate(model,
                      data_type,
                      reference_area='all',
                      reference_sex='total',
                      reference_year='all',
                      mu_age=None,
                      mu_age_parent=None,
                      sigma_age_parent=None,
                      rate_type='neg_binom',
                      lower_bound=None,
                      interpolation_method='linear',
                      include_covariates=True,
                      zero_re=False):
    # TODO: expose (and document) interface for alternative rate_type as well as other options,
    # record reference values in the model
    """ Generate PyMC objects for model of epidemological age-interval data

    :Parameters:
      - `model` : data.ModelData
      - `data_type` : str, one of 'i', 'r', 'f', 'p', or 'pf'
      - `reference_area, reference_sex, reference_year` : the node of the model to fit consistently
      - `mu_age` : pymc.Node, will be used as the age pattern, set to None if not needed
      - `mu_age_parent` : pymc.Node, will be used as the age pattern of the parent of the root area, set to None if not needed
      - `sigma_age_parent` : pymc.Node, will be used as the standard deviation of the age pattern, set to None if not needed
      - `rate_type` : str, optional. One of 'beta_binom', 'binom', 'log_normal_model', 'neg_binom', 'neg_binom_lower_bound_model', 'neg_binom_model', 'normal_model', 'offest_log_normal', or 'poisson'
      - `lower_bound` : 
      - `interpolation_method` : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, or 'cubic'
      - `include_covariates` : boolean
      - `zero_re` : boolean, change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic

    :Results:
      - Returns dict of PyMC objects, including 'pi', the covariate adjusted predicted values for each row of data

    """
    name = data_type
    import data
    result = data.ModelVars()

    if (mu_age_parent != None and pl.any(pl.isnan(mu_age_parent))) \
           or (sigma_age_parent != None and pl.any(pl.isnan(sigma_age_parent))):
        mu_age_parent = None
        sigma_age_parent = None
        print 'WARNING: nan found in parent mu/sigma.  Ignoring'

    ages = pl.array(model.parameters['ages'])
    data = model.get_data(data_type)
    if lower_bound:
        lb_data = model.get_data(lower_bound)
    parameters = model.parameters.get(data_type, {})
    area_hierarchy = model.hierarchy

    vars = dismod3.data.ModelVars()
    vars += dict(data=data)

    if 'parameter_age_mesh' in parameters:
        knots = pl.array(parameters['parameter_age_mesh'])
    else:
        knots = pl.arange(ages[0], ages[-1] + 1, 5)

    smoothing_dict = {
        'No Prior': pl.inf,
        'Slightly': .5,
        'Moderately': .05,
        'Very': .005
    }
    if 'smoothness' in parameters:
        smoothing = smoothing_dict[parameters['smoothness']['amount']]
    else:
        smoothing = 0.

    if mu_age == None:
        vars.update(
            age_pattern.age_pattern(name,
                                    ages=ages,
                                    knots=knots,
                                    smoothing=smoothing,
                                    interpolation_method=interpolation_method))
    else:
        vars.update(dict(mu_age=mu_age, ages=ages))

    vars.update(
        expert_prior_model.level_constraints(name, parameters, vars['mu_age'],
                                             ages))
    vars.update(
        expert_prior_model.derivative_constraints(name, parameters,
                                                  vars['mu_age'], ages))

    if mu_age_parent != None:
        # setup a hierarchical prior on the simliarity between the
        # consistent estimate here and (inconsistent) estimate for its
        # parent in the areas hierarchy
        #weight_dict = {'Unusable': 10., 'Slightly': 10., 'Moderately': 1., 'Very': .1}
        #weight = weight_dict[parameters['heterogeneity']]
        vars.update(
            similarity_prior_model.similar('parent_similarity_%s' % name,
                                           vars['mu_age'], mu_age_parent,
                                           sigma_age_parent, 0.))

        # also use this as the initial value for the age pattern, if it is not already specified
        if mu_age == None:
            if isinstance(mu_age_parent, mc.Node):  # TODO: test this code
                initial_mu = mu_age_parent.value
            else:
                initial_mu = mu_age_parent

            for i, k_i in enumerate(knots):
                vars['gamma'][i].value = (pl.log(
                    initial_mu[k_i - ages[0]])).clip(-12, 6)

    age_weights = pl.ones_like(
        vars['mu_age'].value
    )  # TODO: use age pattern appropriate to the rate type
    if len(data) > 0:
        vars.update(
            age_integrating_model.age_standardize_approx(
                name, age_weights, vars['mu_age'], data['age_start'],
                data['age_end'], ages))

        # uncomment the following to effectively remove alleffects
        #if 'random_effects' in parameters:
        #    for i in range(5):
        #        effect = 'sigma_alpha_%s_%d' % (name, i)
        #        parameters['random_effects'][effect] = dict(dist='TruncatedNormal', mu=.0001, sigma=.00001, lower=.00009, upper=.00011)
        #if 'fixed_effects' in parameters:
        #    for effect in ['x_sex', 'x_LDI_id_Updated_7July2011']:
        #        parameters['fixed_effects'][effect] = dict(dist='normal', mu=.0001, sigma=.00001)

        if include_covariates:
            vars.update(
                covariate_model.mean_covariate_model(name,
                                                     vars['mu_interval'],
                                                     data,
                                                     parameters,
                                                     model,
                                                     reference_area,
                                                     reference_sex,
                                                     reference_year,
                                                     zero_re=zero_re))
        else:
            vars.update({'pi': vars['mu_interval']})

        ## ensure that all data has uncertainty quantified appropriately
        # first replace all missing se from ci
        missing_se = pl.isnan(
            data['standard_error']) | (data['standard_error'] < 0)
        data['standard_error'][missing_se] = (data['upper_ci'][missing_se] -
                                              data['lower_ci'][missing_se]) / (
                                                  2 * 1.96)

        # then replace all missing ess with se
        missing_ess = pl.isnan(data['effective_sample_size'])
        data['effective_sample_size'][missing_ess] = data['value'][
            missing_ess] * (1 - data['value'][missing_ess]
                            ) / data['standard_error'][missing_ess]**2

        if rate_type == 'neg_binom':

            # warn and drop data that doesn't have effective sample size quantified, or is is non-positive
            missing_ess = pl.isnan(data['effective_sample_size']) | (
                data['effective_sample_size'] < 0)
            if sum(missing_ess) > 0:
                print 'WARNING: %d rows of %s data has invalid quantification of uncertainty.' % (
                    sum(missing_ess), name)
                data['effective_sample_size'][missing_ess] = 0.0

            # warn and change data where ess is unreasonably huge
            large_ess = data['effective_sample_size'] >= 1.e10
            if sum(large_ess) > 0:
                print 'WARNING: %d rows of %s data have effective sample size exceeding 10 billion.' % (
                    sum(large_ess), name)
                data['effective_sample_size'][large_ess] = 1.e10

            if 'heterogeneity' in parameters:
                lower_dict = {'Slightly': 9., 'Moderately': 3., 'Very': 1.}
                lower = lower_dict[parameters['heterogeneity']]
            else:
                lower = 1.

            # special case, treat pf data as poisson
            if data_type == 'pf':
                lower = 1.e12

            vars.update(
                covariate_model.dispersion_covariate_model(
                    name, data, lower, lower * 9.))

            vars.update(
                rate_model.neg_binom_model(name, vars['pi'], vars['delta'],
                                           data['value'],
                                           data['effective_sample_size']))
        elif rate_type == 'log_normal':

            # warn and drop data that doesn't have effective sample size quantified
            missing = pl.isnan(
                data['standard_error']) | (data['standard_error'] < 0)
            if sum(missing) > 0:
                print 'WARNING: %d rows of %s data has no quantification of uncertainty.' % (
                    sum(missing), name)
                data['standard_error'][missing] = 1.e6

            # TODO: allow options for alternative priors for sigma
            vars['sigma'] = mc.Uniform('sigma_%s' % name,
                                       lower=.0001,
                                       upper=1.,
                                       value=.01)
            #vars['sigma'] = mc.Exponential('sigma_%s'%name, beta=100., value=.01)
            vars.update(
                rate_model.log_normal_model(name, vars['pi'], vars['sigma'],
                                            data['value'],
                                            data['standard_error']))
        elif rate_type == 'normal':

            # warn and drop data that doesn't have standard error quantified
            missing = pl.isnan(
                data['standard_error']) | (data['standard_error'] < 0)
            if sum(missing) > 0:
                print 'WARNING: %d rows of %s data has no quantification of uncertainty.' % (
                    sum(missing), name)
                data['standard_error'][missing] = 1.e6

            vars['sigma'] = mc.Uniform('sigma_%s' % name,
                                       lower=.0001,
                                       upper=.1,
                                       value=.01)
            vars.update(
                rate_model.normal_model(name, vars['pi'], vars['sigma'],
                                        data['value'], data['standard_error']))
        elif rate_type == 'binom':
            missing_ess = pl.isnan(data['effective_sample_size']) | (
                data['effective_sample_size'] < 0)
            if sum(missing_ess) > 0:
                print 'WARNING: %d rows of %s data has invalid quantification of uncertainty.' % (
                    sum(missing_ess), name)
                data['effective_sample_size'][missing_ess] = 0.0
            vars += rate_model.binom(name, vars['pi'], data['value'],
                                     data['effective_sample_size'])
        elif rate_type == 'beta_binom':
            vars += rate_model.beta_binom(name, vars['pi'], data['value'],
                                          data['effective_sample_size'])
        elif rate_type == 'poisson':
            missing_ess = pl.isnan(data['effective_sample_size']) | (
                data['effective_sample_size'] < 0)
            if sum(missing_ess) > 0:
                print 'WARNING: %d rows of %s data has invalid quantification of uncertainty.' % (
                    sum(missing_ess), name)
                data['effective_sample_size'][missing_ess] = 0.0

            vars += rate_model.poisson(name, vars['pi'], data['value'],
                                       data['effective_sample_size'])
        elif rate_type == 'offset_log_normal':
            vars['sigma'] = mc.Uniform('sigma_%s' % name,
                                       lower=.0001,
                                       upper=10.,
                                       value=.01)
            vars += rate_model.offset_log_normal(name, vars['pi'],
                                                 vars['sigma'], data['value'],
                                                 data['standard_error'])
        else:
            raise Exception, 'rate_model "%s" not implemented' % rate_type
    else:
        if include_covariates:
            vars.update(
                covariate_model.mean_covariate_model(name, [],
                                                     data,
                                                     parameters,
                                                     model,
                                                     reference_area,
                                                     reference_sex,
                                                     reference_year,
                                                     zero_re=zero_re))
    if include_covariates:
        vars.update(
            expert_prior_model.covariate_level_constraints(
                name, model, vars, ages))

    if lower_bound and len(lb_data) > 0:
        vars['lb'] = age_integrating_model.age_standardize_approx(
            'lb_%s' % name, age_weights, vars['mu_age'], lb_data['age_start'],
            lb_data['age_end'], ages)

        if include_covariates:

            vars['lb'].update(
                covariate_model.mean_covariate_model('lb_%s' % name,
                                                     vars['lb']['mu_interval'],
                                                     lb_data,
                                                     parameters,
                                                     model,
                                                     reference_area,
                                                     reference_sex,
                                                     reference_year,
                                                     zero_re=zero_re))
        else:
            vars['lb'].update({'pi': vars['lb']['mu_interval']})

        vars['lb'].update(
            covariate_model.dispersion_covariate_model(
                'lb_%s' % name, lb_data, 1e12, 1e13)  # treat like poisson
        )

        ## ensure that all data has uncertainty quantified appropriately
        # first replace all missing se from ci
        missing_se = pl.isnan(
            lb_data['standard_error']) | (lb_data['standard_error'] <= 0)
        lb_data['standard_error'][missing_se] = (
            lb_data['upper_ci'][missing_se] -
            lb_data['lower_ci'][missing_se]) / (2 * 1.96)

        # then replace all missing ess with se
        missing_ess = pl.isnan(lb_data['effective_sample_size'])
        lb_data['effective_sample_size'][missing_ess] = lb_data['value'][
            missing_ess] * (1 - lb_data['value'][missing_ess]
                            ) / lb_data['standard_error'][missing_ess]**2

        # warn and drop lb_data that doesn't have effective sample size quantified
        missing_ess = pl.isnan(lb_data['effective_sample_size']) | (
            lb_data['effective_sample_size'] <= 0)
        if sum(missing_ess) > 0:
            print 'WARNING: %d rows of %s lower bound data has no quantification of uncertainty.' % (
                sum(missing_ess), name)
            lb_data['effective_sample_size'][missing_ess] = 1.0

        vars['lb'].update(
            rate_model.neg_binom_lower_bound_model(
                'lb_%s' % name, vars['lb']['pi'], vars['lb']['delta'],
                lb_data['value'], lb_data['effective_sample_size']))

    result[data_type] = vars
    return result
Exemple #3
0
def age_specific_rate(
    model,
    data_type,
    reference_area="all",
    reference_sex="total",
    reference_year="all",
    mu_age=None,
    mu_age_parent=None,
    sigma_age_parent=None,
    rate_type="neg_binom",
    lower_bound=None,
    interpolation_method="linear",
    include_covariates=True,
    zero_re=False,
):
    # TODO: expose (and document) interface for alternative rate_type as well as other options,
    # record reference values in the model
    """ Generate PyMC objects for model of epidemological age-interval data

    :Parameters:
      - `model` : data.ModelData
      - `data_type` : str, one of 'i', 'r', 'f', 'p', or 'pf'
      - `reference_area, reference_sex, reference_year` : the node of the model to fit consistently
      - `mu_age` : pymc.Node, will be used as the age pattern, set to None if not needed
      - `mu_age_parent` : pymc.Node, will be used as the age pattern of the parent of the root area, set to None if not needed
      - `sigma_age_parent` : pymc.Node, will be used as the standard deviation of the age pattern, set to None if not needed
      - `rate_type` : str, optional. One of 'beta_binom', 'binom', 'log_normal_model', 'neg_binom', 'neg_binom_lower_bound_model', 'neg_binom_model', 'normal_model', 'offest_log_normal', or 'poisson'
      - `lower_bound` : 
      - `interpolation_method` : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, or 'cubic'
      - `include_covariates` : boolean
      - `zero_re` : boolean, change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic

    :Results:
      - Returns dict of PyMC objects, including 'pi', the covariate adjusted predicted values for each row of data

    """
    name = data_type
    import data

    result = data.ModelVars()

    if (mu_age_parent != None and pl.any(pl.isnan(mu_age_parent))) or (
        sigma_age_parent != None and pl.any(pl.isnan(sigma_age_parent))
    ):
        mu_age_parent = None
        sigma_age_parent = None
        print "WARNING: nan found in parent mu/sigma.  Ignoring"

    ages = pl.array(model.parameters["ages"])
    data = model.get_data(data_type)
    if lower_bound:
        lb_data = model.get_data(lower_bound)
    parameters = model.parameters.get(data_type, {})
    area_hierarchy = model.hierarchy

    vars = dismod3.data.ModelVars()
    vars += dict(data=data)

    if "parameter_age_mesh" in parameters:
        knots = pl.array(parameters["parameter_age_mesh"])
    else:
        knots = pl.arange(ages[0], ages[-1] + 1, 5)

    smoothing_dict = {"No Prior": pl.inf, "Slightly": 0.5, "Moderately": 0.05, "Very": 0.005}
    if "smoothness" in parameters:
        smoothing = smoothing_dict[parameters["smoothness"]["amount"]]
    else:
        smoothing = 0.0

    if mu_age == None:
        vars.update(
            age_pattern.age_pattern(
                name, ages=ages, knots=knots, smoothing=smoothing, interpolation_method=interpolation_method
            )
        )
    else:
        vars.update(dict(mu_age=mu_age, ages=ages))

    vars.update(expert_prior_model.level_constraints(name, parameters, vars["mu_age"], ages))
    vars.update(expert_prior_model.derivative_constraints(name, parameters, vars["mu_age"], ages))

    if mu_age_parent != None:
        # setup a hierarchical prior on the simliarity between the
        # consistent estimate here and (inconsistent) estimate for its
        # parent in the areas hierarchy
        # weight_dict = {'Unusable': 10., 'Slightly': 10., 'Moderately': 1., 'Very': .1}
        # weight = weight_dict[parameters['heterogeneity']]
        vars.update(
            similarity_prior_model.similar(
                "parent_similarity_%s" % name, vars["mu_age"], mu_age_parent, sigma_age_parent, 0.0
            )
        )

        # also use this as the initial value for the age pattern, if it is not already specified
        if mu_age == None:
            if isinstance(mu_age_parent, mc.Node):  # TODO: test this code
                initial_mu = mu_age_parent.value
            else:
                initial_mu = mu_age_parent

            for i, k_i in enumerate(knots):
                vars["gamma"][i].value = (pl.log(initial_mu[k_i - ages[0]])).clip(-12, 6)

    age_weights = pl.ones_like(vars["mu_age"].value)  # TODO: use age pattern appropriate to the rate type
    if len(data) > 0:
        vars.update(
            age_integrating_model.age_standardize_approx(
                name, age_weights, vars["mu_age"], data["age_start"], data["age_end"], ages
            )
        )

        # uncomment the following to effectively remove alleffects
        # if 'random_effects' in parameters:
        #    for i in range(5):
        #        effect = 'sigma_alpha_%s_%d' % (name, i)
        #        parameters['random_effects'][effect] = dict(dist='TruncatedNormal', mu=.0001, sigma=.00001, lower=.00009, upper=.00011)
        # if 'fixed_effects' in parameters:
        #    for effect in ['x_sex', 'x_LDI_id_Updated_7July2011']:
        #        parameters['fixed_effects'][effect] = dict(dist='normal', mu=.0001, sigma=.00001)

        if include_covariates:
            vars.update(
                covariate_model.mean_covariate_model(
                    name,
                    vars["mu_interval"],
                    data,
                    parameters,
                    model,
                    reference_area,
                    reference_sex,
                    reference_year,
                    zero_re=zero_re,
                )
            )
        else:
            vars.update({"pi": vars["mu_interval"]})

        ## ensure that all data has uncertainty quantified appropriately
        # first replace all missing se from ci
        missing_se = pl.isnan(data["standard_error"]) | (data["standard_error"] < 0)
        data["standard_error"][missing_se] = (data["upper_ci"][missing_se] - data["lower_ci"][missing_se]) / (2 * 1.96)

        # then replace all missing ess with se
        missing_ess = pl.isnan(data["effective_sample_size"])
        data["effective_sample_size"][missing_ess] = (
            data["value"][missing_ess] * (1 - data["value"][missing_ess]) / data["standard_error"][missing_ess] ** 2
        )

        if rate_type == "neg_binom":

            # warn and drop data that doesn't have effective sample size quantified, or is is non-positive
            missing_ess = pl.isnan(data["effective_sample_size"]) | (data["effective_sample_size"] < 0)
            if sum(missing_ess) > 0:
                print "WARNING: %d rows of %s data has invalid quantification of uncertainty." % (
                    sum(missing_ess),
                    name,
                )
                data["effective_sample_size"][missing_ess] = 0.0

            # warn and change data where ess is unreasonably huge
            large_ess = data["effective_sample_size"] >= 1.0e10
            if sum(large_ess) > 0:
                print "WARNING: %d rows of %s data have effective sample size exceeding 10 billion." % (
                    sum(large_ess),
                    name,
                )
                data["effective_sample_size"][large_ess] = 1.0e10

            if "heterogeneity" in parameters:
                lower_dict = {"Slightly": 9.0, "Moderately": 3.0, "Very": 1.0}
                lower = lower_dict[parameters["heterogeneity"]]
            else:
                lower = 1.0

            # special case, treat pf data as poisson
            if data_type == "pf":
                lower = 1.0e12

            vars.update(covariate_model.dispersion_covariate_model(name, data, lower, lower * 9.0))

            vars.update(
                rate_model.neg_binom_model(
                    name, vars["pi"], vars["delta"], data["value"], data["effective_sample_size"]
                )
            )
        elif rate_type == "log_normal":

            # warn and drop data that doesn't have effective sample size quantified
            missing = pl.isnan(data["standard_error"]) | (data["standard_error"] < 0)
            if sum(missing) > 0:
                print "WARNING: %d rows of %s data has no quantification of uncertainty." % (sum(missing), name)
                data["standard_error"][missing] = 1.0e6

            # TODO: allow options for alternative priors for sigma
            vars["sigma"] = mc.Uniform("sigma_%s" % name, lower=0.0001, upper=1.0, value=0.01)
            # vars['sigma'] = mc.Exponential('sigma_%s'%name, beta=100., value=.01)
            vars.update(
                rate_model.log_normal_model(name, vars["pi"], vars["sigma"], data["value"], data["standard_error"])
            )
        elif rate_type == "normal":

            # warn and drop data that doesn't have standard error quantified
            missing = pl.isnan(data["standard_error"]) | (data["standard_error"] < 0)
            if sum(missing) > 0:
                print "WARNING: %d rows of %s data has no quantification of uncertainty." % (sum(missing), name)
                data["standard_error"][missing] = 1.0e6

            vars["sigma"] = mc.Uniform("sigma_%s" % name, lower=0.0001, upper=0.1, value=0.01)
            vars.update(rate_model.normal_model(name, vars["pi"], vars["sigma"], data["value"], data["standard_error"]))
        elif rate_type == "binom":
            missing_ess = pl.isnan(data["effective_sample_size"]) | (data["effective_sample_size"] < 0)
            if sum(missing_ess) > 0:
                print "WARNING: %d rows of %s data has invalid quantification of uncertainty." % (
                    sum(missing_ess),
                    name,
                )
                data["effective_sample_size"][missing_ess] = 0.0
            vars += rate_model.binom(name, vars["pi"], data["value"], data["effective_sample_size"])
        elif rate_type == "beta_binom":
            vars += rate_model.beta_binom(name, vars["pi"], data["value"], data["effective_sample_size"])
        elif rate_type == "poisson":
            missing_ess = pl.isnan(data["effective_sample_size"]) | (data["effective_sample_size"] < 0)
            if sum(missing_ess) > 0:
                print "WARNING: %d rows of %s data has invalid quantification of uncertainty." % (
                    sum(missing_ess),
                    name,
                )
                data["effective_sample_size"][missing_ess] = 0.0

            vars += rate_model.poisson(name, vars["pi"], data["value"], data["effective_sample_size"])
        elif rate_type == "offset_log_normal":
            vars["sigma"] = mc.Uniform("sigma_%s" % name, lower=0.0001, upper=10.0, value=0.01)
            vars += rate_model.offset_log_normal(name, vars["pi"], vars["sigma"], data["value"], data["standard_error"])
        else:
            raise Exception, 'rate_model "%s" not implemented' % rate_type
    else:
        if include_covariates:
            vars.update(
                covariate_model.mean_covariate_model(
                    name, [], data, parameters, model, reference_area, reference_sex, reference_year, zero_re=zero_re
                )
            )
    if include_covariates:
        vars.update(expert_prior_model.covariate_level_constraints(name, model, vars, ages))

    if lower_bound and len(lb_data) > 0:
        vars["lb"] = age_integrating_model.age_standardize_approx(
            "lb_%s" % name, age_weights, vars["mu_age"], lb_data["age_start"], lb_data["age_end"], ages
        )

        if include_covariates:

            vars["lb"].update(
                covariate_model.mean_covariate_model(
                    "lb_%s" % name,
                    vars["lb"]["mu_interval"],
                    lb_data,
                    parameters,
                    model,
                    reference_area,
                    reference_sex,
                    reference_year,
                    zero_re=zero_re,
                )
            )
        else:
            vars["lb"].update({"pi": vars["lb"]["mu_interval"]})

        vars["lb"].update(
            covariate_model.dispersion_covariate_model("lb_%s" % name, lb_data, 1e12, 1e13)  # treat like poisson
        )

        ## ensure that all data has uncertainty quantified appropriately
        # first replace all missing se from ci
        missing_se = pl.isnan(lb_data["standard_error"]) | (lb_data["standard_error"] <= 0)
        lb_data["standard_error"][missing_se] = (lb_data["upper_ci"][missing_se] - lb_data["lower_ci"][missing_se]) / (
            2 * 1.96
        )

        # then replace all missing ess with se
        missing_ess = pl.isnan(lb_data["effective_sample_size"])
        lb_data["effective_sample_size"][missing_ess] = (
            lb_data["value"][missing_ess]
            * (1 - lb_data["value"][missing_ess])
            / lb_data["standard_error"][missing_ess] ** 2
        )

        # warn and drop lb_data that doesn't have effective sample size quantified
        missing_ess = pl.isnan(lb_data["effective_sample_size"]) | (lb_data["effective_sample_size"] <= 0)
        if sum(missing_ess) > 0:
            print "WARNING: %d rows of %s lower bound data has no quantification of uncertainty." % (
                sum(missing_ess),
                name,
            )
            lb_data["effective_sample_size"][missing_ess] = 1.0

        vars["lb"].update(
            rate_model.neg_binom_lower_bound_model(
                "lb_%s" % name,
                vars["lb"]["pi"],
                vars["lb"]["delta"],
                lb_data["value"],
                lb_data["effective_sample_size"],
            )
        )

    result[data_type] = vars
    return result
    pl.title(r'$\sigma = %s$'%sigma_str)

pl.figure(figsize=(11, 8.5), dpi=120)
pl.subplots_adjust(wspace=.4)
pl.subplot(2,2,1)
plot_funnel(.004, '0.5')
pl.subplot(2,2,2)
plot_funnel(.004, '0.1')

pl.subplot(2,1,2)
r = pl.array(schiz['r'])
n = pl.array(schiz['n'])

pi = mc.Uniform('pi', 0, 1, value=.001)
sigma = mc.Uniform('sigma', 0, 100, value=.0001)
oln = rate_model.offset_log_normal('funnel', pi*pl.ones_like(n), sigma*pl.ones_like(n), r, pl.sqrt(r*(1-r)/n))

mcmc = mc.MCMC([pi, sigma, oln])
mcmc.sample(20000, 10000, 10, verbose=False, progress_bar=False)

sorted_indices = r.argsort().argsort()
jitter = mc.rnormal(0, .1**-2, len(oln['p_pred'].trace()))
for i,s_i in enumerate(sorted_indices):
    pl.plot(s_i+jitter, oln['p_pred'].trace()[:, i], 'ko', mew=0, alpha=.25, zorder=-99)

pl.errorbar(sorted_indices, r, yerr=1.96*pl.sqrt(r*(1-r)/n), fmt='ws', mew=1, mec='white', ms=5, elinewidth=3, capsize=0)
pl.errorbar(sorted_indices, r, yerr=1.96*pl.sqrt(r*(1-r)/n), fmt='ks', mew=1, mec='white', ms=5)

pl.xticks([])
pl.ylabel('Rate (per PY)')
pl.axis([-.5, 15.5,-.0001,.0121])