コード例 #1
0
def fit_posterior(dm,
                  region,
                  sex,
                  year,
                  fast_fit=False,
                  inconsistent_fit=False,
                  params_to_fit=['p', 'r', 'i'],
                  zero_re=True,
                  posteriors_only=False):
    """ Fit posterior of specified region/sex/year for specified model

    Parameters
    ----------
    dm : DiseaseJson
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years

    fast_fit : sample 101 draws from posterior, don't try for convergence (fast for testing)
    inconsistent_fit : fit parameters  separately
    params_to_fit : list of params to fit, if not fitting all consistently

    zero_re : bool, if true, enforce constraint that sibling area REs sum to zero
    posteriors_only : bool, if tru use data from 1997-2007 for 2005 and from 2007 on for 2010

    Example
    -------
    >>> import fit_posterior
    >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005')
    """
    dir = dismod3.settings.JOB_WORKING_DIR % dm.id

    ## load the model from disk or from web
    import simplejson as json
    import data
    reload(data)

    try:
        model = data.ModelData.load(dir)
        print 'loaded data from new format from %s' % dir
    except (IOError, AssertionError):
        model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        #model.save(dir)
        print 'loaded data from json, saved in new format for next time in %s' % dir

    # TODO: check for missing covariates, and have them fixed, instead of filling them with zeros

    ## next block fills in missing covariates with zero
    for col in model.input_data.columns:
        if col.startswith('x_'):
            model.input_data[col] = model.input_data[col].fillna(0.)
    # also fill all covariates missing in output template with zeros
    model.output_template = model.output_template.fillna(0)

    predict_area = dismod3.utils.clean(region)
    predict_sex = dismod3.utils.clean(sex)
    predict_year = int(year)

    ## load emp_priors dict from dm.params
    param_type = dict(i='incidence',
                      p='prevalence',
                      r='remission',
                      f='excess-mortality',
                      rr='relative-risk',
                      pf='prevalence_x_excess-mortality',
                      m_with='mortality')
    emp_priors = {}
    for t in 'i r p f'.split():

        # uncomment below to not use empirical prior for rate with zero data
        # if pl.all(model.input_data['data_type'] != t):
        #     continue

        #key = dismod3.utils.gbd_key_for(param_type[t], model.hierarchy.predecessors(predict_area)[0], year, sex)
        key = dismod3.utils.gbd_key_for(param_type[t], predict_area, year, sex)
        mu = dm.get_mcmc('emp_prior_mean', key)
        #mu = dm.get_mcmc('emp_prior_median', key)
        sigma = dm.get_mcmc('emp_prior_std', key)

        if len(mu) == 101 and len(sigma) == 101:
            emp_priors[t, 'mu'] = mu

            # TODO: determine best way to propagate prior on function
            emp_priors[t, 'sigma'] = sigma

            # ALT 1: scale so that the joint probability is not a
            # function of the length of the age function
            # emp_priors[t, 'sigma'] = sigma * pl.sqrt(len(sigma))

        ## update model.parameters['random_effects'] if there is information in the disease model
        expert_priors = model.parameters[t].get('random_effects', {})
        model.parameters[t]['random_effects'] = dm.get_empirical_prior(
            param_type[t]).get('new_alpha', {})
        model.parameters[t]['random_effects'].update(expert_priors)

        # shift random effects to make REs for observed children of predict area have mean zero
        re_mean = pl.mean([model.parameters[t]['random_effects'][area]['mu'] \
                           for area in model.hierarchy.neighbors(predict_area) \
                           if area in model.parameters[t]['random_effects']])
        for area in model.hierarchy.neighbors(predict_area):
            if area in model.parameters[t]['random_effects']:
                model.parameters[t]['random_effects'][area]['mu'] -= re_mean

        ## update model.parameters['fixed_effects'] if there is information in the disease model
        expert_fe_priors = model.parameters[t].get('fixed_effects', {})
        model.parameters[t]['fixed_effects'].update(
            dm.get_empirical_prior(param_type[t]).get('new_beta', {}))

    ## create model and priors for region/sex/year
    # select data that is about areas in this region, recent years, and sex of male or total only
    assert predict_area in model.hierarchy, 'region %s not found in area hierarchy' % predict_area
    subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area)

    def is_relevant(r):
        if (r['area'] not in subtree) and r['area'] != 'all':
            return False

        if predict_year == 1990:
            if r['year_start'] > 1997:
                return False
        elif predict_year == 2005:
            if posteriors_only:
                if r['year_end'] < 1997 or r['year_start'] > 2007:
                    return False
            else:
                if r['year_end'] < 1997:
                    return False
        elif predict_year == 2010:
            if posteriors_only:
                if r['data_type'] == 'm_all':
                    # include m_all data from 2005, since 2010 is not loaded
                    if r['year_end'] < 1997:
                        return False
                else:
                    if r['year_end'] < 2007:
                        return False
            else:
                if r['year_end'] < 1997:
                    return False
        else:
            assert 0, 'Predictions for year %d not yet implemented' % predict_year

        if r['sex'] not in [predict_sex, 'total']:
            return False

        return True

    old_relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                         if (r['area'] in subtree or r['area'] == 'all')\
                         and ((predict_year >= 1997 and r['year_end'] >= 1997) or
                              (predict_year <= 1997 and r['year_start'] <= 1997)) \
                         and r['sex'] in [predict_sex, 'total']]

    relevant_rows = model.input_data.index[model.input_data.apply(is_relevant,
                                                                  axis=1)]

    if predict_year == 1990:
        assert pl.all(
            relevant_rows == old_relevant_rows
        ), "relevant rows should be the same in new and old implementation for 1990"

    if not posteriors_only:
        assert pl.all(
            relevant_rows == old_relevant_rows
        ), "relevant rows should be the same in new and old implementation when posteriors_only is False"

    model.input_data = model.input_data.ix[relevant_rows]

    # replace area 'all' with predict_area
    model.input_data['area'][model.input_data['area'] == 'all'] = predict_area

    if inconsistent_fit:
        # generate fits for requested parameters inconsistently
        for t in params_to_fit:
            model.vars += ism.age_specific_rate(
                model,
                t,
                reference_area=predict_area,
                reference_sex=predict_sex,
                reference_year=predict_year,
                mu_age=None,
                mu_age_parent=emp_priors.get((t, 'mu')),
                sigma_age_parent=emp_priors.get((t, 'sigma')),
                rate_type=(t == 'rr') and 'log_normal' or 'neg_binom',
                zero_re=zero_re)
            if fast_fit:
                dismod3.fit.fit_asr(model,
                                    t,
                                    iter=101,
                                    burn=0,
                                    thin=1,
                                    tune_interval=100)
            else:
                dismod3.fit.fit_asr(model,
                                    t,
                                    iter=iter,
                                    burn=burn,
                                    thin=thin,
                                    tune_interval=100)

    else:
        model.vars += ism.consistent(model,
                                     reference_area=predict_area,
                                     reference_sex=predict_sex,
                                     reference_year=predict_year,
                                     priors=emp_priors,
                                     zero_re=zero_re)

        ## fit model to data
        if fast_fit:
            dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, 105, 0, 1, 100)
        else:
            dm.map, dm.mcmc = dismod3.fit.fit_consistent(model,
                                                         iter=iter,
                                                         burn=burn,
                                                         thin=thin,
                                                         tune_interval=100,
                                                         verbose=True)

    # generate estimates
    posteriors = {}
    for t in 'i r f p rr pf m_with X'.split():
        if t in model.vars:
            if t in model.parameters and 'level_bounds' in model.parameters[t]:
                lower = model.parameters[t]['level_bounds']['lower']
                upper = model.parameters[t]['level_bounds']['upper']
            else:
                lower = 0
                upper = pl.inf
            posteriors[t] = covariate_model.predict_for(
                model,
                model.parameters.get(t, {}),
                predict_area,
                predict_sex,
                predict_year,
                predict_area,
                predict_sex,
                predict_year,
                True,  # population weighted averages
                model.vars[t],
                lower,
                upper)
    try:
        graphics.plot_fit(model, vars, emp_priors, {})
        pl.savefig(dir + '/image/posterior-%s+%s+%s.png' %
                   (predict_area, predict_sex, predict_year))
    except Exception, e:
        print 'Error generating output graphics'
        print e
コード例 #2
0
def fit_emp_prior(id,
                  param_type,
                  fast_fit=False,
                  generate_emp_priors=True,
                  zero_re=True,
                  alt_prior=False,
                  global_heterogeneity='Slightly'):
    """ Fit empirical prior of specified type for specified model

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    param_type : str, one of incidence, prevalence, remission, excess-mortality, prevalence_x_excess-mortality
      The disease parameter to generate empirical priors for

    Example
    -------
    >>> import fit_emp_prior
    >>> fit_emp_prior.fit_emp_prior(2552, 'incidence')
    """

    dir = dismod3.settings.JOB_WORKING_DIR % id

    ## load the model from disk or from web
    import simplejson as json
    import data
    reload(data)

    dm = dismod3.load_disease_model(id)

    try:
        model = data.ModelData.load(dir)
        print 'loaded data from new format from %s' % dir
    except (IOError, AssertionError):
        model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        #model.save(dir)
        print 'loaded data from json, saved in new format for next time in %s' % dir

    ## next block fills in missing covariates with zero
    for col in model.input_data.columns:
        if col.startswith('x_'):
            model.input_data[col] = model.input_data[col].fillna(0.)
    # also fill all covariates missing in output template with zeros
    model.output_template = model.output_template.fillna(0)

    # set all heterogeneity priors to Slightly for the global fit
    for t in model.parameters:
        if 'heterogeneity' in model.parameters[t]:
            model.parameters[t]['heterogeneity'] = global_heterogeneity

    t = {
        'incidence': 'i',
        'prevalence': 'p',
        'remission': 'r',
        'excess-mortality': 'f',
        'prevalence_x_excess-mortality': 'pf'
    }[param_type]
    model.input_data = model.get_data(t)
    if len(model.input_data) == 0:
        print 'No data for type %s, exiting' % param_type
        return dm

    ### For testing:
    ## speed up computation by reducing number of knots
    ## model.parameters[t]['parameter_age_mesh'] = [0, 10, 20, 40, 60, 100]

    ## smooth Slightly, Moderately, or Very
    ## model.parameters[t]['smoothness'] = dict(age_start=0, age_end=100, amount='Very')

    ## speed up computation be reducing data size
    ## predict_area = 'super-region_0'
    ## predict_year=2005
    ## predict_sex='total'
    ## subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area)
    ## relevant_rows = [i for i, r in model.input_data.T.iteritems() \
    ##                      if (r['area'] in subtree or r['area'] == 'all')\
    ##                      and (r['year_end'] >= 1997) \
    ##                      and r['sex'] in [predict_sex, 'total']]
    ## model.input_data = model.input_data.ix[relevant_rows]

    # testing changes
    #model.input_data['effective_sample_size'] = pl.minimum(1.e3, model.input_data['effective_sample_size'])
    #missing_ess = pl.isnan(model.input_data['effective_sample_size'])
    #model.input_data['effective_sample_size'][missing_ess] = 1.
    #model.input_data['z_overdisperse'] = 1.
    #print model.describe(t)
    #model.input_data = model.input_data[model.input_data['area'].map(lambda x: x in nx.bfs_tree(model.hierarchy, 'super-region_5'))]
    #model.input_data = model.input_data = model.input_data.drop(['x_LDI_id_Updated_7July2011'], axis=1)
    #model.input_data = model.input_data.filter([model.input_data['x_nottroponinuse'] == 0.]
    #model.input_data = model.input_data[:100]

    ## speed up output by not making predictions for empirical priors
    #generate_emp_priors = False

    print 'fitting', t
    model.vars += ism.age_specific_rate(model,
                                        t,
                                        reference_area='all',
                                        reference_sex='total',
                                        reference_year='all',
                                        mu_age=None,
                                        mu_age_parent=None,
                                        sigma_age_parent=None,
                                        rate_type=(t == 'rr') and 'log_normal'
                                        or 'neg_binom',
                                        zero_re=zero_re)
    # for backwards compatibility, should be removed eventually
    dm.model = model
    dm.vars = model.vars[t]
    vars = dm.vars

    if fast_fit:
        dm.map, dm.mcmc = dismod3.fit.fit_asr(model,
                                              t,
                                              iter=101,
                                              burn=0,
                                              thin=1,
                                              tune_interval=100)
    else:
        dm.map, dm.mcmc = dismod3.fit.fit_asr(model,
                                              t,
                                              iter=50000,
                                              burn=10000,
                                              thin=40,
                                              tune_interval=1000,
                                              verbose=True)

    stats = dm.vars['p_pred'].stats(batches=5)
    dm.vars['data']['mu_pred'] = stats['mean']
    dm.vars['data']['sigma_pred'] = stats['standard deviation']

    stats = dm.vars['pi'].stats(batches=5)
    dm.vars['data']['mc_error'] = stats['mc error']

    dm.vars['data'][
        'residual'] = dm.vars['data']['value'] - dm.vars['data']['mu_pred']
    dm.vars['data']['abs_residual'] = pl.absolute(dm.vars['data']['residual'])

    graphics.plot_fit(model,
                      data_types=[t],
                      ylab=['PY'],
                      plot_config=(1, 1),
                      fig_size=(8, 8))
    if generate_emp_priors:
        for a in [
                dismod3.utils.clean(a) for a in dismod3.settings.gbd_regions
        ]:
            print 'generating empirical prior for %s' % a
            for s in dismod3.settings.gbd_sexes:
                for y in dismod3.settings.gbd_years:
                    key = dismod3.utils.gbd_key_for(param_type, a, y, s)
                    if t in model.parameters and 'level_bounds' in model.parameters[
                            t]:
                        lower = model.parameters[t]['level_bounds']['lower']
                        upper = model.parameters[t]['level_bounds']['upper']
                    else:
                        lower = 0
                        upper = pl.inf

                    emp_priors = covariate_model.predict_for(
                        model, model.parameters[t], 'all', 'total', 'all', a,
                        dismod3.utils.clean(s), int(y), alt_prior, vars, lower,
                        upper)
                    dm.set_mcmc('emp_prior_mean', key, emp_priors.mean(0))

                    if 'eta' in vars:
                        N, A = emp_priors.shape  # N samples, for A age groups
                        delta_trace = pl.transpose([
                            pl.exp(vars['eta'].trace()) for _ in range(A)
                        ])  # shape delta matrix to match prediction matrix
                        emp_prior_std = pl.sqrt(
                            emp_priors.var(0) +
                            (emp_priors**2 / delta_trace).mean(0))
                    else:
                        emp_prior_std = emp_priors.std(0)
                    dm.set_mcmc('emp_prior_std', key, emp_prior_std)

                    pl.plot(model.parameters['ages'],
                            dm.get_mcmc('emp_prior_mean', key),
                            color='grey',
                            label=a,
                            zorder=-10,
                            alpha=.5)
    pl.savefig(dir + '/prior-%s.png' % param_type)

    store_effect_coefficients(dm, vars, param_type)

    #graphics.plot_one_ppc(vars, t)
    #pl.savefig(dir + '/prior-%s-ppc.png'%param_type)

    graphics.plot_acorr(model)
    pl.savefig(dir + '/prior-%s-convergence.png' % param_type)
    graphics.plot_trace(model)
    pl.savefig(dir + '/prior-%s-trace.png' % param_type)

    graphics.plot_one_effects(model, t)
    pl.savefig(dir + '/prior-%s-effects.png' % param_type)

    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    try:
        dm.save('dm-%d-prior-%s.json' % (id, param_type))
    except IOError, e:
        print e
コード例 #3
0
ファイル: fit_emp_prior.py プロジェクト: aflaxman/gbd
def fit_emp_prior(
    id,
    param_type,
    fast_fit=False,
    generate_emp_priors=True,
    zero_re=True,
    alt_prior=False,
    global_heterogeneity="Slightly",
):
    """ Fit empirical prior of specified type for specified model

    Parameters
    ----------
    id : int
      The model id number for the job to fit
    param_type : str, one of incidence, prevalence, remission, excess-mortality, prevalence_x_excess-mortality
      The disease parameter to generate empirical priors for

    Example
    -------
    >>> import fit_emp_prior
    >>> fit_emp_prior.fit_emp_prior(2552, 'incidence')
    """

    dir = dismod3.settings.JOB_WORKING_DIR % id

    ## load the model from disk or from web
    import simplejson as json
    import data

    reload(data)

    dm = dismod3.load_disease_model(id)

    try:
        model = data.ModelData.load(dir)
        print "loaded data from new format from %s" % dir
    except (IOError, AssertionError):
        model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        # model.save(dir)
        print "loaded data from json, saved in new format for next time in %s" % dir

    ## next block fills in missing covariates with zero
    for col in model.input_data.columns:
        if col.startswith("x_"):
            model.input_data[col] = model.input_data[col].fillna(0.0)
    # also fill all covariates missing in output template with zeros
    model.output_template = model.output_template.fillna(0)

    # set all heterogeneity priors to Slightly for the global fit
    for t in model.parameters:
        if "heterogeneity" in model.parameters[t]:
            model.parameters[t]["heterogeneity"] = global_heterogeneity

    t = {
        "incidence": "i",
        "prevalence": "p",
        "remission": "r",
        "excess-mortality": "f",
        "prevalence_x_excess-mortality": "pf",
    }[param_type]
    model.input_data = model.get_data(t)
    if len(model.input_data) == 0:
        print "No data for type %s, exiting" % param_type
        return dm

    ### For testing:
    ## speed up computation by reducing number of knots
    ## model.parameters[t]['parameter_age_mesh'] = [0, 10, 20, 40, 60, 100]

    ## smooth Slightly, Moderately, or Very
    ## model.parameters[t]['smoothness'] = dict(age_start=0, age_end=100, amount='Very')

    ## speed up computation be reducing data size
    ## predict_area = 'super-region_0'
    ## predict_year=2005
    ## predict_sex='total'
    ## subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area)
    ## relevant_rows = [i for i, r in model.input_data.T.iteritems() \
    ##                      if (r['area'] in subtree or r['area'] == 'all')\
    ##                      and (r['year_end'] >= 1997) \
    ##                      and r['sex'] in [predict_sex, 'total']]
    ## model.input_data = model.input_data.ix[relevant_rows]

    # testing changes
    # model.input_data['effective_sample_size'] = pl.minimum(1.e3, model.input_data['effective_sample_size'])
    # missing_ess = pl.isnan(model.input_data['effective_sample_size'])
    # model.input_data['effective_sample_size'][missing_ess] = 1.
    # model.input_data['z_overdisperse'] = 1.
    # print model.describe(t)
    # model.input_data = model.input_data[model.input_data['area'].map(lambda x: x in nx.bfs_tree(model.hierarchy, 'super-region_5'))]
    # model.input_data = model.input_data = model.input_data.drop(['x_LDI_id_Updated_7July2011'], axis=1)
    # model.input_data = model.input_data.filter([model.input_data['x_nottroponinuse'] == 0.]
    # model.input_data = model.input_data[:100]

    ## speed up output by not making predictions for empirical priors
    # generate_emp_priors = False

    print "fitting", t
    model.vars += ism.age_specific_rate(
        model,
        t,
        reference_area="all",
        reference_sex="total",
        reference_year="all",
        mu_age=None,
        mu_age_parent=None,
        sigma_age_parent=None,
        rate_type=(t == "rr") and "log_normal" or "neg_binom",
        zero_re=zero_re,
    )
    # for backwards compatibility, should be removed eventually
    dm.model = model
    dm.vars = model.vars[t]
    vars = dm.vars

    if fast_fit:
        dm.map, dm.mcmc = dismod3.fit.fit_asr(model, t, iter=101, burn=0, thin=1, tune_interval=100)
    else:
        dm.map, dm.mcmc = dismod3.fit.fit_asr(
            model, t, iter=50000, burn=10000, thin=40, tune_interval=1000, verbose=True
        )

    stats = dm.vars["p_pred"].stats(batches=5)
    dm.vars["data"]["mu_pred"] = stats["mean"]
    dm.vars["data"]["sigma_pred"] = stats["standard deviation"]

    stats = dm.vars["pi"].stats(batches=5)
    dm.vars["data"]["mc_error"] = stats["mc error"]

    dm.vars["data"]["residual"] = dm.vars["data"]["value"] - dm.vars["data"]["mu_pred"]
    dm.vars["data"]["abs_residual"] = pl.absolute(dm.vars["data"]["residual"])

    graphics.plot_fit(model, data_types=[t], ylab=["PY"], plot_config=(1, 1), fig_size=(8, 8))
    if generate_emp_priors:
        for a in [dismod3.utils.clean(a) for a in dismod3.settings.gbd_regions]:
            print "generating empirical prior for %s" % a
            for s in dismod3.settings.gbd_sexes:
                for y in dismod3.settings.gbd_years:
                    key = dismod3.utils.gbd_key_for(param_type, a, y, s)
                    if t in model.parameters and "level_bounds" in model.parameters[t]:
                        lower = model.parameters[t]["level_bounds"]["lower"]
                        upper = model.parameters[t]["level_bounds"]["upper"]
                    else:
                        lower = 0
                        upper = pl.inf

                    emp_priors = covariate_model.predict_for(
                        model,
                        model.parameters[t],
                        "all",
                        "total",
                        "all",
                        a,
                        dismod3.utils.clean(s),
                        int(y),
                        alt_prior,
                        vars,
                        lower,
                        upper,
                    )
                    dm.set_mcmc("emp_prior_mean", key, emp_priors.mean(0))

                    if "eta" in vars:
                        N, A = emp_priors.shape  # N samples, for A age groups
                        delta_trace = pl.transpose(
                            [pl.exp(vars["eta"].trace()) for _ in range(A)]
                        )  # shape delta matrix to match prediction matrix
                        emp_prior_std = pl.sqrt(emp_priors.var(0) + (emp_priors ** 2 / delta_trace).mean(0))
                    else:
                        emp_prior_std = emp_priors.std(0)
                    dm.set_mcmc("emp_prior_std", key, emp_prior_std)

                    pl.plot(
                        model.parameters["ages"],
                        dm.get_mcmc("emp_prior_mean", key),
                        color="grey",
                        label=a,
                        zorder=-10,
                        alpha=0.5,
                    )
    pl.savefig(dir + "/prior-%s.png" % param_type)

    store_effect_coefficients(dm, vars, param_type)

    # graphics.plot_one_ppc(vars, t)
    # pl.savefig(dir + '/prior-%s-ppc.png'%param_type)

    graphics.plot_acorr(model)
    pl.savefig(dir + "/prior-%s-convergence.png" % param_type)
    graphics.plot_trace(model)
    pl.savefig(dir + "/prior-%s-trace.png" % param_type)

    graphics.plot_one_effects(model, t)
    pl.savefig(dir + "/prior-%s-effects.png" % param_type)

    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    try:
        dm.save("dm-%d-prior-%s.json" % (id, param_type))
    except IOError, e:
        print e
コード例 #4
0
ファイル: fit_posterior.py プロジェクト: aflaxman/gbd
def fit_posterior(dm, region, sex, year, fast_fit=False, 
                  inconsistent_fit=False, params_to_fit=['p', 'r', 'i'], zero_re=True,
                  posteriors_only=False):
    """ Fit posterior of specified region/sex/year for specified model

    Parameters
    ----------
    dm : DiseaseJson
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years

    fast_fit : sample 101 draws from posterior, don't try for convergence (fast for testing)
    inconsistent_fit : fit parameters  separately
    params_to_fit : list of params to fit, if not fitting all consistently

    zero_re : bool, if true, enforce constraint that sibling area REs sum to zero
    posteriors_only : bool, if tru use data from 1997-2007 for 2005 and from 2007 on for 2010

    Example
    -------
    >>> import fit_posterior
    >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005')
    """
    dir = dismod3.settings.JOB_WORKING_DIR % dm.id

    ## load the model from disk or from web
    import simplejson as json
    import data
    reload(data)

    try:
        model = data.ModelData.load(dir)
        print 'loaded data from new format from %s' % dir
    except (IOError, AssertionError):
        model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        #model.save(dir)
        print 'loaded data from json, saved in new format for next time in %s' % dir

    # TODO: check for missing covariates, and have them fixed, instead of filling them with zeros

    ## next block fills in missing covariates with zero
    for col in model.input_data.columns:
        if col.startswith('x_'):
            model.input_data[col] = model.input_data[col].fillna(0.)
    # also fill all covariates missing in output template with zeros
    model.output_template = model.output_template.fillna(0)

    predict_area = dismod3.utils.clean(region)
    predict_sex = dismod3.utils.clean(sex)
    predict_year = int(year)

    ## load emp_priors dict from dm.params
    param_type = dict(i='incidence', p='prevalence', r='remission', f='excess-mortality', rr='relative-risk', pf='prevalence_x_excess-mortality', m_with='mortality')
    emp_priors = {}
    for t in 'i r p f'.split():

        # uncomment below to not use empirical prior for rate with zero data
        # if pl.all(model.input_data['data_type'] != t):
        #     continue

        #key = dismod3.utils.gbd_key_for(param_type[t], model.hierarchy.predecessors(predict_area)[0], year, sex)
        key = dismod3.utils.gbd_key_for(param_type[t], predict_area, year, sex)
        mu = dm.get_mcmc('emp_prior_mean', key)
        #mu = dm.get_mcmc('emp_prior_median', key)
        sigma = dm.get_mcmc('emp_prior_std', key)
        
        if len(mu) == 101 and len(sigma) == 101:
            emp_priors[t, 'mu'] = mu

            # TODO: determine best way to propagate prior on function
            emp_priors[t, 'sigma'] = sigma
            
            # ALT 1: scale so that the joint probability is not a
            # function of the length of the age function
            # emp_priors[t, 'sigma'] = sigma * pl.sqrt(len(sigma))

        ## update model.parameters['random_effects'] if there is information in the disease model
        expert_priors = model.parameters[t].get('random_effects', {})
        model.parameters[t]['random_effects'] = dm.get_empirical_prior(param_type[t]).get('new_alpha', {})
        model.parameters[t]['random_effects'].update(expert_priors)

        # shift random effects to make REs for observed children of predict area have mean zero
        re_mean = pl.mean([model.parameters[t]['random_effects'][area]['mu'] \
                           for area in model.hierarchy.neighbors(predict_area) \
                           if area in model.parameters[t]['random_effects']])
        for area in model.hierarchy.neighbors(predict_area):
            if area in model.parameters[t]['random_effects']:
                model.parameters[t]['random_effects'][area]['mu'] -= re_mean
            

        ## update model.parameters['fixed_effects'] if there is information in the disease model
        expert_fe_priors = model.parameters[t].get('fixed_effects', {})
        model.parameters[t]['fixed_effects'].update(dm.get_empirical_prior(param_type[t]).get('new_beta', {}))


    ## create model and priors for region/sex/year
    # select data that is about areas in this region, recent years, and sex of male or total only
    assert predict_area in model.hierarchy, 'region %s not found in area hierarchy' % predict_area
    subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area)

    def is_relevant(r):
        if (r['area'] not in subtree) and r['area'] != 'all':
            return False


        if predict_year == 1990:
            if r['year_start'] > 1997:
                return False
        elif predict_year == 2005:
            if posteriors_only:
                if r['year_end'] < 1997 or r['year_start'] > 2007:
                    return False
            else:
                if r['year_end'] < 1997:
                    return False
        elif predict_year == 2010:
            if posteriors_only:
                if r['data_type'] == 'm_all':
                    # include m_all data from 2005, since 2010 is not loaded
                    if r['year_end'] < 1997:
                        return False
                else:
                    if r['year_end'] < 2007:
                        return False
            else:
                if r['year_end'] < 1997:
                    return False
        else:
            assert 0, 'Predictions for year %d not yet implemented' % predict_year

        if r['sex'] not in [predict_sex, 'total']:
            return False

        return True
    
    old_relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                         if (r['area'] in subtree or r['area'] == 'all')\
                         and ((predict_year >= 1997 and r['year_end'] >= 1997) or
                              (predict_year <= 1997 and r['year_start'] <= 1997)) \
                         and r['sex'] in [predict_sex, 'total']]

    relevant_rows = model.input_data.index[model.input_data.apply(is_relevant, axis=1)]

    if predict_year == 1990:
        assert pl.all(relevant_rows == old_relevant_rows), "relevant rows should be the same in new and old implementation for 1990"

    if not posteriors_only:
        assert pl.all(relevant_rows == old_relevant_rows), "relevant rows should be the same in new and old implementation when posteriors_only is False"
    
    model.input_data = model.input_data.ix[relevant_rows]

    # replace area 'all' with predict_area
    model.input_data['area'][model.input_data['area'] == 'all'] = predict_area

    if inconsistent_fit:
        # generate fits for requested parameters inconsistently
        for t in params_to_fit:
            model.vars += ism.age_specific_rate(model, t,
                                            reference_area=predict_area, reference_sex=predict_sex, reference_year=predict_year,
                                            mu_age=None,
                                            mu_age_parent=emp_priors.get((t, 'mu')),
                                            sigma_age_parent=emp_priors.get((t, 'sigma')),
                                            rate_type=(t == 'rr') and 'log_normal' or 'neg_binom',
                                            zero_re=zero_re)
            if fast_fit:
                dismod3.fit.fit_asr(model, t, iter=101, burn=0, thin=1, tune_interval=100)
            else:
                dismod3.fit.fit_asr(model, t, iter=iter, burn=burn, thin=thin, tune_interval=100)

    else:
        model.vars += ism.consistent(model,
                                     reference_area=predict_area, reference_sex=predict_sex, reference_year=predict_year,
                                     priors=emp_priors, zero_re=zero_re)

        ## fit model to data
        if fast_fit:
            dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, 105, 0, 1, 100)
        else:
            dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, iter=iter, burn=burn, thin=thin, tune_interval=100, verbose=True)


    # generate estimates
    posteriors = {}
    for t in 'i r f p rr pf m_with X'.split():
        if t in model.vars:
            if t in model.parameters and 'level_bounds' in model.parameters[t]:
                lower=model.parameters[t]['level_bounds']['lower']
                upper=model.parameters[t]['level_bounds']['upper']
            else:
                lower=0
                upper=pl.inf
            posteriors[t] = covariate_model.predict_for(model,
                                                        model.parameters.get(t, {}),
                                                        predict_area, predict_sex, predict_year,
                                                        predict_area, predict_sex, predict_year,
                                                        True,  # population weighted averages
                                                        model.vars[t], lower, upper)
    try:
        graphics.plot_fit(model, vars, emp_priors, {})
        pl.savefig(dir + '/image/posterior-%s+%s+%s.png'%(predict_area, predict_sex, predict_year))
    except Exception, e:
        print 'Error generating output graphics'
        print e
コード例 #5
0
def fit_world(id, fast_fit=False, zero_re=True, alt_prior=False, global_heterogeneity='Slightly'):
    """ Fit consistent for all data in world

    Parameters
    ----------
    id : int
      The model id number for the job to fit

    Example
    -------
    >>> import fit_world
    >>> dm = fit_world.dismod3.load_disease_model(1234)
    >>> fit_world.fit_world(dm)
    """

    dir = dismod3.settings.JOB_WORKING_DIR % id

    ## load the model from disk or from web
    import simplejson as json
    import data
    reload(data)

    try:
        model = data.ModelData.load(dir)
        print 'loaded data from new format from %s' % dir
        dm = dismod3.load_disease_model(id)
    except (IOError, AssertionError):
        dm = dismod3.load_disease_model(id)
        model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        try:
            model.save(dir)
            print 'loaded data from json, saved in new format for next time in %s' % dir
        except IOError:
            print 'loaded data from json, failed to save in new format'


    ## next block fills in missing covariates with zero
    for col in model.input_data.columns:
        if col.startswith('x_'):
            model.input_data[col] = model.input_data[col].fillna(0.)
    # also fill all covariates missing in output template with zeros
    model.output_template = model.output_template.fillna(0)

    # set all heterogeneity priors to Slightly for the global fit
    for t in model.parameters:
        if 'heterogeneity' in model.parameters[t]:
            model.parameters[t]['heterogeneity'] = global_heterogeneity

    ### For testing:
    ## speed up computation by reducing number of knots
    ## for t in 'irf':
    ##     model.parameters[t]['parameter_age_mesh'] = [0, 100]
    model.vars += dismod3.ism.consistent(model,
                                         reference_area='all',
                                         reference_sex='total',
                                         reference_year='all',
                                         priors={},
                                         zero_re=zero_re)

    ## fit model to data
    if fast_fit:
        dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, 105, 0, 1, 100)
    else:
        dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, iter=50000, burn=10000, thin=40, tune_interval=1000, verbose=True)

    dm.model = model

    # borrow strength to inform sigma_alpha between rate types post-hoc
    types_with_re = ['rr', 'f', 'i', 'm', 'smr', 'p', 'r', 'pf', 'm_with', 'X']
    ## first calculate sigma_alpha_bar from posterior draws from each alpha
    alpha_vals = []
    for type in types_with_re:
        if 'alpha' in model.vars[type]:
            for alpha_i in model.vars[type]['alpha']:
                alpha_vals += [a for a in alpha_i.trace() if a != 0]  # remove zeros because areas with no siblings are included for convenience but are pinned to zero
    ## then blend sigma_alpha_i and sigma_alpha_bar for each sigma_alpha_i
    if len(alpha_vals) > 0:
        sigma_alpha_bar = pl.std(alpha_vals)
        for type in types_with_re:
            if 'sigma_alpha' in model.vars[type]:
                for sigma_alpha_i in model.vars[type]['sigma_alpha']:
                    cur_val = sigma_alpha_i.trace()
                    sigma_alpha_i.trace._trace[0] = (cur_val + sigma_alpha_bar) * pl.ones_like(sigma_alpha_i.trace._trace[0])


    for t in 'p i r f rr pf m_with'.split():
        param_type = dict(i='incidence', r='remission', f='excess-mortality', p='prevalence', rr='relative-risk', pf='prevalence_x_excess-mortality', m_with='mortality')[t]
        #graphics.plot_one_type(model, model.vars[t], {}, t)
        for a in [dismod3.utils.clean(a) for a in dismod3.settings.gbd_regions]:
            print 'generating empirical prior for %s' % a
            for s in dismod3.settings.gbd_sexes:
                for y in dismod3.settings.gbd_years:
                    key = dismod3.utils.gbd_key_for(param_type, a, y, s)
                    if t in model.parameters and 'level_bounds' in model.parameters[t]:
                        lower=model.parameters[t]['level_bounds']['lower']
                        upper=model.parameters[t]['level_bounds']['upper']
                    else:
                        lower=0
                        upper=pl.inf
                        
                    emp_priors = covariate_model.predict_for(model,
                                                             model.parameters.get(t, {}),
                                                             'all', 'total', 'all',
                                                             a, dismod3.utils.clean(s), int(y),
                                                             alt_prior,
                                                             model.vars[t], lower, upper)
                    dm.set_mcmc('emp_prior_mean', key, emp_priors.mean(0))
                    if 'eta' in model.vars[t]:
                        N,A = emp_priors.shape  # N samples, for A age groups
                        delta_trace = pl.transpose([pl.exp(model.vars[t]['eta'].trace()) for _ in range(A)])  # shape delta matrix to match prediction matrix
                        emp_prior_std = pl.sqrt(emp_priors.var(0) + (emp_priors**2 / delta_trace).mean(0))
                    else:
                        emp_prior_std = emp_priors.std(0)
                    dm.set_mcmc('emp_prior_std', key, emp_prior_std)


        from fit_emp_prior import store_effect_coefficients
        store_effect_coefficients(dm, model.vars[t], param_type)

    
        if 'p_pred' in model.vars[t]:
            graphics.plot_one_ppc(model, t)
            pl.savefig(dir + '/prior-%s-ppc.png'%param_type)

        if 'p_pred' in model.vars[t] or 'lb' in model.vars[t]:
            graphics.plot_one_effects(model, t)
            pl.savefig(dir + '/prior-%s-effects.png'%param_type)


    for t in 'i r f p rr pf X m_with smr'.split():
        fname = dir + '/empirical_priors/data-%s.csv'%t
        print 'saving tables for', t, 'to', fname
        if 'data' in model.vars[t] and 'p_pred' in model.vars[t]:
            stats = model.vars[t]['p_pred'].stats(batches=5)
            model.vars[t]['data']['mu_pred'] = stats['mean']
            model.vars[t]['data']['sigma_pred'] = stats['standard deviation']

            stats = model.vars[t]['pi'].stats(batches=5)
            model.vars[t]['data']['mc_error'] = stats['mc error']

            model.vars[t]['data']['residual'] = model.vars[t]['data']['value'] - model.vars[t]['data']['mu_pred']
            model.vars[t]['data']['abs_residual'] = pl.absolute(model.vars[t]['data']['residual'])
            #if 'delta' in model.vars[t]:
            #    model.vars[t]['data']['logp'] = [mc.negative_binomial_like(n*p_obs, n*p_pred, n*p_pred*d) for n, p_obs, p_pred, d \
            #                                  in zip(model.vars[t]['data']['effective_sample_size'],
            #                                         model.vars[t]['data']['value'],
            #                                         model.vars[t]['data']['mu_pred'],
            #                                         pl.atleast_1d(model.vars[t]['delta'].stats()['mean']))]
            model.vars[t]['data'].to_csv(fname)


    graphics.plot_fit(model)
    pl.savefig(dir + '/prior.png')

    graphics.plot_acorr(model)
    pl.savefig(dir + '/prior-convergence.png')

    graphics.plot_trace(model)
    pl.savefig(dir + '/prior-trace.png')
    
    # save results (do this last, because it removes things from the disease model that plotting function, etc, might need
    try:
        dm.save('dm-%d-prior-%s.json' % (dm.id, 'all'))
    except IOError, e:
        print e
コード例 #6
0
def validate_consistent_re(N=500, delta_true=.15, sigma_true=[.1,.1,.1,.1,.1], 
                           true=dict(i=quadratic, f=constant, r=constant)):
    types = pl.array(['i', 'r', 'f', 'p'])

    ## generate simulated data
    model = data_simulation.simple_model(N)
    model.input_data['effective_sample_size'] = 1.
    model.input_data['value'] = 0.
    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    sim = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    for t in 'irf':
        for i, k_i in enumerate(sim[t]['knots']):
            sim[t]['gamma'][i].value = pl.log(true[t](k_i))

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    data_type = types[mc.rcategorical(pl.ones(len(types), dtype=float) / float(len(types)), size=N)]


    a = pl.arange(101)
    age_weights = pl.ones_like(a)
    sum_wt = pl.cumsum(age_weights)

    p = pl.zeros(N)
    for t in types:
        mu_t = sim[t]['mu_age'].value
        sum_mu_wt = pl.cumsum(mu_t*age_weights)
    
        p_t = (sum_mu_wt[age_end] - sum_mu_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start])

        # correct cases where age_start == age_end
        i = age_start == age_end
        if pl.any(i):
            p_t[i] = mu_t[age_start[i]]

        # copy part into p
        p[data_type==t] = p_t[data_type==t]


    # add covariate shifts
    import dismod3
    import simplejson as json
    gbd_model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    model.hierarchy = gbd_model.hierarchy

    from validate_covariates import alpha_true_sim
    area_list = pl.array(['all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT', 'IRN', 'IRQ', 'JOR', 'SYR'])
    alpha = {}
    for t in types:
        alpha[t] = alpha_true_sim(model, area_list, sigma_true)
    print json.dumps(alpha, indent=2)

    model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)]
    
    for i, a in model.input_data['area'].iteritems():
        t = data_type[i]
        p[i] = p[i] * pl.exp(pl.sum([alpha[t][n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha]))

    n = mc.runiform(100, 10000, size=N)

    model.input_data['data_type'] = data_type
    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true) / n

    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    #model.map, model.mcmc = fit_model.fit_consistent_model(model.vars, iter=101, burn=0, thin=1, tune_interval=100)
    model.map, model.mcmc = fit_model.fit_consistent_model(model.vars, iter=10000, burn=5000, thin=25, tune_interval=100)

    graphics.plot_convergence_diag(model.vars)

    graphics.plot_fit(model, model.vars, {}, {})
    for i, t in enumerate('i r f p rr pf'.split()):
        pl.subplot(2, 3, i+1)
        pl.plot(range(101), sim[t]['mu_age'].value, 'w-', label='Truth', linewidth=2)
        pl.plot(range(101), sim[t]['mu_age'].value, 'r-', label='Truth', linewidth=1)

    pl.show()

    model.input_data['mu_pred'] = 0.
    model.input_data['sigma_pred'] = 0.
    for t in types:
        model.input_data['mu_pred'][data_type==t] = model.vars[t]['p_pred'].stats()['mean']
        model.input_data['sigma_pred'][data_type==t] = model.vars[t]['p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true for t in types if t != 'rr']))
    model.delta['mu_pred'] = [pl.exp(model.vars[t]['eta'].trace()).mean() for t in types if t != 'rr']
    model.delta['sigma_pred'] = [pl.exp(model.vars[t]['eta'].trace()).std() for t in types if t != 'rr']
    data_simulation.add_quality_metrics(model.delta)

    model.alpha = pandas.DataFrame()
    model.sigma = pandas.DataFrame()
    for t in types:
        alpha_t = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
        alpha_t['true'] = pandas.Series(dict(alpha[t]))
        alpha_t['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars[t]['alpha']], index=model.vars[t]['U'].columns)
        alpha_t['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars[t]['alpha']], index=model.vars[t]['U'].columns)
        alpha_t['type'] = t
        model.alpha = model.alpha.append(alpha_t.dropna(), ignore_index=True)

        sigma_t = pandas.DataFrame(dict(true=sigma_true))
        sigma_t['mu_pred'] = [n.stats()['mean'] for n in model.vars[t]['sigma_alpha']]
        sigma_t['sigma_pred'] = [n.stats()['standard deviation'] for n in model.vars[t]['sigma_alpha']]
        model.sigma = model.sigma.append(sigma_t.dropna(), ignore_index=True)

    data_simulation.add_quality_metrics(model.alpha)
    data_simulation.add_quality_metrics(model.sigma)


    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame()
    for t in types:
        model.mu = model.mu.append(pandas.DataFrame(dict(true=sim[t]['mu_age'].value,
                                                         mu_pred=model.vars[t]['mu_age'].stats()['mean'],
                                                         sigma_pred=model.vars[t]['mu_age'].stats()['standard deviation'])),
                                   ignore_index=True)
    data_simulation.add_quality_metrics(model.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.mu['abs_err'].mean(),
                                                                         pl.median(pl.absolute(model.mu['rel_err'].dropna())),
                                                                         model.mu['covered?'].mean())
    print


    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.add_to_results(model, 'alpha')
    data_simulation.add_to_results(model, 'sigma')
    data_simulation.finalize_results(model)

    print model.results

    return model
コード例 #7
0
def validate_consistent_model_sim(N=500,
                                  delta_true=.5,
                                  true=dict(i=quadratic,
                                            f=constant,
                                            r=constant)):
    types = pl.array(['i', 'r', 'f', 'p'])

    ## generate simulated data
    model = data_simulation.simple_model(N)
    model.input_data['effective_sample_size'] = 1.
    model.input_data['value'] = 0.

    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    sim = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    for t in 'irf':
        for i, k_i in enumerate(sim[t]['knots']):
            sim[t]['gamma'][i].value = pl.log(true[t](k_i))

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    data_type = types[mc.rcategorical(pl.ones(len(types), dtype=float) /
                                      float(len(types)),
                                      size=N)]

    a = pl.arange(101)
    age_weights = pl.ones_like(a)
    sum_wt = pl.cumsum(age_weights)

    p = pl.zeros(N)
    for t in types:
        mu_t = sim[t]['mu_age'].value
        sum_mu_wt = pl.cumsum(mu_t * age_weights)

        p_t = (sum_mu_wt[age_end] - sum_mu_wt[age_start]) / (sum_wt[age_end] -
                                                             sum_wt[age_start])

        # correct cases where age_start == age_end
        i = age_start == age_end
        if pl.any(i):
            p_t[i] = mu_t[age_start[i]]

        # copy part into p
        p[data_type == t] = p_t[data_type == t]
    n = mc.runiform(100, 10000, size=N)

    model.input_data['data_type'] = data_type
    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars = consistent_model.consistent_model(model, 'all', 'total',
                                                   'all', {})
    model.map, model.mcmc = fit_model.fit_consistent_model(model.vars,
                                                           iter=10000,
                                                           burn=5000,
                                                           thin=25,
                                                           tune_interval=100)

    graphics.plot_convergence_diag(model.vars)

    graphics.plot_fit(model, model.vars, {}, {})
    for i, t in enumerate('i r f p rr pf'.split()):
        pl.subplot(2, 3, i + 1)
        pl.plot(a, sim[t]['mu_age'].value, 'w-', label='Truth', linewidth=2)
        pl.plot(a, sim[t]['mu_age'].value, 'r-', label='Truth', linewidth=1)

    #graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    #pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = 0.
    model.input_data['sigma_pred'] = 0.
    for t in types:
        model.input_data['mu_pred'][
            data_type == t] = model.vars[t]['p_pred'].stats()['mean']
        model.input_data['sigma_pred'][data_type == t] = model.vars['p'][
            'p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(
        dict(true=[delta_true for t in types if t != 'rr']))
    model.delta['mu_pred'] = [
        pl.exp(model.vars[t]['eta'].trace()).mean() for t in types if t != 'rr'
    ]
    model.delta['sigma_pred'] = [
        pl.exp(model.vars[t]['eta'].trace()).std() for t in types if t != 'rr'
    ]
    data_simulation.add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame()
    for t in types:
        model.mu = model.mu.append(pandas.DataFrame(
            dict(true=sim[t]['mu_age'].value,
                 mu_pred=model.vars[t]['mu_age'].stats()['mean'],
                 sigma_pred=model.vars[t]['mu_age'].stats()
                 ['standard deviation'])),
                                   ignore_index=True)
    data_simulation.add_quality_metrics(model.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.mu['abs_err'].mean(),
        pl.median(pl.absolute(
            model.mu['rel_err'].dropna())), model.mu['covered?'].mean())
    print

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.finalize_results(model)

    print model.results

    return model