Example #1
0
def check_emp_prior_fits(dm):
    are = []
    # compare fit to data
    print '*********************', inspect.stack()[1][3]
    for d in dm.vars['data']:
        type = d['data_type'].replace(' data', '')
        prior = dm.get_empirical_prior(type)
        prediction = neg_binom_model.predict_country_rate(dismod3.utils.gbd_key_for(type, d['gbd_region'],
                                                                                    (d['year_start'] < 1997) and 1990 or 2005, d['sex']),
                                                          d['country_iso3_code'],
                                                          prior['alpha'], prior['beta'], prior['gamma'], dm.get_covariates(), lambda f, age: f, arange(101))
        data_prediction = dismod3.utils.rate_for_range(prediction,
                                                       arange(d['age_start'], d['age_end']+1),
                                                       d['age_weights'])

        # test distance of predicted data value from observed data value
        are.append(abs(100 * (data_prediction / dm.value_per_1(d) - 1.)))
        print type, d['age_start'], dm.value_per_1(d), data_prediction, are[-1]
        #assert abs((.001 + data_prediction) / (.001 + dm.value_per_1(d)) - 1.) < .05, 'Prediction should be closer to data'
    print 'median absolue relative error:', median(are)
    print '*********************\n\n\n\n\n'
    return are
Example #2
0
def save_country_level_posterior(dm, region, year, sex, rate_type_list):
    """ Save country level posterior in a csv file, and put the file in the 
    directory job_working_directory/posterior/country_level_posterior_dm-'id'
    
    Parameters:
    -----------
      dm : DiseaseJson object
        disease model
      region : str
      year : str
        1990 or 2005
      sex : str
        male or female
      rate_type_list : list
        list of rate types
    """
    import csv, os
    
    # gat covariate dict from dm
    covariates_dict = dm.get_covariates()

    # job working directory
    job_wd = dismod3.settings.JOB_WORKING_DIR % dm.id

    # directory to save the file
    dir = job_wd + '/posterior/country_level_posterior_dm-' + str(dm.id) + '/'
    
    # make an output file
    filename = 'dm-%s-%s-%s-%s.csv' % (str(dm.id), region, sex, year)
    try:
        # open a file to write
        f_file = open(dir + filename, 'w')

        # get csv file writer
        csv_f = csv.writer(f_file)
        #csv_f = csv.writer(f_file, dialect=csv.excel_tab)
        print('writing csv file %s' % filename)

        # write header
        csv_f.writerow(['Iso3', 'Rate type', 'Age', 'Value'])

        # loop over countries and rate_types
        for iso3 in countries_for[region]:
            for rate_type in rate_type_list:

                # make a key
                key = '%s+%s+%s+%s' % (rate_type, region, year, sex)

                # modify rate type names
                if rate_type == 'mortality':
                    rate_type = 'with-condition mortality'
                if rate_type == 'relative-risk':
                    rate_type = 'rr mortality'

                # get dm.vars by the key
                model_vars = dm.vars[key]

                # get coeffs from dm.vars
                alpha=model_vars['region_coeffs']
                beta=model_vars['study_coeffs']
                gamma_trace = model_vars['age_coeffs'].trace()

                # get sample size
                sample_size = len(gamma_trace)

                # make a value_list of 0s for ages
                value_list = [0] * dismod3.MAX_AGE

                # calculate value list for ages
                for gamma in gamma_trace:
                    value_trace = nbm.predict_country_rate(iso3, key, alpha, beta, gamma,
                                                           covariates_dict, 
                                                           model_vars['bounds_func'],
                                                           dm.get_estimate_age_mesh())

                    for i in range(dismod3.MAX_AGE):
                        value_list[i] += value_trace[i]

                # write a row
                for i, value in enumerate(value_list):
                    csv_f.writerow([iso3, rate_type, str(i), value / sample_size])

        # close the file
        f_file.close()
    except:
        print "couldn't write file"
Example #3
0
def save_country_level_posterior(dm, region, year, sex, rate_type_list):
    """ Save country level posterior in a csv file, and put the file in the 
    directory job_working_directory/posterior/country_level_posterior_dm-'id'
    
    Parameters:
    -----------
      dm : DiseaseJson object
        disease model
      region : str
      year : str
        1990 or 2005
      sex : str
        male or female
      rate_type_list : list
        list of rate types
    """
    import csv, os
    
    import dismod3.gbd_disease_model as model
    keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex])
    #dm.vars = model.setup(dm, keys)

    # get covariate dict from dm
    covariates_dict = dm.get_covariates()
    derived_covariate = dm.get_derived_covariate_values()

    # job working directory
    job_wd = dismod3.settings.JOB_WORKING_DIR % dm.id

    # directory to save the file
    dir = job_wd + '/posterior/'
    
    #import pymc as mc
    #picklename = 'pickle/dm-%s-posterior-%s-%s-%s.pickle' % (str(dm.id), region, sex, year)
    #model_trace = mc.database.pickle.load(dir + picklename)

    # make an output file
    filename = 'dm-%s-%s-%s-%s.csv' % (str(dm.id), region, sex, year)
    # open a file to write
    f_file = open(dir + filename, 'w')

    # get csv file writer
    csv_f = csv.writer(f_file)
    #csv_f = csv.writer(f_file, dialect=csv.excel_tab)
    print('writing csv file %s' % filename)

    # write header
    csv_f.writerow(['Iso3', 'Rate type', 'Age', 'Value', 'Lower UI', 'Upper UI'])

    # loop over countries and rate_types
    for iso3 in countries_for[region]:
        for rate_type in rate_type_list:
            # make a key
            key = '%s+%s+%s+%s' % (rate_type, region, year, dismod3.utils.clean(sex))

            # modify rate type names
            if rate_type == 'mortality':
                rate_type = 'm_with'

            # get dm.vars by the key
            model_vars = dm.vars[key]
            if rate_type == 'duration':
                # make a value_list of 0s for ages
                value_list = np.zeros((dismod3.MAX_AGE, sample_size))

                # calculate value list for ages
                for i, value_trace in enumerate(model_vars['rate_stoch'].trace()):
                    value_list[:, i] = value_trace
            else:
                # get coeffs from dm.vars
                alpha=model_vars['region_coeffs']
                beta=model_vars['study_coeffs']
                #gamma_trace = model_trace.__getattribute__('age_coeffs_%s+%s+%s+%s' % (rate_type, region, year, dismod3.utils.clean(sex))).gettrace()
                gamma_trace = model_vars['age_coeffs'].trace()

                # get sample size
                sample_size = len(gamma_trace)

                # make a value_list of 0s for ages
                value_list = np.zeros((dismod3.MAX_AGE, sample_size))

                # calculate value list for ages
                for i, gamma in enumerate(gamma_trace):
                    value_trace = nbm.predict_country_rate(key, iso3, alpha, beta, gamma,
                                                           covariates_dict, derived_covariate,
                                                           model_vars['bounds_func'],
                                                           range(101))

                    value_list[:, i] = value_trace
            if rate_type == 'prevalence':
                print key, iso3, nbm.country_covariates(key, iso3, covariates_dict, derived_covariate)[1], np.sort(value_list, axis=1)[5, .5*sample_size]

                                
            # write a row
            for age in range(dismod3.MAX_AGE):
                csv_f.writerow([iso3, rate_type, str(age)] + list(np.sort(value_list, axis=1)[age, [.5*sample_size, .025*sample_size, .975*sample_size]]))

    # close the file
    f_file.close()