Exemple #1
0
def main():
    usage = 'usage: %prog [options] disease_model_id'
    parser = optparse.OptionParser(usage)

    # flags for parameters to modify
    parser.add_option('-y', '--ymax',
                      help='set the maximum y value for summary plots')
    parser.add_option('-t', '--condition',
                      help='set the condition of the model')
    parser.add_option('-n', '--notes',
                      help='set the notes of the model')

    # boolean flags
    parser.add_option('-c', '--clone',
                      action='store_true', dest='clone',
                      help='create a clone of the model (leave specified model unchanged)')

    (opts, args) = parser.parse_args()

    # check that args are correct
    if len(args) == 1:
        try:
            id = int(args[0])
        except ValueError:
            parser.error('disease_model_id must be an integer')
            return
    else:
        parser.error('incorrect number of arguments')
        return

    # fetch requested model
    dm = dismod3.get_disease_model(id)

    # change values specified
    if opts.ymax:
        dm.set_ymax(float(opts.ymax))

    # TODO: get condition to actually change
    if opts.condition:
        dm.set_condition(opts.condition)
    if opts.notes:
        dm.set_notes(opts.notes)

    # clone if requested
    if opts.clone:
        dm.params.pop('id')  # dismod_data_server creates new model if it doesn't find an id

    # post results to dismod_data_server
    url = dismod3.post_disease_model(dm)

    # announce url to view results
    print url
Exemple #2
0
def generate_disease_data(condition, cov):
    """ Generate csv files with gold-standard disease data,
    and somewhat good, somewhat dense disease data, as might be expected from a
    condition that is carefully studied in the literature
    """

    age_len = dismod3.MAX_AGE
    ages = np.arange(age_len, dtype='float')

    # incidence rate
    i0 = .005 + .02 * mc.invlogit((ages - 44) / 3)
    #i0 = np.maximum(0., .001 * (-.125 + np.ones_like(ages) + (ages / age_len)**2.))

    # remission rate
    #r = 0. * ages
    r = .1 * np.ones_like(ages)

    # excess-mortality rate
    #f_init = .085 * (ages / 100) ** 2.5
    SMR = 3. * np.ones_like(ages) - ages / age_len

    # all-cause mortality-rate
    mort = dismod3.get_disease_model('all-cause_mortality')

    #age_intervals = [[a, a+9] for a in range(0, dismod3.MAX_AGE-4, 10)] + [[0, 100] for ii in range(1)]
    age_intervals = [[a, a] for a in range(0, dismod3.MAX_AGE, 1)]

    # TODO:  take age structure from real data
    sparse_intervals = dict([[
        region,
        random.sample(age_intervals,
                      (ii**3 * len(age_intervals)) / len(countries_for)**3 / 1)
    ] for ii, region in enumerate(countries_for)])
    dense_intervals = dict(
        [[region, random.sample(age_intervals,
                                len(age_intervals) / 2)]
         for ii, region in enumerate(countries_for)])

    gold_data = []
    noisy_data = []

    for ii, region in enumerate(sorted(countries_for)):
        if region == 'world':
            continue

        print region
        sys.stdout.flush()

        # introduce unexplained regional variation
        #i = i0 * (1 + float(ii) / 21)

        # or not
        i = i0

        for year in [1990, 2005]:
            for sex in ['male', 'female']:

                param_type = 'all-cause_mortality'
                key = dismod3.gbd_key_for(param_type, region, year, sex)
                m_all_cause = mort.mortality(key, mort.data)

                # calculate excess-mortality rate from smr
                f = (SMR - 1.) * m_all_cause

                ## compartmental model (bins S, C, D, M)
                import scipy.linalg
                from dismod3 import NEARLY_ZERO
                from dismod3.utils import trim

                SCDM = np.zeros([4, age_len])
                p = np.zeros(age_len)
                m = np.zeros(age_len)

                SCDM[0, 0] = 1.
                SCDM[1, 0] = 0.
                SCDM[2, 0] = 0.
                SCDM[3, 0] = 0.

                p[0] = SCDM[1, 0] / (SCDM[0, 0] + SCDM[1, 0] + NEARLY_ZERO)
                m[0] = trim(m_all_cause[0] - f[0] * p[0], NEARLY_ZERO,
                            1 - NEARLY_ZERO)

                for a in range(age_len - 1):
                    A = [[-i[a] - m[a], r[a], 0., 0.],
                         [i[a], -r[a] - m[a] - f[a], 0., 0.],
                         [m[a], m[a], 0., 0.], [0., f[a], 0., 0.]]

                    SCDM[:, a + 1] = np.dot(scipy.linalg.expm(A), SCDM[:, a])

                    p[a + 1] = SCDM[1, a + 1] / (SCDM[0, a + 1] +
                                                 SCDM[1, a + 1] + NEARLY_ZERO)
                    m[a + 1] = m_all_cause[a + 1] - f[a + 1] * p[a + 1]

                # duration = E[time in bin C]
                hazard = r + m + f
                pr_not_exit = np.exp(-hazard)
                X = np.empty(len(hazard))
                X[-1] = 1 / hazard[-1]
                for ii in reversed(range(len(X) - 1)):
                    X[ii] = (pr_not_exit[ii] *
                             (X[ii + 1] + 1)) + (1 / hazard[ii] *
                                                 (1 - pr_not_exit[ii]) -
                                                 pr_not_exit[ii])

                country = countries_for[region][0]
                params = dict(age_intervals=age_intervals,
                              condition=condition,
                              gbd_region=region,
                              country=country,
                              year=year,
                              sex=sex,
                              effective_sample_size=1000)

                params['age_intervals'] = [[0, 99]]
                generate_and_append_data(gold_data, 'prevalence data', p,
                                         **params)
                generate_and_append_data(gold_data, 'incidence data', i,
                                         **params)
                generate_and_append_data(gold_data, 'excess-mortality data', f,
                                         **params)
                generate_and_append_data(gold_data, 'remission data', r,
                                         **params)
                generate_and_append_data(gold_data, 'duration data', X,
                                         **params)

                # TODO: use this approach to age standardize all gold data, and then change it to get iX as a direct sum
                params['age_intervals'] = [[0, 99]]
                iX = i * X * (1 - p) * regional_population(key)
                generate_and_append_data(gold_data, 'incidence_x_duration', iX,
                                         **params)

                params['effective_sample_size'] = 1000
                params['cov'] = 0.
                params['age_intervals'] = age_intervals
                generate_and_append_data(noisy_data, 'prevalence data', p,
                                         **params)
                generate_and_append_data(noisy_data, 'excess-mortality data',
                                         f, **params)
                generate_and_append_data(noisy_data, 'remission data', r,
                                         **params)
                generate_and_append_data(noisy_data, 'incidence data', i,
                                         **params)

    col_names = sorted(data_dict_for_csv(gold_data[0]).keys())

    f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w')
    csv_f = csv.writer(f_file, dialect='excel-tab')
    csv_f.writerow(col_names)
    for d in gold_data:
        dd = data_dict_for_csv(d)
        csv_f.writerow([dd[c] for c in col_names])
    f_file.close()

    f_name = OUTPUT_PATH + '%s_data.tsv' % condition
    f_file = open(f_name, 'w')
    csv_f = csv.writer(f_file, dialect='excel-tab')
    csv_f.writerow(col_names)

    for d in noisy_data:
        dd = data_dict_for_csv(d)
        csv_f.writerow([dd[c] for c in col_names])
    f_file.close()

    # upload data file
    from dismod3.disease_json import dismod_server_login, twc, DISMOD_BASE_URL
    dismod_server_login()
    twc.go(DISMOD_BASE_URL + 'dismod/data/upload/')
    twc.formvalue(1, 'tab_separated_values', open(f_name).read())

    # TODO: find or set the model number for this model, set the
    # expert priors and covariates, merge the covariate data into the
    # model, and add the "ground truth" to the disease json

    try:
        url = twc.submit()
    except Exception, e:
        print e
Exemple #3
0
def fit(id):
    """ Download model, conduct fit, and upload results

    Parameters
    ----------
    id : int
      The model id number for the job to fit

Commandline Version:

[omak] dismod4.abie ] test/parameter.sh
[omak] dismod4.abie ] example/simulate.py 5 1 100
[omak] dismod4.abie ] /tmp/dismod4_csv test/parameter.csv measure_in.csv sfun_in.csv
dismod4_csv: Attempt to overwrite the existing file
sfun_in.csv
[omak] dismod4.abie ] /tmp/dismod4_csv test/parameter.csv measure_in.csv sfun_in.csv sfun_out.csv measure_out.csv

    """

    dm = dismod3.get_disease_model(id)
    mort = dismod3.fetch_disease_model('all-cause_mortality')
    dm.data += mort.data

    ## convert model to csv file
    column_names = 'time_lower,time_upper,age_lower,age_upper,likelihood_name,likelihood_sigma,likelihood_beta,value,integrand'.split(',')
    data_list = []

    # add all the model data to the data list
    for d in dm.data:
        row = {}
        row['time_lower'] = d['year_start']
        row['time_upper'] = d['year_end']  # TODO: determine if this should be +1

        row['age_lower'] = d['age_start']+1.
        row['age_upper'] = d['age_end']+1.  # TODO: determine if this should be +1


        row['likelihood_name'] = 'gaussian'
        row['likelihood_sigma'] = .0001  # TODO: use more accurate sigma
        row['likelihood_beta'] = 1.

        row['value'] = d['value'] / float(d.get('units', 1.))

        for dm3_type, dm4_type in [['remission data', 'remission'],
                                   ['excess-mortality data', 'excess'],
                                   ['incidence data', 'incidence'],
                                   ['mrr data', 'risk'],
                                   ['prevalence data', 'prevalence'],
                                   ['all-cause mortality data', 'all_cause'],
                                   ]:
            if d['data_type'] == dm3_type:
                row['integrand'] = dm4_type
                data_list.append(row)
                break

    # add the time/age/regions that we want to predict to the data list as well
    age_mesh = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    index_dict = {}
    for year in [1990, 2005]:
        for age in age_mesh:
            for type in ['remission', 'excess', 'incidence', 'risk', 'prevalence']:
                row = {}

                row['time_lower'] = year
                row['time_upper'] = year

                row['age_lower'] = age
                row['age_upper'] = age

                row['likelihood_name'] = 'gaussian'
                row['likelihood_sigma'] = inf
                row['likelihood_beta'] = 1.

                row['value'] = 0.

                row['integrand'] = type
                
                index_dict[(type, year, age)] = len(data_list)
                data_list.append(row)


    # save the csv file
    import csv
    fname = dismod3.settings.JOB_WORKING_DIR % id + '/measure_in.csv'

    try:
        f = open(fname, 'w')
        csv.writer(f).writerow(column_names)
        csv.DictWriter(f, column_names).writerows(data_list)
        f.close()
    except IOError, e:
        print 'Warning: could not create data csv.  Maybe it exists already?\n%s' % e
Exemple #4
0
def hep_c_fit(regions, prediction_years, data_year_start=-inf, data_year_end=inf, egypt_flag=False):
    """ Fit prevalence for regions and years specified """
    print '\n***************************\nfitting %s for %s (using data from years %f to %f)' % (regions, prediction_years, data_year_start, data_year_end)
    
    ## load model to fit
    #dm = DiseaseJson(file('tests/hep_c.json').read())
    dm = dismod3.get_disease_model(8021)
    ## adjust the expert priors
    dm.params['global_priors']['heterogeneity']['prevalence'] = 'Very'
    dm.params['global_priors']['smoothness']['prevalence']['amount'] = 'Slightly'
    # TODO: construct examples of adjusting other covariates
    # ipdb> dm.params['global_priors'].keys()
    # [u'increasing', u'unimodal', u'level_bounds', u'y_maximum', u'note', u'level_value', u'decreasing', u'parameter_age_mesh', u'heterogeneity', u'smoothness']
    #ipdb> dm.params['global_priors']['smoothness']['prevalence']
    #{u'age_start': 0, u'amount': u'Moderately', u'age_end': 100}

    # include a study-level covariate for 'bias'
    covariates_dict = dm.get_covariates()
    covariates_dict['Study_level']['bias']['rate']['value'] = 1
    # TODO: construct additional examples of adjusting covariates

    ## select relevant prevalence data
    # TODO: streamline data selection functions
    if egypt_flag:
        dm.data = [d for d in dm.data if d['country_iso3_code'] == 'EGY']
    else:
        dm.data = [d for d in dm.data if
                   dismod3.utils.clean(d['gbd_region']) in regions
                   and float(d['year_end']) >= data_year_start
                   and float(d['year_start']) <= data_year_end
                   and d['country_iso3_code'] != 'EGY']

    ## create, fit, and save rate model
    dm.vars = {}

    keys = dismod3.utils.gbd_keys(type_list=['prevalence'],
                                  region_list=regions,
                                  year_list=prediction_years)
    # TODO: consider how to do this for models that use the complete disease model
    # TODO: consider adding hierarchical similarity priors for the male and female models
    k0 = keys[0]  # looks like k0='prevalence+asia_south+1990+male'
    dm.vars[k0] = neg_binom_model.setup(dm, k0, dm.data)

    dm.mcmc = mc.MCMC(dm.vars)
    dm.mcmc.sample(iter=50000, burn=25000, thin=50, verbose=1)

    # make map object so we can compute AIC and BIC
    dm.map = mc.MAP(dm.vars)
    dm.map.fit()

    for k in keys:
        # save the results in the disease model
        dm.vars[k] = dm.vars[k0]
        if egypt_flag:
            neg_binom_model.countries_for['egypt'] = ['EGY']  # HACK: to treat egypt as its own region

        neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k])

        # check autocorrelation to confirm chain has mixed
        test_model.summarize_acorr(dm.vars[k]['rate_stoch'].trace())

        # generate plots of results
        dismod3.tile_plot_disease_model(dm, [k], defaults={'ymax':.15, 'alpha': .5})
        dm.savefig('dm-%d-posterior-%s.%f.png' % (dm.id, k, random()))

    # summarize fit quality graphically, as well as parameter posteriors
    dismod3.plotting.plot_posterior_predicted_checks(dm, k0)
    dm.savefig('dm-%d-check-%s.%f.png' % (dm.id, k0, random()))
    dismod3.post_disease_model(dm)
    return dm
Exemple #5
0
def generate_disease_data(condition, cov):
    """ Generate csv files with gold-standard disease data,
    and somewhat good, somewhat dense disease data, as might be expected from a
    condition that is carefully studied in the literature
    """
    
    age_len = dismod3.MAX_AGE
    ages = np.arange(age_len, dtype='float')

    # incidence rate
    i0 = .005 + .02 * mc.invlogit((ages - 44) / 3)
    #i0 = np.maximum(0., .001 * (-.125 + np.ones_like(ages) + (ages / age_len)**2.))

    # remission rate
    #r = 0. * ages
    r = .1 * np.ones_like(ages)

    # excess-mortality rate
    #f_init = .085 * (ages / 100) ** 2.5
    SMR = 3. * np.ones_like(ages) - ages / age_len

    # all-cause mortality-rate
    mort = dismod3.get_disease_model('all-cause_mortality')

    #age_intervals = [[a, a+9] for a in range(0, dismod3.MAX_AGE-4, 10)] + [[0, 100] for ii in range(1)]
    age_intervals = [[a, a] for a in range(0, dismod3.MAX_AGE, 1)]
    
    # TODO:  take age structure from real data
    sparse_intervals = dict([[region, random.sample(age_intervals, (ii**3 * len(age_intervals)) / len(countries_for)**3 / 1)] for ii, region in enumerate(countries_for)])
    dense_intervals = dict([[region, random.sample(age_intervals, len(age_intervals)/2)] for ii, region in enumerate(countries_for)])

    gold_data = []
    noisy_data = []
            
    for ii, region in enumerate(sorted(countries_for)):
        if region == 'world':
            continue
        
        print region
        sys.stdout.flush()

        # introduce unexplained regional variation
        #i = i0 * (1 + float(ii) / 21)

        # or not
        i = i0
        
        for year in [1990, 2005]:
            for sex in ['male', 'female']:

                param_type = 'all-cause_mortality'
                key = dismod3.gbd_key_for(param_type, region, year, sex)
                m_all_cause = mort.mortality(key, mort.data)

                # calculate excess-mortality rate from smr
                f = (SMR - 1.) * m_all_cause


                ## compartmental model (bins S, C, D, M)
                import scipy.linalg
                from dismod3 import NEARLY_ZERO
                from dismod3.utils import trim

                SCDM = np.zeros([4, age_len])
                p = np.zeros(age_len)
                m = np.zeros(age_len)

                SCDM[0,0] = 1.
                SCDM[1,0] = 0.
                SCDM[2,0] = 0.
                SCDM[3,0] = 0.

                p[0] = SCDM[1,0] / (SCDM[0,0] + SCDM[1,0] + NEARLY_ZERO)
                m[0] = trim(m_all_cause[0] - f[0] * p[0], NEARLY_ZERO, 1-NEARLY_ZERO)

                for a in range(age_len - 1):
                    A = [[-i[a]-m[a],  r[a]          , 0., 0.],
                         [ i[a]     , -r[a]-m[a]-f[a], 0., 0.],
                         [      m[a],       m[a]     , 0., 0.],
                         [        0.,            f[a], 0., 0.]]

                    SCDM[:,a+1] = np.dot(scipy.linalg.expm(A), SCDM[:,a])

                    p[a+1] = SCDM[1,a+1] / (SCDM[0,a+1] + SCDM[1,a+1] + NEARLY_ZERO)
                    m[a+1] = m_all_cause[a+1] - f[a+1] * p[a+1]


                # duration = E[time in bin C]
                hazard = r + m + f
                pr_not_exit = np.exp(-hazard)
                X = np.empty(len(hazard))
                X[-1] = 1 / hazard[-1]
                for ii in reversed(range(len(X)-1)):
                    X[ii] = (pr_not_exit[ii] * (X[ii+1] + 1)) + (1 / hazard[ii] * (1 - pr_not_exit[ii]) - pr_not_exit[ii])

                country = countries_for[region][0]
                params = dict(age_intervals=age_intervals, condition=condition, gbd_region=region,
                              country=country, year=year, sex=sex, effective_sample_size=1000)

                params['age_intervals'] = [[0,99]]
                generate_and_append_data(gold_data, 'prevalence data', p, **params)
                generate_and_append_data(gold_data, 'incidence data', i, **params)
                generate_and_append_data(gold_data, 'excess-mortality data', f, **params)
                generate_and_append_data(gold_data, 'remission data', r, **params)
                generate_and_append_data(gold_data, 'duration data', X, **params)

                # TODO: use this approach to age standardize all gold data, and then change it to get iX as a direct sum
                params['age_intervals'] = [[0,99]]
                iX = i * X * (1-p) * regional_population(key)
                generate_and_append_data(gold_data, 'incidence_x_duration', iX, **params)
                

                params['effective_sample_size'] = 1000
                params['cov'] = 0.
                params['age_intervals'] = age_intervals
                generate_and_append_data(noisy_data, 'prevalence data', p, **params)
                generate_and_append_data(noisy_data, 'excess-mortality data', f, **params)
                generate_and_append_data(noisy_data, 'remission data', r, **params)
                generate_and_append_data(noisy_data, 'incidence data', i, **params)



    col_names = sorted(data_dict_for_csv(gold_data[0]).keys())

    f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w')
    csv_f = csv.writer(f_file, dialect='excel-tab')
    csv_f.writerow(col_names)
    for d in gold_data:
        dd = data_dict_for_csv(d)
        csv_f.writerow([dd[c] for c in col_names])
    f_file.close()

    f_name = OUTPUT_PATH + '%s_data.tsv' % condition
    f_file = open(f_name, 'w')
    csv_f = csv.writer(f_file, dialect='excel-tab')
    csv_f.writerow(col_names)

    for d in noisy_data:
        dd = data_dict_for_csv(d)
        csv_f.writerow([dd[c] for c in col_names])
    f_file.close()

    # upload data file
    from dismod3.disease_json import dismod_server_login, twc, DISMOD_BASE_URL
    dismod_server_login()
    twc.go(DISMOD_BASE_URL + 'dismod/data/upload/')
    twc.formvalue(1, 'tab_separated_values', open(f_name).read())

    # TODO: find or set the model number for this model, set the
    # expert priors and covariates, merge the covariate data into the
    # model, and add the "ground truth" to the disease json

    try:
        url = twc.submit()
    except Exception, e:
        print e
Exemple #6
0
#!/usr/local/bin/python2.5

import dismod3

import sys

dm = dismod3.get_disease_model(894)

if len(sys.argv) == 2:
    k = dm.params['priors'].keys()[int(sys.argv[1])]

    dm.data = [d for d in dm.data if \
               d['gbd_region'] == k.replace('prevalence data+', '')]

    dm.set_priors('prevalence data', ' smooth 10\n zero 0 15\n zero 99 100\n confidence 1000 .0001\n')

    import dismod3.beta_binomial_model as model
    print 'Processing %s (%d data points)' % (k, len(dm.data))
    model.fit(dm, 'map')
    model.fit(dm, 'mcmc')
    model.fit(dm, 'map')
    model.fit(dm, 'mcmc')
    model.fit(dm, 'map')

else:
    keys = dm.params['priors'].keys()
    for k in keys:
        dm.set_priors(k, ' smooth 10\n zero 0 15\n zero 99 100\n confidence 1000 .0001\n')

    import dismod3.multiregion_model as model
    print 'Processing all regions (%d data points)' % len(dm.data)
Exemple #7
0
def fit_continuous_spm(id):
    """ Fit continuous single parameter model

    Parameters
    ----------
    id : int
      The model id number for the job to fit

    Example
    -------
    >>> import fit_continuous_spm
    >>> fit_continuous_spm.fit_continuous_spm(4773)
    """

    dm = dismod3.get_disease_model(id)
    
    ## convert model to csv file
    column_names = ['region', 'country', 'year', 'age', 'y', 'se', 'x0', 'x1', 'w0']
    data_list = []

    # add all the model data to the data list
    param_type = 'continuous single parameter'
    for d in dm.filter_data(data_type=param_type):
        row = {}
        row['region'] = dismod3.utils.clean(d['gbd_region'])
        row['country'] = d['country_iso3_code']
        
        row['year'] = round(.5 * (d['year_start'] + d['year_end']), -1)
        row['age'] = round(.5 * (d['age_start'] + d['age_end']), -1)

        row['y'] = d['parameter_value'] * float(d['units'])
        row['se'] = d['standard_error'] * float(d['units'])

        row['x0'] = 1.
        row['x1'] = .1 * (row['year']-1997.)

        row['w0'] = .1 * (row['year']-1997.)

        data_list.append(row)


    # add the time/age/regions that we want to predict to the data list as well
    prediction_regions = dismod3.gbd_regions # FIXME: now i just take a few regions, for fast testing
    age_mesh = [0, 20, 40, 60, 80, 100]
    index_dict = {}
    for r in prediction_regions:
        for y in [1990, 2005]:
            for a in age_mesh:
                row = {}
                row['region'] = dismod3.utils.clean(r)
                row['country'] = row['region'] + '_all'

                row['year'] = y
                row['age'] = a

                row['y'] = pl.nan
                row['se'] = pl.inf

                row['x0'] = 1.
                row['x1'] = .1 * (row['year']-1997.)

                row['w0'] = .1 * (row['year']-1997.)

                index_dict[(dismod3.utils.clean(r),y,a)] = len(data_list)
                data_list.append(row)


    # save the csv file
    import csv
    fname = dismod3.settings.JOB_WORKING_DIR % id + '/data.csv'

    try:
        f = open(fname, 'w')
        csv.writer(f).writerow(column_names)
        csv.DictWriter(f, column_names).writerows(data_list)
        f.close()
    except IOError, e:
        print 'Warning: could not create data csv.  Maybe it exists already?\n%s' % e
Exemple #8
0
            truth, range(a0, a1 + 1),
            np.ones(a1 + 1 - a0) / float(a1 + 1 - a0))

        d['value'] = p0
        if p0 == 0.:
            d['standard_error'] = .000001
        elif p0 < 1.:
            d['standard_error'] = p0 * (1 - p0) / np.sqrt(1000)
        else:
            d['standard_error'] = p0 * .05

        data.append(d)


data = []
mort = dismod3.get_disease_model('all-cause_mortality')

age_intervals = [[a, a + 4] for a in range(0, dismod3.MAX_AGE - 4, 5)]
for sex in ['male', 'female']:
    for year in [1990, 2005]:
        for region, country in \
                [['Asia, Southeast', 'THA'],
                 ['Asia, East', 'CHN'],
                 ['Asia, South', 'IND'],
                 ['Europe, Central', 'ALB']]:

            offset = 0.
            if sex == 'male':
                offset += .5
            if year == 2005:
                offset += .5
              'id': len(data)}

        p0 = dismod3.utils.rate_for_range(truth, range(a0, a1 + 1), np.ones(a1 + 1 - a0) / float(a1 + 1 - a0))
    
        d['value'] = p0
        if p0 == 0.:
            d['standard_error'] = .000001
        elif p0 < 1.:
            d['standard_error'] = p0 * (1-p0) / np.sqrt(1000)
        else:
            d['standard_error'] = p0 * .05

        data.append(d)

data = []
mort = dismod3.get_disease_model('all-cause_mortality')

age_intervals = [[a, a+4] for a in range(0, dismod3.MAX_AGE-4, 5)]
for sex in ['male', 'female']:
    for year in [1990, 2005]:
        for region, country in \
                [['Asia, Southeast', 'THA'],
                 ['Asia, East', 'CHN'],
                 ['Asia, South', 'IND'],
                 ['Europe, Central', 'ALB']]:

            offset = 0.
            if sex == 'male':
                offset += .5
            if year == 2005:
                offset += .5
def daemon_loop():
    on_sge = dismod3.settings.ON_SGE
    while True:
        try:
            job_queue = dismod3.get_job_queue()
        except:
            job_queue = []
        
        for param_id in job_queue:
            #tweet('processing job %d' % id)
            log('processing job %d' % param_id)
            job_params = dismod3.remove_from_job_queue(param_id)
            id = int(job_params['dm_id'])
            dm = dismod3.get_disease_model(id)

            # make a working directory for the id
            dir = dismod3.settings.JOB_WORKING_DIR % id
            if not os.path.exists(dir):
                os.makedirs(dir)

            estimate_type = dm.params.get('run_status', {}).get('estimate_type', 'fit all individually')

            if estimate_type.find('posterior') != -1:
                #fit each region/year/sex individually for this model
                regions_to_fit = dm.params.get('run_status', {}).get('regions_to_fit', [])
                if regions_to_fit[0] == 'all_regions':
                    regions_to_fit = dismod3.gbd_regions
                d = '%s/posterior' % dir
                if os.path.exists(d):
                    rmtree(d)
                os.mkdir(d)
                os.mkdir('%s/stdout' % d)
                os.mkdir('%s/stderr' % d)
                dismod3.init_job_log(id, 'posterior', param_id)
                for r in regions_to_fit:
                    for s in dismod3.gbd_sexes:
                        for y in dismod3.gbd_years:
                            # fit only one region, for the time being...
                            # TODO: make region selection a user-settable option from the gui
                            #if clean(r) != 'asia_southeast':
                            #    continue
                            k = '%s+%s+%s' % (clean(r), s, y)
                            o = '%s/stdout/%s' % (d, k)
                            e = '%s/stderr/%s' % (d, k)
                            if on_sge:
                                call_str = dismod3.settings.GBD_FIT_STR % (o, e, '-l -r %s -s %s -y %s' % (clean(r), s, y), id)
                                subprocess.call(call_str, shell=True)
                            else:
                                call_str = dismod3.settings.GBD_FIT_STR % ('-l -r %s -s %s -y %s' % (clean(r), s, y), id, o, e)
                                subprocess.call(call_str, shell=True)
                            time.sleep(1.)

            elif estimate_type.find('empirical priors') != -1:
                # fit empirical priors (by pooling data from all regions
                d = '%s/empirical_priors' % dir
                if os.path.exists(d):
                    rmtree(d)
                os.mkdir(d)
                os.mkdir('%s/stdout' % d)
                os.mkdir('%s/stderr' % d)
                dismod3.init_job_log(id, 'empirical_priors', param_id)
                for t in ['excess-mortality', 'remission', 'incidence', 'prevalence']:
                    o = '%s/stdout/%s' % (d, t)
                    e = '%s/stderr/%s' % (d, t)
                    if on_sge:
                        subprocess.call(dismod3.settings.GBD_FIT_STR % (o, e, '-l -t %s' % t, id), shell=True)
                    else:
                        subprocess.call(dismod3.settings.GBD_FIT_STR % ('-l -t %s' % t, id, o, e), shell=True)

            else:
                #tweet('unrecognized estimate type: %s' % estimate_type)
                log('unrecognized estimate type: %s' % estimate_type)
            
        time.sleep(dismod3.settings.SLEEP_SECS)

# check that args are correct
if len(args) == 1:
    try:
        id = int(args[0])
    except ValueError:
        parser.error('disease_model_id must be an integer')
        exit()
else:
    parser.error('incorrect number of arguments')
    exit()


# fetch requested model
dm = dismod3.get_disease_model(id)


# define ground truth
age_len = dismod3.MAX_AGE
ages = np.arange(age_len, dtype='float')

print 'defining model transition parameters'

truth = {}

# all-cause mortality-rate
m = np.array(
    [ 0.03266595,  0.01114646,  0.00450302,  0.00226896,  0.00143311,
      0.00109108,  0.00094584,  0.00087981,  0.00083913,  0.0008073 ,
      0.00078515,  0.00077967,  0.00079993,  0.00085375,  0.00094349,
Exemple #12
0
def fit(id, opts):
    fit_str = '(%d) %s %s %s' % (id, opts.region or '', opts.sex or '', opts.year or '')
    #tweet('fitting disease model %s' % fit_str)
    sys.stdout.flush()
    
    # update job status file
    if opts.log:
        if opts.type and not (opts.region and opts.sex and opts.year):
            dismod3.log_job_status(id, 'empirical_priors', opts.type, 'Running')
        elif opts.region and opts.sex and opts.year and not opts.type:
            dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (opts.region, opts.sex, opts.year), 'Running')

    dm = dismod3.get_disease_model(id)
    fit_str = '%s %s' % (dm.params['condition'], fit_str)

    sex_list = opts.sex and [ opts.sex ] or dismod3.gbd_sexes
    year_list = opts.year and [ opts.year ] or dismod3.gbd_years
    region_list = opts.region and [ opts.region ] or dismod3.gbd_regions
    keys = gbd_keys(region_list=region_list, year_list=year_list, sex_list=sex_list)

    # fit empirical priors, if type is specified
    if opts.type:
        fit_str += ' emp prior for %s' % opts.type
        #print 'beginning ', fit_str
        import dismod3.neg_binom_model as model

        dir = dismod3.settings.JOB_WORKING_DIR % id
        model.fit_emp_prior(dm, opts.type, dbname='%s/empirical_priors/pickle/dm-%d-emp_prior-%s.pickle' % (dir, id, opts.type))

    # if type is not specified, find consistient fit of all parameters
    else:
        import dismod3.gbd_disease_model as model

        # get the all-cause mortality data, and merge it into the model
        mort = dismod3.get_disease_model('all-cause_mortality')
        dm.data += mort.data

        # fit individually, if sex, year, and region are specified
        if opts.sex and opts.year and opts.region:
            dm.params['estimate_type'] = 'fit individually'

        # fit the model
        #print 'beginning ', fit_str
        dir = dismod3.settings.JOB_WORKING_DIR % id
        model.fit(dm, method='map', keys=keys, verbose=1)
        model.fit(dm, method='mcmc', keys=keys, iter=10000, thin=5, burn=5000, verbose=1,
                  dbname='%s/posterior/pickle/dm-%d-posterior-%s-%s-%s.pickle' % (dir, id, opts.region, opts.sex, opts.year))
        #model.fit(dm, method='mcmc', keys=keys, iter=1, thin=1, burn=0, verbose=1)

    # remove all keys that have not been changed by running this model
    for k in dm.params.keys():
        if type(dm.params[k]) == dict:
            for j in dm.params[k].keys():
                if not j in keys:
                    dm.params[k].pop(j)

    # post results to dismod_data_server
    # "dumb" error handling, in case post fails (try: except: sleep random time, try again, stop after 4 tries)
    from twill.errors import TwillAssertionError
    from urllib2 import URLError
    import random

    PossibleExceptions = [TwillAssertionError, URLError]
    try:
        url = dismod3.post_disease_model(dm)
    except PossibleExceptions:
        time.sleep(random.random()*30)
        try:
            url = dismod3.post_disease_model(dm)
        except PossibleExceptions:
            time.sleep(random.random()*30)
            try:
                url = dismod3.post_disease_model(dm)
            except PossibleExceptions:
                time.sleep(random.random()*30)
                url = dismod3.post_disease_model(dm)

    # form url to view results
    #if opts.sex and opts.year and opts.region:
    #    url += '/%s/%s/%s' % (opts.region, opts.year, opts.sex)
    #elif opts.region:
    #    url += '/%s' % opts.region

    # announce completion, and url to view results
    #tweet('%s fit complete %s' % (fit_str, url))
    sys.stdout.flush()

    # update job status file
    if opts.log:
        if opts.type and not (opts.region and opts.sex and opts.year):
            dismod3.log_job_status(id, 'empirical_priors', opts.type, 'Completed')
        elif opts.region and opts.sex and opts.year and not opts.type:
            dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (opts.region, opts.sex, opts.year), 'Completed')
Exemple #13
0
def daemon_loop():
    on_sge = dismod3.settings.ON_SGE
    while True:
        try:
            job_queue = dismod3.get_job_queue()
        except:
            job_queue = []
        
        for param_id in job_queue:
            #tweet('processing job %d' % id)
            log('processing job %d' % param_id)
            job_params = dismod3.remove_from_job_queue(param_id)
            id = int(job_params['dm_id'])
            dm = dismod3.get_disease_model(id)

            # make a working directory for the id
            dir = dismod3.settings.JOB_WORKING_DIR % id
            if os.path.exists(dir):
                dismod3.disease_json.random_rename(dir)
            os.makedirs(dir)

            estimate_type = dm.params.get('run_status', {}).get('estimate_type', 'fit all individually')

            # sort the regions so that the data rich regions are fit first
            #data_hash = GBDDataHash(dm.data)
            #sorted_regions = sorted(dismod3.gbd_regions, reverse=True,
                                    #key=lambda r: len(data_hash.get(region=r)))

            if estimate_type == 'Fit continuous single parameter model':
                #dismod3.disease_json.create_disease_model_dir(id)
                o = '%s/continuous_spm.stdout' % dir
                e = '%s/continuous_spm.stderr' % dir
                if on_sge:
                    print o
                    print e
                    call_str = 'qsub -cwd -o %s -e %s ' % (o, e) \
                               + 'run_on_cluster.sh /home/OUTPOST/abie/gbd_dev/gbd/fit_continuous_spm.py %d' % id
                else:
                    call_str = 'python -u /home/abie/gbd/fit_continuous_spm.py %d 2>%s |tee %s' % (id, e, o)
                subprocess.call(call_str, shell=True)
                continue
            
            if estimate_type.find('posterior') != -1:
                #fit each region/year/sex individually for this model
                regions_to_fit = dm.params.get('run_status', {}).get('regions_to_fit', [])
                if regions_to_fit[0] == 'all_regions':
                    regions_to_fit = dismod3.gbd_regions
                d = '%s/posterior' % dir
                if os.path.exists(d):
                    rmtree(d)
                os.mkdir(d)
                os.mkdir('%s/stdout' % d)
                os.mkdir('%s/stderr' % d)
                os.mkdir('%s/pickle' % d)
                dismod3.init_job_log(id, 'posterior', param_id)
                for r in regions_to_fit:
                    for s in dismod3.gbd_sexes:
                        for y in dismod3.gbd_years:
                            # fit only one region, for the time being...
                            # TODO: make region selection a user-settable option from the gui
                            #if clean(r) != 'asia_southeast':
                            #    continue
                            k = '%s+%s+%s' % (clean(r), s, y)
                            o = '%s/stdout/%s' % (d, k)
                            e = '%s/stderr/%s' % (d, k)
                            if on_sge:
                                call_str = dismod3.settings.GBD_FIT_STR % (o, e, '-l -r %s -s %s -y %s' % (clean(r), s, y), id)
                                subprocess.call(call_str, shell=True)
                            else:
                                call_str = dismod3.settings.GBD_FIT_STR % ('-l -r %s -s %s -y %s' % (clean(r), s, y), id, o, e)
                                subprocess.call(call_str, shell=True)
                            #time.sleep(1.)

            elif estimate_type.find('empirical priors') != -1:
                # fit empirical priors (by pooling data from all regions
                d = '%s/empirical_priors' % dir
                if os.path.exists(d):
                    rmtree(d)
                os.mkdir(d)
                os.mkdir('%s/stdout' % d)
                os.mkdir('%s/stderr' % d)
                os.mkdir('%s/pickle' % d)
                dismod3.init_job_log(id, 'empirical_priors', param_id)
                for t in ['excess-mortality', 'remission', 'incidence', 'prevalence']:
                    o = '%s/stdout/%s' % (d, t)
                    e = '%s/stderr/%s' % (d, t)
                    if on_sge:
                        subprocess.call(dismod3.settings.GBD_FIT_STR % (o, e, '-l -t %s' % t, id), shell=True)
                    else:
                        subprocess.call(dismod3.settings.GBD_FIT_STR % ('-l -t %s' % t, id, o, e), shell=True)

            else:
                #tweet('unrecognized estimate type: %s' % estimate_type)
                log('unrecognized estimate type: %s' % estimate_type)
            
        time.sleep(dismod3.settings.SLEEP_SECS)