Ejemplos de clean en Python, ejemplos de dismod3.utils.clean en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: table.py Proyecto: flaxter/gbd

def write_data(data_list, wb):
    """ Write data as a table that can be loaded into dismod"""

    ws = wb.add_sheet('data')

    if len(data_list) == 0:
        return

    all_keys = set()
    for d in data_list:
        all_keys |= set(d.keys())

    required_keys = ['GBD Cause', 'Parameter', 'GBD Region', 'Country ISO3 Code',
                     'Sex', 'Year Start', 'Year End', 'Age Start', 'Age End',
                     'Parameter Value', 'Standard Error', 'Units', ]

    redundant_keys = ['_row', 'age_weights', 'id', 'value', 'condition', 'data_type', 'region']

    additional_keys = sorted(all_keys - set([clean(k) for k in required_keys] + redundant_keys))

    keys = required_keys + additional_keys
    
    for c, k in enumerate(keys):
        if k == 'GBD Region':
            k = 'Region'
        ws.write(0, c, k)
    for r, d in enumerate(sorted(data_list, key=lambda d: d.get('_row'))):
        for c, k in enumerate(keys):
            val = d.get(clean(k), '')
            if val == 'mortality data':
                val = 'with condition mortality data'
            ws.write(r+1, c, val)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: logit_normal_model.py Proyecto: flaxter/gbd

def covariates(d):
    """ extract the covariates from a data point as a vector;

    Xa represents region-level covariates:
      Xa[0],...,Xa[21] = region indicators
      Xa[22] = year-1997
      Xa[23] = 1 if sex == 'male', -1 if sex == 'female'
    Xb represented study-level covariates:
      Xb[0] = self-reported
      Xb[1] = threshold (integer)
    """
    Xa = np.zeros(len(gbd_regions) + 2)
    for ii, r in enumerate(gbd_regions):
        if clean(d['gbd_region']) == clean(r):
            Xa[ii] = 1.

    Xa[ii+1] = .1 * (.5 * (float(d['year_start']) + float(d['year_end'])) - 1997)

    if clean(d['sex']) == 'male':
        Xa[ii+2] = .5
    elif clean(d['sex']) == 'female':
        Xa[ii+2] = -.5
    else:
        Xa[ii+2] = 0.

    Xb = np.zeros(5.)

    # TODO: instead of hard-coding this, store it in the disease model
    # (and let users set it through the web)
    if clean(d.get('self_reported', '')) == 'true':
        Xb[0] = 1.
    if d.has_key('threshold'):
        Xb[0] = float(d['threshold'])
        
    return Xa, Xb

Ejemplo n.º 3

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: studentmicky/gbd

def regional_covariates(key, covariates_dict, derived_covariate):
    """ form the covariates for a gbd key"""
    if not key in covariate_hash:
        try:
            t,r,y,s = dismod3.utils.type_region_year_sex_from_key(key)
        except KeyError:
            r = 'world'
            y = 1997
            s = 'total'

        d = {'gbd_region': r,
             'year_start': y,
             'year_end': y,
             'sex': s}
        for level in ['Study_level', 'Country_level']:
            for k in covariates_dict[level]:
                if k == 'none':
                    continue
                if covariates_dict[level][k]['rate']['value']:
                    d[clean(k)] = covariates_dict[level][k]['value']['value']
                    if level == 'Country_level':
                        d[clean(k)] = regional_average(derived_covariate, k, r, y, s)
                    else:
                        d[clean(k)] = float(d[clean(k)] or 0.)

        covariate_hash[key] = covariates(d, covariates_dict)
    
    return covariate_hash[key]

Ejemplo n.º 4

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: jjdu/gbd

def covariates(d, covariates_dict):
    """ extract the covariates from a data point as a vector;

    Xa represents region-level covariates:
      Xa[0],...,Xa[21] = region indicators
      Xa[22] = .1*(year-1997)
      Xa[23] = .5 if sex == 'male', -.5 if sex == 'female'
    Xb represents study-level covariates, according to the covariates_dict
      
    """
    Xa = np.zeros(len(gbd_regions) + 2)
    for ii, r in enumerate(gbd_regions):
        if clean(d['gbd_region']) == clean(r):
            Xa[ii] = 1.

    Xa[ii+1] = .1 * (.5 * (float(d['year_start']) + float(d['year_end'])) - 1997)
    
    if clean(d['sex']) == 'male':
        Xa[ii+2] = .5
    elif clean(d['sex']) == 'female':
        Xa[ii+2] = -.5
    else:
        Xa[ii+2] = 0.

    Xb = []
    for level in ['Study_level', 'Country_level']:
        for k in sorted(covariates_dict[level]):
            if covariates_dict[level][k]['rate']['value'] == 1 and standardize_data_type[d['parameter']][:-5] in covariates_dict[level][k]['types']['value']:
                Xb.append(float(d.get(clean(k)) or 0.))
    #debug('%s-%s-%s-%s: Xb = %s' % (d['sex'], d['year_start'], d['gbd_region'], d.get('country_iso3_code', 'none'), str(Xb)))
    if Xb == []:
        Xb = [0.]
    return Xa, Xb

Ejemplo n.º 5

0

Mostrar archivo

Archivo: disease_json.py Proyecto: flaxter/gbd

    def get_global_priors(self, type):
        """ Return the global priors that best match the specified type

        Since the type might be a key with the form
        'incidence+sub-saharan_africa_east+1990+female', return the
        first global prior who's key is found as a substring of ``type``

        Build and cache the global_priors_dict from the
        global_priors_json, if necessary.
        """
        if not hasattr(self, "global_priors"):
            raw_dict = self.params.get("global_priors", {})
            self.global_priors = {
                "prevalence": {},
                "incidence": {},
                "remission": {},
                "excess_mortality": {},
                "relative_risk": {},
                "duration": {},
            }

            # reverse the order of the first and second level of keys in the raw_dict
            # this will be more convenient later
            for k1 in [
                "heterogeneity",
                "smoothness",
                "level_value",
                "level_bounds",
                "increasing",
                "decreasing",
                "unimodal",
            ]:
                if not raw_dict.has_key(k1):
                    continue
                for k2 in raw_dict[k1]:
                    self.global_priors[k2][k1] = raw_dict[k1][k2]

            # deal with the dash vs underscore
            self.global_priors["excess-mortality"] = self.global_priors["excess_mortality"]
            self.global_priors["relative-risk"] = self.global_priors["relative_risk"]

            for k in self.global_priors:
                self.global_priors[k]["prior_str"] = prior_dict_to_str(self.global_priors[k])
        for k in self.global_priors:
            if clean(type) == clean(k):
                return self.global_priors[k]["prior_str"]

        return ""

Ejemplo n.º 6

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: jjdu/gbd

def regional_population(key):
    """ calculate regional population for a gbd key"""
    t,r,y,s = type_region_year_sex_from_key(key)
    pop = np.zeros(MAX_AGE)
    for c in countries_for[clean(r)]:
        pop += population_by_age[(c, y, s)]
    return pop

Ejemplo n.º 7

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: studentmicky/gbd

def regional_population(key):
    """ calculate regional population for a gbd key"""
    t,r,y,s = dismod3.utils.type_region_year_sex_from_key(key)
    pop = pl.zeros(dismod3.settings.MAX_AGE)
    for c in countries_for[clean(r)]:
        if y == 'all' and s == 'all':
            for yy in dismod3.settings.gbd_years:
                for ss in dismod3.settings.gbd_sexes:
                    pop += population_by_age[(c, yy, dismod3.utils.clean(ss))]
        else:
            pop += population_by_age[(c, y, s)]
    return pop

Ejemplo n.º 8

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: studentmicky/gbd

def country_covariates(key, iso3, covariates_dict, derived_covariate):
    """ form the covariates for a gbd key"""
    if not (key, iso3) in covariate_hash:
        t,r,y,s = dismod3.utils.type_region_year_sex_from_key(key)

        d = {'gbd_region': r,
             'year_start': y,
             'year_end': y,
             'sex': s}
        for level in ['Study_level', 'Country_level']:
            for k in covariates_dict[level]:
                if k == 'none':
                    continue
                if covariates_dict[level][k]['rate']['value']:
                    d[clean(k)] = covariates_dict[level][k]['value']['value']
                    if level == 'Country_level':
                        if k not in derived_covariate:
                            debug('WARNING: derived covariate %s not found' % key)
                            d[clean(k)] = 0.
                        elif not derived_covariate[k].has_key('%s+%s+%s'%(iso3,y,s)):
                            debug('WARNING: derived covariate %s not found for (%s, %s, %s)' % (k, iso3, y, s))
                            d[clean(k)] = 0.
                        else:
                            d[clean(k)] = derived_covariate[k].get('%s+%s+%s'%(iso3,y,s), 0.)
                    else:
                        d[clean(k)] = float(d[clean(k)] or 0.)

        covariate_hash[(key, iso3)] = covariates(d, covariates_dict)
    return covariate_hash[(key, iso3)]

Ejemplo n.º 9

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: jjdu/gbd

def country_covariates(key, iso3, covariates_dict):
    """ form the covariates for a gbd key"""
    if not (key, iso3) in covariate_hash:
        t,r,y,s = type_region_year_sex_from_key(key)

        d = {'parameter': t,
             'gbd_region': r,
             'year_start': y,
             'year_end': y,
             'sex': s}
        for level in ['Study_level', 'Country_level']:
            for k in covariates_dict[level]:
                if k == 'none':
                    continue
                d[clean(k)] = covariates_dict[level][k]['value']['value']
                if d[clean(k)] == 'Country Specific Value':
                    d[clean(k)] = covariates_dict[level][k]['defaults'].get(iso3, 0.)
                else:
                    d[clean(k)] = float(d[clean(k)] or 0.)

        covariate_hash[(key, iso3)] = covariates(d, covariates_dict)
    return covariate_hash[(key, iso3)]

Ejemplo n.º 10

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: flaxter/gbd

def regional_covariates(key, covariates_dict, derived_covariate):
    """ form the covariates for a gbd key"""
    if not key in covariate_hash:
        t,r,y,s = type_region_year_sex_from_key(key)

        d = {'gbd_region': r,
             'year_start': y,
             'year_end': y,
             'sex': s}
        for level in ['Study_level', 'Country_level']:
            for k in covariates_dict[level]:
                if k == 'none':
                    continue
                if covariates_dict[level][k]['rate']['value']:
                    d[clean(k)] = covariates_dict[level][k]['value']['value']
                    if d[clean(k)] == 'Country Specific Value':
                        d[clean(k)] = regional_average(derived_covariate, k, r, y, s)
                    else:
                        d[clean(k)] = float(d[clean(k)] or 0.)

        covariate_hash[key] = covariates(d, covariates_dict)
    
    return covariate_hash[key]

Ejemplo n.º 11

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: studentmicky/gbd

def covariates(d, covariates_dict):
    """ extract the covariates from a data point as a vector;

    Xa represents region-level covariates:
      Xa[0],...,Xa[21] = region indicators
      Xa[22] = .1*(year-1997)
      Xa[23] = .5 if sex == 'male', -.5 if sex == 'female'
    Xb represents study-level covariates, according to the covariates_dict
      
    """
    Xa = pl.zeros(len(dismod3.gbd_regions) + 2)
    for ii, r in enumerate(dismod3.gbd_regions):
        if clean(d['gbd_region']) == clean(r):
            Xa[ii] = 1.

    if d['year_start'] == 'all':
        Xa[ii+1] = 0.
    else:
        Xa[ii+1] = .1 * (.5 * (float(d['year_start']) + float(d['year_end'])) - 1997)
    
    if clean(d['sex']) == 'male':
        Xa[ii+2] = .5
    elif clean(d['sex']) == 'female':
        Xa[ii+2] = -.5
    else:
        Xa[ii+2] = 0.

    Xb = []
    for level in ['Study_level', 'Country_level']:
        for k in sorted(covariates_dict[level]):
            if covariates_dict[level][k]['rate']['value'] == 1:
                Xb.append(float(d.get(clean(k)) or 0.))
    #debug('%s-%s-%s-%s: Xb = %s' % (d['sex'], d['year_start'], d['gbd_region'], d.get('country_iso3_code', 'none'), str(Xb)))
    if Xb == []:
        Xb = [0.]
    return Xa, Xb

Ejemplo n.º 12

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: jjdu/gbd

def regional_covariates(key, covariates_dict):
    """ form the covariates for a gbd key"""
    if not key in covariate_hash:
        t,r,y,s = type_region_year_sex_from_key(key)

        d = {'parameter': t,
             'gbd_region': r,
             'year_start': y,
             'year_end': y,
             'sex': s}
        for level in ['Study_level', 'Country_level']:
            for k in covariates_dict[level]:
                if k == 'none':
                    continue
                d[clean(k)] = covariates_dict[level][k]['value']['value']
                if d[clean(k)] == 'Country Specific Value':
                    # FIXME: this could be returning bogus answers
                    d[clean(k)] = regional_average(covariates_dict[level][k]['defaults'], r)
                else:
                    d[clean(k)] == float(d[clean(k)] or 0.)

        covariate_hash[key] = covariates(d, covariates_dict)
    
    return covariate_hash[key]

Ejemplo n.º 13

0

Mostrar archivo

Archivo: disease_json.py Proyecto: studentmicky/gbd

    def relevant_to(self, d, t, r, y, s):
        """ Determine if data is relevant to specified type, region, year, and sex

        Parameters
        ----------
        d : data hash
        t : str, one of 'incidence data', 'prevalence data', etc... or 'all'
        r : str, one of 21 GBD regions or 'all'
        y : int, one of 1990, 2005 or 'all'
        s : sex, one of 'male', 'female' or 'all'
        """
        from dismod3.utils import clean

        # ignore data if requested
        if d.get('ignore') == 1:
            return False

        # check if data is of the correct type
        if t != 'all':
            if clean(d['data_type']) != clean(t + ' data'):
                return False
            

        # check if data is from correct region
        if r != 'all' and r != 'world':
            if clean(d['gbd_region']) != clean(r) and clean(d['gbd_region']) != 'all':
                return False

        # check if data is from relevant year
        if y != 'all':
            y = int(y)
            if not y in [1990, 1997, 2005]:
                raise KeyError, 'GBD Year must be 1990 or 2005 (or 1997 for all years)'
            if y == 2005 and d['year_end'] < 1997:
                return False
            if y == 1990 and d['year_start'] > 1997:
                return False

        # check if data is for relevant sex
        if s != 'all':
            if clean(d['sex']) != clean(s) and clean(d['sex']) != 'all':
                return False

        # if code makes it this far, the data is relevent
        return True

Ejemplo n.º 14

0

Mostrar archivo

Archivo: gbd_disease_model.py Proyecto: flaxter/gbd

def relevant_to(d, t, r, y, s):
    """ Determine if data is relevant to specified type, region, year, and sex
    
    Parameters
    ----------
    d : data hash
    t : str, one of 'incidence data', 'prevalence data', etc... or 'all'
    r : str, one of 21 GBD regions or 'all'
    y : int, one of 1990, 2005 or 'all'
    s : sex, one of 'male', 'female' or 'all'
    """
    # ignore data if requested
    if d.get('ignore') == 1:
        return False
    
    # check if data is of the correct type
    if t != 'all':
        if clean(d['data_type']).find(clean(t)) != 0:
            return False

    # check if data is from correct region
    if r != 'all' and r != 'world':
        if clean(d['gbd_region']) != clean(r) and clean(d['gbd_region']) != 'all':
            return False

    # check if data is from relevant year
    if y != 'all':
        y = int(y)
        if not y in [1990, 1997, 2005]:
            raise KeyError, 'GBD Year must be 1990 or 2005 (or 1997 for all years)'
        if y == 2005 and d['year_end'] < 1997:
            return False
        if y == 1990 and d['year_start'] > 1997:
            return False

    # check if data is for relevant sex
    if s != 'all':
        if clean(d['sex']) != clean(s) and clean(d['sex']) != 'all':
            return False

    # if code makes it this far, the data is relevent
    return True

Ejemplo n.º 15

0

Mostrar archivo

Archivo: computational_engine_dameon.py Proyecto: flaxter/gbd

def daemon_loop():
    on_sge = dismod3.settings.ON_SGE
    while True:
        try:
            job_queue = dismod3.get_job_queue()
        except:
            job_queue = []
        
        for param_id in job_queue:
            #tweet('processing job %d' % id)
            log('processing job %d' % param_id)
            job_params = dismod3.remove_from_job_queue(param_id)
            id = int(job_params['dm_id'])
            dm = dismod3.get_disease_model(id)

            # make a working directory for the id
            dir = dismod3.settings.JOB_WORKING_DIR % id
            if not os.path.exists(dir):
                os.makedirs(dir)

            estimate_type = dm.params.get('run_status', {}).get('estimate_type', 'fit all individually')

            if estimate_type.find('posterior') != -1:
                #fit each region/year/sex individually for this model
                regions_to_fit = dm.params.get('run_status', {}).get('regions_to_fit', [])
                if regions_to_fit[0] == 'all_regions':
                    regions_to_fit = dismod3.gbd_regions
                d = '%s/posterior' % dir
                if os.path.exists(d):
                    rmtree(d)
                os.mkdir(d)
                os.mkdir('%s/stdout' % d)
                os.mkdir('%s/stderr' % d)
                dismod3.init_job_log(id, 'posterior', param_id)
                for r in regions_to_fit:
                    for s in dismod3.gbd_sexes:
                        for y in dismod3.gbd_years:
                            # fit only one region, for the time being...
                            # TODO: make region selection a user-settable option from the gui
                            #if clean(r) != 'asia_southeast':
                            #    continue
                            k = '%s+%s+%s' % (clean(r), s, y)
                            o = '%s/stdout/%s' % (d, k)
                            e = '%s/stderr/%s' % (d, k)
                            if on_sge:
                                call_str = dismod3.settings.GBD_FIT_STR % (o, e, '-l -r %s -s %s -y %s' % (clean(r), s, y), id)
                                subprocess.call(call_str, shell=True)
                            else:
                                call_str = dismod3.settings.GBD_FIT_STR % ('-l -r %s -s %s -y %s' % (clean(r), s, y), id, o, e)
                                subprocess.call(call_str, shell=True)
                            time.sleep(1.)

            elif estimate_type.find('empirical priors') != -1:
                # fit empirical priors (by pooling data from all regions
                d = '%s/empirical_priors' % dir
                if os.path.exists(d):
                    rmtree(d)
                os.mkdir(d)
                os.mkdir('%s/stdout' % d)
                os.mkdir('%s/stderr' % d)
                dismod3.init_job_log(id, 'empirical_priors', param_id)
                for t in ['excess-mortality', 'remission', 'incidence', 'prevalence']:
                    o = '%s/stdout/%s' % (d, t)
                    e = '%s/stderr/%s' % (d, t)
                    if on_sge:
                        subprocess.call(dismod3.settings.GBD_FIT_STR % (o, e, '-l -t %s' % t, id), shell=True)
                    else:
                        subprocess.call(dismod3.settings.GBD_FIT_STR % ('-l -t %s' % t, id, o, e), shell=True)

            else:
                #tweet('unrecognized estimate type: %s' % estimate_type)
                log('unrecognized estimate type: %s' % estimate_type)
            
        time.sleep(dismod3.settings.SLEEP_SECS)

Ejemplo n.º 16

0

Mostrar archivo

Archivo: gbd_fit.py Proyecto: flaxter/gbd

def daemon_loop():
    on_sge = dismod3.settings.ON_SGE
    while True:
        try:
            job_queue = dismod3.get_job_queue()
        except:
            job_queue = []
        
        for param_id in job_queue:
            #tweet('processing job %d' % id)
            log('processing job %d' % param_id)
            job_params = dismod3.remove_from_job_queue(param_id)
            id = int(job_params['dm_id'])
            dm = dismod3.get_disease_model(id)

            # make a working directory for the id
            dir = dismod3.settings.JOB_WORKING_DIR % id
            if os.path.exists(dir):
                dismod3.disease_json.random_rename(dir)
            os.makedirs(dir)

            estimate_type = dm.params.get('run_status', {}).get('estimate_type', 'fit all individually')

            # sort the regions so that the data rich regions are fit first
            #data_hash = GBDDataHash(dm.data)
            #sorted_regions = sorted(dismod3.gbd_regions, reverse=True,
                                    #key=lambda r: len(data_hash.get(region=r)))

            if estimate_type == 'Fit continuous single parameter model':
                #dismod3.disease_json.create_disease_model_dir(id)
                o = '%s/continuous_spm.stdout' % dir
                e = '%s/continuous_spm.stderr' % dir
                if on_sge:
                    print o
                    print e
                    call_str = 'qsub -cwd -o %s -e %s ' % (o, e) \
                               + 'run_on_cluster.sh /home/OUTPOST/abie/gbd_dev/gbd/fit_continuous_spm.py %d' % id
                else:
                    call_str = 'python -u /home/abie/gbd/fit_continuous_spm.py %d 2>%s |tee %s' % (id, e, o)
                subprocess.call(call_str, shell=True)
                continue
            
            if estimate_type.find('posterior') != -1:
                #fit each region/year/sex individually for this model
                regions_to_fit = dm.params.get('run_status', {}).get('regions_to_fit', [])
                if regions_to_fit[0] == 'all_regions':
                    regions_to_fit = dismod3.gbd_regions
                d = '%s/posterior' % dir
                if os.path.exists(d):
                    rmtree(d)
                os.mkdir(d)
                os.mkdir('%s/stdout' % d)
                os.mkdir('%s/stderr' % d)
                os.mkdir('%s/pickle' % d)
                dismod3.init_job_log(id, 'posterior', param_id)
                for r in regions_to_fit:
                    for s in dismod3.gbd_sexes:
                        for y in dismod3.gbd_years:
                            # fit only one region, for the time being...
                            # TODO: make region selection a user-settable option from the gui
                            #if clean(r) != 'asia_southeast':
                            #    continue
                            k = '%s+%s+%s' % (clean(r), s, y)
                            o = '%s/stdout/%s' % (d, k)
                            e = '%s/stderr/%s' % (d, k)
                            if on_sge:
                                call_str = dismod3.settings.GBD_FIT_STR % (o, e, '-l -r %s -s %s -y %s' % (clean(r), s, y), id)
                                subprocess.call(call_str, shell=True)
                            else:
                                call_str = dismod3.settings.GBD_FIT_STR % ('-l -r %s -s %s -y %s' % (clean(r), s, y), id, o, e)
                                subprocess.call(call_str, shell=True)
                            #time.sleep(1.)

            elif estimate_type.find('empirical priors') != -1:
                # fit empirical priors (by pooling data from all regions
                d = '%s/empirical_priors' % dir
                if os.path.exists(d):
                    rmtree(d)
                os.mkdir(d)
                os.mkdir('%s/stdout' % d)
                os.mkdir('%s/stderr' % d)
                os.mkdir('%s/pickle' % d)
                dismod3.init_job_log(id, 'empirical_priors', param_id)
                for t in ['excess-mortality', 'remission', 'incidence', 'prevalence']:
                    o = '%s/stdout/%s' % (d, t)
                    e = '%s/stderr/%s' % (d, t)
                    if on_sge:
                        subprocess.call(dismod3.settings.GBD_FIT_STR % (o, e, '-l -t %s' % t, id), shell=True)
                    else:
                        subprocess.call(dismod3.settings.GBD_FIT_STR % ('-l -t %s' % t, id, o, e), shell=True)

            else:
                #tweet('unrecognized estimate type: %s' % estimate_type)
                log('unrecognized estimate type: %s' % estimate_type)
            
        time.sleep(dismod3.settings.SLEEP_SECS)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: fit_all_prevonly.py Proyecto: flaxter/gbd

def fit_all(id):
    """ Enqueues all jobs necessary to fit specified model
    to the cluster

    Parameters
    ----------
    id : int
      The model id number for the job to fit

    Example
    -------
    >>> import fit_all
    >>> fit_all.fit_all(2552)
    """

    # TODO: store all disease information in this dir already, so fetching is not necessary
    # download the disease model json and store it in the working dir
    print 'downloading disease model'
    dismod3.disease_json.create_disease_model_dir(id)
    dm = dismod3.fetch_disease_model(id)
    
    # get the all-cause mortality data, and merge it into the model
    mort = dismod3.fetch_disease_model('all-cause_mortality')
    dm.data += mort.data
    dm.save()

    # fit empirical priors (by pooling data from all regions)
    dir = dismod3.settings.JOB_WORKING_DIR % id  # TODO: refactor into a function
    emp_names = []
    for t in ['prevalence']:
        o = '%s/empirical_priors/stdout/%s' % (dir, t)
        e = '%s/empirical_priors/stderr/%s' % (dir, t)
        name_str = '%s-%d' %(t[0], id)
        emp_names.append(name_str)
        call_str = 'qsub -cwd -o %s -e %s ' % (o, e) \
                        + '-N %s ' % name_str \
                        + 'run_on_cluster.sh fit_emp_prior.py %d -t %s' % (id, t)
        subprocess.call(call_str, shell=True)

    # directory to save the country level posterior csv files
    temp_dir = dir + '/posterior/country_level_posterior_dm-' + str(id) + '/'
    if os.path.exists(temp_dir):
        rmtree(temp_dir)
    os.makedirs(temp_dir)

    #fit each region/year/sex individually for this model
    hold_str = '-hold_jid %s ' % ','.join(emp_names)
    post_names = []
    for ii, r in enumerate(dismod3.gbd_regions):
        for s in dismod3.gbd_sexes:
            for y in dismod3.gbd_years:
                k = '%s+%s+%s' % (clean(r), s, y)
                o = '%s/posterior/stdout/%s' % (dir, k)
                e = '%s/posterior/stderr/%s' % (dir, k)
                name_str = '%s%d%s%s%d' % (r[0], ii+1, s[0], str(y)[-1], id)
                post_names.append(name_str)
                call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \
                           + hold_str \
                           + '-N %s ' % name_str \
                           + 'run_on_cluster.sh fit_posterior_prevonly.py %d -r %s -s %s -y %s' % (id, clean(r), s, y)
                subprocess.call(call_str, shell=True)

    # after all posteriors have finished running, upload disease model json
    hold_str = '-hold_jid %s ' % ','.join(post_names)
    o = '%s/upload.stdout' % dir
    e = '%s/upload.stderr' % dir
    call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \
               + hold_str \
               + '-N upld-%s ' % id \
               + 'run_on_cluster.sh upload_fits.py %d' % id
    subprocess.call(call_str, shell=True)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: generic_disease_model.py Proyecto: jjdu/gbd

def fit(dm, method='map'):
    """ Generate an estimate of the generic disease model parameters
    using maximum a posteriori liklihood (MAP) or Markov-chain Monte
    Carlo (MCMC)

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)

    method : string, optional
      the parameter estimation method, either 'map' or 'mcmc'

    Example
    -------
    >>> import dismod3
    >>> import dismod3.generic_disease_model as model
    >>> dm = dismod3.get_disease_model(1)
    >>> model.fit(dm, method='map')
    >>> model.fit(dm, method='mcmc')
    """
    if not hasattr(dm, 'vars'):
        for param_type in ['incidence', 'remission', 'excess-mortality']:
            # find initial values for these rates
            data =  [d for d in dm.data if clean(d['data_type']).find(param_type) != -1]

            # use a random subset of the data if there is a lot of it,
            # to speed things up
            if len(data) > 25:
                dm.fit_initial_estimate(param_type, random.sample(data,25))
            else:
                dm.fit_initial_estimate(param_type, data)

            dm.set_units(param_type, '(per person-year)')

        dm.set_units('prevalence', '(per person)')
        dm.set_units('duration', '(years)')

        dm.vars = setup(dm)

    if method == 'map':
        if not hasattr(dm, 'map'):
            dm.map = mc.MAP(dm.vars)
            
        try:
            dm.map.fit(method='fmin_powell', iterlim=500, tol=.001, verbose=1)
        except KeyboardInterrupt:
            # if user cancels with cntl-c, save current values for "warm-start"
            pass

        for t in dismod3.settings.output_data_types:
            t = clean(t)
            val = dm.vars[t]['rate_stoch'].value
            dm.set_map(t, val)
            dm.set_initial_value(t, val)  # better initial value may save time in the future
    elif method == 'mcmc':
        if not hasattr(dm, 'mcmc'):
            dm.mcmc = mc.MCMC(dm.vars)
            for key in dm.vars:
                stochs = dm.vars[key].get('logit_p_stochs', [])
                if len(stochs) > 0:
                    dm.mcmc.use_step_method(mc.AdaptiveMetropolis, stochs)

        try:
            dm.mcmc.sample(iter=60*1000, burn=10*1000, thin=50, verbose=1)
        except KeyboardInterrupt:
            # if user cancels with cntl-c, save current values for "warm-start"
            pass
        for t in dismod3.settings.output_data_types:
            t = clean(t)
            rate_model.store_mcmc_fit(dm, t, dm.vars[t])

Ejemplo n.º 19

0

Mostrar archivo

Archivo: table.py Proyecto: flaxter/gbd

def table_disease_model(dm, keys, ws, x, y, group_size):
    """Make a table representation of the disease model data and
    estimates provided

    Parameters
    ----------
    dm_json : str or DiseaseJson object
      the json string or a thin python wrapper around this data that
      is to be plotted
    keys : list
      the keys to include
    ws : work sheet
    x : horizontal shift
    y : vertical shift
    group_size : positive integer smaller than 102
    """
    MAX_AGE = dismod3.MAX_AGE
    group_sizes = [1, 4, 5, 5, 5, 5, 10, 10, 10, 10, 10, 10, 16]
    if group_size > 1:
        group_sizes = []
        for i in range(MAX_AGE / group_size):
            group_sizes.append(group_size)
        if MAX_AGE % group_size > 0:
            group_sizes.append(MAX_AGE % group_size)

    data_hash = GBDDataHash(dm.data)
    c = dismod3.utils.KEY_DELIM_CHAR
    type, region, year, sex = keys[0].split(c)

    # add a key: with-condition-death = with-condition-mortality * prevalence * population
    keys.append('with-condition-death' + c + region + c + year + c + sex)
    
    ws.write(x + 2, y, "Condition: %s" % (dm.params['condition']))
    ws.write(x + 3, y, "Region: %s" % (region))
    ws.write(x + 4, y + 1, "%s %s" % (sex.capitalize(), year))
    x += 5
    for i in range(1, 5):
        ws.write(x, y + i, "Data")
    for i in range(5, 17):
        ws.write(x, y + i, "Prior")
    for i in range(17, 45):
        ws.write(x, y + i, "Posterior")
    x += 1
    ws.write(x, y, "Age")
    ws.write(x, y + 1, "Prevalence")
    ws.write(x, y + 2, "Incidence")
    ws.write(x, y + 3, "Remission")
    ws.write(x, y + 4, "Excess Mortality")
    ws.write(x, y + 5, "Prevalence")
    ws.write(x, y + 6, "Prevalence")
    ws.write(x, y + 7, "Prevalence")
    ws.write(x, y + 8, "Incidence")
    ws.write(x, y + 9, "Incidence")
    ws.write(x, y + 10, "Incidence")
    ws.write(x, y + 11, "Remission")
    ws.write(x, y + 12, "Remission")
    ws.write(x, y + 13, "Remission")
    ws.write(x, y + 14, "Excess Mortality")
    ws.write(x, y + 15, "Excess Mortality")
    ws.write(x, y + 16, "Excess Mortality")
    ws.write(x, y + 17, "Prevalence")
    ws.write(x, y + 18, "Prevalence")
    ws.write(x, y + 19, "Prevalence")
    ws.write(x, y + 20, "Incidence")
    ws.write(x, y + 21, "Incidence")
    ws.write(x, y + 22, "Incidence")
    ws.write(x, y + 23, "Remission")
    ws.write(x, y + 24, "Remission")
    ws.write(x, y + 25, "Remission")
    ws.write(x, y + 26, "Excess Mortality")
    ws.write(x, y + 27, "Excess Mortality")
    ws.write(x, y + 28, "Excess Mortality")
    ws.write(x, y + 29, "Duration")
    ws.write(x, y + 30, "Duration")
    ws.write(x, y + 31, "Duration")
    ws.write(x, y + 32, "With-condition Mortality")
    ws.write(x, y + 33, "With-condition Mortality")
    ws.write(x, y + 34, "With-condition Mortality")
    ws.write(x, y + 35, "RR Mortality")
    ws.write(x, y + 36, "RR Mortality")
    ws.write(x, y + 37, "RR Mortality")
    ws.write(x, y + 38, "Age of onset")
    ws.write(x, y + 39, "Incidence_x_duration")
    ws.write(x, y + 40, "Incidence_x_duration")
    ws.write(x, y + 41, "Incidence_x_duration")
    ws.write(x, y + 42, "With-condition Death")
    ws.write(x, y + 43, "With-condition Death")
    ws.write(x, y + 44, "With-condition Death")
    x += 1
    ws.write(x, y, "(years)")
    for i in range(1, 6):
        ws.write(x, y + i, "(rate)")
    ws.write(x, y + 6, "lower ui")
    ws.write(x, y + 7, "upper ui")
    ws.write(x, y + 8, "(rate)")
    ws.write(x, y + 9, "lower ui")
    ws.write(x, y + 10, "upper ui")
    ws.write(x, y + 11, "(rate)")
    ws.write(x, y + 12, "lower ui")
    ws.write(x, y + 13, "upper ui")
    ws.write(x, y + 14, "(rate)")
    ws.write(x, y + 15, "lower ui")
    ws.write(x, y + 16, "upper ui")
    ws.write(x, y + 17, "(rate)")
    ws.write(x, y + 18, "lower ui")
    ws.write(x, y + 19, "upper ui")
    ws.write(x, y + 20, "(rate)")
    ws.write(x, y + 21, "lower ui")
    ws.write(x, y + 22, "upper ui")
    ws.write(x, y + 23, "(rate)")
    ws.write(x, y + 24, "lower ui")
    ws.write(x, y + 25, "upper ui")
    ws.write(x, y + 26, "(rate)")
    ws.write(x, y + 27, "lower ui")
    ws.write(x, y + 28, "upper ui")
    ws.write(x, y + 29, "(years)")
    ws.write(x, y + 30, "lower ui")
    ws.write(x, y + 31, "upper ui")
    ws.write(x, y + 32, "(rate)")
    ws.write(x, y + 33, "lower ui")
    ws.write(x, y + 34, "upper ui")
    ws.write(x, y + 35, "(rate)")
    ws.write(x, y + 36, "lower ui")
    ws.write(x, y + 37, "upper ui")
    ws.write(x, y + 38, "(years)")
    ws.write(x, y + 39, "(thousand person-years)")
    ws.write(x, y + 40, "lower ui")
    ws.write(x, y + 41, "upper ui")
    ws.write(x, y + 42, "(thousands)")
    ws.write(x, y + 43, "lower ui")
    ws.write(x, y + 44, "upper ui")
    x += 1
    y38 = y + 38
    if group_size == 1:
        for j in range(MAX_AGE):
            ws.write(x + j, y, j)
            ws.write(x + j, y38, j + .5)
    elif group_size == 0:
        start = 0
        end = 0
        for j, s in enumerate(group_sizes):
            start = end
            end = start + s
            if start == 0:
                ws.write(x + j, y, "0")
            elif start == 85:
                ws.write(x + j, y, "85+")
            else:
                ws.write(x + j, y, "%s-%s" % (start, end - 1))
            ws.write(x + j, y38, .5 * (start + end))
    else:
        for j in range(MAX_AGE / group_size + 1):
            start = j * group_size
            end = start + group_size
            if end > MAX_AGE:
                end = MAX_AGE
            ws.write(x + j, y, "%s-%s" % (start, end - 1))
            ws.write(x + j, y38, .5 * (start + end))
    for k in keys:
        type, region, year, sex = k.split(c)
        data_type = clean(type) + ' data'
        data = data_hash.get(data_type, region, year, sex) \
               + data_hash.get(data_type, region, year, 'total')
        column = y
        if type == 'prevalence':
            column = y + 1
        elif type == 'incidence':
            column = y + 2
        elif type == 'remission':
            column = y + 3
        elif type == 'excess-mortality':
            column = y + 4
        else:
            column = -1
        if column != -1:
            data_all = []
            data_weight_all = []
            for j in range(MAX_AGE):
                data_all.append('')
                data_weight_all.append(0)
            for i in range(len(data)):
                start = data[i]['age_start']
                end = data[i]['age_end']
                if end > MAX_AGE:
                    end = MAX_AGE
                for j in range(start, end + 1):
                    p = data[i]['parameter_value'] / float(data[i]['units'])
                    #std = data[i]['standard_error']
                    #age_weight = data[i]['age_weights'][j - start]
                    data_weight = 1
                    #if std != 0:
                        #data_weight = age_weight / std / std
                    #else:
                        #if p != 0:
                            #data_weight = age_weight * 25 / (p**2 * (1 - p)**2)
                    if data_all[j] == '':
                        data_all[j] = p * data_weight
                    else:
                        data_all[j] += p * data_weight
                    data_weight_all[j] += data_weight
            for j in range(MAX_AGE):
                if data_weight_all[j] != 0:
                    data_all[j] = data_all[j] / data_weight_all[j]
            if group_size == 1:
                for j in range(MAX_AGE):
                    ws.write(x + j, column, data_all[j])
            elif group_size == 0:
                start = 0
                end = 0
                for j, gs in enumerate(group_sizes):
                    start = end
                    end = start + gs
                    s = 0
                    n = 0
                    for i in range(start, end):
                        if data_all[i] != '':
                            s += data_all[i]
                            n += 1
                    if n != 0:
                        ws.write(x + j, column, s / n)        
            else:
                for j in range(MAX_AGE / group_size + 1):
                    start = j * group_size
                    end = start + group_size
                    if end > MAX_AGE:
                        end = MAX_AGE
                    s = 0
                    n = 0
                    for i in range(start, end):
                        if data_all[i] != '':
                            s += data_all[i]
                            n += 1
                    if n != 0:
                        ws.write(x + j, column, s / n)
        if type == 'prevalence':
            column = y + 5
        elif type == 'incidence':
            column = y + 8
        elif type == 'remission':
            column = y + 11
        elif type == 'excess-mortality':
            column = y + 14
        else:
            column = -1
        if column != -1:
            if group_size == 1:
                write_table_age_value(dm, k, 'emp_prior_mean', ws, x, column)
                write_table_age_value(dm, k, 'emp_prior_lower_ui', ws, x, column + 1)
                write_table_age_value(dm, k, 'emp_prior_upper_ui', ws, x, column + 2)
            else:
                write_table_group_value(dm, k, 'emp_prior_mean', ws, x, column, group_sizes)
                write_table_group_value(dm, k, 'emp_prior_lower_ui', ws, x, column + 1, group_sizes)
                write_table_group_value(dm, k, 'emp_prior_upper_ui', ws, x, column + 2, group_sizes)
        if type == 'prevalence':
            column = y + 17
        elif type == 'incidence':
            column = y + 20
        elif type == 'remission':
            column = y + 23
        elif type == 'excess-mortality':
            column = y + 26
        elif type == 'duration':
            column = y + 29
        elif type == 'mortality':
            column = y + 32
        elif type == 'relative-risk':
            column = y + 35
        elif type == 'incidence_x_duration':
            column = y + 39
        elif type == 'with-condition-death':
            column = y + 42
        else:
            column = -1
        if column != -1:
            if group_size == 1:
                write_table_age_value(dm, k, 'mean', ws, x, column)
                write_table_age_value(dm, k, 'lower_ui', ws, x, column + 1)
                write_table_age_value(dm, k, 'upper_ui', ws, x, column + 2)
            else:
                write_table_group_value(dm, k, 'mean', ws, x, column, group_sizes)
                write_table_group_value(dm, k, 'lower_ui', ws, x, column + 1, group_sizes)
                write_table_group_value(dm, k, 'upper_ui', ws, x, column + 2, group_sizes)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: jjdu/gbd

def fit_emp_prior(dm, param_type, iter=30000, thin=20, burn=10000, dbname='/dev/null'):
    """ Generate an empirical prior distribution for a single disease parameter

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      The object containing all the data, (hyper)-priors, and additional
      information (like input and output age-mesh).

    param_type : str, one of 'incidence', 'prevalence', 'remission', 'excess-mortality'
      The disease parameter to work with

    Notes
    -----
    The results of this fit are stored in the disease model's params
    hash for use when fitting multiple paramter types together

    Example
    -------
    $ python2.5 gbd_fit.py 231 -t incidence
    """

    data = [d for d in dm.data if clean(d['data_type']).find(param_type) != -1 and d.get('ignore') != -1]
    dm.calc_effective_sample_size(data)

    lower_bound_data = []
    if param_type == 'excess-mortality':
        lower_bound_data = [d for d in dm.data if d['data_type'] == 'cause-specific mortality data']
        dm.calc_effective_sample_size(lower_bound_data)
                        
    dm.clear_empirical_prior()
    dm.fit_initial_estimate(param_type, data)

    dm.vars = setup(dm, param_type, data, lower_bound_data=lower_bound_data)

    # don't do anything if there is no data for this parameter type
    if len(dm.vars['data']) == 0:
        return

    debug('i: %s' % ', '.join(['%.2f' % x for x in dm.vars['rate_stoch'].value[::10]]))
    sys.stdout.flush()
    
    # fit the model
    #dm.na = mc.NormApprox(dm.vars)

    #dm.na.fit(method='fmin_powell', verbose=1)
    #dm.na.sample(1000, verbose=1)

    log_dispersion = dm.vars.pop('log_dispersion')  # remove the dispersion term while finding initial values for MCMC
    dm.map = mc.MAP(dm.vars)
    dm.vars.update(log_dispersion=log_dispersion)
    
    try:
        dm.map.fit(method='fmin_powell', iterlim=500, verbose=1)
    except KeyboardInterrupt:
        debug('User halted optimization routine before optimal value found')
    sys.stdout.flush()

    # make pymc warnings go to stdout
    mc.warnings.warn = sys.stdout.write
    dm.mcmc = mc.MCMC(dm.vars, db='pickle', dbname=dbname)
    dm.mcmc.use_step_method(mc.Metropolis, dm.vars['log_dispersion'],
                            proposal_sd=dm.vars['dispersion_step_sd'])
    dm.mcmc.use_step_method(mc.AdaptiveMetropolis, dm.vars['age_coeffs_mesh'],
                            cov=dm.vars['age_coeffs_mesh_step_cov'], verbose=0)
    dm.mcmc.sample(iter=iter, burn=burn, thin=thin, verbose=1)
    dm.mcmc.db.commit()
    
    dm.vars['region_coeffs'].value = dm.vars['region_coeffs'].stats()['mean']
    dm.vars['study_coeffs'].value = dm.vars['study_coeffs'].stats()['mean']
    dm.vars['age_coeffs_mesh'].value = dm.vars['age_coeffs_mesh'].stats()['mean']
    dm.vars['log_dispersion'].value = dm.vars['log_dispersion'].stats()['mean']

    alpha = dm.vars['region_coeffs'].stats()['mean']
    beta = dm.vars['study_coeffs'].stats()['mean']
    gamma_mesh = dm.vars['age_coeffs_mesh'].stats()['mean']
    debug('a: %s' % ', '.join(['%.2f' % x for x in alpha]))
    debug('b: %s' % ', '.join(['%.2f' % x for x in beta]))
    debug('g: %s' % ', '.join(['%.2f' % x for x in gamma_mesh]))
    debug('d: %.2f' % dm.vars['dispersion'].stats()['mean'])
    debug('m: %s' % ', '.join(['%.2f' % x for x in dm.vars['rate_stoch'].stats()['mean'][::10]]))
    covariates_dict = dm.get_covariates()
    X = covariates(data[0], covariates_dict)
    debug('p: %s' % ', '.join(['%.2f' % x for x in predict_rate(X, alpha, beta, gamma_mesh, dm.vars['bounds_func'], dm.get_param_age_mesh())]))
    # save the results in the param_hash
    prior_vals = dict(
        alpha=list(dm.vars['region_coeffs'].stats()['mean']),
        beta=list(dm.vars['study_coeffs'].stats()['mean']),
        gamma=list(dm.vars['age_coeffs'].stats()['mean']),
        delta=float(dm.vars['dispersion'].stats()['mean']))

    prior_vals.update(
        sigma_alpha=list(dm.vars['region_coeffs'].stats()['standard deviation']),
        sigma_beta=list(dm.vars['study_coeffs'].stats()['standard deviation']),
        sigma_gamma=list(dm.vars['age_coeffs'].stats()['standard deviation']),
        sigma_delta=float(dm.vars['dispersion'].stats()['standard deviation']))
    # save the goodness-of-fit statistics for the empirical prior
    prior_vals.update(
        aic=dm.map.AIC,
        bic=dm.map.BIC,
        dic=dm.mcmc.dic()
        )
    dm.set_empirical_prior(param_type, prior_vals)


    dispersion = prior_vals['delta']
    median_sample_size = np.median([values_from(dm, d)[3] for d in dm.vars['data']] + [1000])
    debug('median effective sample size: %.1f' % median_sample_size)

    param_mesh = dm.get_param_age_mesh()
    age_mesh = dm.get_estimate_age_mesh()

    import random
    trace = zip(dm.vars['region_coeffs'].trace(), dm.vars['study_coeffs'].trace(), dm.vars['age_coeffs'].trace())[::5]
    
    for r in dismod3.gbd_regions:
        print 'predicting rates for %s' % r
        for y in dismod3.gbd_years:
            for s in dismod3.gbd_sexes:
                key = dismod3.gbd_key_for(param_type, r, y, s)
                rate_trace = []
                for a, b, g in trace:
                    rate_trace.append(predict_region_rate(key,
                                                          alpha=a,
                                                          beta=b,
                                                          gamma=g,
                                                          covariates_dict=covariates_dict,
                                                          bounds_func=dm.vars['bounds_func'],
                                                          ages=dm.get_estimate_age_mesh()))
                mu = dismod3.utils.interpolate(param_mesh, np.mean(rate_trace, axis=0)[param_mesh], age_mesh)
                dm.set_initial_value(key, mu)
                dm.set_mcmc('emp_prior_mean', key, mu)

                # similar to saving upper_ui and lower_ui in function store_mcmc_fit below
                rate_trace = np.sort(rate_trace, axis=0)
                dm.set_mcmc('emp_prior_upper_ui', key, dismod3.utils.interpolate(param_mesh, rate_trace[.975 * len(rate_trace), :][param_mesh], age_mesh))
                dm.set_mcmc('emp_prior_lower_ui', key, dismod3.utils.interpolate(param_mesh, rate_trace[.025 * len(rate_trace), :][param_mesh], age_mesh))

Ejemplo n.º 21

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: jjdu/gbd

    Xb = []
    for level in ['Study_level', 'Country_level']:
        for k in sorted(covariates_dict[level]):
            if covariates_dict[level][k]['rate']['value'] == 1 and standardize_data_type[d['parameter']][:-5] in covariates_dict[level][k]['types']['value']:
                Xb.append(float(d.get(clean(k)) or 0.))
    #debug('%s-%s-%s-%s: Xb = %s' % (d['sex'], d['year_start'], d['gbd_region'], d.get('country_iso3_code', 'none'), str(Xb)))
    if Xb == []:
        Xb = [0.]
    return Xa, Xb


from dismod3.utils import clean
import csv
import settings
countries_for = dict(
    [[clean(x[0]), x[1:]] for x in csv.reader(open(settings.CSV_PATH + 'country_region.csv'))]
    )
population_by_age = dict(
    [[(d['Country Code'], d['Year'], d['Sex']),
      [max(.001,float(d['Age %d Population' % i])) for i in range(MAX_AGE)]] for d in csv.DictReader(open(settings.CSV_PATH + 'population.csv'))
     if len(d['Country Code']) == 3]
    )

def regional_population(key):
    """ calculate regional population for a gbd key"""
    t,r,y,s = type_region_year_sex_from_key(key)
    pop = np.zeros(MAX_AGE)
    for c in countries_for[clean(r)]:
        pop += population_by_age[(c, y, s)]
    return pop

Ejemplo n.º 22

0

Mostrar archivo

Archivo: beta_binomial_model.py Proyecto: jjdu/gbd

def fit(dm, method="map", param_type="prevalence", units="(per 1.0)", emp_prior={}):
    """ Generate an estimate of the beta binomial model parameters
    using maximum a posteriori liklihood (MAP) or Markov-chain Monte
    Carlo (MCMC).

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      The object containing all the data, priors, and additional
      information (like input and output age-mesh).

    method : string, optional
      The parameter estimation method, either 'map' or 'mcmc'.

    param_type : str, optional
      Only data in dm.data with clean(d['data_type']).find(param_type) != -1
      will be included in the beta-binomial liklihood function.

    units : str, optional
      The units of this parameter, for pretty plotting, etc.

    emp_prior : dict, optional
      the empirical prior dictionary, retrieved from the disease model
      if appropriate by::

          >>> t, r, y, s = type_region_year_sex_from_key(key)
          >>> emp_prior = dm.get_empirical_prior(t)

    Example
    -------
    >>> import dismod3
    >>> import dismod3.beta_binomial_model as model
    >>> dm = dismod3.get_disease_model(1)
    >>> model.fit(dm, method='map', param_type='excess-mortality', units='(per person-year)')
    >>> model.fit(dm, method='mcmc', param_type='excess-mortality', units='(per person-year)')
    """

    # setup model variables, if they do not already exist
    if not hasattr(dm, "vars"):
        data = [d for d in dm.data if clean(d["data_type"]).find(param_type) != -1]
        # use a random subset of the data if there is a lot of it,
        # to speed things up
        if len(data) > 25:
            dm.fit_initial_estimate(param_type, random.sample(data, 25))
        else:
            dm.fit_initial_estimate(param_type, data)

        dm.set_units(param_type, units)

        dm.vars = setup(dm, param_type, data, emp_prior)

    # fit the model, with the selected method
    if method == "map":
        if not hasattr(dm, "map"):
            dm.map = mc.MAP(dm.vars)
        dm.map.fit(method="fmin_powell", iterlim=500, tol=0.001, verbose=1)
        dm.set_map(param_type, dm.vars["rate_stoch"].value)
    elif method == "mcmc":
        if not hasattr(dm, "mcmc"):
            dm.mcmc = mc.MCMC(dm.vars)
        if len(dm.vars["latent_p"]) > 0:
            dm.mcmc.use_step_method(mc.AdaptiveMetropolis, dm.vars["latent_p"])
        dm.mcmc.sample(iter=40000, burn=10000, thin=30, verbose=1)
        store_mcmc_fit(dm, param_type, dm.vars["rate_stoch"])

Ejemplo n.º 23

0

Mostrar archivo

Archivo: neg_binom_model.py Proyecto: studentmicky/gbd

    Xb = []
    for level in ['Study_level', 'Country_level']:
        for k in sorted(covariates_dict[level]):
            if covariates_dict[level][k]['rate']['value'] == 1:
                Xb.append(float(d.get(clean(k)) or 0.))
    #debug('%s-%s-%s-%s: Xb = %s' % (d['sex'], d['year_start'], d['gbd_region'], d.get('country_iso3_code', 'none'), str(Xb)))
    if Xb == []:
        Xb = [0.]
    return Xa, Xb


from dismod3.utils import clean
import csv
import settings
countries_for = dict(
    [[clean(x[0]), x[1:]] for x in csv.reader(open(settings.CSV_PATH + 'country_region.csv'))]
    )
population_by_age = dict(
    [[(d['Country Code'], d['Year'], d['Sex']),
      [max(.001,float(d['Age %d Population' % i])) for i in range(dismod3.settings.MAX_AGE)]] for d in csv.DictReader(open(settings.CSV_PATH + 'population.csv'))
     if len(d['Country Code']) == 3]
    )

def regional_population(key):
    """ calculate regional population for a gbd key"""
    t,r,y,s = dismod3.utils.type_region_year_sex_from_key(key)
    pop = pl.zeros(dismod3.settings.MAX_AGE)
    for c in countries_for[clean(r)]:
        if y == 'all' and s == 'all':
            for yy in dismod3.settings.gbd_years:
                for ss in dismod3.settings.gbd_sexes:

Ejemplo n.º 24

0

Mostrar archivo

Archivo: logit_normal_model.py Proyecto: flaxter/gbd

def fit_emp_prior(dm, param_type):
    """ Generate an empirical prior distribution for a single disease parameter

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      The object containing all the data, (hyper)-priors, and additional
      information (like input and output age-mesh).

    param_type : str, one of 'incidence', 'prevalence', 'remission', 'excess-mortality'
      The disease parameter to work with

    Notes
    -----
    The results of this fit are stored in the disease model's params
    hash for use when fitting multiple paramter types together

    Example
    -------
    $ python2.5 gbd_fit.py 175 -t incidence -p 'zero 0 4, zero 41 100, smooth 25' # takes 7m to run
    """

    data = [d for d in dm.data if clean(d['data_type']).find(param_type) != -1]

    # don't do anything if there is no data for this parameter type
    if len(data) == 0:
        return
    
    dm.fit_initial_estimate(param_type, data)

    dm.vars = setup(dm, param_type, data)
    
    # fit the model
    dm.map = mc.MAP(dm.vars)
    try:
        dm.map.fit(method='fmin_powell', iterlim=500, tol=.00001, verbose=1)
    except KeyboardInterrupt:
        print 'User halted optimization routine before optimal value found'
    
    # save the results in the param_hash
    dm.clear_empirical_prior()
    prior_vals = dict(
        alpha=list(dm.vars['region_coeffs'].value),
        beta=list(dm.vars['study_coeffs'].value),
        gamma=list(dm.vars['age_coeffs'].value),
        sigma=float(dm.vars['dispersion'].value))
    dm.set_empirical_prior(param_type, prior_vals)

    dispersion = prior_vals['sigma']
    for r in dismod3.gbd_regions:
        for y in dismod3.gbd_years:
            for s in dismod3.gbd_sexes:
                key = dismod3.gbd_key_for(param_type, r, y, s)
                logit_mu = predict_logit_rate(regional_covariates(key), **prior_vals)
                mu = mc.invlogit(logit_mu)
                dm.set_initial_value(key, mu)
                dm.set_mcmc('emp_prior_mean', key, mu)
                dm.set_mcmc('emp_prior_lower_ui', key, mc.invlogit(logit_mu - 1.96*dispersion))
                dm.set_mcmc('emp_prior_upper_ui', key, mc.invlogit(logit_mu + 1.96*dispersion))

    key = dismod3.gbd_key_for(param_type, 'world', 1997, 'total')
    logit_mu = predict_logit_rate(regional_covariates(key), **prior_vals)
    mu = mc.invlogit(logit_mu)
    dm.set_initial_value(key, mu)
    dm.set_mcmc('emp_prior_mean', key, mu)
    dm.set_mcmc('emp_prior_lower_ui', key, mc.invlogit(logit_mu - 1.96*dispersion))
    dm.set_mcmc('emp_prior_upper_ui', key, mc.invlogit(logit_mu + 1.96*dispersion))

Ejemplo n.º 25

0

Mostrar archivo

Archivo: generate_covariate_data.py Proyecto: flaxter/gbd

      0.02044349,  0.02214463,  0.02396039,  0.02589065,  0.0279525 ,
      0.03017836,  0.03261135,  0.03530052,  0.03828981,  0.04160153,
      0.04523777,  0.04918468,  0.05341633,  0.05790466,  0.06263516,
      0.06760523,  0.07281963,  0.07828758,  0.08401736,  0.09000903,
      0.09625542,  0.10274424,  0.10945923,  0.11638187,  0.1234935 ,
      0.13077522,  0.13820759,  0.14577067,  0.15344416,  0.16120755,
      0.16904026,  0.17692176,  0.18483165,  0.19274966,  0.20065553,
      0.20852876,  0.2163489 ,  0.22409584,  0.23174999,  0.23929245,
      0.2467051 ])

for region in dismod3.gbd_regions:
    for year in dismod3.gbd_years:
        for sex in dismod3.gbd_sexes:
            key = dismod3.gbd_key_for('%s', region, year, sex)

            if clean(region) == 'north_america_high_income':
                regional_offset = 0.
            else:
                regional_offset = -.5

            time_offset = (int(year)-1997)/10.

            if clean(sex) == 'male':
                sex_offset = .1
            else:
                sex_offset = 0.
            
            # incidence rate
            i = mc.invlogit(mc.logit(.012 * mc.invlogit((ages - 44) / 3)) + regional_offset + time_offset + sex_offset)
            truth[key % 'incidence'] = i