Exemple #1
0
def read_helper_files(parent_dir, location_id):
    """ Read in and return helper DataFrames.

        Returns:
        Config
        DataFrame containing age_weights for age-standardized rate calculation
        Most detailed locations
    """

    # Config file
    config = read_json(parent_dir + r'FILEPATH.json')

    # Age weights
    age_weights = pd.read_csv(parent_dir + r'FILEPATH.csv')

    # Most-detailed location
    location_hierarchy = pd.read_csv(parent_dir +
                                     r'FILEPATH.csv')
    estimate_locations = location_hierarchy.ix[
        location_hierarchy['is_estimate'] == 1, 'location_id'].tolist()

    if int(location_id) in estimate_locations:
        most_detailed_location = True
    else:
        most_detailed_location = False

    return age_weights, most_detailed_location, config
Exemple #2
0
def read_helper_files(parent_dir):
    """Read in and return helper DataFrames.

        Returns: DataFrame containing config file.
    """
    # Config file
    config = read_json(os.path.join(parent_dir, '_temp/config.json'))
    return config
def read_helper_files(parent_dir):
    """ Read in and return helper DataFrames.

        Returns:
        DataFrame containing cause hierarchy used for aggregation
    """

    # Config file
    config = read_json(parent_dir + r'/FILEPATH.json')
    return config
Exemple #4
0
def read_helper_files(parent_dir):
    """
    Return the config dictionary from json.

    Arguments:
        parent_dir (str):

    Returns:
        A dictionary containing the configuration specifications for this run.
    """
    return read_json(os.path.join(parent_dir, 'FILEPATH'))
def read_helper_files(parent_dir, location_id):
    """Read in and return helper DataFrames.

        Returns:
        DataFrame containing cause hierarchy used for aggregation
    """

    # Config file
    config = read_json(os.path.join(parent_dir, 'FILEPATH'))

    # Cause hierarchy
    cause_hierarchy = pd.read_csv(os.path.join(parent_dir, 'FILEPATH'))

    return config, cause_hierarchy
Exemple #6
0
def read_helper_files(parent_dir):
    ''' Read in and return helper DataFrames.

        Returns:
        config
        list of causes
    '''
    logger = logging.getLogger('correct.read_helper_files')

    # Config file
    logger.info('Reading config file')
    config = read_json(parent_dir + r'FILEPATH.json')
    causes = sorted(
        pd.read_csv(parent_dir + 'FILEPATH.csv').cause_id.unique(), key=int)

    return config, causes
Exemple #7
0
def read_helper_files(parent_dir):
    """ Read in and return helper DataFrames.

        Returns:
        DataFrame containing cause hierarchy used for aggregation
    """

    # Config file
    config = read_json(parent_dir + r'/_temp/config.json')

    # Location hierarchy
    location_hierarchy = pd.read_csv(parent_dir +
                                     r'/_temp/location_hierarchy.csv')
    location_ids = location_hierarchy['location_id'].drop_duplicates().tolist()

    return config, location_ids
Exemple #8
0
def read_helper_files(parent_dir, location_id, sex_id):
    """Read in and return helper DataFrames.

    Returns:
        best_models: DataFrame containing all best model ids
                     and relevant cause metadata for a given sex
        eligible_data: a DataFrame containing all demographics
                       and their restriction status
    """
    logger = logging.getLogger('correct.read_helper_files')

    # Config file
    logger.info('Reading config file')
    config = read_json(parent_dir + r'FILEPATH')

    # List of best models (excluding shocks)
    logger.info('Reading best models')
    best_models = pd.read_csv(parent_dir + r'FILEPATH')
    best_models = best_models.loc[(best_models['sex_id'] == int(sex_id)) &
                                  (best_models['model_version_type_id']
                                  .isin(list(range(0, 5))))]

    # List of eligible data
    logger.info('Reading eligible models')
    eligible_data = pd.read_csv(parent_dir + r'FILEPATH.csv')

    # Space-time restrictions
    spacetime_restriction_data = pd.read_csv(
        parent_dir + 'FILEPATH')

    # Envelope
    logger.info('Reading envelope draws')
    envelope_data = read_envelope_draws(parent_dir + r'FILEPATH.h5',
                                        location_id)
    rename_columns = {}
    for x in range(1000):
        rename_columns['env_{}'.format(x)] = 'draw_{}'.format(x)
    envelope_data = envelope_data.rename(columns=rename_columns)
    envelope_summ = read_envelope_draws(parent_dir + r'FILEPATH.h5',
                                        location_id, key='summary')

    return (config, best_models, eligible_data, spacetime_restriction_data,
            envelope_data, envelope_summ)
Exemple #9
0
def read_helper_files(parent_dir):
    """Read in and return helper DataFrames.

    Returns:
        config
        list of causes
    """
    logger = logging.getLogger('correct.read_helper_files')

    # Config file
    logger.info('Reading config file')
    config = read_json(os.path.join(parent_dir, '_temp/config.json'))
    causes = sorted(pd.read_csv(
        os.path.join(
            parent_dir,
            '_temp/cause_aggregation_hierarchy.csv')).cause_id.unique(),
                    key=int)

    return config, causes
def read_helper_files(parent_dir, location):
    """ Read in and return helper DataFrames.

        Returns:
        DataFrame config
    """

    # Config file
    config = read_json(parent_dir + r'FILEPATH.json')

    location_hierarchy = pd.read_csv(parent_dir + r'FILEPATH.csv')
    estimate_locations = location_hierarchy.ix[
        location_hierarchy['is_estimate'] == 1, 'location_id'].tolist()

    if int(location) in estimate_locations:
        most_detailed_location = True
    else:
        most_detailed_location = False

    return config, most_detailed_location
def read_helper_files(parent_dir, location):
    """ Read in and return helper DataFrames.

        Returns:
        DataFrame config, most_detailed_location bool
    """

    # Config file
    config = read_json(os.path.join(parent_dir, '_temp/config.json'))

    location_hierarchy = pd.read_csv(
        os.path.join(parent_dir, '_temp/location_hierarchy.csv'))
    estimate_locations = location_hierarchy.loc[
        location_hierarchy['is_estimate'] == 1, 'location_id'].tolist()

    if int(location) in estimate_locations:
        most_detailed_location = True
    else:
        most_detailed_location = False

    return config, most_detailed_location
Exemple #12
0
def read_helper_files(parent_dir, location_id, sex_name):
    ''' Read in and return helper DataFrames.

        Returns:
        best_models: DataFrame containing all best model ids
                     and relevant cause metadata for a given sex
        eligible_data: a DataFrame containing all demographics
                       and their restriction status
    '''
    logger = logging.getLogger('correct.read_helper_files')
    sex_dict = {1: 'male', 2: 'female'}

    # Config file
    logger.info('Reading config file')
    config = read_json(parent_dir + r'/_temp/config.json')

    # List of best models (excluding shocks)
    logger.info('Reading best models')
    best_models = pd.read_csv(parent_dir + r'/_temp/best_models.csv')
    best_models['sex_name'] = best_models['sex_id'].map(lambda x: sex_dict[x])
    best_models = best_models.ix[(best_models['sex_name'] == sex_name)&
                                 (best_models['model_version_type_id'].isin(range(0,5)))]

    # List of eligible data
    logger.info('Reading eligible models')
    eligible_data = pd.read_csv(parent_dir + r'/_temp/eligible_data.csv')

    # Space-time restrictions
    spacetime_restriction_data = pd.read_csv(parent_dir+'/_temp/spacetime_restrictions.csv')

    # Envelope
    logger.info('Reading envelope draws')
    envelope_data = read_envelope_draws(parent_dir + r'/_temp/envelope.h5',
                                        location_id)
    rename_columns = {}
    for x in xrange(1000):
        rename_columns['env_{}'.format(x)] = 'draw_{}'.format(x)
    envelope_data = envelope_data.rename(columns=rename_columns)

    return config, best_models, eligible_data, spacetime_restriction_data, envelope_data
def read_helper_files(parent_dir):
    """ Read in and return helper DataFrames.

        Returns:
        DataFrame containing cause hierarchy used for aggregation
    """

    # Config file
    config = read_json(os.path.join(parent_dir, '_temp/config.json'))

    # Location hierarchy
    location_hierarchy = get_location_metadata(gbd_round_id=5,
                                               location_set_id=35)
    location_hierarchy = location_hierarchy[[
        'location_id', 'parent_id', 'level', 'is_estimate', 'most_detailed',
        'sort_order'
    ]]
    location_ids = location_hierarchy['location_id'].drop_duplicates().tolist()
    estimate_locations = location_hierarchy.loc[
        location_hierarchy['is_estimate'] == 1, 'location_id'].tolist()

    return config, location_ids, estimate_locations
Exemple #14
0
def read_helper_files(parent_dir, location_id, sex_name):
    ''' Read in and return helper DataFrames.

        Returns:
        best_models: DataFrame containing all best model ids
                     and relevant cause metadata for a given sex
        eligible_data: a DataFrame containing all demographics
                       and their restriction status
    '''
    logger = logging.getLogger('shocks.read_helper_files')
    sex_dict = {1: 'male', 2: 'female'}

    # Config file
    logger.info('Reading config file')
    config = read_json(parent_dir + r'/_temp/config.json')

    # List of best models for shocks
    logger.info('Reading best models')
    best_models = pd.read_csv(parent_dir + r'/_temp/best_models.csv')
    best_models['sex_name'] = best_models['sex_id'].map(lambda x: sex_dict[x])
    best_models = best_models.ix[(best_models['sex_name'] == sex_name)&(best_models['model_version_type_id'].isin(range(5,8)))]

    return config, best_models
Exemple #15
0
def read_helper_files(parent_dir, location_id, sex_id):
    """Read in and return helper DataFrames.

        Returns:
        best_models: DataFrame containing all best model ids and relevant cause
            metadata for a given sex
        eligible_data: a DataFrame containing all demographics and their
            restriction status
    """
    logger = logging.getLogger('shocks.read_helper_files')

    # Config file
    logger.info('Reading config file')
    config = read_json(os.path.join(parent_dir, '_temp/config.json'))

    # List of best models for shocks/imported_cases/hiv
    logger.info('Reading best models')
    best_models = pd.read_csv(parent_dir + r'/_temp/best_models.csv')
    best_models = best_models.loc[(best_models['sex_id'] == int(sex_id)) &
                                  (best_models['model_version_type_id']
                                  .isin(list(range(5, 8))))]

    return config, best_models
Exemple #16
0
def read_helper_files(parent_dir, location_id, sex_id):
    ''' Read in and return helper DataFrames.

        Returns:
        best_models: DataFrame containing all best model ids
                     and relevant cause metadata for a given sex
        eligible_data: a DataFrame containing all demographics
                       and their restriction status
    '''
    logger = logging.getLogger('shocks.read_helper_files')

    # Config file
    logger.info('Reading config file')
    config = read_json(parent_dir + r'FILEPATH.json')

    # List of best models for shocks
    logger.info('Reading best models')
    best_models = pd.read_csv(parent_dir + r'FILEPATH.csv')
    best_models = best_models.ix[(best_models['sex_id'] == int(sex_id)) &
                                 (best_models['model_version_type_id']
                                  .isin(range(5, 8)))]

    return config, best_models
Exemple #17
0
        config['eligible_sex_ids'] = eligible_sex_ids
        config['eligible_cause_ids'] = eligible_cause_ids
        config['eligible_year_ids'] = codcorrect_years
        config['eligible_location_ids'] = eligible_location_ids
        config['diagnostic_year_ids'] = [1990, 2005, 2017]
        config['change_years'] = change_years
        config['process_version_id'] = process_version_id

        write_json(config, parent_dir + r'/_temp/config.json')
    else:
        # Read in location data
        location_data = pd.read_csv(
            os.path.join(parent_dir, '_temp/location_hierarchy.csv'))

        # Read in config file
        config = read_json(os.path.join(parent_dir, '_temp/config.json'))

        # Read in variables
        eligible_location_ids = config['eligible_location_ids']
        envelope_version_id = config['envelope_version_id']
        pop_version_id = config['pop_version_id']
        process_version_id = config['process_version_id']
        change_years = config['change_years']

        # if eligible_year_ids do not match, then do not resume jobs
        if config['eligible_year_ids'] != codcorrect_years:
            logging.info("CoDCorrect years do not match!")
            logging.info("Can't just resume jobs")
            config['eligible_year_ids'] != codcorrect_years
            write_json(config, os.path.join(parent_dir, '_temp/config.json'))
            resume = False
Exemple #18
0
def read_helper_files(parent_dir):
    return read_json(os.path.join(parent_dir, '_temp/config.json'))
Exemple #19
0
        config['eligible_cause_ids'] = eligible_cause_ids
        config['eligible_year_ids'] = codcorrect_years
        config['eligible_location_ids'] = eligible_location_ids
        config['dalynator_export_years_ids'] = codcorrect_years
        config['diagnostic_year_ids'] = [
            1990, 1995, 2000, 2005, 2010, 2013, 2015
        ]

        write_json(config, parent_dir + r'/_temp/config.json')
    else:
        # Read in location data
        location_data = pd.read_csv(parent_dir +
                                    '/_temp/location_hierarchy.csv')

        # Read in config file
        config = read_json(parent_dir + r'/_temp/config.json')

        # Read in variables
        eligible_location_ids = config['eligible_location_ids']
        envelope_version_id = config['envelope_version_id']

        # if eligible_year_ids do not match, then do not resume jobs
        if config['eligible_year_ids'] != codcorrect_years:
            print "CoDCorrect years do not match!"
            print "Can't just resume jobs"
            config['eligible_year_ids'] != codcorrect_years
            write_json(config, parent_dir + r'/_temp/config.json')
            resume = False

    # Generate CoDCorrect jobs
    codcorrect_job_list = TaskList()
Exemple #20
0
def prep_upload(parent_dir):
    change_permission(parent_dir, recursively=False)
    change_permission(parent_dir + r'/_temp/', recursively=True)
    output_upload_files = read_json(parent_dir + r'/_temp/output_upload.json')
    return output_upload_files
Exemple #21
0
        config['eligible_age_group_ids'] = eligible_age_group_ids
        config['eligible_sex_ids'] = eligible_sex_ids
        config['eligible_cause_ids'] = eligible_cause_ids
        config['eligible_year_ids'] = codcorrect_years
        config['eligible_location_ids'] = eligible_location_ids
        config['diagnostic_year_ids'] = [
            1990, 1995, 2000, 2005, 2006, 2010, 2016]

        write_json(config, parent_dir + r'FILEPATH.json')
    else:
        # Read in location data
        location_data = pd.read_csv(
            parent_dir + 'FILEPATH.csv')

        # Read in config file
        config = read_json(parent_dir + r'FILEPATH.json')

        # Read in variables
        eligible_location_ids = config['eligible_location_ids']
        envelope_version_id = config['envelope_version_id']
        lifetable_version_id = config['lifetable_version_id']
        pop_version_id = config['pop_version_id']

        # if eligible_year_ids do not match, then do not resume jobs
        if config['eligible_year_ids'] != codcorrect_years:
            logging.info("CoDCorrect years do not match!")
            logging.info("Can't just resume jobs")
            config['eligible_year_ids'] != codcorrect_years
            write_json(config, parent_dir + r'/FILEPATH.json')
            resume = False