Beispiel #1
0
def read_most_detailed(shock_dir, rescaled_dir, location):
    logging.info(
        "Reading in shock and rescaled deaths for {}".format(location))
    shocks = read_hdf_draws(r'FILEPATH.h5')
    rescaled = read_hdf_draws(r'FILEPATH.h5')
    logging.info("Reading in shock and rescaled ylls for {}".format(location))
    shocks_yll = read_hdf_draws(r'FILEPATH.h5')
    rescaled_yll = read_hdf_draws(r'FILEPATH.h5')
    return rescaled, shocks, rescaled_yll, shocks_yll
def read_shock_draw_files(parent_dir, location_id):
    """Reads in shock draw files."""
    data = []
    for sex_id in [1, 2]:
        draw_filepath = os.path.join(
            parent_dir, 'FILEPATH'.format(loc=location_id, sex=sex_id))
        data.append(read_hdf_draws(draw_filepath, location_id))
    data = pd.concat(data).reset_index(drop=True)
    return data
def read_rescaled_draw_files(parent_dir, location_id):
    """ Reads in rescaled draw files """
    data = []
    for sex_id in [1, 2]:
        draw_filepath = (
            parent_dir +
            r'FILEPATH.h5'.format(location_id=location_id, sex_id=sex_id))
        data.append(read_hdf_draws(draw_filepath, location_id))
    data = pd.concat(data).reset_index(drop=True)
    return data
Beispiel #4
0
def read_draw_files(parent_dir, location_id):
    """ Reads in draw files """
    logger = logging.getLogger('summary.read_draw_files')
    try:
        # Rescaled data
        draw_filepath = (
            parent_dir +
            r'/aggregated/rescaled/rescaled_{location_id}.h5'.format(
                location_id=str(location_id)))
        rescaled_draws = read_hdf_draws(draw_filepath,
                                        location_id).reset_index(drop=True)
        # DALYnator data
        draw_filepath = (parent_dir + r'/draws/death_{location_id}.h5'.format(
            location_id=str(location_id)))
        dalynator_draws = read_hdf_draws(draw_filepath,
                                         location_id).reset_index(drop=True)
    except Exception as e:
        logger.exception('Failed to read location: {}'.format(e))
    return rescaled_draws, dalynator_draws
def read_unscaled_draw_files(parent_dir, location_id, index_columns,
                             draw_columns):
    """Reads in unscaled draw files."""
    data = []
    for sex_id in [1, 2]:
        draw_filepath = os.path.join(
            parent_dir, 'FILEPATH'.format(loc=location_id, sex=sex_id))
        data.append(read_hdf_draws(draw_filepath, location_id))
    data = pd.concat(data).reset_index(drop=True)
    data = data[index_columns + data_columns]
    data = data.sort_values(index_columns).reset_index(drop=True)
    return data
Beispiel #6
0
def read_gbd_draw_files(parent_dir, location_id, years, measure_id):
    """ Reads in draw files """
    logger = logging.getLogger('summary.read_gbd_draw_files')
    try:
        if measure_id == 1:
            draw_filepath = (parent_dir + r'FILEPATH.h5'
                             .format(location_id=str(location_id)))
            draws = read_hdf_draws(draw_filepath,
                                   location_id,
                                   filter_years=years
                                   ).reset_index(drop=True)
        elif measure_id == 4:
            yll_filepath = (parent_dir + r'FILEPATH.h5'
                            .format(location_id=str(location_id)))
            draws = read_hdf_draws(yll_filepath,
                                   location_id,
                                   filter_years=years
                                   ).reset_index(drop=True)
        else:
            raise ValueError("Can only operate on measure_ids 1 or 4")
        return draws
    except Exception as e:
        logger.exception('Failed to read location: {}'.format(e))
Beispiel #7
0
def read_cod_draw_files(pool, parent_dir, location_id, years,
                        most_detailed_location):
    logger = logging.getLogger('summary.read_cod_draw_files')
    try:
        if most_detailed_location:
            # Rescaled data
            draw_filepath = parent_dir + r'FILEPATH.h5'
            rescaled_draws = read_hdf_draws(draw_filepath,
                                            location_id,
                                            filter_years=years
                                            ).reset_index(drop=True)
        else:
            rescaled_draws = read_aggregated_cod(
                pool, parent_dir + r'FILEPATH', location_id, years)
        # With Shock data
        draw_filepath = (parent_dir + r'FILEPATH.h5'
                         .format(location_id=str(location_id)))
        dalynator_draws = read_hdf_draws(draw_filepath,
                                         location_id,
                                         filter_years=years
                                         ).reset_index(drop=True)
        return rescaled_draws, dalynator_draws
    except Exception as e:
        logger.exception('Failed to read location: {}'.format(e))
Beispiel #8
0
def get_model_numbers(location_id, index_columns):
    """ Reads in model version ids """
    logger = logging.getLogger('summary.get_model_numbers')
    try:
        data = []
        for sex_id in [1, 2]:
            draw_filepath = (parent_dir +
                             r'FILEPATH.h5'
                             .format(location_id=location_id, sex_id=sex_id))
            data.append(read_hdf_draws(draw_filepath, location_id))
        data = pd.concat(data).reset_index(drop=True)
        data = data[index_columns + ['model_version_id']]
    except Exception as e:
        logger.exception('Failed to read model version data: {}'.format(e))
    return data
Beispiel #9
0
def get_model_numbers(location_id, index_columns):
    """Reads in model version ids, in order to tack these on for CoD db."""
    logger = logging.getLogger('summary.get_model_numbers')
    try:
        data = []
        for sex_id in [1, 2]:
            draw_filepath = (os.path.join(
                parent_dir,
                'models/models_{loc}_{sex}.h5'
                .format(loc=location_id, sex=sex_id)))
            data.append(read_hdf_draws(draw_filepath, location_id))
        data = pd.concat(data).reset_index(drop=True)

        data = data[index_columns + ['model_version_id']]
    except Exception as e:
        logger.exception('Failed to read model version data: {}'.format(e))
    return data
def read_child_location_draw_files(parent_dir, location_id, child_locations,
                                   index_columns):
    """ Chunks up reading in the child locations, collapsing after each 10th location"""
    logger = logging.getLogger(
        'aggregate_locations.read_child_location_draw_files')
    try:
        c = 0
        rescaled_data = []
        unscaled_data = []
        shocks_data = []
        for child_id in child_locations:
            rescaled_filepath = parent_dir + r'/aggregated/rescaled/rescaled_{location_id}.h5'.format(
                location_id=str(child_id))
            unscaled_filepath = parent_dir + r'/aggregated/unscaled/unscaled_{location_id}.h5'.format(
                location_id=str(child_id))
            shocks_filepath = parent_dir + r'/aggregated/shocks/shocks_{location_id}.h5'.format(
                location_id=str(child_id))

            logger.info('Appending in {}'.format(rescaled_filepath))
            print 'Appending in {}'.format(rescaled_filepath)
            rescaled_data.append(
                read_hdf_draws(rescaled_filepath,
                               child_id).reset_index(drop=True))

            logger.info('Appending in {}'.format(unscaled_filepath))
            print 'Appending in {}'.format(unscaled_filepath)
            unscaled_data.append(
                read_hdf_draws(unscaled_filepath,
                               child_id).reset_index(drop=True))

            logger.info('Appending in {}'.format(shocks_filepath))
            print 'Appending in {}'.format(shocks_filepath)
            shocks_data.append(
                read_hdf_draws(shocks_filepath,
                               child_id).reset_index(drop=True))

            c += 1
            if c % 5 == 0:
                logger.info('Intermediate collapsing location')
                rescaled_data = [
                    aggregate_location(pd.concat(rescaled_data), location_id,
                                       index_columns)
                ]
                unscaled_data = [
                    aggregate_location(pd.concat(unscaled_data), location_id,
                                       index_columns)
                ]
                shocks_data = [
                    aggregate_location(pd.concat(shocks_data), location_id,
                                       index_columns)
                ]
        logger.info('Intermediate collapsing location')
        rescaled_data = aggregate_location(pd.concat(rescaled_data),
                                           location_id, index_columns)
        unscaled_data = aggregate_location(pd.concat(unscaled_data),
                                           location_id, index_columns)
        shocks_data = aggregate_location(pd.concat(shocks_data), location_id,
                                         index_columns)

    except Exception as e:
        logger.exception('Failed to aggregate location: {}'.format(e))
    return rescaled_data, unscaled_data, shocks_data