def read_most_detailed(shock_dir, rescaled_dir, location): logging.info( "Reading in shock and rescaled deaths for {}".format(location)) shocks = read_hdf_draws(r'FILEPATH.h5') rescaled = read_hdf_draws(r'FILEPATH.h5') logging.info("Reading in shock and rescaled ylls for {}".format(location)) shocks_yll = read_hdf_draws(r'FILEPATH.h5') rescaled_yll = read_hdf_draws(r'FILEPATH.h5') return rescaled, shocks, rescaled_yll, shocks_yll
def read_shock_draw_files(parent_dir, location_id): """Reads in shock draw files.""" data = [] for sex_id in [1, 2]: draw_filepath = os.path.join( parent_dir, 'FILEPATH'.format(loc=location_id, sex=sex_id)) data.append(read_hdf_draws(draw_filepath, location_id)) data = pd.concat(data).reset_index(drop=True) return data
def read_rescaled_draw_files(parent_dir, location_id): """ Reads in rescaled draw files """ data = [] for sex_id in [1, 2]: draw_filepath = ( parent_dir + r'FILEPATH.h5'.format(location_id=location_id, sex_id=sex_id)) data.append(read_hdf_draws(draw_filepath, location_id)) data = pd.concat(data).reset_index(drop=True) return data
def read_draw_files(parent_dir, location_id): """ Reads in draw files """ logger = logging.getLogger('summary.read_draw_files') try: # Rescaled data draw_filepath = ( parent_dir + r'/aggregated/rescaled/rescaled_{location_id}.h5'.format( location_id=str(location_id))) rescaled_draws = read_hdf_draws(draw_filepath, location_id).reset_index(drop=True) # DALYnator data draw_filepath = (parent_dir + r'/draws/death_{location_id}.h5'.format( location_id=str(location_id))) dalynator_draws = read_hdf_draws(draw_filepath, location_id).reset_index(drop=True) except Exception as e: logger.exception('Failed to read location: {}'.format(e)) return rescaled_draws, dalynator_draws
def read_unscaled_draw_files(parent_dir, location_id, index_columns, draw_columns): """Reads in unscaled draw files.""" data = [] for sex_id in [1, 2]: draw_filepath = os.path.join( parent_dir, 'FILEPATH'.format(loc=location_id, sex=sex_id)) data.append(read_hdf_draws(draw_filepath, location_id)) data = pd.concat(data).reset_index(drop=True) data = data[index_columns + data_columns] data = data.sort_values(index_columns).reset_index(drop=True) return data
def read_gbd_draw_files(parent_dir, location_id, years, measure_id): """ Reads in draw files """ logger = logging.getLogger('summary.read_gbd_draw_files') try: if measure_id == 1: draw_filepath = (parent_dir + r'FILEPATH.h5' .format(location_id=str(location_id))) draws = read_hdf_draws(draw_filepath, location_id, filter_years=years ).reset_index(drop=True) elif measure_id == 4: yll_filepath = (parent_dir + r'FILEPATH.h5' .format(location_id=str(location_id))) draws = read_hdf_draws(yll_filepath, location_id, filter_years=years ).reset_index(drop=True) else: raise ValueError("Can only operate on measure_ids 1 or 4") return draws except Exception as e: logger.exception('Failed to read location: {}'.format(e))
def read_cod_draw_files(pool, parent_dir, location_id, years, most_detailed_location): logger = logging.getLogger('summary.read_cod_draw_files') try: if most_detailed_location: # Rescaled data draw_filepath = parent_dir + r'FILEPATH.h5' rescaled_draws = read_hdf_draws(draw_filepath, location_id, filter_years=years ).reset_index(drop=True) else: rescaled_draws = read_aggregated_cod( pool, parent_dir + r'FILEPATH', location_id, years) # With Shock data draw_filepath = (parent_dir + r'FILEPATH.h5' .format(location_id=str(location_id))) dalynator_draws = read_hdf_draws(draw_filepath, location_id, filter_years=years ).reset_index(drop=True) return rescaled_draws, dalynator_draws except Exception as e: logger.exception('Failed to read location: {}'.format(e))
def get_model_numbers(location_id, index_columns): """ Reads in model version ids """ logger = logging.getLogger('summary.get_model_numbers') try: data = [] for sex_id in [1, 2]: draw_filepath = (parent_dir + r'FILEPATH.h5' .format(location_id=location_id, sex_id=sex_id)) data.append(read_hdf_draws(draw_filepath, location_id)) data = pd.concat(data).reset_index(drop=True) data = data[index_columns + ['model_version_id']] except Exception as e: logger.exception('Failed to read model version data: {}'.format(e)) return data
def get_model_numbers(location_id, index_columns): """Reads in model version ids, in order to tack these on for CoD db.""" logger = logging.getLogger('summary.get_model_numbers') try: data = [] for sex_id in [1, 2]: draw_filepath = (os.path.join( parent_dir, 'models/models_{loc}_{sex}.h5' .format(loc=location_id, sex=sex_id))) data.append(read_hdf_draws(draw_filepath, location_id)) data = pd.concat(data).reset_index(drop=True) data = data[index_columns + ['model_version_id']] except Exception as e: logger.exception('Failed to read model version data: {}'.format(e)) return data
def read_child_location_draw_files(parent_dir, location_id, child_locations, index_columns): """ Chunks up reading in the child locations, collapsing after each 10th location""" logger = logging.getLogger( 'aggregate_locations.read_child_location_draw_files') try: c = 0 rescaled_data = [] unscaled_data = [] shocks_data = [] for child_id in child_locations: rescaled_filepath = parent_dir + r'/aggregated/rescaled/rescaled_{location_id}.h5'.format( location_id=str(child_id)) unscaled_filepath = parent_dir + r'/aggregated/unscaled/unscaled_{location_id}.h5'.format( location_id=str(child_id)) shocks_filepath = parent_dir + r'/aggregated/shocks/shocks_{location_id}.h5'.format( location_id=str(child_id)) logger.info('Appending in {}'.format(rescaled_filepath)) print 'Appending in {}'.format(rescaled_filepath) rescaled_data.append( read_hdf_draws(rescaled_filepath, child_id).reset_index(drop=True)) logger.info('Appending in {}'.format(unscaled_filepath)) print 'Appending in {}'.format(unscaled_filepath) unscaled_data.append( read_hdf_draws(unscaled_filepath, child_id).reset_index(drop=True)) logger.info('Appending in {}'.format(shocks_filepath)) print 'Appending in {}'.format(shocks_filepath) shocks_data.append( read_hdf_draws(shocks_filepath, child_id).reset_index(drop=True)) c += 1 if c % 5 == 0: logger.info('Intermediate collapsing location') rescaled_data = [ aggregate_location(pd.concat(rescaled_data), location_id, index_columns) ] unscaled_data = [ aggregate_location(pd.concat(unscaled_data), location_id, index_columns) ] shocks_data = [ aggregate_location(pd.concat(shocks_data), location_id, index_columns) ] logger.info('Intermediate collapsing location') rescaled_data = aggregate_location(pd.concat(rescaled_data), location_id, index_columns) unscaled_data = aggregate_location(pd.concat(unscaled_data), location_id, index_columns) shocks_data = aggregate_location(pd.concat(shocks_data), location_id, index_columns) except Exception as e: logger.exception('Failed to aggregate location: {}'.format(e)) return rescaled_data, unscaled_data, shocks_data