def migrate_files(connection): """ Migrate the various files to S3 """ LOG.info('Migrating the files') s3helper = S3Helper() migrate_image_files(connection, get_galaxy_image_bucket(), get_files_bucket(), s3helper) migrate_hdf5_files(connection, get_files_bucket(), s3helper)
def delete_galaxy(connection, galaxy_ids): try: for galaxy_id_str in galaxy_ids: transaction = connection.begin() galaxy_id1 = int(galaxy_id_str) galaxy = connection.execute( select([GALAXY ]).where(GALAXY.c.galaxy_id == galaxy_id1)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id1) else: LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id1, galaxy[GALAXY.c.name]) area_count = connection.execute( select([func.count(AREA.c.area_id) ]).where(AREA.c.galaxy_id == galaxy[ GALAXY.c.galaxy_id])).first()[0] counter = 1 for area_id1 in connection.execute( select( [AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[ GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)): LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format( galaxy_id_str, area_id1[0], counter, area_count)) connection.execute(PIXEL_RESULT.delete().where( PIXEL_RESULT.c.area_id == area_id1[0])) # Give the rest of the world a chance to access the database time.sleep(0.1) counter += 1 # Now empty the bucket s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) galaxy_file_name = get_galaxy_file_name( galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) for key in bucket.list( prefix='{0}/sed/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue bucket.delete_key(key) # Now the folder key = Key(bucket) key.key = '{0}/sed/'.format(galaxy_file_name) bucket.delete_key(key) LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id1) connection.execute( GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id1).values( status_id=DELETED, status_time=datetime.datetime.now())) transaction.commit() except Exception: LOG.exception('Major error')
def regenerated_original_images(galaxy_name, run_id, galaxy_id, s3Helper, connection): """ We need to regenerate the image :param galaxy_name: :param run_id: :param galaxy_id: :return: if we succeed """ all_ok = False # Get the fits file bucket = s3Helper.get_bucket(get_files_bucket()) galaxy_file_name = get_galaxy_file_name(galaxy_name, run_id, galaxy_id) key_name = '{0}/{0}.fits'.format(galaxy_name) key = bucket.get_key(key_name) if key is None: LOG.error('The fits file does not seem to exists') return all_ok path_name = get_temp_file('fits') key.get_contents_to_filename(path_name) # Now regenerate try: image = FitsImage(connection) image.build_image(path_name, galaxy_file_name, galaxy_id, get_galaxy_image_bucket()) all_ok = True except Exception: LOG.exception('Major error') all_ok = False finally: os.remove(path_name) return all_ok
def remove_s3_files(galaxy_name, run_id, galaxy_id): """ Remove the files from S3 :return: """ s3_helper = S3Helper() remove_files_with_key(s3_helper.get_bucket(get_galaxy_image_bucket()), galaxy_name, run_id, galaxy_id) remove_files_with_key(s3_helper.get_bucket(get_files_bucket()), galaxy_name, run_id, galaxy_id)
def remigrate_files(connection): """ Migrate the various files to S3 """ LOG.info('Migrating the files') s3helper = S3Helper() files_bucket = get_files_bucket() bad_galaxies = find_bad_hdf5_files(s3helper, files_bucket) migrate_hdf5_files(bad_galaxies, connection, files_bucket, s3helper)
def get_hdf5_size_data(): """ Get the HDF5 data we need :return: """ # Get the list of files LOG.info('Getting the hdf5 files from the database') data = {} set_names = set() for entry in connection.execute(select([HDF5_SIZE])): key_size_mb = entry[HDF5_SIZE.c.size] / 1000000.0 LOG.info('Processing {0} {1} {2}'.format(entry[HDF5_SIZE.c.name], entry[HDF5_SIZE.c.size], key_size_mb)) run_id = entry[HDF5_SIZE.c.run_id] # Get the array row_data = data.get(run_id) if row_data is None: row_data = [] data[run_id] = row_data row_data.append(key_size_mb) set_names.add(entry[HDF5_SIZE.c.name]) LOG.info('Getting the hdf5 files from S3') s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) insert_hdf5 = HDF5_SIZE.insert() for prefix in bucket.list(prefix='', delimiter='/'): prefix_name = prefix.name[:-1] if prefix_name not in set_names: key = bucket.get_key('{0}/{0}.hdf5'.format(prefix_name)) if key is not None: key_size_mb = key.size / 1000000.0 LOG.info('Processing {0} {1} {2}'.format( key.name, key.size, key_size_mb)) elements = prefix.name.split('__') run_id = int(elements[1]) connection.execute(insert_hdf5, name=prefix_name, size=key.size, run_id=run_id) # Get the array row_data = data.get(run_id) if row_data is None: row_data = [] data[run_id] = row_data row_data.append(key_size_mb) return data
def get_hdf5_size_data(): """ Get the HDF5 data we need :return: """ # Get the list of files LOG.info('Getting the hdf5 files from the database') data = {} set_names = set() for entry in connection.execute(select([HDF5_SIZE])): key_size_mb = entry[HDF5_SIZE.c.size] / 1000000.0 LOG.info('Processing {0} {1} {2}'.format(entry[HDF5_SIZE.c.name], entry[HDF5_SIZE.c.size], key_size_mb)) run_id = entry[HDF5_SIZE.c.run_id] # Get the array row_data = data.get(run_id) if row_data is None: row_data = [] data[run_id] = row_data row_data.append(key_size_mb) set_names.add(entry[HDF5_SIZE.c.name]) LOG.info('Getting the hdf5 files from S3') s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) insert_hdf5 = HDF5_SIZE.insert() for prefix in bucket.list(prefix='', delimiter='/'): prefix_name = prefix.name[:-1] if prefix_name not in set_names: key = bucket.get_key('{0}/{0}.hdf5'.format(prefix_name)) if key is not None: key_size_mb = key.size / 1000000.0 LOG.info('Processing {0} {1} {2}'.format(key.name, key.size, key_size_mb)) elements = prefix.name.split('__') run_id = int(elements[1]) connection.execute(insert_hdf5, name=prefix_name, size=key.size, run_id=run_id) # Get the array row_data = data.get(run_id) if row_data is None: row_data = [] data[run_id] = row_data row_data.append(key_size_mb) return data
def store_files(hdf5_dir): """ Scan a directory for files and send them to the archive :param hdf5_dir: the directory to scan :return: """ LOG.info('Directory: %s', hdf5_dir) # Get the work units still being processed ENGINE = create_engine(DB_LOGIN) connection = ENGINE.connect() files = os.path.join(hdf5_dir, '*.hdf5') file_count = 0 try: s3helper = S3Helper() bucket_name = get_files_bucket() for file_name in glob.glob(files): size = os.path.getsize(file_name) galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy_id).values( status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('File size: %d', size) LOG.error('Could not get the galaxy id') except Exception: LOG.exception('Major error') finally: connection.close() return file_count
def get_hdf5_file(s3Helper, output_dir, galaxy_name, run_id, galaxy_id): """ Get the HDF file :param galaxy_name: the name of the galaxy :param run_id: the run id :param galaxy_id: the galaxy id :return: """ bucket_name = get_files_bucket() key = get_key_hdf5(galaxy_name, run_id, galaxy_id) tmp_file = get_temp_file('.hdf5', 'pogs', output_dir) s3Helper.get_file_from_bucket(bucket_name=bucket_name, key_name=key, file_name=tmp_file) return tmp_file
def store_files(hdf5_dir): """ Scan a directory for files and send them to the archive :param hdf5_dir: the directory to scan :return: """ LOG.info('Directory: %s', hdf5_dir) # Get the work units still being processed ENGINE = create_engine(DB_LOGIN) connection = ENGINE.connect() files = os.path.join(hdf5_dir, '*.hdf5') file_count = 0 try: s3helper = S3Helper() bucket_name = get_files_bucket() for file_name in glob.glob(files): size = os.path.getsize(file_name) galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('File size: %d', size) LOG.error('Could not get the galaxy id') except Exception: LOG.exception('Major error') finally: connection.close() return file_count
def delete_galaxy(connection, galaxy_ids): try: for galaxy_id_str in galaxy_ids: transaction = connection.begin() galaxy_id1 = int(galaxy_id_str) galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id1)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id1) else: LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id1, galaxy[GALAXY.c.name]) area_count = connection.execute(select([func.count(AREA.c.area_id)]).where(AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])).first()[0] counter = 1 for area_id1 in connection.execute(select([AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)): LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format(galaxy_id_str, area_id1[0], counter, area_count)) connection.execute(PIXEL_RESULT.delete().where(PIXEL_RESULT.c.area_id == area_id1[0])) # Give the rest of the world a chance to access the database time.sleep(0.1) counter += 1 # Now empty the bucket s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) galaxy_file_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) for key in bucket.list(prefix='{0}/sed/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue bucket.delete_key(key) # Now the folder key = Key(bucket) key.key = '{0}/sed/'.format(galaxy_file_name) bucket.delete_key(key) LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id1) connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id1).values(status_id=DELETED, status_time=datetime.datetime.now())) transaction.commit() except Exception: LOG.exception('Major error')
def store_files(connection): """ Scan a directory for files and send them to the archive :param hdf5_dir: the directory to scan :return: """ LOG.info('Directory: %s', HDF5_OUTPUT_DIRECTORY) to_store_dir = os.path.join(HDF5_OUTPUT_DIRECTORY, 'to_store') files = os.path.join(to_store_dir, '*.hdf5') file_count = 0 s3helper = S3Helper() bucket_name = get_files_bucket() for file_name in glob.glob(files): size = os.path.getsize(file_name) galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('File size: %d', size) LOG.error('Could not get the galaxy id') return file_count
def store_pixels(connection, galaxy_file_name, group, dimension_x, dimension_y, dimension_z, area_total, output_directory, map_parameter_name): """ Store the pixel data """ LOG.info('Storing the pixel data for {0} - {1} areas to process'.format( galaxy_file_name, area_total)) data = numpy.empty( (dimension_x, dimension_y, NUMBER_PARAMETERS, NUMBER_IMAGES), dtype=numpy.float) data.fill(numpy.NaN) data_pixel_details = group.create_dataset('pixel_details', (dimension_x, dimension_y), dtype=data_type_pixel, compression='gzip') data_pixel_parameters = group.create_dataset( 'pixel_parameters', (dimension_x, dimension_y, NUMBER_PARAMETERS), dtype=data_type_pixel_parameter, compression='gzip') data_pixel_filter = group.create_dataset( 'pixel_filters', (dimension_x, dimension_y, dimension_z), dtype=data_type_pixel_filter, compression='gzip') data_pixel_histograms_grid = group.create_dataset( 'pixel_histograms_grid', (dimension_x, dimension_y, NUMBER_PARAMETERS), dtype=data_type_block_details, compression='gzip') histogram_group = group.create_group('histogram_blocks') histogram_list = [] pixel_count = 0 area_count = 0 block_id = 1 block_index = 0 histogram_data = histogram_group.create_dataset( 'block_1', (BLOCK_SIZE, ), dtype=data_type_pixel_histogram, compression='gzip') s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) for key in bucket.list(prefix='{0}/sed/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue # Now process the file start_time = time.time() LOG.info('Processing file {0}'.format(key.key)) temp_file = os.path.join(output_directory, 'temp.sed') key.get_contents_to_filename(temp_file) if is_gzip(temp_file): f = gzip.open(temp_file, "rb") else: f = open(temp_file, "r") area_id = None pxresult_id = None line_number = 0 percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = False map_pixel_results = {} list_filters = [] try: for line in f: line_number += 1 if line.startswith(" ####### "): # Clear all the maps and stuff map_pixel_results = {} list_filters = [] # Split the line to extract the data values = line.split() pointName = values[1] pxresult_id = pointName[3:].rstrip() (x, y, area_id) = get_pixel_result(connection, pxresult_id) line_number = 0 percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = False pixel_count += 1 elif pxresult_id is not None: if line_number == 2: filter_names = line.split() filter_layer = 0 for filter_name in filter_names: if filter_name != '#': data_pixel_filter.attrs[ filter_name] = filter_layer filter_layer += 1 elif line_number == 3: values = line.split() for value in values: list_filters.append([float(value)]) elif line_number == 4: filter_layer = 0 values = line.split() for value in values: filter_description = list_filters[filter_layer] filter_description.append(float(value)) filter_layer += 1 elif line_number == 9: values = line.split() map_pixel_results['i_sfh'] = float(values[0]) map_pixel_results['i_ir'] = float(values[1]) map_pixel_results['chi2'] = float(values[2]) map_pixel_results['redshift'] = float(values[3]) elif line_number == 11: values = line.split() data[x, y, INDEX_F_MU_SFH, INDEX_BEST_FIT] = float(values[0]) data[x, y, INDEX_F_MU_IR, INDEX_BEST_FIT] = float(values[1]) data[x, y, INDEX_MU_PARAMETER, INDEX_BEST_FIT] = float(values[2]) data[x, y, INDEX_TAU_V, INDEX_BEST_FIT] = float(values[3]) data[x, y, INDEX_SSFR_0_1GYR, INDEX_BEST_FIT] = float(values[4]) data[x, y, INDEX_M_STARS, INDEX_BEST_FIT] = float(values[5]) data[x, y, INDEX_L_DUST, INDEX_BEST_FIT] = float(values[6]) data[x, y, INDEX_T_W_BC, INDEX_BEST_FIT] = float(values[7]) data[x, y, INDEX_T_C_ISM, INDEX_BEST_FIT] = float(values[8]) data[x, y, INDEX_XI_C_TOT, INDEX_BEST_FIT] = float(values[9]) data[x, y, INDEX_XI_PAH_TOT, INDEX_BEST_FIT] = float(values[10]) data[x, y, INDEX_XI_MIR_TOT, INDEX_BEST_FIT] = float(values[11]) data[x, y, INDEX_XI_W_TOT, INDEX_BEST_FIT] = float(values[12]) data[x, y, INDEX_TAU_V_ISM, INDEX_BEST_FIT] = float(values[13]) data[x, y, INDEX_M_DUST, INDEX_BEST_FIT] = float(values[14]) data[x, y, INDEX_SFR_0_1GYR, INDEX_BEST_FIT] = float(values[15]) elif line_number == 13: filter_layer = 0 values = line.split() for value in values: filter_description = list_filters[filter_layer] if filter_layer < dimension_z: data_pixel_filter[x, y, filter_layer] = ( filter_description[0], filter_description[1], float(value), ) filter_layer += 1 elif line_number > 13: if line.startswith("# ..."): parts = line.split('...') parameter_name = parts[1].strip() parameter_name_id = map_parameter_name[ parameter_name] percentiles_next = False histogram_next = True skynet_next1 = False skynet_next2 = False histogram_list = [] elif line.startswith( "#....percentiles of the PDF......"): percentiles_next = True histogram_next = False skynet_next1 = False skynet_next2 = False # Write out the histogram into a block for compression improvement data_pixel_histograms_grid[x, y, parameter_name_id - 1] = ( block_id, block_index, len(histogram_list)) for pixel_histogram_item in histogram_list: # Do we need a new block if block_index >= BLOCK_SIZE: block_id += 1 block_index = 0 histogram_data = histogram_group.create_dataset( 'block_{0}'.format(block_id), (BLOCK_SIZE, ), dtype=data_type_pixel_histogram, compression='gzip') histogram_data[block_index] = ( pixel_histogram_item[0], pixel_histogram_item[1], ) block_index += 1 elif line.startswith(" #...theSkyNet"): percentiles_next = False histogram_next = False skynet_next1 = True skynet_next2 = False elif line.startswith("# theSkyNet2"): percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = True elif percentiles_next: values = line.split() z = parameter_name_id - 1 data[x, y, z, INDEX_PERCENTILE_2_5] = float(values[0]) data[x, y, z, INDEX_PERCENTILE_16] = float(values[1]) data[x, y, z, INDEX_PERCENTILE_50] = float(values[2]) data[x, y, z, INDEX_PERCENTILE_84] = float(values[3]) data[x, y, z, INDEX_PERCENTILE_97_5] = float(values[4]) percentiles_next = False elif histogram_next: values = line.split() hist_value = float(values[1]) if hist_value > MIN_HIST_VALUE and not math.isnan( hist_value): histogram_list.append( (float(values[0]), hist_value)) elif skynet_next1: values = line.split() data_pixel_details[x, y] = ( pxresult_id, area_id, map_pixel_results['i_sfh'], map_pixel_results['i_ir'], map_pixel_results['chi2'], map_pixel_results['redshift'], float(values[0]), float(values[2]), float(values[3]), float(values[4]), ) skynet_next1 = False elif skynet_next2: # We have the highest bin probability values which require the parameter_id values = line.split() high_prob_bin = float(values[0]) if float( values[0]) is not None else numpy.NaN first_prob_bin = float(values[1]) if float( values[1]) is not None else numpy.NaN last_prob_bin = float(values[2]) if float( values[2]) is not None else numpy.NaN bin_step = float(values[3]) if float( values[3]) is not None else numpy.NaN z = parameter_name_id - 1 data[x, y, z, INDEX_HIGHEST_PROB_BIN] = high_prob_bin data_pixel_parameters[x, y, z] = ( first_prob_bin, last_prob_bin, bin_step, ) skynet_next2 = False except IOError: LOG.error('IOError after {0} lines'.format(line_number)) finally: f.close() area_count += 1 LOG.info('{0:0.3f} seconds for file {1}. {2} of {3} areas.'.format( time.time() - start_time, key.key, area_count, area_total)) pixel_dataset = group.create_dataset('pixels', data=data, compression='gzip') pixel_dataset.attrs['DIM3_F_MU_SFH'] = INDEX_F_MU_SFH pixel_dataset.attrs['DIM3_F_MU_IR'] = INDEX_F_MU_IR pixel_dataset.attrs['DIM3_MU_PARAMETER'] = INDEX_MU_PARAMETER pixel_dataset.attrs['DIM3_TAU_V'] = INDEX_TAU_V pixel_dataset.attrs['DIM3_SSFR_0_1GYR'] = INDEX_SSFR_0_1GYR pixel_dataset.attrs['DIM3_M_STARS'] = INDEX_M_STARS pixel_dataset.attrs['DIM3_L_DUST'] = INDEX_L_DUST pixel_dataset.attrs['DIM3_T_C_ISM'] = INDEX_T_C_ISM pixel_dataset.attrs['DIM3_T_W_BC'] = INDEX_T_W_BC pixel_dataset.attrs['DIM3_XI_C_TOT'] = INDEX_XI_C_TOT pixel_dataset.attrs['DIM3_XI_PAH_TOT'] = INDEX_XI_PAH_TOT pixel_dataset.attrs['DIM3_XI_MIR_TOT'] = INDEX_XI_MIR_TOT pixel_dataset.attrs['DIM3_XI_W_TOT'] = INDEX_XI_W_TOT pixel_dataset.attrs['DIM3_TAU_V_ISM'] = INDEX_TAU_V_ISM pixel_dataset.attrs['DIM3_M_DUST'] = INDEX_M_DUST pixel_dataset.attrs['DIM3_SFR_0_1GYR'] = INDEX_SFR_0_1GYR pixel_dataset.attrs['DIM4_BEST_FIT'] = INDEX_BEST_FIT pixel_dataset.attrs['DIM4_PERCENTILE_50'] = INDEX_PERCENTILE_50 pixel_dataset.attrs['DIM4_HIGHEST_PROB_BIN'] = INDEX_HIGHEST_PROB_BIN pixel_dataset.attrs['DIM4_PERCENTILE_2_5'] = INDEX_PERCENTILE_2_5 pixel_dataset.attrs['DIM4_PERCENTILE_16'] = INDEX_PERCENTILE_16 pixel_dataset.attrs['DIM4_PERCENTILE_84'] = INDEX_PERCENTILE_84 pixel_dataset.attrs['DIM4_PERCENTILE_97_5'] = INDEX_PERCENTILE_97_5 LOG.info('Created {0} blocks'.format(block_id)) return pixel_count
def store_pixels(connection, galaxy_file_name, group, dimension_x, dimension_y, dimension_z, area_total, galaxy_id, map_parameter_name): """ Store the pixel data """ LOG.info('Storing the pixel data for {0} - {1} areas to process'.format(galaxy_file_name, area_total)) group.attrs['PIXELS_MAX_X_Y_BLOCK'] = MAX_X_Y_BLOCK group.attrs['PIXELS_DIM3_F_MU_SFH'] = INDEX_F_MU_SFH group.attrs['PIXELS_DIM3_F_MU_IR'] = INDEX_F_MU_IR group.attrs['PIXELS_DIM3_MU_PARAMETER'] = INDEX_MU_PARAMETER group.attrs['PIXELS_DIM3_TAU_V'] = INDEX_TAU_V group.attrs['PIXELS_DIM3_SSFR_0_1GYR'] = INDEX_SSFR_0_1GYR group.attrs['PIXELS_DIM3_M_STARS'] = INDEX_M_STARS group.attrs['PIXELS_DIM3_L_DUST'] = INDEX_L_DUST group.attrs['PIXELS_DIM3_T_C_ISM'] = INDEX_T_C_ISM group.attrs['PIXELS_DIM3_T_W_BC'] = INDEX_T_W_BC group.attrs['PIXELS_DIM3_XI_C_TOT'] = INDEX_XI_C_TOT group.attrs['PIXELS_DIM3_XI_PAH_TOT'] = INDEX_XI_PAH_TOT group.attrs['PIXELS_DIM3_XI_MIR_TOT'] = INDEX_XI_MIR_TOT group.attrs['PIXELS_DIM3_XI_W_TOT'] = INDEX_XI_W_TOT group.attrs['PIXELS_DIM3_TAU_V_ISM'] = INDEX_TAU_V_ISM group.attrs['PIXELS_DIM3_M_DUST'] = INDEX_M_DUST group.attrs['PIXELS_DIM3_SFR_0_1GYR'] = INDEX_SFR_0_1GYR group.attrs['PIXELS_DIM4_BEST_FIT'] = INDEX_BEST_FIT group.attrs['PIXELS_DIM4_PERCENTILE_50'] = INDEX_PERCENTILE_50 group.attrs['PIXELS_DIM4_HIGHEST_PROB_BIN'] = INDEX_HIGHEST_PROB_BIN group.attrs['PIXELS_DIM4_PERCENTILE_2_5'] = INDEX_PERCENTILE_2_5 group.attrs['PIXELS_DIM4_PERCENTILE_16'] = INDEX_PERCENTILE_16 group.attrs['PIXELS_DIM4_PERCENTILE_84'] = INDEX_PERCENTILE_84 group.attrs['PIXELS_DIM4_PERCENTILE_97_5'] = INDEX_PERCENTILE_97_5 histogram_list = [] keys = [] map_areas = {} pixel_count = 0 area_count = 0 histogram_block_id = 1 histogram_block_index = 0 s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) # Load the area details and keys load_map_areas(connection, map_areas, galaxy_id) for key in bucket.list(prefix='{0}/sed/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue keys.append(key) histogram_group = group.create_group('histogram_blocks') histogram_data = histogram_group.create_dataset('block_1', (HISTOGRAM_BLOCK_SIZE,), dtype=data_type_pixel_histogram, compression='gzip') for block_x in get_chunks(dimension_x): for block_y in get_chunks(dimension_y): LOG.info('Starting {0} : {1}.'.format(block_x, block_y)) size_x = get_size(block_x, dimension_x) size_y = get_size(block_y, dimension_y) # Create the arrays for this block data = numpy.empty((size_x, size_y, NUMBER_PARAMETERS, NUMBER_IMAGES), dtype=numpy.float) data.fill(numpy.NaN) data_pixel_details = group.create_dataset('pixel_details_{0}_{1}'.format(block_x, block_y), (size_x, size_y), dtype=data_type_pixel, compression='gzip') data_pixel_parameters = group.create_dataset('pixel_parameters_{0}_{1}'.format(block_x, block_y), (size_x, size_y, NUMBER_PARAMETERS), dtype=data_type_pixel_parameter, compression='gzip') data_pixel_filter = group.create_dataset('pixel_filters_{0}_{1}'.format(block_x, block_y), (size_x, size_y, dimension_z), dtype=data_type_pixel_filter, compression='gzip') data_pixel_histograms_grid = group.create_dataset('pixel_histograms_grid_{0}_{1}'.format(block_x, block_y), (size_x, size_y, NUMBER_PARAMETERS), dtype=data_type_block_details, compression='gzip') for key in keys: if not area_intersects_block(connection, key.key, block_x, block_y, map_areas): LOG.info('Skipping {0}'.format(key.key)) continue # Now process the file start_time = time.time() LOG.info('Processing file {0}'.format(key.key)) temp_file = os.path.join(POGS_TMP, 'temp.sed') key.get_contents_to_filename(temp_file) if is_gzip(temp_file): f = gzip.open(temp_file, "rb") else: f = open(temp_file, "r") area_id = None pxresult_id = None line_number = 0 percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = False skip_this_pixel = False map_pixel_results = {} list_filters = [] try: for line in f: line_number += 1 if line.startswith(" ####### "): # Clear all the maps and stuff map_pixel_results = {} list_filters = [] # Split the line to extract the data values = line.split() pointName = values[1] pxresult_id = pointName[3:].rstrip() (raw_x, raw_y, area_id) = get_pixel_result(connection, pxresult_id) # The pixel could be out of this block as the cutting up is not uniform if pixel_in_block(raw_x, raw_y, block_x, block_y): # correct x & y for this block x = raw_x - (block_x * MAX_X_Y_BLOCK) y = raw_y - (block_y * MAX_X_Y_BLOCK) #LOG.info('Processing pixel {0}:{1} or {2}:{3} - {4}:{5}'.format(raw_x, raw_y, x, y, block_x, block_y)) line_number = 0 percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = False skip_this_pixel = False pixel_count += 1 else: #LOG.info('Skipping pixel {0}:{1} - {2}:{3}'.format(raw_x, raw_y, block_x, block_y)) skip_this_pixel = True elif skip_this_pixel: # Do nothing as we're skipping this pixel pass elif pxresult_id is not None: if line_number == 2: filter_names = line.split() filter_layer = 0 for filter_name in filter_names: if filter_name != '#': data_pixel_filter.attrs[filter_name] = filter_layer filter_layer += 1 elif line_number == 3: values = line.split() for value in values: list_filters.append([float(value)]) elif line_number == 4: filter_layer = 0 values = line.split() for value in values: filter_description = list_filters[filter_layer] filter_description.append(float(value)) filter_layer += 1 elif line_number == 9: values = line.split() map_pixel_results['i_sfh'] = float(values[0]) map_pixel_results['i_ir'] = float(values[1]) map_pixel_results['chi2'] = float(values[2]) map_pixel_results['redshift'] = float(values[3]) elif line_number == 11: values = line.split() data[x, y, INDEX_F_MU_SFH, INDEX_BEST_FIT] = float(values[0]) data[x, y, INDEX_F_MU_IR, INDEX_BEST_FIT] = float(values[1]) data[x, y, INDEX_MU_PARAMETER, INDEX_BEST_FIT] = float(values[2]) data[x, y, INDEX_TAU_V, INDEX_BEST_FIT] = float(values[3]) data[x, y, INDEX_SSFR_0_1GYR, INDEX_BEST_FIT] = float(values[4]) data[x, y, INDEX_M_STARS, INDEX_BEST_FIT] = float(values[5]) data[x, y, INDEX_L_DUST, INDEX_BEST_FIT] = float(values[6]) data[x, y, INDEX_T_W_BC, INDEX_BEST_FIT] = float(values[7]) data[x, y, INDEX_T_C_ISM, INDEX_BEST_FIT] = float(values[8]) data[x, y, INDEX_XI_C_TOT, INDEX_BEST_FIT] = float(values[9]) data[x, y, INDEX_XI_PAH_TOT, INDEX_BEST_FIT] = float(values[10]) data[x, y, INDEX_XI_MIR_TOT, INDEX_BEST_FIT] = float(values[11]) data[x, y, INDEX_XI_W_TOT, INDEX_BEST_FIT] = float(values[12]) data[x, y, INDEX_TAU_V_ISM, INDEX_BEST_FIT] = float(values[13]) data[x, y, INDEX_M_DUST, INDEX_BEST_FIT] = float(values[14]) data[x, y, INDEX_SFR_0_1GYR, INDEX_BEST_FIT] = float(values[15]) elif line_number == 13: filter_layer = 0 values = line.split() for value in values: filter_description = list_filters[filter_layer] if filter_layer < dimension_z: data_pixel_filter[x, y, filter_layer] = ( filter_description[0], filter_description[1], float(value), ) filter_layer += 1 elif line_number > 13: if line.startswith("# ..."): parts = line.split('...') parameter_name = parts[1].strip() parameter_name_id = map_parameter_name[parameter_name] percentiles_next = False histogram_next = True skynet_next1 = False skynet_next2 = False histogram_list = [] elif line.startswith("#....percentiles of the PDF......"): percentiles_next = True histogram_next = False skynet_next1 = False skynet_next2 = False # Write out the histogram into a block for compression improvement data_pixel_histograms_grid[x, y, parameter_name_id - 1] = (histogram_block_id, histogram_block_index, len(histogram_list)) for pixel_histogram_item in histogram_list: # Do we need a new block if histogram_block_index >= HISTOGRAM_BLOCK_SIZE: histogram_block_id += 1 histogram_block_index = 0 histogram_data = histogram_group.create_dataset('block_{0}'.format(histogram_block_id), (HISTOGRAM_BLOCK_SIZE,), dtype=data_type_pixel_histogram, compression='gzip') histogram_data[histogram_block_index] = ( pixel_histogram_item[0], pixel_histogram_item[1], ) histogram_block_index += 1 elif line.startswith(" #...theSkyNet"): percentiles_next = False histogram_next = False skynet_next1 = True skynet_next2 = False elif line.startswith("# theSkyNet2"): percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = True elif percentiles_next: values = line.split() z = parameter_name_id - 1 data[x, y, z, INDEX_PERCENTILE_2_5] = float(values[0]) data[x, y, z, INDEX_PERCENTILE_16] = float(values[1]) data[x, y, z, INDEX_PERCENTILE_50] = float(values[2]) data[x, y, z, INDEX_PERCENTILE_84] = float(values[3]) data[x, y, z, INDEX_PERCENTILE_97_5] = float(values[4]) percentiles_next = False elif histogram_next: values = line.split() hist_value = float(values[1]) if hist_value > MIN_HIST_VALUE and not math.isnan(hist_value): histogram_list.append((float(values[0]), hist_value)) elif skynet_next1: values = line.split() data_pixel_details[x, y] = ( pxresult_id, area_id, map_pixel_results['i_sfh'], map_pixel_results['i_ir'], map_pixel_results['chi2'], map_pixel_results['redshift'], float(values[0]), float(values[2]), float(values[3]), float(values[4]), ) skynet_next1 = False elif skynet_next2: # We have the highest bin probability values which require the parameter_id values = line.split() high_prob_bin = float(values[0]) if float(values[0]) is not None else numpy.NaN first_prob_bin = float(values[1]) if float(values[1]) is not None else numpy.NaN last_prob_bin = float(values[2]) if float(values[2]) is not None else numpy.NaN bin_step = float(values[3]) if float(values[3]) is not None else numpy.NaN z = parameter_name_id - 1 data[x, y, z, INDEX_HIGHEST_PROB_BIN] = high_prob_bin data_pixel_parameters[x, y, z] = ( first_prob_bin, last_prob_bin, bin_step, ) skynet_next2 = False except IOError: LOG.error('IOError after {0} lines'.format(line_number)) finally: f.close() area_count += 1 LOG.info('{0:0.3f} seconds for file {1}. {2} of {3} areas.'.format(time.time() - start_time, key.key, area_count, area_total)) group.create_dataset('pixels_{0}_{1}'.format(block_x, block_y), data=data, compression='gzip') LOG.info('histogram_blocks: {0}, x_blocks: {1}, y_blocks: {2}'.format(histogram_block_id, block_x, block_y)) return pixel_count
for extension in ['fits', 'hdf5']: copy_files(old_name, new_name, run_id, galaxy_id, extension, bucket_files) remove_files_folder(old_name, run_id, galaxy_id, bucket_files) for file_name in [ 'colour_1.png', 'colour_2.png', 'colour_3.png', 'colour_4.png', 'ldust.png', 'm.png', 'mu.png', 'sfr.png', 'tn_colour_1.png' ]: copy_galaxy_images(old_name, new_name, run_id, galaxy_id, file_name, bucket_galaxy_image) remove_galaxy_images_folder(old_name, run_id, galaxy_id, bucket_galaxy_image) if DRY_RUN: LOG.info('Updating {0} to {1}'.format(galaxy_id, new_name)) else: connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy_id).values(name=new_name)) for galaxy in connection.execute(select([GALAXY])): s3helper = S3Helper() bucket_files = s3helper.get_bucket(get_files_bucket()) bucket_galaxy_image = s3helper.get_bucket(get_galaxy_image_bucket()) if needs_fixing(galaxy[GALAXY.c.name]): fix_galaxy(galaxy, bucket_files, bucket_galaxy_image) connection.close()
def assimilate_handler(self, wu, results, canonical_result): """ Process the Results. """ self.logDebug("Start of assimilate_handler for wu %d\n", wu.id) connection = None transaction = None try: if wu.canonical_result: out_file = self.get_file_path(canonical_result) self.area = None if out_file: if os.path.isfile(out_file): pass else: self.logDebug("File [%s] not found\n", out_file) out_file = None if out_file: self.logDebug("Reading File [%s]\n", out_file) start = time.time() connection = ENGINE.connect() transaction = connection.begin() resultCount = self._process_result(connection, out_file, wu) if self.noinsert: transaction.rollback() else: if not resultCount: self.logCritical("No results were found in the output file\n") if self._area_id is None: self.logDebug("The Area was not found\n") else: connection.execute(AREA.update() .where(AREA.c.area_id == self._area_id) .values(workunit_id=wu.id, update_time=datetime.datetime.now())) user_id_set = set() for result in results: if result.user and result.validate_state == boinc_db.VALIDATE_STATE_VALID: user_id = result.user.id if user_id not in user_id_set: user_id_set.add(user_id) connection.execute(AREA_USER.delete().where(AREA_USER.c.area_id == self._area_id)) insert = AREA_USER.insert() for user_id in user_id_set: connection.execute(insert, area_id=self._area_id, userid=user_id) # Copy the file to S3 s3helper = S3Helper() s3helper.add_file_to_bucket(get_files_bucket(), get_key_sed(self._galaxy_name, self._run_id, self._galaxy_id, self._area_id), out_file, reduced_redundancy=True) time_taken = '{0:.2f}'.format(time.time() - start) self.logDebug("Saving %d results for workunit %d in %s seconds\n", resultCount, wu.id, time_taken) transaction.commit() connection.close() else: self.logCritical("The output file was not found\n") else: self.logDebug("No canonical_result for workunit\n") self.report_errors(wu) except: if transaction is not None: transaction.rollback() if connection is not None: connection.close() print "Unexpected error:", sys.exc_info()[0] traceback.print_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]) self.logCritical("Unexpected error occurred, retrying...\n") return -1 return 0
def process_file(self, registration): """ Process a registration. :param registration: """ self._filename = registration[REGISTER.c.filename] self._galaxy_name = registration[REGISTER.c.galaxy_name] self._galaxy_type = registration[REGISTER.c.galaxy_type] self._priority = registration[REGISTER.c.priority] self._redshift = registration[REGISTER.c.redshift] self._run_id = registration[REGISTER.c.run_id] self._sigma = registration[REGISTER.c.sigma] self._sigma_filename = registration[REGISTER.c.sigma_filename] # Have we files that we can use for this? self._rounded_redshift = self._get_rounded_redshift() if self._rounded_redshift is None: LOG.error('No models matching the redshift of %.4f', self._redshift) return 0 self._hdu_list = pyfits.open(self._filename, memmap=True) self._layer_count = len(self._hdu_list) # Do we need to open and sort the S/N Ratio file if self._sigma_filename is not None: self._sigma = 0.0 self._signal_noise_hdu = pyfits.open(self._sigma_filename, memmap=True) if self._layer_count != len(self._signal_noise_hdu): LOG.error('The layer counts do not match %d vs %d', self._layer_count, len(self._signal_noise_hdu)) return 0, 0 else: self._sigma = float(self._sigma) self._end_y = self._hdu_list[0].data.shape[0] self._end_x = self._hdu_list[0].data.shape[1] LOG.info("Image dimensions: %(x)d x %(y)d x %(z)d => %(pix).2f Mpixels" % {'x': self._end_x, 'y': self._end_y, 'z': self._layer_count, 'pix': self._end_x * self._end_y / 1000000.0}) # Get the flops estimate amd cobblestone factor run = self._connection.execute(select([RUN]).where(RUN.c.run_id == self._run_id)).first() self._fpops_est_per_pixel = run[RUN.c.fpops_est] self._cobblestone_scaling_factor = run[RUN.c.cobblestone_factor] # Create and save the object datetime_now = datetime.now() result = self._connection.execute(GALAXY.insert().values(name=self._galaxy_name, dimension_x=self._end_x, dimension_y=self._end_y, dimension_z=self._layer_count, redshift=self._redshift, sigma=self._sigma, create_time=datetime_now, image_time=datetime_now, galaxy_type=self._galaxy_type, ra_cent=0, dec_cent=0, pixel_count=0, pixels_processed=0, run_id=self._run_id)) self._galaxy_id = result.inserted_primary_key[0] LOG.info("Writing %s to database", self._galaxy_name) # Store the fits header self._store_fits_header() # Get the filters we're using for this run and sort the layers self._get_filters_sort_layers() # Build the template file we need if necessary self._build_template_file() # Copy the filter and model files we need self._copy_important_files() # Now break up the galaxy into chunks self._break_up_galaxy() self._connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == self._galaxy_id).values(pixel_count=self._pixel_count)) LOG.info('Building the images') galaxy_file_name = get_galaxy_file_name(self._galaxy_name, self._run_id, self._galaxy_id) s3helper = S3Helper() image = FitsImage(self._connection) image.build_image(self._filename, galaxy_file_name, self._galaxy_id, get_galaxy_image_bucket()) # Copy the fits file to S3 - renamed to make it unique bucket_name = get_files_bucket() s3helper.add_file_to_bucket(bucket_name, get_key_fits(self._galaxy_name, self._run_id, self._galaxy_id), self._filename) if self._sigma_filename is not None: s3helper.add_file_to_bucket(bucket_name, get_key_sigma_fits(self._galaxy_name, self._run_id, self._galaxy_id), self._sigma_filename) return self._work_units_added, self._pixel_count
def assimilate_handler(self, wu, results, canonical_result): """ Process the Results. """ self.logDebug("Start of assimilate_handler for wu %d\n", wu.id) connection = None transaction = None try: if wu.canonical_result: out_file = self.get_file_path(canonical_result) self.area = None if out_file: if os.path.isfile(out_file): pass else: self.logDebug("File [%s] not found\n", out_file) out_file = None if out_file: self.logDebug("Reading File [%s]\n", out_file) start = time.time() connection = ENGINE.connect() transaction = connection.begin() resultCount = self._process_result(connection, out_file, wu) if self.noinsert: transaction.rollback() else: if not resultCount: self.logCritical("No results were found in the output file\n") if self._area_id is None: self.logDebug("The Area was not found\n") else: connection.execute(AREA.update() .where(AREA.c.area_id == self._area_id) .values(workunit_id=wu.id, update_time=datetime.datetime.now())) user_id_set = set() for result in results: if result.user and result.validate_state == boinc_db.VALIDATE_STATE_VALID: user_id = result.user.id if user_id not in user_id_set: user_id_set.add(user_id) connection.execute(AREA_USER.delete().where(AREA_USER.c.area_id == self._area_id)) insert_area_user = AREA_USER.insert() insert_galaxy_user = GALAXY_USER.insert().prefix_with('IGNORE') for user_id in user_id_set: connection.execute(insert_area_user, area_id=self._area_id, userid=user_id) # self.logDebug("Inserting row into galaxy_user for userid: %d galaxy_id: %d\n", user_id, self._galaxy_id) connection.execute(insert_galaxy_user, galaxy_id=self._galaxy_id, userid=user_id) # Copy the file to S3 s3helper = S3Helper() s3helper.add_file_to_bucket(get_files_bucket(), get_key_sed(self._galaxy_name, self._run_id, self._galaxy_id, self._area_id), out_file, reduced_redundancy=True) time_taken = '{0:.2f}'.format(time.time() - start) self.logDebug("Saving %d results for workunit %d in %s seconds\n", resultCount, wu.id, time_taken) transaction.commit() connection.close() else: self.logCritical("The output file was not found\n") else: self.logDebug("No canonical_result for workunit\n") self.report_errors(wu) except: if transaction is not None: transaction.rollback() if connection is not None: connection.close() print "Unexpected error:", sys.exc_info()[0] traceback.print_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]) self.logCritical("Unexpected error occurred, retrying...\n") return -1 return 0
def store_pixels(connection, galaxy_file_name, group, dimension_x, dimension_y, dimension_z, area_total, output_directory, map_parameter_name): """ Store the pixel data """ LOG.info('Storing the pixel data for {0} - {1} areas to process'.format(galaxy_file_name, area_total)) data = numpy.empty((dimension_x, dimension_y, NUMBER_PARAMETERS, NUMBER_IMAGES), dtype=numpy.float) data.fill(numpy.NaN) data_pixel_details = group.create_dataset('pixel_details', (dimension_x, dimension_y), dtype=data_type_pixel, compression='gzip') data_pixel_parameters = group.create_dataset('pixel_parameters', (dimension_x, dimension_y, NUMBER_PARAMETERS), dtype=data_type_pixel_parameter, compression='gzip') data_pixel_filter = group.create_dataset('pixel_filters', (dimension_x, dimension_y, dimension_z), dtype=data_type_pixel_filter, compression='gzip') data_pixel_histograms_grid = group.create_dataset('pixel_histograms_grid', (dimension_x, dimension_y, NUMBER_PARAMETERS), dtype=data_type_block_details, compression='gzip') histogram_group = group.create_group('histogram_blocks') histogram_list = [] pixel_count = 0 area_count = 0 block_id = 1 block_index = 0 histogram_data = histogram_group.create_dataset('block_1', (BLOCK_SIZE,), dtype=data_type_pixel_histogram, compression='gzip') s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) for key in bucket.list(prefix='{0}/sed/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue # Now process the file start_time = time.time() LOG.info('Processing file {0}'.format(key.key)) temp_file = os.path.join(output_directory, 'temp.sed') key.get_contents_to_filename(temp_file) if is_gzip(temp_file): f = gzip.open(temp_file, "rb") else: f = open(temp_file, "r") area_id = None pxresult_id = None line_number = 0 percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = False map_pixel_results = {} list_filters = [] try: for line in f: line_number += 1 if line.startswith(" ####### "): # Clear all the maps and stuff map_pixel_results = {} list_filters = [] # Split the line to extract the data values = line.split() pointName = values[1] pxresult_id = pointName[3:].rstrip() (x, y, area_id) = get_pixel_result(connection, pxresult_id) line_number = 0 percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = False pixel_count += 1 elif pxresult_id is not None: if line_number == 2: filter_names = line.split() filter_layer = 0 for filter_name in filter_names: if filter_name != '#': data_pixel_filter.attrs[filter_name] = filter_layer filter_layer += 1 elif line_number == 3: values = line.split() for value in values: list_filters.append([float(value)]) elif line_number == 4: filter_layer = 0 values = line.split() for value in values: filter_description = list_filters[filter_layer] filter_description.append(float(value)) filter_layer += 1 elif line_number == 9: values = line.split() map_pixel_results['i_sfh'] = float(values[0]) map_pixel_results['i_ir'] = float(values[1]) map_pixel_results['chi2'] = float(values[2]) map_pixel_results['redshift'] = float(values[3]) elif line_number == 11: values = line.split() data[x, y, INDEX_F_MU_SFH, INDEX_BEST_FIT] = float(values[0]) data[x, y, INDEX_F_MU_IR, INDEX_BEST_FIT] = float(values[1]) data[x, y, INDEX_MU_PARAMETER, INDEX_BEST_FIT] = float(values[2]) data[x, y, INDEX_TAU_V, INDEX_BEST_FIT] = float(values[3]) data[x, y, INDEX_SSFR_0_1GYR, INDEX_BEST_FIT] = float(values[4]) data[x, y, INDEX_M_STARS, INDEX_BEST_FIT] = float(values[5]) data[x, y, INDEX_L_DUST, INDEX_BEST_FIT] = float(values[6]) data[x, y, INDEX_T_W_BC, INDEX_BEST_FIT] = float(values[7]) data[x, y, INDEX_T_C_ISM, INDEX_BEST_FIT] = float(values[8]) data[x, y, INDEX_XI_C_TOT, INDEX_BEST_FIT] = float(values[9]) data[x, y, INDEX_XI_PAH_TOT, INDEX_BEST_FIT] = float(values[10]) data[x, y, INDEX_XI_MIR_TOT, INDEX_BEST_FIT] = float(values[11]) data[x, y, INDEX_XI_W_TOT, INDEX_BEST_FIT] = float(values[12]) data[x, y, INDEX_TAU_V_ISM, INDEX_BEST_FIT] = float(values[13]) data[x, y, INDEX_M_DUST, INDEX_BEST_FIT] = float(values[14]) data[x, y, INDEX_SFR_0_1GYR, INDEX_BEST_FIT] = float(values[15]) elif line_number == 13: filter_layer = 0 values = line.split() for value in values: filter_description = list_filters[filter_layer] if filter_layer < dimension_z: data_pixel_filter[x, y, filter_layer] = ( filter_description[0], filter_description[1], float(value), ) filter_layer += 1 elif line_number > 13: if line.startswith("# ..."): parts = line.split('...') parameter_name = parts[1].strip() parameter_name_id = map_parameter_name[parameter_name] percentiles_next = False histogram_next = True skynet_next1 = False skynet_next2 = False histogram_list = [] elif line.startswith("#....percentiles of the PDF......"): percentiles_next = True histogram_next = False skynet_next1 = False skynet_next2 = False # Write out the histogram into a block for compression improvement data_pixel_histograms_grid[x, y, parameter_name_id - 1] = (block_id, block_index, len(histogram_list)) for pixel_histogram_item in histogram_list: # Do we need a new block if block_index >= BLOCK_SIZE: block_id += 1 block_index = 0 histogram_data = histogram_group.create_dataset('block_{0}'.format(block_id), (BLOCK_SIZE,), dtype=data_type_pixel_histogram, compression='gzip') histogram_data[block_index] = ( pixel_histogram_item[0], pixel_histogram_item[1], ) block_index += 1 elif line.startswith(" #...theSkyNet"): percentiles_next = False histogram_next = False skynet_next1 = True skynet_next2 = False elif line.startswith("# theSkyNet2"): percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = True elif percentiles_next: values = line.split() z = parameter_name_id - 1 data[x, y, z, INDEX_PERCENTILE_2_5] = float(values[0]) data[x, y, z, INDEX_PERCENTILE_16] = float(values[1]) data[x, y, z, INDEX_PERCENTILE_50] = float(values[2]) data[x, y, z, INDEX_PERCENTILE_84] = float(values[3]) data[x, y, z, INDEX_PERCENTILE_97_5] = float(values[4]) percentiles_next = False elif histogram_next: values = line.split() hist_value = float(values[1]) if hist_value > MIN_HIST_VALUE and not math.isnan(hist_value): histogram_list.append((float(values[0]), hist_value)) elif skynet_next1: values = line.split() data_pixel_details[x, y] = ( pxresult_id, area_id, map_pixel_results['i_sfh'], map_pixel_results['i_ir'], map_pixel_results['chi2'], map_pixel_results['redshift'], float(values[0]), float(values[2]), float(values[3]), float(values[4]), ) skynet_next1 = False elif skynet_next2: # We have the highest bin probability values which require the parameter_id values = line.split() high_prob_bin = float(values[0]) if float(values[0]) is not None else numpy.NaN first_prob_bin = float(values[1]) if float(values[1]) is not None else numpy.NaN last_prob_bin = float(values[2]) if float(values[2]) is not None else numpy.NaN bin_step = float(values[3]) if float(values[3]) is not None else numpy.NaN z = parameter_name_id - 1 data[x, y, z, INDEX_HIGHEST_PROB_BIN] = high_prob_bin data_pixel_parameters[x, y, z] = ( first_prob_bin, last_prob_bin, bin_step, ) skynet_next2 = False except IOError: LOG.error('IOError after {0} lines'.format(line_number)) finally: f.close() area_count += 1 LOG.info('{0:0.3f} seconds for file {1}. {2} of {3} areas.'.format(time.time() - start_time, key.key, area_count, area_total)) pixel_dataset = group.create_dataset('pixels', data=data, compression='gzip') pixel_dataset.attrs['DIM3_F_MU_SFH'] = INDEX_F_MU_SFH pixel_dataset.attrs['DIM3_F_MU_IR'] = INDEX_F_MU_IR pixel_dataset.attrs['DIM3_MU_PARAMETER'] = INDEX_MU_PARAMETER pixel_dataset.attrs['DIM3_TAU_V'] = INDEX_TAU_V pixel_dataset.attrs['DIM3_SSFR_0_1GYR'] = INDEX_SSFR_0_1GYR pixel_dataset.attrs['DIM3_M_STARS'] = INDEX_M_STARS pixel_dataset.attrs['DIM3_L_DUST'] = INDEX_L_DUST pixel_dataset.attrs['DIM3_T_C_ISM'] = INDEX_T_C_ISM pixel_dataset.attrs['DIM3_T_W_BC'] = INDEX_T_W_BC pixel_dataset.attrs['DIM3_XI_C_TOT'] = INDEX_XI_C_TOT pixel_dataset.attrs['DIM3_XI_PAH_TOT'] = INDEX_XI_PAH_TOT pixel_dataset.attrs['DIM3_XI_MIR_TOT'] = INDEX_XI_MIR_TOT pixel_dataset.attrs['DIM3_XI_W_TOT'] = INDEX_XI_W_TOT pixel_dataset.attrs['DIM3_TAU_V_ISM'] = INDEX_TAU_V_ISM pixel_dataset.attrs['DIM3_M_DUST'] = INDEX_M_DUST pixel_dataset.attrs['DIM3_SFR_0_1GYR'] = INDEX_SFR_0_1GYR pixel_dataset.attrs['DIM4_BEST_FIT'] = INDEX_BEST_FIT pixel_dataset.attrs['DIM4_PERCENTILE_50'] = INDEX_PERCENTILE_50 pixel_dataset.attrs['DIM4_HIGHEST_PROB_BIN'] = INDEX_HIGHEST_PROB_BIN pixel_dataset.attrs['DIM4_PERCENTILE_2_5'] = INDEX_PERCENTILE_2_5 pixel_dataset.attrs['DIM4_PERCENTILE_16'] = INDEX_PERCENTILE_16 pixel_dataset.attrs['DIM4_PERCENTILE_84'] = INDEX_PERCENTILE_84 pixel_dataset.attrs['DIM4_PERCENTILE_97_5'] = INDEX_PERCENTILE_97_5 LOG.info('Created {0} blocks'.format(block_id)) return pixel_count
:return: """ old_name = galaxy[GALAXY.c.name] new_name = old_name[:-1] galaxy_id = galaxy[GALAXY.c.galaxy_id] run_id = galaxy[GALAXY.c.run_id] LOG.info('Fixing {0}({1}) t0 {2}'.format(old_name, galaxy_id, new_name)) for extension in ['fits', 'hdf5']: copy_files(old_name, new_name, run_id, galaxy_id, extension, bucket_files) remove_files_folder(old_name, run_id, galaxy_id, bucket_files) for file_name in ['colour_1.png', 'colour_2.png', 'colour_3.png', 'colour_4.png', 'ldust.png', 'm.png', 'mu.png', 'sfr.png', 'tn_colour_1.png']: copy_galaxy_images(old_name, new_name, run_id, galaxy_id, file_name, bucket_galaxy_image) remove_galaxy_images_folder(old_name, run_id, galaxy_id, bucket_galaxy_image) if DRY_RUN: LOG.info('Updating {0} to {1}'.format(galaxy_id, new_name)) else: connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(name=new_name)) for galaxy in connection.execute(select([GALAXY])): s3helper = S3Helper() bucket_files = s3helper.get_bucket(get_files_bucket()) bucket_galaxy_image = s3helper.get_bucket(get_galaxy_image_bucket()) if needs_fixing(galaxy[GALAXY.c.name]): fix_galaxy(galaxy, bucket_files, bucket_galaxy_image) connection.close()