def process_galaxy(connection, galaxies, verbosity): """ Process the galaxies """ # Build the tables build_dynamic_tables(connection) for galaxy in galaxies: galaxy_id = galaxy[GALAXY.c.galaxy_id] steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_MEAN_STANDARD_DEVIATION, True) if verbosity: LOG.info( 'galaxy_id: {0}, galaxy_name: {1}'.format( galaxy_id, galaxy[GALAXY.c.name] ) ) tables_required = get_tables_required(steps_done) if len(tables_required) > 0: for table in tables_required: with connection.begin(): create_mean_standard_deviation = CreateMeanStandardDeviation(connection, table, galaxy_id) create_mean_standard_deviation.calculate_details() create_mean_standard_deviation.update_details() add_step_done_id( connection, galaxy_id, STEP_DONE_ID_MEAN_STANDARD_DEVIATION, table.name )
def process_galaxy(connection, galaxies, verbosity): """ Process the galaxies """ # Build the tables build_dynamic_tables(connection) for galaxy in galaxies: galaxy_id = galaxy[GALAXY.c.galaxy_id] steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_BUILD_MASK) if verbosity: LOG.info( 'galaxy_id: {0}, galaxy_name: {1}'.format( galaxy_id, galaxy[GALAXY.c.name] ) ) if STEP_DONE_ID_BUILD_MASK not in steps_done: with connection.begin(): build_mask = ProcessGalaxyMasks(connection, galaxy_id) build_mask.run() add_step_done_id( connection, galaxy_id, STEP_DONE_ID_BUILD_MASK )
def add_original_data(connection, galaxy_id, h5_file, data_required): galaxy_group = h5_file['galaxy'] pixel_group = galaxy_group['pixel'] count = 0 total_time = 0 # Add the original values for filter_name in data_required: with connection.begin(): time_for_filter = insert_original_data( connection, filter_name, pixel_group, galaxy_group.attrs['output_format'], galaxy_group.attrs['dimension_x'], galaxy_group.attrs['dimension_y'], galaxy_id ) add_step_done_id(connection, galaxy_id, STEP_DONE_ID_ORIGINAL_VALUES, filter_name) if time_for_filter > 0: count += 1 total_time += time_for_filter LOG.info( 'add_original_data, galaxy_id: {0}, total_time: {1:.3f}s, average_time: {2:.3f}s'.format( galaxy_id, total_time, total_time / count if count > 0 else 0 ) )
def add_sed_data(connection, galaxy_id, h5_file, data_required): galaxy_group = h5_file['galaxy'] pixel_group = galaxy_group['pixel'] count = 0 total_time = 0 # For each pair insert the data for pair in data_required: with connection.begin(): time_for_pair = insert_sed_data( connection, pair, pixel_group, galaxy_group.attrs['output_format'], galaxy_group.attrs['dimension_x'], galaxy_group.attrs['dimension_y'], galaxy_id ) add_step_done_id(connection, galaxy_id, STEP_DONE_ID_SED_DATA, pair[0], pair[1]) count += 1 total_time += time_for_pair LOG.info( 'add_sed_data, galaxy_id: {0}, total_time: {1:.3f}s, average_time: {2:.3f}s'.format( galaxy_id, total_time, total_time / count if count > 0 else 0 ) )
def process_galaxy(connection, galaxies, verbosity): """ Process the galaxies """ # Build the tables build_dynamic_tables(connection) for galaxy in galaxies: galaxy_id = galaxy[GALAXY.c.galaxy_id] steps_done = get_step_done_ids( connection, galaxy_id, [STEP_DONE_ID_BUILD_MASK, STEP_DONE_ID_CENTROID_CENTRE_OF_MASS]) # have we built the mask if STEP_DONE_ID_BUILD_MASK in steps_done: if STEP_DONE_ID_CENTROID_CENTRE_OF_MASS not in steps_done: with connection.begin(): centroid_center_of_mass = CentroidCentreOfMass(connection, galaxy, verbosity) centroid_center_of_mass.calculate_results() if len(centroid_center_of_mass.layers) > 0: connection.execute( IMAGE_DETAIL.insert(), galaxy_id=galaxy_id, x_image=centroid_center_of_mass.dimension_x / 2.0, y_image=centroid_center_of_mass.dimension_y / 2.0, x_centroid=numpy.asscalar(centroid_center_of_mass.centroid[0]), y_centroid=numpy.asscalar(centroid_center_of_mass.centroid[1]), x_centre_of_mass=numpy.asscalar(centroid_center_of_mass.centre_of_mass[0]), y_centre_of_mass=numpy.asscalar(centroid_center_of_mass.centre_of_mass[1]) ) else: connection.execute( IMAGE_DETAIL.insert(), galaxy_id=galaxy_id, x_image=centroid_center_of_mass.dimension_x / 2.0, y_image=centroid_center_of_mass.dimension_y / 2.0, x_centroid=-1.0, y_centroid=-1.0, x_centre_of_mass=-1.0, y_centre_of_mass=-1.0 ) add_step_done_id(connection, galaxy_id, STEP_DONE_ID_CENTROID_CENTRE_OF_MASS) elif verbosity >= 1: LOG.info('Nothing to do for the galaxy {0}'.format(galaxy_id)) elif verbosity >= 1: LOG.info('The mask has not been built for the galaxy {0}'.format(galaxy_id))
def process_galaxy(connection, galaxies, verbosity): """ Process the galaxies """ # Build the tables build_dynamic_tables(connection) for galaxy in galaxies: time_start = time.time() galaxy_id = galaxy[GALAXY.c.galaxy_id] steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_BUILD_GALAXY_DETAILS, True) if verbosity: LOG.info( 'galaxy_id: {0}, galaxy_name: {1}'.format( galaxy_id, galaxy[GALAXY.c.name] ) ) tables_required = get_tables_required(steps_done) if len(tables_required) > 0: for table in tables_required: with connection.begin(): time_start_update = time.time() create_galaxy_detail = CreateGalaxyDetail(connection, table, galaxy_id) create_galaxy_detail.calculate_details() create_galaxy_detail.insert_details() add_step_done_id( connection, galaxy_id, STEP_DONE_ID_BUILD_GALAXY_DETAILS, table.name ) time_end_update = time.time() LOG.info( 'galaxy_id: {0}, table: {1}, time: {2:.3f}s'.format( galaxy_id, table.name, time_end_update - time_start_update ) ) time_end = time.time() LOG.info( 'galaxy_id: {0}, time: {1:.3f}s'.format( galaxy_id, time_end - time_start ) )
def copy_hdf5_files(args): s3_helper = S3Helper() bucket_aws_pogs = s3_helper.get_bucket(BUCKET_NAME) bucket_pogs = s3_helper.get_bucket(BUCKET_NAME_OLD_POGS) engine = create_engine(DB_LOGIN) connection = engine.connect() subquery = select([STEP_DONE.c.galaxy_id]).distinct() for galaxy in connection.execute(select([GALAXY]).where(~GALAXY.c.galaxy_id.in_(subquery)).order_by(GALAXY.c.galaxy_id)): if args.verbose: LOG.info('Checking galaxy_id: {0}, name: {1}'.format(galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name])) # noinspection PyBroadException try: block_dir = get_block_dir(galaxy[GALAXY.c.galaxy_id] / 1000) name_aws_pogs = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) key_aws_pogs = Key(bucket_aws_pogs) key_aws_pogs.key = '{0}/{1}.hdf5'.format(block_dir, name_aws_pogs) if not key_aws_pogs.exists(): # Does it exist in POGS? name_pogs = get_galaxy_file_name_pogs(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) key_pogs = Key(bucket_pogs) key_pogs.key = '{0}/{0}.hdf5'.format(name_pogs) if key_pogs.exists(): check_and_copy_or_restore(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id], args.verbose) else: add_step_done_id(connection, galaxy[GALAXY.c.galaxy_id], STEP_DONE_ID_NO_HDF5_FILE) except BaseException: LOG.exception('error during fetch, quitting') break if args.verbose: LOG.info('Done')
def process_galaxy(connection, galaxies): """ Process the galaxies """ # Build the tables build_dynamic_tables(connection) for galaxy in galaxies: galaxy_id = galaxy[GALAXY.c.galaxy_id] steps_done = get_step_done_ids( connection, galaxy_id, [STEP_DONE_ID_BUILD_GALAXY_DETAILS, STEP_DONE_ID_NORMALISE_Z_MIN_MAX], True ) tables_required = get_tables_required(steps_done) if len(tables_required) > 0: time_start = time.time() for table in tables_required: with connection.begin(): time_start_update = time.time() mean_standard = connection.execute( select([GALAXY_DETAIL]).where( and_( GALAXY_DETAIL.c.galaxy_id == galaxy_id, GALAXY_DETAIL.c.table_name == table.name ) ) ).first() insert_data = [] if mean_standard is not None: mean = mean_standard[GALAXY_DETAIL.c.mean] standard_deviation = mean_standard[GALAXY_DETAIL.c.standard_deviation] min_value = mean_standard[GALAXY_DETAIL.c.min_value] max_value = mean_standard[GALAXY_DETAIL.c.max_value] max_minus_min = max_value - min_value if max_minus_min is not None and max_minus_min != 0.0 \ and standard_deviation is not None and standard_deviation != 0.0: if table.name.startswith('original_value__'): select_statement = select( [table] ).where( and_( table.c.galaxy_id == galaxy_id, table.c.galaxy_id == MASK_POINT.c.galaxy_id, table.c.x == MASK_POINT.c.x, table.c.y == MASK_POINT.c.y, table.c.value > 0.0 ) ) else: select_statement = select( [table] ).where( and_( table.c.galaxy_id == galaxy_id, table.c.galaxy_id == MASK_POINT.c.galaxy_id, table.c.x == MASK_POINT.c.x, table.c.y == MASK_POINT.c.y ) ) for table_data in connection.execute(select_statement): value = table_data[table.c.value] z_score = (value - mean) / standard_deviation insert_data.append( { 'original_id': table_data[table.c.id], 'value_zscore': z_score, 'value_min_max': (value - min_value) / max_minus_min, 'value_ln': math.log(value - min_value + 1), 'value_softmax': 1.0 / (1.0 + math.exp(-z_score)) } ) if len(insert_data) > 0: table_extended = DATA_TABLES_EXTENDED[table.name + '__extended'] connection.execute(table_extended.insert(), insert_data) add_step_done_id( connection, galaxy_id, STEP_DONE_ID_NORMALISE_Z_MIN_MAX, table.name ) time_end_update = time.time() LOG.info( 'galaxy_id: {0}, table: {1}__extended, time: {2:.3f}s, inserts: {3}'.format( galaxy_id, table.name, time_end_update - time_start_update, len(insert_data) ) ) time_end = time.time() LOG.info('galaxy_id: {0}, time: {1:.3f}s'.format(galaxy_id, time_end - time_start))