def _store_fits_header(self): """ Store the FITS headers we need to remember """ insert = FITS_HEADER.insert() header = self._hdu_list[0].header index = 0 ctype1 = None ctype2 = None for keyword in header: # The new version of PyFits supports comments value = header[index] comment = header.comments[index] self._connection.execute(insert.values(galaxy_id=self._galaxy_id, keyword=keyword, value=value, comment=comment)) # Record the ctype so we can get the RA and DEC if keyword == 'CTYPE1': ctype1 = value elif keyword == 'CTYPE2': ctype2 = value # Record the RA and DEC if we can if keyword == 'RA_CENT' or (ctype1 == 'RA---TAN' and keyword == 'CRVAL1'): self._connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == self._galaxy_id).values(ra_cent=float(value))) elif keyword == 'DEC_CENT' or (ctype2 == 'DEC--TAN' and keyword == 'CRVAL2'): self._connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == self._galaxy_id).values(dec_cent=float(value))) index += 1
def processed_data(connection): """ Work out which galaxies have been processed :param connection: :return: """ # Get the work units still being processed engine = create_engine(BOINC_DB_LOGIN) connection_boinc = engine.connect() current_jobs = [] for result in connection_boinc.execute(select([RESULT]).where(RESULT.c.server_state != 5)): current_jobs.append(result[RESULT.c.name]) connection_boinc.close() sorted_data = sort_data(connection, current_jobs) for key in sorted(sorted_data.iterkeys()): LOG.info('{0}: {1} results'.format(key, len(sorted_data[key]))) # Get the galaxies we know are still processing processed = [] for galaxy in connection.execute(select([GALAXY]).where(GALAXY.c.status_id == COMPUTING)): if finish_processing(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], sorted_data): processed.append(galaxy[GALAXY.c.galaxy_id]) LOG.info('%d %s has completed', galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]) for galaxy_id in processed: connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=PROCESSED, status_time=datetime.datetime.now())) LOG.info('Marked %d galaxies ready for archiving', len(processed)) LOG.info('%d galaxies are still being processed', len(sorted_data))
def original_image_checked_ami(): """ We're running in the AMI instance - so do the actual work Check the newly created images to make sure the images have been created :return: """ # Connect to the database - the login string is set in the database package ENGINE = create_engine(DB_LOGIN) connection = ENGINE.connect() s3helper = S3Helper() try: # Look in the database for the galaxies galaxy_ids = [] for galaxy in connection.execute(select([GALAXY]).where(and_(GALAXY.c.original_image_checked == None, GALAXY.c.pixel_count > 0)).order_by(GALAXY.c.galaxy_id)): galaxy_ids.append(galaxy[GALAXY.c.galaxy_id]) for galaxy_id in galaxy_ids: galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id)).first() if not image_files_exist(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id], s3helper): mark_as_checked = regenerated_original_images(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id], s3helper, connection) else: mark_as_checked = True if mark_as_checked: connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(original_image_checked=datetime.datetime.now())) except Exception: LOG.exception('Major error') finally: connection.close()
def delete_galaxy(connection, galaxy_ids): try: for galaxy_id_str in galaxy_ids: transaction = connection.begin() galaxy_id1 = int(galaxy_id_str) galaxy = connection.execute( select([GALAXY ]).where(GALAXY.c.galaxy_id == galaxy_id1)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id1) else: LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id1, galaxy[GALAXY.c.name]) area_count = connection.execute( select([func.count(AREA.c.area_id) ]).where(AREA.c.galaxy_id == galaxy[ GALAXY.c.galaxy_id])).first()[0] counter = 1 for area_id1 in connection.execute( select( [AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[ GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)): LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format( galaxy_id_str, area_id1[0], counter, area_count)) connection.execute(PIXEL_RESULT.delete().where( PIXEL_RESULT.c.area_id == area_id1[0])) # Give the rest of the world a chance to access the database time.sleep(0.1) counter += 1 # Now empty the bucket s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) galaxy_file_name = get_galaxy_file_name( galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) for key in bucket.list( prefix='{0}/sed/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue bucket.delete_key(key) # Now the folder key = Key(bucket) key.key = '{0}/sed/'.format(galaxy_file_name) bucket.delete_key(key) LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id1) connection.execute( GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id1).values( status_id=DELETED, status_time=datetime.datetime.now())) transaction.commit() except Exception: LOG.exception('Major error')
def fix_galaxy(galaxy, bucket_files, bucket_galaxy_image): """ Fix the galaxy name :param galaxy: :return: """ old_name = galaxy[GALAXY.c.name] new_name = old_name[:-1] galaxy_id = galaxy[GALAXY.c.galaxy_id] run_id = galaxy[GALAXY.c.run_id] LOG.info('Fixing {0}({1}) t0 {2}'.format(old_name, galaxy_id, new_name)) for extension in ['fits', 'hdf5']: copy_files(old_name, new_name, run_id, galaxy_id, extension, bucket_files) remove_files_folder(old_name, run_id, galaxy_id, bucket_files) for file_name in [ 'colour_1.png', 'colour_2.png', 'colour_3.png', 'colour_4.png', 'ldust.png', 'm.png', 'mu.png', 'sfr.png', 'tn_colour_1.png' ]: copy_galaxy_images(old_name, new_name, run_id, galaxy_id, file_name, bucket_galaxy_image) remove_galaxy_images_folder(old_name, run_id, galaxy_id, bucket_galaxy_image) if DRY_RUN: LOG.info('Updating {0} to {1}'.format(galaxy_id, new_name)) else: connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy_id).values(name=new_name))
def delete_galaxy(connection, galaxy_ids): for galaxy_id in galaxy_ids: transaction = connection.begin() galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id) else: LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id, galaxy[GALAXY.c.name]) area_count = connection.execute(select([func.count(AREA.c.area_id)]).where(AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])).first()[0] counter = 1 for area_id1 in connection.execute(select([AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)): LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format(galaxy_id, area_id1[0], counter, area_count)) connection.execute(PIXEL_RESULT.delete().where(PIXEL_RESULT.c.area_id == area_id1[0])) # Give the rest of the world a chance to access the database time.sleep(0.1) counter += 1 if shutdown() is True: transaction.rollback() raise SystemExit LOG.info("Deleting FITS headers for galaxy {0}".format(galaxy_id)) connection.execute(FITS_HEADER.delete().where(FITS_HEADER.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])) # Now empty the bucket of the sed files s3helper = S3Helper() bucket = s3helper.get_bucket(get_sed_files_bucket()) galaxy_file_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) for key in bucket.list(prefix='{0}/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue bucket.delete_key(key) if shutdown() is True: transaction.rollback() raise SystemExit # Now the folder key = Key(bucket) key.key = '{0}/'.format(galaxy_file_name) bucket.delete_key(key) LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id) connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=DELETED, status_time=datetime.datetime.now())) if shutdown() is True: transaction.rollback() raise SystemExit transaction.commit()
def store_files(hdf5_dir): """ Scan a directory for files and send them to the archive :param hdf5_dir: the directory to scan :return: """ LOG.info('Directory: %s', hdf5_dir) # Get the work units still being processed ENGINE = create_engine(DB_LOGIN) connection = ENGINE.connect() files = os.path.join(hdf5_dir, '*.hdf5') file_count = 0 try: s3helper = S3Helper() bucket_name = get_files_bucket() for file_name in glob.glob(files): size = os.path.getsize(file_name) galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy_id).values( status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('File size: %d', size) LOG.error('Could not get the galaxy id') except Exception: LOG.exception('Major error') finally: connection.close() return file_count
def store_files(hdf5_dir): """ Scan a directory for files and send them to the archive :param hdf5_dir: the directory to scan :return: """ LOG.info('Directory: %s', hdf5_dir) # Get the work units still being processed ENGINE = create_engine(DB_LOGIN) connection = ENGINE.connect() files = os.path.join(hdf5_dir, '*.hdf5') file_count = 0 try: s3helper = S3Helper() bucket_name = get_files_bucket() for file_name in glob.glob(files): size = os.path.getsize(file_name) galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('File size: %d', size) LOG.error('Could not get the galaxy id') except Exception: LOG.exception('Major error') finally: connection.close() return file_count
def processed_data(connection, modulus, remainder): """ Work out which galaxies have been processed :param connection: :return: """ # Get the work units still being processed engine = create_engine(BOINC_DB_LOGIN) connection_boinc = engine.connect() current_jobs = [] LOG.info('Getting results from BOINC') # The use of appid ensures MySQL uses an index otherwise it does a full table scan for result in connection_boinc.execute( select([RESULT]).where( and_(RESULT.c.server_state != 5, RESULT.c.appid == 1))): current_jobs.append(result[RESULT.c.name]) connection_boinc.close() LOG.info('Got results') sorted_data = sort_data(connection, current_jobs, modulus, remainder) for key in sorted(sorted_data.iterkeys()): LOG.info('{0}: {1} results'.format(key, len(sorted_data[key]))) # Get the galaxies we know are still processing processed = [] for galaxy in connection.execute( select([GALAXY]).where(GALAXY.c.status_id == COMPUTING)): if modulus is None or int( galaxy[GALAXY.c.galaxy_id]) % modulus == remainder: if finish_processing(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], sorted_data): processed.append(galaxy[GALAXY.c.galaxy_id]) LOG.info('%d %s has completed', galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]) for galaxy_id in processed: connection.execute( GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values( status_id=PROCESSED, status_time=datetime.datetime.now())) if shutdown() is True: raise SystemExit LOG.info('Marked %d galaxies ready for archiving', len(processed)) LOG.info('%d galaxies are still being processed', len(sorted_data))
def original_image_checked_ami(): """ We're running in the AMI instance - so do the actual work Check the newly created images to make sure the images have been created :return: """ # Connect to the database - the login string is set in the database package engine = create_engine(DB_LOGIN) connection = engine.connect() s3helper = S3Helper() try: # Look in the database for the galaxies galaxy_ids = [] for galaxy in connection.execute( select([GALAXY]).where( and_(GALAXY.c.original_image_checked == None, GALAXY.c.pixel_count > 0)).order_by( GALAXY.c.galaxy_id)): galaxy_ids.append(galaxy[GALAXY.c.galaxy_id]) for galaxy_id in galaxy_ids: galaxy = connection.execute( select([GALAXY ]).where(GALAXY.c.galaxy_id == galaxy_id)).first() if not image_files_exist(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id], s3helper): mark_as_checked = regenerated_original_images( galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id], s3helper, connection) else: mark_as_checked = True if mark_as_checked: connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy_id).values( original_image_checked=datetime.datetime.now())) except Exception: LOG.exception('Major error') finally: connection.close()
def delete_galaxy(connection, galaxy_ids): try: for galaxy_id_str in galaxy_ids: transaction = connection.begin() galaxy_id1 = int(galaxy_id_str) galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id1)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id1) else: LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id1, galaxy[GALAXY.c.name]) area_count = connection.execute(select([func.count(AREA.c.area_id)]).where(AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])).first()[0] counter = 1 for area_id1 in connection.execute(select([AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)): LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format(galaxy_id_str, area_id1[0], counter, area_count)) connection.execute(PIXEL_RESULT.delete().where(PIXEL_RESULT.c.area_id == area_id1[0])) # Give the rest of the world a chance to access the database time.sleep(0.1) counter += 1 # Now empty the bucket s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) galaxy_file_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) for key in bucket.list(prefix='{0}/sed/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue bucket.delete_key(key) # Now the folder key = Key(bucket) key.key = '{0}/sed/'.format(galaxy_file_name) bucket.delete_key(key) LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id1) connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id1).values(status_id=DELETED, status_time=datetime.datetime.now())) transaction.commit() except Exception: LOG.exception('Major error')
def store_files(connection, modulus, remainder): """ Scan a directory for files and send them to the archive """ LOG.info('Directory: %s', HDF5_OUTPUT_DIRECTORY) to_store_dir = os.path.join(HDF5_OUTPUT_DIRECTORY, 'to_store') files = os.path.join(to_store_dir, '*.hdf5') file_count = 0 s3helper = S3Helper() bucket_name = get_saved_files_bucket() for file_name in glob.glob(files): galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: if modulus is None or galaxy_id % modulus == remainder: size = os.path.getsize(file_name) key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute( GALAXY.update() .where(GALAXY.c.galaxy_id == galaxy_id) .values(status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('Could not get the galaxy id') if shutdown() is True: raise SystemExit return file_count
def store_files(connection, modulus, remainder): """ Scan a directory for files and send them to the archive """ LOG.info('Directory: %s', HDF5_OUTPUT_DIRECTORY) to_store_dir = os.path.join(HDF5_OUTPUT_DIRECTORY, 'to_store') files = os.path.join(to_store_dir, '*.hdf5') file_count = 0 s3helper = S3Helper() bucket_name = get_saved_files_bucket() for file_name in glob.glob(files): galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: if modulus is None or galaxy_id % modulus == remainder: size = os.path.getsize(file_name) key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy_id).values( status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('Could not get the galaxy id') if shutdown() is True: raise SystemExit return file_count
def processed_data(connection, modulus, remainder): """ Work out which galaxies have been processed :param connection: :return: """ # Get the work units still being processed engine = create_engine(BOINC_DB_LOGIN) connection_boinc = engine.connect() current_jobs = [] LOG.info('Getting results from BOINC') # The use of appid ensures MySQL uses an index otherwise it does a full table scan for result in connection_boinc.execute(select([RESULT]).where(and_(RESULT.c.server_state != 5, RESULT.c.appid == 1))): current_jobs.append(result[RESULT.c.name]) connection_boinc.close() LOG.info('Got results') sorted_data = sort_data(connection, current_jobs, modulus, remainder) for key in sorted(sorted_data.iterkeys()): LOG.info('{0}: {1} results'.format(key, len(sorted_data[key]))) # Get the galaxies we know are still processing processed = [] for galaxy in connection.execute(select([GALAXY]).where(GALAXY.c.status_id == COMPUTING)): if modulus is None or int(galaxy[GALAXY.c.galaxy_id]) % modulus == remainder: if finish_processing(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], sorted_data): processed.append(galaxy[GALAXY.c.galaxy_id]) LOG.info('%d %s has completed', galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]) for galaxy_id in processed: connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=PROCESSED, status_time=datetime.datetime.now())) if shutdown() is True: raise SystemExit LOG.info('Marked %d galaxies ready for archiving', len(processed)) LOG.info('%d galaxies are still being processed', len(sorted_data))
def store_files(connection): """ Scan a directory for files and send them to the archive :param hdf5_dir: the directory to scan :return: """ LOG.info('Directory: %s', HDF5_OUTPUT_DIRECTORY) to_store_dir = os.path.join(HDF5_OUTPUT_DIRECTORY, 'to_store') files = os.path.join(to_store_dir, '*.hdf5') file_count = 0 s3helper = S3Helper() bucket_name = get_files_bucket() for file_name in glob.glob(files): size = os.path.getsize(file_name) galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('File size: %d', size) LOG.error('Could not get the galaxy id') return file_count
def fix_galaxy(galaxy, bucket_files, bucket_galaxy_image): """ Fix the galaxy name :param galaxy: :return: """ old_name = galaxy[GALAXY.c.name] new_name = old_name[:-1] galaxy_id = galaxy[GALAXY.c.galaxy_id] run_id = galaxy[GALAXY.c.run_id] LOG.info('Fixing {0}({1}) t0 {2}'.format(old_name, galaxy_id, new_name)) for extension in ['fits', 'hdf5']: copy_files(old_name, new_name, run_id, galaxy_id, extension, bucket_files) remove_files_folder(old_name, run_id, galaxy_id, bucket_files) for file_name in ['colour_1.png', 'colour_2.png', 'colour_3.png', 'colour_4.png', 'ldust.png', 'm.png', 'mu.png', 'sfr.png', 'tn_colour_1.png']: copy_galaxy_images(old_name, new_name, run_id, galaxy_id, file_name, bucket_galaxy_image) remove_galaxy_images_folder(old_name, run_id, galaxy_id, bucket_galaxy_image) if DRY_RUN: LOG.info('Updating {0} to {1}'.format(galaxy_id, new_name)) else: connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(name=new_name))
def delete_galaxy(connection, galaxy_ids): for galaxy_id in galaxy_ids: transaction = connection.begin() galaxy = connection.execute( select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id) else: LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id, galaxy[GALAXY.c.name]) area_count = connection.execute( select([func.count(AREA.c.area_id)]).where( AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])).first()[0] counter = 1 for area_id1 in connection.execute( select([AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[ GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)): LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format( galaxy_id, area_id1[0], counter, area_count)) connection.execute(PIXEL_RESULT.delete().where( PIXEL_RESULT.c.area_id == area_id1[0])) # Give the rest of the world a chance to access the database time.sleep(0.1) counter += 1 if shutdown() is True: transaction.rollback() raise SystemExit LOG.info("Deleting FITS headers for galaxy {0}".format(galaxy_id)) connection.execute(FITS_HEADER.delete().where( FITS_HEADER.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])) # Now empty the bucket of the sed files s3helper = S3Helper() bucket = s3helper.get_bucket(get_sed_files_bucket()) galaxy_file_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) for key in bucket.list(prefix='{0}/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue bucket.delete_key(key) if shutdown() is True: transaction.rollback() raise SystemExit # Now the folder key = Key(bucket) key.key = '{0}/'.format(galaxy_file_name) bucket.delete_key(key) LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id) connection.execute( GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values( status_id=DELETED, status_time=datetime.datetime.now())) if shutdown() is True: transaction.rollback() raise SystemExit transaction.commit()
sorted_data = sort_data(connection, current_jobs) for key in sorted(sorted_data.iterkeys()): LOG.info('{0}: {1} results'.format(key, len(sorted_data[key]))) try: # Get the galaxies we know are still processing processed = [] for galaxy in connection.execute( select([GALAXY]).where(GALAXY.c.status_id == COMPUTING)): if finish_processing(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], sorted_data): processed.append(galaxy[GALAXY.c.galaxy_id]) LOG.info('%d %s has completed', galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]) transaction = connection.begin() for galaxy_id in processed: connection.execute( GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values( status_id=PROCESSED, status_time=datetime.datetime.now())) transaction.commit() LOG.info('Marked %d galaxies ready for archiving', len(processed)) LOG.info('%d galaxies are still being processed', len(sorted_data)) except Exception: LOG.exception('Major error') finally: connection.close()
galaxy[GALAXY.c.dimension_z], area_count, OUTPUT_DIRECTORY, map_parameter_name) # Flush the HDF5 data to disk h5_file.flush() h5_file.close() # Move the file to_store = os.path.join(OUTPUT_DIRECTORY, 'to_store') LOG.info('Moving the file %s to %s', filename, to_store) if not os.path.exists(to_store): os.makedirs(to_store) shutil.move(filename, to_store) connection.execute( GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id1).values( status_id=ARCHIVED, status_time=datetime.datetime.now())) end_time = time.time() LOG.info('Galaxy with galaxy_id of %d was archived.', galaxy_id1) LOG.info('Copied %d areas %d pixels.', area_count, pixel_count) total_time = end_time - start_time LOG.info('Total time %d mins %.1f secs', int(total_time / 60), total_time % 60) except Exception: LOG.exception('Major error') finally: connection.close()
pixel_group, galaxy[GALAXY.c.dimension_x], galaxy[GALAXY.c.dimension_y], filter_layers, galaxy[GALAXY.c.pixel_count]) # Flush the HDF5 data to disk h5_file.flush() h5_file.close() # Move the file to_store = os.path.join(OUTPUT_DIRECTORY, 'to_store') LOG.info('Moving the file %s to %s', filename, to_store) if not os.path.exists(to_store): os.makedirs(to_store) shutil.move(filename, to_store) connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id1).values(status_id = ARCHIVED)) end_time = time.time() LOG.info('Galaxy with galaxy_id of %d was archived.', galaxy_id1) LOG.info('Copied %d areas %d pixels.', area_count, pixel_count) total_time = end_time - start_time LOG.info('Total time %d mins %.1f secs', int(total_time / 60), total_time % 60) except Exception: LOG.exception('Major error') finally: connection.close()
def process_file(self, registration): """ Process a registration. :param registration: """ self._filename = registration[REGISTER.c.filename] self._galaxy_name = registration[REGISTER.c.galaxy_name] self._galaxy_type = registration[REGISTER.c.galaxy_type] self._priority = registration[REGISTER.c.priority] self._redshift = registration[REGISTER.c.redshift] self._run_id = registration[REGISTER.c.run_id] self._sigma = registration[REGISTER.c.sigma] self._sigma_filename = registration[REGISTER.c.sigma_filename] # Have we files that we can use for this? self._rounded_redshift = self._get_rounded_redshift() if self._rounded_redshift is None: LOG.error('No models matching the redshift of %.4f', self._redshift) return 0 self._hdu_list = pyfits.open(self._filename, memmap=True) self._layer_count = len(self._hdu_list) # Do we need to open and sort the S/N Ratio file if self._sigma_filename is not None: self._sigma = 0.0 self._signal_noise_hdu = pyfits.open(self._sigma_filename, memmap=True) if self._layer_count != len(self._signal_noise_hdu): LOG.error('The layer counts do not match %d vs %d', self._layer_count, len(self._signal_noise_hdu)) return 0, 0 else: self._sigma = float(self._sigma) self._end_y = self._hdu_list[0].data.shape[0] self._end_x = self._hdu_list[0].data.shape[1] LOG.info("Image dimensions: %(x)d x %(y)d x %(z)d => %(pix).2f Mpixels" % {'x': self._end_x, 'y': self._end_y, 'z': self._layer_count, 'pix': self._end_x * self._end_y / 1000000.0}) # Get the flops estimate amd cobblestone factor run = self._connection.execute(select([RUN]).where(RUN.c.run_id == self._run_id)).first() self._fpops_est_per_pixel = run[RUN.c.fpops_est] self._cobblestone_scaling_factor = run[RUN.c.cobblestone_factor] # Create and save the object datetime_now = datetime.now() result = self._connection.execute(GALAXY.insert().values(name=self._galaxy_name, dimension_x=self._end_x, dimension_y=self._end_y, dimension_z=self._layer_count, redshift=self._redshift, sigma=self._sigma, create_time=datetime_now, image_time=datetime_now, galaxy_type=self._galaxy_type, ra_cent=0, dec_cent=0, pixel_count=0, pixels_processed=0, run_id=self._run_id)) self._galaxy_id = result.inserted_primary_key[0] LOG.info("Writing %s to database", self._galaxy_name) # Store the fits header self._store_fits_header() # Get the filters we're using for this run and sort the layers self._get_filters_sort_layers() # Build the template file we need if necessary self._build_template_file() # Copy the filter and model files we need self._copy_important_files() # Now break up the galaxy into chunks self._break_up_galaxy() self._connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == self._galaxy_id).values(pixel_count=self._pixel_count)) LOG.info('Building the images') galaxy_file_name = get_galaxy_file_name(self._galaxy_name, self._run_id, self._galaxy_id) s3helper = S3Helper() image = FitsImage(self._connection) image.build_image(self._filename, galaxy_file_name, self._galaxy_id, get_galaxy_image_bucket()) # Copy the fits file to S3 - renamed to make it unique bucket_name = get_files_bucket() s3helper.add_file_to_bucket(bucket_name, get_key_fits(self._galaxy_name, self._run_id, self._galaxy_id), self._filename) if self._sigma_filename is not None: s3helper.add_file_to_bucket(bucket_name, get_key_sigma_fits(self._galaxy_name, self._run_id, self._galaxy_id), self._sigma_filename) return self._work_units_added, self._pixel_count
# Connect to the database - the login string is set in the database package ENGINE = create_engine(DB_LOGIN) connection = ENGINE.connect() sorted_data = sort_data(connection, current_jobs) for key in sorted(sorted_data.iterkeys()): LOG.info('{0}: {1} results'.format(key, len(sorted_data[key]))) try: # Get the galaxies we know are still processing processed = [] for galaxy in connection.execute(select([GALAXY]).where(GALAXY.c.status_id == COMPUTING)): if finish_processing(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], sorted_data): processed.append(galaxy[GALAXY.c.galaxy_id]) LOG.info('%d %s has completed', galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]) transaction = connection.begin() for galaxy_id in processed: connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=PROCESSED, status_time=datetime.datetime.now())) transaction.commit() LOG.info('Marked %d galaxies ready for archiving', len(processed)) LOG.info('%d galaxies are still being processed', len(sorted_data)) except Exception: LOG.exception('Major error') finally: connection.close()
def archive_to_hdf5(connection): """ Archive data to an HDF5 file :param connection: :return: """ # Load the parameter name map map_parameter_name = {} for parameter_name in connection.execute(select([PARAMETER_NAME])): map_parameter_name[parameter_name[PARAMETER_NAME.c.name]] = parameter_name[PARAMETER_NAME.c.parameter_name_id] # Look in the database for the galaxies galaxy_ids = [] for galaxy in connection.execute(select([GALAXY]).where(GALAXY.c.status_id == PROCESSED).order_by(GALAXY.c.galaxy_id)): galaxy_ids.append(galaxy[GALAXY.c.galaxy_id]) for galaxy_id_str in galaxy_ids: start_time = time.time() galaxy_id1 = int(galaxy_id_str) galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id1)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id1) else: LOG.info('Archiving Galaxy with galaxy_id of %d - %s', galaxy_id1, galaxy[GALAXY.c.name]) # Copy the galaxy details galaxy_file_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) filename = os.path.join(HDF5_OUTPUT_DIRECTORY, '{0}.hdf5'.format(galaxy_file_name)) h5_file = h5py.File(filename, 'w') # Build the groups galaxy_group = h5_file.create_group('galaxy') area_group = galaxy_group.create_group('area') pixel_group = galaxy_group.create_group('pixel') # Write the galaxy data galaxy_group.attrs['galaxy_id'] = galaxy[GALAXY.c.galaxy_id] galaxy_group.attrs['run_id'] = galaxy[GALAXY.c.run_id] galaxy_group.attrs['name'] = galaxy[GALAXY.c.name] galaxy_group.attrs['dimension_x'] = galaxy[GALAXY.c.dimension_x] galaxy_group.attrs['dimension_y'] = galaxy[GALAXY.c.dimension_y] galaxy_group.attrs['dimension_z'] = galaxy[GALAXY.c.dimension_z] galaxy_group.attrs['redshift'] = float(galaxy[GALAXY.c.redshift]) galaxy_group.attrs['create_time'] = str(galaxy[GALAXY.c.create_time]) galaxy_group.attrs['image_time'] = str(galaxy[GALAXY.c.image_time]) galaxy_group.attrs['galaxy_type'] = galaxy[GALAXY.c.galaxy_type] galaxy_group.attrs['ra_cent'] = galaxy[GALAXY.c.ra_cent] galaxy_group.attrs['dec_cent'] = galaxy[GALAXY.c.dec_cent] galaxy_group.attrs['sigma'] = float(galaxy[GALAXY.c.sigma]) galaxy_group.attrs['pixel_count'] = galaxy[GALAXY.c.pixel_count] galaxy_group.attrs['pixels_processed'] = galaxy[GALAXY.c.pixels_processed] galaxy_group.attrs['output_format'] = OUTPUT_FORMAT_1_03 galaxy_id_aws = galaxy[GALAXY.c.galaxy_id] # Store the data associated with the galaxy store_fits_header(connection, galaxy_id_aws, galaxy_group) store_image_filters(connection, galaxy_id_aws, galaxy_group) # Store the data associated with the areas area_count = store_area(connection, galaxy_id_aws, area_group) store_area_user(connection, galaxy_id_aws, area_group) h5_file.flush() # Store the values associated with a pixel pixel_count = store_pixels(connection, galaxy_file_name, pixel_group, galaxy[GALAXY.c.dimension_x], galaxy[GALAXY.c.dimension_y], galaxy[GALAXY.c.dimension_z], area_count, galaxy[GALAXY.c.galaxy_id], map_parameter_name) # Flush the HDF5 data to disk h5_file.flush() h5_file.close() # Move the file to_store = os.path.join(HDF5_OUTPUT_DIRECTORY, 'to_store') LOG.info('Moving the file %s to %s', filename, to_store) if not os.path.exists(to_store): os.makedirs(to_store) # Sometimes the file can exist so remove it old_filename = os.path.join(to_store, '{0}.hdf5'.format(galaxy_file_name)) LOG.info('Checking for old file %s', old_filename) if os.path.exists(old_filename): LOG.info('Removing old file %s', old_filename) os.remove(old_filename) shutil.move(filename, to_store) connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id1).values(status_id=ARCHIVED, status_time=datetime.datetime.now())) end_time = time.time() LOG.info('Galaxy with galaxy_id of %d was archived.', galaxy_id1) LOG.info('Copied %d areas %d pixels.', area_count, pixel_count) total_time = end_time - start_time LOG.info('Total time %d mins %.1f secs', int(total_time / 60), total_time % 60)
galaxy[GALAXY.c.dimension_y], filter_layers, galaxy[GALAXY.c.pixel_count]) # Flush the HDF5 data to disk h5_file.flush() h5_file.close() # Move the file to_store = os.path.join(OUTPUT_DIRECTORY, 'to_store') LOG.info('Moving the file %s to %s', filename, to_store) if not os.path.exists(to_store): os.makedirs(to_store) shutil.move(filename, to_store) connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy_id1).values(status_id=ARCHIVED)) end_time = time.time() LOG.info('Galaxy with galaxy_id of %d was archived.', galaxy_id1) LOG.info('Copied %d areas %d pixels.', area_count, pixel_count) total_time = end_time - start_time LOG.info('Total time %d mins %.1f secs', int(total_time / 60), total_time % 60) except Exception: LOG.exception('Major error') finally: connection.close()
pixels_processed += 1 try: array[row__y, row__x, 0] = row[PIXEL_RESULT.c.mu] array[row__y, row__x, 1] = row[PIXEL_RESULT.c.m] array[row__y, row__x, 2] = row[PIXEL_RESULT.c.ldust] # the SFR is a log array[row__y, row__x, 3] = math.pow(10, row[PIXEL_RESULT.c.sfr]) except TypeError: LOG.error('Error at x: {0}, y: {1}'.format(row__x, row__y)) transaction = connection.begin() connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]).values( image_time=datetime.datetime.now(), pixel_count=pixel_count, pixels_processed=pixels_processed)) transaction.commit() galaxy_count += 1 # Now right the files blackRGB = (0, 0, 0) for name in PNG_IMAGE_NAMES: value = 0 height = galaxy[GALAXY.c.dimension_y] width = galaxy[GALAXY.c.dimension_x] idx = 0 if name == 'mu': idx = 0 elif name == 'm':
ra_eqj2000 = table.array['pos_ra_equ_J2000_d'][0] dec_eqj2000 = table.array['pos_dec_equ_J2000_d'][0] LOG.info('VOTable data collected from NED for {0}'.format(name)) return True, float(ra_eqj2000), float(dec_eqj2000) except Exception: pass # Try Hyperleda try: url = 'http://leda.univ-lyon1.fr/G.cgi?n=113&c=o&o=' + name + '&a=x&z=d' table = getVOTable(url, 0) ra_eqj2000 = table.array['alpha'][0] dec_eqj2000 = table.array['delta'][0] LOG.info('VOTable data collected from Hyperleda for {0}'.format(name)) return True, float(ra_eqj2000), float(dec_eqj2000) except Exception: LOG.exception('ERROR: Getting VO data for {0}'.format(name)) return False, 0.0, 0.0 for galaxy in connection.execute(select([GALAXY]).where(or_(GALAXY.c.ra_cent == None, GALAXY.c.ra_cent == 0.0, GALAXY.c.dec_cent == None, GALAXY.c.dec_cent == 0.0))): LOG.info('Processing %d - %s', galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]) name = fix_name(galaxy[GALAXY.c.name]) (found, ra, dec) = get_ra_dec(name) if found: LOG.info('Updating {0} to RA: {1}, DEC: {2}'.format(name, ra, dec)) connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]).values(ra_cent=ra, dec_cent=dec)) connection.close()
def build_png_image_ami(): """ Build the images :return: """ # First check the galaxy exists in the database engine = create_engine(DB_LOGIN) connection = engine.connect() try: query = select([GALAXY]).distinct().where(and_(AREA.c.galaxy_id == GALAXY.c.galaxy_id, AREA.c.update_time >= GALAXY.c.image_time)) galaxy_count = 0 s3helper = S3Helper() bucket_name = get_galaxy_image_bucket() # Start the shutdown signal poller to check when this instance must close start_poll() galaxy_list = [] for galaxy in connection.execute(query): galaxy_list.append(galaxy) total_galaxies = len(galaxy_list) processed_galaxies = 0 processed_print_point = 50 for galaxy in galaxy_list: if processed_galaxies == processed_print_point: LOG.info('{0} out of {1} galaxies processed'.format(processed_galaxies, total_galaxies)) processed_print_point += 50 processed_galaxies += 1 LOG.info('Working on galaxy %s', galaxy[GALAXY.c.name]) array = numpy.empty((galaxy[GALAXY.c.dimension_y], galaxy[GALAXY.c.dimension_x], len(PNG_IMAGE_NAMES)), dtype=numpy.float) array.fill(numpy.NaN) # Return the rows pixel_count = 0 pixels_processed = 0 for row in connection.execute(select([PIXEL_RESULT]).where((PIXEL_RESULT.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]) and PIXEL_RESULT.c.x > -1)): row__x = row[PIXEL_RESULT.c.x] row__y = row[PIXEL_RESULT.c.y] pixel_count += 1 if row[PIXEL_RESULT.c.workunit_id] is not None: pixels_processed += 1 # Defend against bad values if row[PIXEL_RESULT.c.mu] is not None: array[row__y, row__x, 0] = row[PIXEL_RESULT.c.mu] if row[PIXEL_RESULT.c.m] is not None: array[row__y, row__x, 1] = row[PIXEL_RESULT.c.m] if row[PIXEL_RESULT.c.ldust] is not None: array[row__y, row__x, 2] = row[PIXEL_RESULT.c.ldust] if row[PIXEL_RESULT.c.sfr] is not None: # the SFR is a log array[row__y, row__x, 3] = math.pow(10, row[PIXEL_RESULT.c.sfr]) connection.execute(GALAXY.update() .where(GALAXY.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]) .values(image_time=datetime.datetime.now(), pixel_count=pixel_count, pixels_processed=pixels_processed)) galaxy_count += 1 # Now write the files black_rgb = (0, 0, 0) for name in PNG_IMAGE_NAMES: value = 0 height = galaxy[GALAXY.c.dimension_y] width = galaxy[GALAXY.c.dimension_x] idx = 0 if name == 'mu': idx = 0 elif name == 'm': idx = 1 elif name == 'ldust': idx = 2 elif name == 'sfr': idx = 3 values = [] for x in range(0, width - 1): for y in range(0, height - 1): value = array[y, x, idx] if not math.isnan(value) and value > 0: values.append(value) values.sort() if len(values) > 1000: top_count = int(len(values) * 0.005) top_value = values[len(values) - top_count] elif len(values) > 0: top_value = values[len(values) - 1] else: top_value = 1 if len(values) > 1: median_value = values[int(len(values) / 2)] elif len(values) > 0: median_value = values[0] else: median_value = 1 sigma = 1 / median_value multiplier = 255.0 / math.asinh(top_value * sigma) image = Image.new("RGB", (width, height), black_rgb) for x in range(0, width - 1): for y in range(0, height - 1): value = array[y, x, idx] if not math.isnan(value) and value > 0: value = int(math.asinh(value * sigma) * multiplier) if value > 255: value = 255 red = FIRE_R[value] green = FIRE_G[value] blue = FIRE_B[value] image.putpixel((x, height - y - 1), (red, green, blue)) file_name = '{0}/image.png'.format(POGS_TMP) image.save(file_name) s3helper.add_file_to_bucket(bucket_name, get_build_png_name(get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]), name), file_name) if shutdown() is True: LOG.info('Spot Instance Terminate Notice received, build_png_image is shutting down') break except: LOG.exception('An exception occurred.') finally: connection.close() LOG.info('Built images for %d galaxies', galaxy_count)
# Try Hyperleda try: url = 'http://leda.univ-lyon1.fr/G.cgi?n=113&c=o&o=' + name + '&a=x&z=d' table = getVOTable(url, 0) ra_eqj2000 = table.array['alpha'][0] dec_eqj2000 = table.array['delta'][0] LOG.info('VOTable data collected from Hyperleda for {0}'.format(name)) return True, float(ra_eqj2000), float(dec_eqj2000) except Exception: LOG.exception('ERROR: Getting VO data for {0}'.format(name)) return False, 0.0, 0.0 for galaxy in connection.execute( select([GALAXY]).where( or_(GALAXY.c.ra_cent == None, GALAXY.c.ra_cent == 0.0, GALAXY.c.dec_cent == None, GALAXY.c.dec_cent == 0.0))): LOG.info('Processing %d - %s', galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]) name = fix_name(galaxy[GALAXY.c.name]) (found, ra, dec) = get_ra_dec(name) if found: LOG.info('Updating {0} to RA: {1}, DEC: {2}'.format(name, ra, dec)) connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]).values( ra_cent=ra, dec_cent=dec)) connection.close()
pixel_count += 1 if row[PIXEL_RESULT.c.mu] is not None and row[PIXEL_RESULT.c.m] is not None and row[PIXEL_RESULT.c.ldust] is not None and row[PIXEL_RESULT.c.sfr] is not None: pixels_processed += 1 try: array[row__y, row__x, 0] = row[PIXEL_RESULT.c.mu] array[row__y, row__x, 1] = row[PIXEL_RESULT.c.m] array[row__y, row__x, 2] = row[PIXEL_RESULT.c.ldust] # the SFR is a log array[row__y, row__x, 3] = math.pow(10, row[PIXEL_RESULT.c.sfr]) except TypeError: LOG.error('Error at x: {0}, y: {1}'.format(row__x, row__y)) transaction = connection.begin() connection.execute(GALAXY.update() .where(GALAXY.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]) .values(image_time=datetime.datetime.now(), pixel_count=pixel_count, pixels_processed=pixels_processed)) transaction.commit() galaxy_count += 1 # Now right the files blackRGB = (0, 0, 0) for name in PNG_IMAGE_NAMES: value = 0 height = galaxy[GALAXY.c.dimension_y] width = galaxy[GALAXY.c.dimension_x] idx = 0 if name == 'mu': idx = 0 elif name == 'm': idx = 1