def delete_galaxy(connection, galaxy_ids): for galaxy_id in galaxy_ids: transaction = connection.begin() galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id) else: LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id, galaxy[GALAXY.c.name]) area_count = connection.execute(select([func.count(AREA.c.area_id)]).where(AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])).first()[0] counter = 1 for area_id1 in connection.execute(select([AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)): LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format(galaxy_id, area_id1[0], counter, area_count)) connection.execute(PIXEL_RESULT.delete().where(PIXEL_RESULT.c.area_id == area_id1[0])) # Give the rest of the world a chance to access the database time.sleep(0.1) counter += 1 if shutdown() is True: transaction.rollback() raise SystemExit LOG.info("Deleting FITS headers for galaxy {0}".format(galaxy_id)) connection.execute(FITS_HEADER.delete().where(FITS_HEADER.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])) # Now empty the bucket of the sed files s3helper = S3Helper() bucket = s3helper.get_bucket(get_sed_files_bucket()) galaxy_file_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) for key in bucket.list(prefix='{0}/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue bucket.delete_key(key) if shutdown() is True: transaction.rollback() raise SystemExit # Now the folder key = Key(bucket) key.key = '{0}/'.format(galaxy_file_name) bucket.delete_key(key) LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id) connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=DELETED, status_time=datetime.datetime.now())) if shutdown() is True: transaction.rollback() raise SystemExit transaction.commit()
def delete_register_data(connection, modulus, remainder): """ Delete register entries :param connection: :param modulus: :param remainder: :return: """ delete_delay_ago = datetime.datetime.now() - datetime.timedelta( days=float(ARC_DELETE_DELAY)) LOG.info('Deleting registrations: {0} days ago ({1})'.format( ARC_DELETE_DELAY, delete_delay_ago)) register_ids = [] for register in connection.execute( select([REGISTER]).where( and_(REGISTER.c.create_time is not None, REGISTER.c.create_time < delete_delay_ago)).order_by( REGISTER.c.register_id)): register_id = int(register[REGISTER.c.register_id]) if modulus is None or register_id % modulus == remainder: register_ids.append(register_id) if shutdown() is True: raise SystemExit delete_register_entries(connection, register_ids)
def delete_galaxy_data(connection, modulus, remainder): """ Delete galaxies after a period of time :param connection: :return: """ delete_delay_ago = datetime.datetime.now() - datetime.timedelta( days=float(ARC_DELETE_DELAY)) LOG.info('Deleting galaxies: {0} days ago ({1})'.format( ARC_DELETE_DELAY, delete_delay_ago)) galaxy_ids = [] for galaxy in connection.execute( select([GALAXY]).where( and_(GALAXY.c.status_id == STORED, GALAXY.c.status_time < delete_delay_ago)).order_by( GALAXY.c.galaxy_id)): galaxy_id = int(galaxy[GALAXY.c.galaxy_id]) if modulus is None or galaxy_id % modulus == remainder: galaxy_ids.append(galaxy_id) if shutdown() is True: raise SystemExit delete_galaxy(connection, galaxy_ids)
def main(): i = 0 try: while True: print "I am doing things! {0}".format(i) i += 1 time.sleep(1) if shutdown() is True: raise SystemExit except SystemExit: print 'Exiting...'
def processed_data(connection, modulus, remainder): """ Work out which galaxies have been processed :param connection: :return: """ # Get the work units still being processed engine = create_engine(BOINC_DB_LOGIN) connection_boinc = engine.connect() current_jobs = [] LOG.info('Getting results from BOINC') # The use of appid ensures MySQL uses an index otherwise it does a full table scan for result in connection_boinc.execute( select([RESULT]).where( and_(RESULT.c.server_state != 5, RESULT.c.appid == 1))): current_jobs.append(result[RESULT.c.name]) connection_boinc.close() LOG.info('Got results') sorted_data = sort_data(connection, current_jobs, modulus, remainder) for key in sorted(sorted_data.iterkeys()): LOG.info('{0}: {1} results'.format(key, len(sorted_data[key]))) # Get the galaxies we know are still processing processed = [] for galaxy in connection.execute( select([GALAXY]).where(GALAXY.c.status_id == COMPUTING)): if modulus is None or int( galaxy[GALAXY.c.galaxy_id]) % modulus == remainder: if finish_processing(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], sorted_data): processed.append(galaxy[GALAXY.c.galaxy_id]) LOG.info('%d %s has completed', galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]) for galaxy_id in processed: connection.execute( GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values( status_id=PROCESSED, status_time=datetime.datetime.now())) if shutdown() is True: raise SystemExit LOG.info('Marked %d galaxies ready for archiving', len(processed)) LOG.info('%d galaxies are still being processed', len(sorted_data))
def delete_galaxy_data(connection, modulus, remainder): """ Delete galaxies after a period of time :param connection: :return: """ delete_delay_ago = datetime.datetime.now() - datetime.timedelta(days=float(ARC_DELETE_DELAY)) LOG.info('Deleting galaxies: {0} days ago ({1})'.format(ARC_DELETE_DELAY, delete_delay_ago)) galaxy_ids = [] for galaxy in connection.execute(select([GALAXY]).where(and_(GALAXY.c.status_id == STORED, GALAXY.c.status_time < delete_delay_ago)).order_by(GALAXY.c.galaxy_id)): galaxy_id = int(galaxy[GALAXY.c.galaxy_id]) if modulus is None or galaxy_id % modulus == remainder: galaxy_ids.append(galaxy_id) if shutdown() is True: raise SystemExit delete_galaxy(connection, galaxy_ids)
def store_files(connection, modulus, remainder): """ Scan a directory for files and send them to the archive """ LOG.info('Directory: %s', HDF5_OUTPUT_DIRECTORY) to_store_dir = os.path.join(HDF5_OUTPUT_DIRECTORY, 'to_store') files = os.path.join(to_store_dir, '*.hdf5') file_count = 0 s3helper = S3Helper() bucket_name = get_saved_files_bucket() for file_name in glob.glob(files): galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: if modulus is None or galaxy_id % modulus == remainder: size = os.path.getsize(file_name) key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute( GALAXY.update() .where(GALAXY.c.galaxy_id == galaxy_id) .values(status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('Could not get the galaxy id') if shutdown() is True: raise SystemExit return file_count
def store_files(connection, modulus, remainder): """ Scan a directory for files and send them to the archive """ LOG.info('Directory: %s', HDF5_OUTPUT_DIRECTORY) to_store_dir = os.path.join(HDF5_OUTPUT_DIRECTORY, 'to_store') files = os.path.join(to_store_dir, '*.hdf5') file_count = 0 s3helper = S3Helper() bucket_name = get_saved_files_bucket() for file_name in glob.glob(files): galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: if modulus is None or galaxy_id % modulus == remainder: size = os.path.getsize(file_name) key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy_id).values( status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('Could not get the galaxy id') if shutdown() is True: raise SystemExit return file_count
def processed_data(connection, modulus, remainder): """ Work out which galaxies have been processed :param connection: :return: """ # Get the work units still being processed engine = create_engine(BOINC_DB_LOGIN) connection_boinc = engine.connect() current_jobs = [] LOG.info('Getting results from BOINC') # The use of appid ensures MySQL uses an index otherwise it does a full table scan for result in connection_boinc.execute(select([RESULT]).where(and_(RESULT.c.server_state != 5, RESULT.c.appid == 1))): current_jobs.append(result[RESULT.c.name]) connection_boinc.close() LOG.info('Got results') sorted_data = sort_data(connection, current_jobs, modulus, remainder) for key in sorted(sorted_data.iterkeys()): LOG.info('{0}: {1} results'.format(key, len(sorted_data[key]))) # Get the galaxies we know are still processing processed = [] for galaxy in connection.execute(select([GALAXY]).where(GALAXY.c.status_id == COMPUTING)): if modulus is None or int(galaxy[GALAXY.c.galaxy_id]) % modulus == remainder: if finish_processing(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], sorted_data): processed.append(galaxy[GALAXY.c.galaxy_id]) LOG.info('%d %s has completed', galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]) for galaxy_id in processed: connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values(status_id=PROCESSED, status_time=datetime.datetime.now())) if shutdown() is True: raise SystemExit LOG.info('Marked %d galaxies ready for archiving', len(processed)) LOG.info('%d galaxies are still being processed', len(sorted_data))
def delete_register_data(connection, modulus, remainder): """ Delete register entries :param connection: :param modulus: :param remainder: :return: """ delete_delay_ago = datetime.datetime.now() - datetime.timedelta(days=float(ARC_DELETE_DELAY)) LOG.info('Deleting registrations: {0} days ago ({1})'.format(ARC_DELETE_DELAY, delete_delay_ago)) register_ids = [] for register in connection.execute(select([REGISTER]).where(and_(REGISTER.c.create_time is not None, REGISTER.c.create_time < delete_delay_ago)).order_by(REGISTER.c.register_id)): register_id = int(register[REGISTER.c.register_id]) if modulus is None or register_id % modulus == remainder: register_ids.append(register_id) if shutdown() is True: raise SystemExit delete_register_entries(connection, register_ids)
def build_png_image_ami(): """ Build the images :return: """ # First check the galaxy exists in the database engine = create_engine(DB_LOGIN) connection = engine.connect() try: query = select([GALAXY]).distinct().where(and_(AREA.c.galaxy_id == GALAXY.c.galaxy_id, AREA.c.update_time >= GALAXY.c.image_time)) galaxy_count = 0 s3helper = S3Helper() bucket_name = get_galaxy_image_bucket() # Start the shutdown signal poller to check when this instance must close start_poll() galaxy_list = [] for galaxy in connection.execute(query): galaxy_list.append(galaxy) total_galaxies = len(galaxy_list) processed_galaxies = 0 processed_print_point = 50 for galaxy in galaxy_list: if processed_galaxies == processed_print_point: LOG.info('{0} out of {1} galaxies processed'.format(processed_galaxies, total_galaxies)) processed_print_point += 50 processed_galaxies += 1 LOG.info('Working on galaxy %s', galaxy[GALAXY.c.name]) array = numpy.empty((galaxy[GALAXY.c.dimension_y], galaxy[GALAXY.c.dimension_x], len(PNG_IMAGE_NAMES)), dtype=numpy.float) array.fill(numpy.NaN) # Return the rows pixel_count = 0 pixels_processed = 0 for row in connection.execute(select([PIXEL_RESULT]).where((PIXEL_RESULT.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]) and PIXEL_RESULT.c.x > -1)): row__x = row[PIXEL_RESULT.c.x] row__y = row[PIXEL_RESULT.c.y] pixel_count += 1 if row[PIXEL_RESULT.c.workunit_id] is not None: pixels_processed += 1 # Defend against bad values if row[PIXEL_RESULT.c.mu] is not None: array[row__y, row__x, 0] = row[PIXEL_RESULT.c.mu] if row[PIXEL_RESULT.c.m] is not None: array[row__y, row__x, 1] = row[PIXEL_RESULT.c.m] if row[PIXEL_RESULT.c.ldust] is not None: array[row__y, row__x, 2] = row[PIXEL_RESULT.c.ldust] if row[PIXEL_RESULT.c.sfr] is not None: # the SFR is a log array[row__y, row__x, 3] = math.pow(10, row[PIXEL_RESULT.c.sfr]) connection.execute(GALAXY.update() .where(GALAXY.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]) .values(image_time=datetime.datetime.now(), pixel_count=pixel_count, pixels_processed=pixels_processed)) galaxy_count += 1 # Now write the files black_rgb = (0, 0, 0) for name in PNG_IMAGE_NAMES: value = 0 height = galaxy[GALAXY.c.dimension_y] width = galaxy[GALAXY.c.dimension_x] idx = 0 if name == 'mu': idx = 0 elif name == 'm': idx = 1 elif name == 'ldust': idx = 2 elif name == 'sfr': idx = 3 values = [] for x in range(0, width - 1): for y in range(0, height - 1): value = array[y, x, idx] if not math.isnan(value) and value > 0: values.append(value) values.sort() if len(values) > 1000: top_count = int(len(values) * 0.005) top_value = values[len(values) - top_count] elif len(values) > 0: top_value = values[len(values) - 1] else: top_value = 1 if len(values) > 1: median_value = values[int(len(values) / 2)] elif len(values) > 0: median_value = values[0] else: median_value = 1 sigma = 1 / median_value multiplier = 255.0 / math.asinh(top_value * sigma) image = Image.new("RGB", (width, height), black_rgb) for x in range(0, width - 1): for y in range(0, height - 1): value = array[y, x, idx] if not math.isnan(value) and value > 0: value = int(math.asinh(value * sigma) * multiplier) if value > 255: value = 255 red = FIRE_R[value] green = FIRE_G[value] blue = FIRE_B[value] image.putpixel((x, height - y - 1), (red, green, blue)) file_name = '{0}/image.png'.format(POGS_TMP) image.save(file_name) s3helper.add_file_to_bucket(bucket_name, get_build_png_name(get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]), name), file_name) if shutdown() is True: LOG.info('Spot Instance Terminate Notice received, build_png_image is shutting down') break except: LOG.exception('An exception occurred.') finally: connection.close() LOG.info('Built images for %d galaxies', galaxy_count)
def delete_galaxy(connection, galaxy_ids): for galaxy_id in galaxy_ids: transaction = connection.begin() galaxy = connection.execute( select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id) else: LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id, galaxy[GALAXY.c.name]) area_count = connection.execute( select([func.count(AREA.c.area_id)]).where( AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])).first()[0] counter = 1 for area_id1 in connection.execute( select([AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[ GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)): LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format( galaxy_id, area_id1[0], counter, area_count)) connection.execute(PIXEL_RESULT.delete().where( PIXEL_RESULT.c.area_id == area_id1[0])) # Give the rest of the world a chance to access the database time.sleep(0.1) counter += 1 if shutdown() is True: transaction.rollback() raise SystemExit LOG.info("Deleting FITS headers for galaxy {0}".format(galaxy_id)) connection.execute(FITS_HEADER.delete().where( FITS_HEADER.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])) # Now empty the bucket of the sed files s3helper = S3Helper() bucket = s3helper.get_bucket(get_sed_files_bucket()) galaxy_file_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) for key in bucket.list(prefix='{0}/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue bucket.delete_key(key) if shutdown() is True: transaction.rollback() raise SystemExit # Now the folder key = Key(bucket) key.key = '{0}/'.format(galaxy_file_name) bucket.delete_key(key) LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id) connection.execute( GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values( status_id=DELETED, status_time=datetime.datetime.now())) if shutdown() is True: transaction.rollback() raise SystemExit transaction.commit()