def get_hdf5_from_s3(galaxy, directory): bucket_name = get_saved_files_bucket() key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) s3_helper = S3Helper() if s3_helper.file_exists(bucket_name, key): if s3_helper.file_archived(bucket_name, key): # file is archived if s3_helper.file_restoring(bucket_name, key): # if file is restoring, just need to wait for it LOG.info( 'Galaxy {0} ({1}) is still restoring from glacier'.format( galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id])) else: # if file is not restoring, need to request. LOG.info('Making request for archived galaxy {0} ({1})'.format( galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id])) s3_helper.restore_archived_file(bucket_name, key, days=10) else: # file is not archived LOG.info('Galaxy {0} ({1}) is available in s3'.format( galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id])) filename = os.path.join( directory, get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])) + '.hdf5' s3_helper.get_file_from_bucket(bucket_name=bucket_name, key_name=key, file_name=filename) else: LOG.info('The key {0} in bucket {1} does not exist'.format( key, bucket_name))
def delete_galaxy(connection, galaxy_ids): try: for galaxy_id_str in galaxy_ids: transaction = connection.begin() galaxy_id1 = int(galaxy_id_str) galaxy = connection.execute( select([GALAXY ]).where(GALAXY.c.galaxy_id == galaxy_id1)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id1) else: LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id1, galaxy[GALAXY.c.name]) area_count = connection.execute( select([func.count(AREA.c.area_id) ]).where(AREA.c.galaxy_id == galaxy[ GALAXY.c.galaxy_id])).first()[0] counter = 1 for area_id1 in connection.execute( select( [AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[ GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)): LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format( galaxy_id_str, area_id1[0], counter, area_count)) connection.execute(PIXEL_RESULT.delete().where( PIXEL_RESULT.c.area_id == area_id1[0])) # Give the rest of the world a chance to access the database time.sleep(0.1) counter += 1 # Now empty the bucket s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) galaxy_file_name = get_galaxy_file_name( galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) for key in bucket.list( prefix='{0}/sed/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue bucket.delete_key(key) # Now the folder key = Key(bucket) key.key = '{0}/sed/'.format(galaxy_file_name) bucket.delete_key(key) LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id1) connection.execute( GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id1).values( status_id=DELETED, status_time=datetime.datetime.now())) transaction.commit() except Exception: LOG.exception('Major error')
def _build_image_asinh(self, fits_file_name, galaxy_key_stub, centre, galaxy_id, bucket_name): """ Build Three Colour Images using the asinh() function. :param fits_file_name: :param galaxy_key_stub: :param centre: :param galaxy_id: :param bucket_name: """ hdulist = pyfits.open(fits_file_name, memmap=True) hdu = hdulist[0] width = hdu.header['NAXIS1'] height = hdu.header['NAXIS2'] (image1_filters, image2_filters, image3_filters, image4_filters) = self._get_image_filters(hdulist) # Create Three Colour Images image1 = ImageBuilder(bucket_name, 1, get_colour_image_key(galaxy_key_stub, 1), get_thumbnail_colour_image_key( galaxy_key_stub, 1), image1_filters[0], image1_filters[1], image1_filters[2], width, height, centre, self._connection, galaxy_id) # i, r, g image2 = ImageBuilder(bucket_name, 2, get_colour_image_key(galaxy_key_stub, 2), None, image2_filters[0], image2_filters[1], image2_filters[2], width, height, centre, self._connection, galaxy_id) # r, g, NUV image3 = ImageBuilder(bucket_name, 3, get_colour_image_key(galaxy_key_stub, 3), None, image3_filters[0], image3_filters[1], image3_filters[2], width, height, centre, self._connection, galaxy_id) # 3.6, g, NUV image4 = ImageBuilder(bucket_name, 4, get_colour_image_key(galaxy_key_stub, 4), None, image4_filters[0], image4_filters[1], image4_filters[2], width, height, centre, self._connection, galaxy_id) # 22, r, NUV images = [image1, image2, image3, image4] for hdu in hdulist: filter_band = hdu.header['MAGPHYSI'] for image in images: image.set_data(filter_band, hdu.data) s3helper = S3Helper() for image in images: if image.is_valid(): image.save_image(s3helper) else: print 'not valid' hdulist.close()
def migrate_files(connection): """ Migrate the various files to S3 """ LOG.info('Migrating the files') s3helper = S3Helper() migrate_image_files(connection, get_galaxy_image_bucket(), get_files_bucket(), s3helper) migrate_hdf5_files(connection, get_files_bucket(), s3helper)
def remigrate_files(connection): """ Migrate the various files to S3 """ LOG.info('Migrating the files') s3helper = S3Helper() files_bucket = get_files_bucket() bad_galaxies = find_bad_hdf5_files(s3helper, files_bucket) migrate_hdf5_files(bad_galaxies, connection, files_bucket, s3helper)
def get_hdf5_size_data(): """ Get the HDF5 data we need :return: """ # Get the list of files LOG.info('Getting the hdf5 files from the database') data = {} set_names = set() for entry in connection.execute(select([HDF5_SIZE])): key_size_mb = entry[HDF5_SIZE.c.size] / 1000000.0 LOG.info('Processing {0} {1} {2}'.format(entry[HDF5_SIZE.c.name], entry[HDF5_SIZE.c.size], key_size_mb)) run_id = entry[HDF5_SIZE.c.run_id] # Get the array row_data = data.get(run_id) if row_data is None: row_data = [] data[run_id] = row_data row_data.append(key_size_mb) set_names.add(entry[HDF5_SIZE.c.name]) LOG.info('Getting the hdf5 files from S3') s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) insert_hdf5 = HDF5_SIZE.insert() for prefix in bucket.list(prefix='', delimiter='/'): prefix_name = prefix.name[:-1] if prefix_name not in set_names: key = bucket.get_key('{0}/{0}.hdf5'.format(prefix_name)) if key is not None: key_size_mb = key.size / 1000000.0 LOG.info('Processing {0} {1} {2}'.format( key.name, key.size, key_size_mb)) elements = prefix.name.split('__') run_id = int(elements[1]) connection.execute(insert_hdf5, name=prefix_name, size=key.size, run_id=run_id) # Get the array row_data = data.get(run_id) if row_data is None: row_data = [] data[run_id] = row_data row_data.append(key_size_mb) return data
def remove_s3_files(galaxy_name, run_id, galaxy_id): """ Remove the files from S3 :return: """ s3_helper = S3Helper() remove_files_with_key(s3_helper.get_bucket(get_galaxy_image_bucket()), galaxy_name, run_id, galaxy_id) remove_files_with_key(s3_helper.get_bucket(get_files_bucket()), galaxy_name, run_id, galaxy_id)
def access_s3(): """ Check we can access the archive bucket :return: """ try: s3helper = S3Helper() bucket = s3helper.get_bucket(get_archive_bucket()) LOG.info('Access S3 bucket name: {0}'.format(bucket.name)) except Exception: LOG.exception('check_database_connection') return False return True
def store_files(hdf5_dir): """ Scan a directory for files and send them to the archive :param hdf5_dir: the directory to scan :return: """ LOG.info('Directory: %s', hdf5_dir) # Get the work units still being processed ENGINE = create_engine(DB_LOGIN) connection = ENGINE.connect() files = os.path.join(hdf5_dir, '*.hdf5') file_count = 0 try: s3helper = S3Helper() bucket_name = get_files_bucket() for file_name in glob.glob(files): size = os.path.getsize(file_name) galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy_id).values( status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('File size: %d', size) LOG.error('Could not get the galaxy id') except Exception: LOG.exception('Major error') finally: connection.close() return file_count
def access_s3(): """ Check we can access the archive bucket :return: """ try: LOG.info('Testing S3 access') s3helper = S3Helper() bucket = s3helper.get_bucket(get_archive_bucket()) LOG.info('Access S3 bucket name: {0}'.format(bucket.name)) except Exception: LOG.exception('access_s3') return False return True
def init(project, template): project_fname = os.path.join(find_project_root(), 'project_info.json') logger = logging.getLogger(__name__) # write empty template file to fill in manually if template: template = {"name": project, "keywords": []} with open(project_fname, 'w') as f: json.dump(template, f, cls=JSONEncoder, indent=4) logger.info( 'Successfully wrote empty template file "{}". Please fill in values manually.' .format(project_fname)) return # sync project info s3_helper = S3Helper() s3_helper.sync_project_info(project)
def original_image_checked_ami(): """ We're running in the AMI instance - so do the actual work Check the newly created images to make sure the images have been created :return: """ # Connect to the database - the login string is set in the database package engine = create_engine(DB_LOGIN) connection = engine.connect() s3helper = S3Helper() try: # Look in the database for the galaxies galaxy_ids = [] for galaxy in connection.execute( select([GALAXY]).where( and_(GALAXY.c.original_image_checked == None, GALAXY.c.pixel_count > 0)).order_by( GALAXY.c.galaxy_id)): galaxy_ids.append(galaxy[GALAXY.c.galaxy_id]) for galaxy_id in galaxy_ids: galaxy = connection.execute( select([GALAXY ]).where(GALAXY.c.galaxy_id == galaxy_id)).first() if not image_files_exist(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id], s3helper): mark_as_checked = regenerated_original_images( galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id], s3helper, connection) else: mark_as_checked = True if mark_as_checked: connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy_id).values( original_image_checked=datetime.datetime.now())) except Exception: LOG.exception('Major error') finally: connection.close()
def process_ami(): """ We're running on the AMI instance - so actually do the work Find the files and move them to S3 :return: """ delete_delay_ago = datetime.datetime.now() - datetime.timedelta( days=float(ARC_BOINC_STATISTICS_DELAY)) LOG.info('delete_delay_ago: {0}'.format(delete_delay_ago)) s3helper = S3Helper() for directory_name in glob.glob( os.path.join(POGS_BOINC_PROJECT_ROOT, 'html/stats_archive/*')): if os.path.isdir(directory_name): directory_mtime = datetime.datetime.fromtimestamp( os.path.getmtime(directory_name)) LOG.info('directory: {0}, mtime: {1}'.format( directory_name, directory_mtime)) if directory_mtime < delete_delay_ago: move_files_to_s3(s3helper, directory_name)
def archive_boinc_db_purge(): """ Clean up the BOINC DB Purge records Find the files and move them to S3 :return: """ delete_delay_ago = datetime.datetime.now() - datetime.timedelta( days=float(ARC_BOINC_STATISTICS_DELAY)) LOG.info('delete_delay_ago: {0}'.format(delete_delay_ago)) s3helper = S3Helper() for directory_name in glob.glob( os.path.join(POGS_BOINC_PROJECT_ROOT, 'archives/*')): if os.path.isdir(directory_name): directory_mtime = datetime.datetime.fromtimestamp( os.path.getmtime(directory_name)) LOG.info('directory: {0}, mtime: {1}'.format( directory_name, directory_mtime)) if directory_mtime < delete_delay_ago: move_files_to_s3(s3helper, directory_name)
def store_files(connection, modulus, remainder): """ Scan a directory for files and send them to the archive """ LOG.info('Directory: %s', HDF5_OUTPUT_DIRECTORY) to_store_dir = os.path.join(HDF5_OUTPUT_DIRECTORY, 'to_store') files = os.path.join(to_store_dir, '*.hdf5') file_count = 0 s3helper = S3Helper() bucket_name = get_saved_files_bucket() for file_name in glob.glob(files): galaxy_id, galaxy_name = get_galaxy_id_and_name(file_name) if galaxy_id >= 0: if modulus is None or galaxy_id % modulus == remainder: size = os.path.getsize(file_name) key = '{0}/{0}.hdf5'.format(galaxy_name) LOG.info('File name: %s', file_name) LOG.info('File size: %d', size) LOG.info('Bucket: %s', bucket_name) LOG.info('Key: %s', key) s3helper.add_file_to_bucket(bucket_name, key, file_name) file_count += 1 os.remove(file_name) connection.execute(GALAXY.update().where( GALAXY.c.galaxy_id == galaxy_id).values( status_id=STORED, status_time=datetime.datetime.now())) else: LOG.error('File name: %s', file_name) LOG.error('Could not get the galaxy id') if shutdown() is True: raise SystemExit return file_count
def get_glacier_data_size(connection, bucket_name): """ Returns the total number of bytes that we have stored in glacier. Checks with the database first for a cached copy of this info to not have to keep re-requesting it. :param connection: The database connection. :param bucket_name: Name of the bucket to count. :return: """ # Load most recent entry from database # if timestamp on most recent entry is < 24 hours from now, use it # if not, do the full check and add a new entry in the db specifying the glacier size. day_ago = seconds_since_epoch(get_hours_ago(24)) result = connection.execute( select([HDF5_GLACIER_STORAGE_SIZE ]).where(HDF5_GLACIER_STORAGE_SIZE.c.count_time > day_ago)) latest_time = 0 latest_size = 0 for row in result: if row['count_time'] > latest_time: latest_size = row['size'] latest_time = row['count_time'] if latest_time == 0 or latest_size == 0: # Need to re-count s3helper = S3Helper() LOG.info("Glacier data size expired, recounting...") size = s3helper.glacier_data_size(bucket_name) LOG.info("Glacier data size counted: {0} bytes".format(size)) connection.execute(HDF5_GLACIER_STORAGE_SIZE.insert(), size=size, count_time=seconds_since_epoch(datetime.now())) else: size = latest_size return size
def upload(self, request, pk=None, *args, **kwargs): multipart_file = request.data.get("multipart_file") store_file = StoreFile(file_obj=multipart_file) store_file.store = Store(pk=pk) store_file.save() file_path = store_file.file_obj.path file_size = store_file.file_obj.size if file_path and not file_path == "": orig_filename = file_path filename = orig_filename.split("/")[-1].lower() file_ext = filename.split(".")[-1] filename_hash = '{}.{}'.format(uuid.uuid4(), file_ext) folder = 'dev_public/test' upload_request = S3Helper.upload_file(orig_filename, folder, filename_hash) if upload_request.get("status") == 200: # Delete created file object in disk store_file.file_obj.delete() # Update store_file store_file.storage_url = upload_request.get("upload_url") store_file.filename = filename_hash store_file.file_size = file_size store_file.content_type = FILE_TYPES[file_ext] store_file.save() return Response( { "status": HTTP_200_OK, "store_file": store_file.to_json() }, status=HTTP_200_OK) return Response({"status": HTTP_400_BAD_REQUEST}, status=HTTP_400_BAD_REQUEST)
args = vars(parser.parse_args()) if args['option'] == 'boinc': LOG.info('PYTHONPATH = {0}'.format(sys.path)) # We're running from the BOINC server process_boinc() else: # We're running from a specially created AMI filename, full_filename = get_ami_log_file('archive_boinc_stats') add_file_handler_to_root(full_filename) LOG.info('PYTHONPATH = {0}'.format(sys.path)) LOG.info('About to perform sanity checks') if pass_sanity_checks(): process_ami() else: LOG.error('Failed to pass sanity tests') # Try copying the log file to S3 try: LOG.info('About to copy the log file') s3helper = S3Helper() s3helper.add_file_to_bucket(get_archive_bucket(), get_log_archive_key('archive_boinc_stats', filename), full_filename, True) os.remove(full_filename) except: LOG.exception('Failed to copy the log file') ec2_helper = EC2Helper() ec2_helper.release_public_ip() LOG.INFO('All done')
def build_png_image_ami(): """ Build the images :return: """ # First check the galaxy exists in the database engine = create_engine(DB_LOGIN) connection = engine.connect() try: query = select([GALAXY]).distinct().where(and_(AREA.c.galaxy_id == GALAXY.c.galaxy_id, AREA.c.update_time >= GALAXY.c.image_time)) galaxy_count = 0 s3helper = S3Helper() bucket_name = get_galaxy_image_bucket() # Start the shutdown signal poller to check when this instance must close start_poll() galaxy_list = [] for galaxy in connection.execute(query): galaxy_list.append(galaxy) total_galaxies = len(galaxy_list) processed_galaxies = 0 processed_print_point = 50 for galaxy in galaxy_list: if processed_galaxies == processed_print_point: LOG.info('{0} out of {1} galaxies processed'.format(processed_galaxies, total_galaxies)) processed_print_point += 50 processed_galaxies += 1 LOG.info('Working on galaxy %s', galaxy[GALAXY.c.name]) array = numpy.empty((galaxy[GALAXY.c.dimension_y], galaxy[GALAXY.c.dimension_x], len(PNG_IMAGE_NAMES)), dtype=numpy.float) array.fill(numpy.NaN) # Return the rows pixel_count = 0 pixels_processed = 0 for row in connection.execute(select([PIXEL_RESULT]).where((PIXEL_RESULT.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]) and PIXEL_RESULT.c.x > -1)): row__x = row[PIXEL_RESULT.c.x] row__y = row[PIXEL_RESULT.c.y] pixel_count += 1 if row[PIXEL_RESULT.c.workunit_id] is not None: pixels_processed += 1 # Defend against bad values if row[PIXEL_RESULT.c.mu] is not None: array[row__y, row__x, 0] = row[PIXEL_RESULT.c.mu] if row[PIXEL_RESULT.c.m] is not None: array[row__y, row__x, 1] = row[PIXEL_RESULT.c.m] if row[PIXEL_RESULT.c.ldust] is not None: array[row__y, row__x, 2] = row[PIXEL_RESULT.c.ldust] if row[PIXEL_RESULT.c.sfr] is not None: # the SFR is a log array[row__y, row__x, 3] = math.pow(10, row[PIXEL_RESULT.c.sfr]) connection.execute(GALAXY.update() .where(GALAXY.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]) .values(image_time=datetime.datetime.now(), pixel_count=pixel_count, pixels_processed=pixels_processed)) galaxy_count += 1 # Now write the files black_rgb = (0, 0, 0) for name in PNG_IMAGE_NAMES: value = 0 height = galaxy[GALAXY.c.dimension_y] width = galaxy[GALAXY.c.dimension_x] idx = 0 if name == 'mu': idx = 0 elif name == 'm': idx = 1 elif name == 'ldust': idx = 2 elif name == 'sfr': idx = 3 values = [] for x in range(0, width - 1): for y in range(0, height - 1): value = array[y, x, idx] if not math.isnan(value) and value > 0: values.append(value) values.sort() if len(values) > 1000: top_count = int(len(values) * 0.005) top_value = values[len(values) - top_count] elif len(values) > 0: top_value = values[len(values) - 1] else: top_value = 1 if len(values) > 1: median_value = values[int(len(values) / 2)] elif len(values) > 0: median_value = values[0] else: median_value = 1 sigma = 1 / median_value multiplier = 255.0 / math.asinh(top_value * sigma) image = Image.new("RGB", (width, height), black_rgb) for x in range(0, width - 1): for y in range(0, height - 1): value = array[y, x, idx] if not math.isnan(value) and value > 0: value = int(math.asinh(value * sigma) * multiplier) if value > 255: value = 255 red = FIRE_R[value] green = FIRE_G[value] blue = FIRE_B[value] image.putpixel((x, height - y - 1), (red, green, blue)) file_name = '{0}/image.png'.format(POGS_TMP) image.save(file_name) s3helper.add_file_to_bucket(bucket_name, get_build_png_name(get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]), name), file_name) if shutdown() is True: LOG.info('Spot Instance Terminate Notice received, build_png_image is shutting down') break except: LOG.exception('An exception occurred.') finally: connection.close() LOG.info('Built images for %d galaxies', galaxy_count)
def delete_galaxy(connection, galaxy_ids): for galaxy_id in galaxy_ids: transaction = connection.begin() galaxy = connection.execute( select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id) else: LOG.info('Deleting Galaxy with galaxy_id of %d - %s', galaxy_id, galaxy[GALAXY.c.name]) area_count = connection.execute( select([func.count(AREA.c.area_id)]).where( AREA.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])).first()[0] counter = 1 for area_id1 in connection.execute( select([AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy[ GALAXY.c.galaxy_id]).order_by(AREA.c.area_id)): LOG.info("Deleting galaxy {0} area {1}. {2} of {3}".format( galaxy_id, area_id1[0], counter, area_count)) connection.execute(PIXEL_RESULT.delete().where( PIXEL_RESULT.c.area_id == area_id1[0])) # Give the rest of the world a chance to access the database time.sleep(0.1) counter += 1 if shutdown() is True: transaction.rollback() raise SystemExit LOG.info("Deleting FITS headers for galaxy {0}".format(galaxy_id)) connection.execute(FITS_HEADER.delete().where( FITS_HEADER.c.galaxy_id == galaxy[GALAXY.c.galaxy_id])) # Now empty the bucket of the sed files s3helper = S3Helper() bucket = s3helper.get_bucket(get_sed_files_bucket()) galaxy_file_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) for key in bucket.list(prefix='{0}/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue bucket.delete_key(key) if shutdown() is True: transaction.rollback() raise SystemExit # Now the folder key = Key(bucket) key.key = '{0}/'.format(galaxy_file_name) bucket.delete_key(key) LOG.info('Galaxy with galaxy_id of %d was deleted', galaxy_id) connection.execute( GALAXY.update().where(GALAXY.c.galaxy_id == galaxy_id).values( status_id=DELETED, status_time=datetime.datetime.now())) if shutdown() is True: transaction.rollback() raise SystemExit transaction.commit()
def store_pixels(connection, galaxy_file_name, group, dimension_x, dimension_y, dimension_z, area_total, output_directory, map_parameter_name): """ Store the pixel data """ LOG.info('Storing the pixel data for {0} - {1} areas to process'.format( galaxy_file_name, area_total)) data = numpy.empty( (dimension_x, dimension_y, NUMBER_PARAMETERS, NUMBER_IMAGES), dtype=numpy.float) data.fill(numpy.NaN) data_pixel_details = group.create_dataset('pixel_details', (dimension_x, dimension_y), dtype=data_type_pixel, compression='gzip') data_pixel_parameters = group.create_dataset( 'pixel_parameters', (dimension_x, dimension_y, NUMBER_PARAMETERS), dtype=data_type_pixel_parameter, compression='gzip') data_pixel_filter = group.create_dataset( 'pixel_filters', (dimension_x, dimension_y, dimension_z), dtype=data_type_pixel_filter, compression='gzip') data_pixel_histograms_grid = group.create_dataset( 'pixel_histograms_grid', (dimension_x, dimension_y, NUMBER_PARAMETERS), dtype=data_type_block_details, compression='gzip') histogram_group = group.create_group('histogram_blocks') histogram_list = [] pixel_count = 0 area_count = 0 block_id = 1 block_index = 0 histogram_data = histogram_group.create_dataset( 'block_1', (BLOCK_SIZE, ), dtype=data_type_pixel_histogram, compression='gzip') s3helper = S3Helper() bucket = s3helper.get_bucket(get_files_bucket()) for key in bucket.list(prefix='{0}/sed/'.format(galaxy_file_name)): # Ignore the key if key.key.endswith('/'): continue # Now process the file start_time = time.time() LOG.info('Processing file {0}'.format(key.key)) temp_file = os.path.join(output_directory, 'temp.sed') key.get_contents_to_filename(temp_file) if is_gzip(temp_file): f = gzip.open(temp_file, "rb") else: f = open(temp_file, "r") area_id = None pxresult_id = None line_number = 0 percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = False map_pixel_results = {} list_filters = [] try: for line in f: line_number += 1 if line.startswith(" ####### "): # Clear all the maps and stuff map_pixel_results = {} list_filters = [] # Split the line to extract the data values = line.split() pointName = values[1] pxresult_id = pointName[3:].rstrip() (x, y, area_id) = get_pixel_result(connection, pxresult_id) line_number = 0 percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = False pixel_count += 1 elif pxresult_id is not None: if line_number == 2: filter_names = line.split() filter_layer = 0 for filter_name in filter_names: if filter_name != '#': data_pixel_filter.attrs[ filter_name] = filter_layer filter_layer += 1 elif line_number == 3: values = line.split() for value in values: list_filters.append([float(value)]) elif line_number == 4: filter_layer = 0 values = line.split() for value in values: filter_description = list_filters[filter_layer] filter_description.append(float(value)) filter_layer += 1 elif line_number == 9: values = line.split() map_pixel_results['i_sfh'] = float(values[0]) map_pixel_results['i_ir'] = float(values[1]) map_pixel_results['chi2'] = float(values[2]) map_pixel_results['redshift'] = float(values[3]) elif line_number == 11: values = line.split() data[x, y, INDEX_F_MU_SFH, INDEX_BEST_FIT] = float(values[0]) data[x, y, INDEX_F_MU_IR, INDEX_BEST_FIT] = float(values[1]) data[x, y, INDEX_MU_PARAMETER, INDEX_BEST_FIT] = float(values[2]) data[x, y, INDEX_TAU_V, INDEX_BEST_FIT] = float(values[3]) data[x, y, INDEX_SSFR_0_1GYR, INDEX_BEST_FIT] = float(values[4]) data[x, y, INDEX_M_STARS, INDEX_BEST_FIT] = float(values[5]) data[x, y, INDEX_L_DUST, INDEX_BEST_FIT] = float(values[6]) data[x, y, INDEX_T_W_BC, INDEX_BEST_FIT] = float(values[7]) data[x, y, INDEX_T_C_ISM, INDEX_BEST_FIT] = float(values[8]) data[x, y, INDEX_XI_C_TOT, INDEX_BEST_FIT] = float(values[9]) data[x, y, INDEX_XI_PAH_TOT, INDEX_BEST_FIT] = float(values[10]) data[x, y, INDEX_XI_MIR_TOT, INDEX_BEST_FIT] = float(values[11]) data[x, y, INDEX_XI_W_TOT, INDEX_BEST_FIT] = float(values[12]) data[x, y, INDEX_TAU_V_ISM, INDEX_BEST_FIT] = float(values[13]) data[x, y, INDEX_M_DUST, INDEX_BEST_FIT] = float(values[14]) data[x, y, INDEX_SFR_0_1GYR, INDEX_BEST_FIT] = float(values[15]) elif line_number == 13: filter_layer = 0 values = line.split() for value in values: filter_description = list_filters[filter_layer] if filter_layer < dimension_z: data_pixel_filter[x, y, filter_layer] = ( filter_description[0], filter_description[1], float(value), ) filter_layer += 1 elif line_number > 13: if line.startswith("# ..."): parts = line.split('...') parameter_name = parts[1].strip() parameter_name_id = map_parameter_name[ parameter_name] percentiles_next = False histogram_next = True skynet_next1 = False skynet_next2 = False histogram_list = [] elif line.startswith( "#....percentiles of the PDF......"): percentiles_next = True histogram_next = False skynet_next1 = False skynet_next2 = False # Write out the histogram into a block for compression improvement data_pixel_histograms_grid[x, y, parameter_name_id - 1] = ( block_id, block_index, len(histogram_list)) for pixel_histogram_item in histogram_list: # Do we need a new block if block_index >= BLOCK_SIZE: block_id += 1 block_index = 0 histogram_data = histogram_group.create_dataset( 'block_{0}'.format(block_id), (BLOCK_SIZE, ), dtype=data_type_pixel_histogram, compression='gzip') histogram_data[block_index] = ( pixel_histogram_item[0], pixel_histogram_item[1], ) block_index += 1 elif line.startswith(" #...theSkyNet"): percentiles_next = False histogram_next = False skynet_next1 = True skynet_next2 = False elif line.startswith("# theSkyNet2"): percentiles_next = False histogram_next = False skynet_next1 = False skynet_next2 = True elif percentiles_next: values = line.split() z = parameter_name_id - 1 data[x, y, z, INDEX_PERCENTILE_2_5] = float(values[0]) data[x, y, z, INDEX_PERCENTILE_16] = float(values[1]) data[x, y, z, INDEX_PERCENTILE_50] = float(values[2]) data[x, y, z, INDEX_PERCENTILE_84] = float(values[3]) data[x, y, z, INDEX_PERCENTILE_97_5] = float(values[4]) percentiles_next = False elif histogram_next: values = line.split() hist_value = float(values[1]) if hist_value > MIN_HIST_VALUE and not math.isnan( hist_value): histogram_list.append( (float(values[0]), hist_value)) elif skynet_next1: values = line.split() data_pixel_details[x, y] = ( pxresult_id, area_id, map_pixel_results['i_sfh'], map_pixel_results['i_ir'], map_pixel_results['chi2'], map_pixel_results['redshift'], float(values[0]), float(values[2]), float(values[3]), float(values[4]), ) skynet_next1 = False elif skynet_next2: # We have the highest bin probability values which require the parameter_id values = line.split() high_prob_bin = float(values[0]) if float( values[0]) is not None else numpy.NaN first_prob_bin = float(values[1]) if float( values[1]) is not None else numpy.NaN last_prob_bin = float(values[2]) if float( values[2]) is not None else numpy.NaN bin_step = float(values[3]) if float( values[3]) is not None else numpy.NaN z = parameter_name_id - 1 data[x, y, z, INDEX_HIGHEST_PROB_BIN] = high_prob_bin data_pixel_parameters[x, y, z] = ( first_prob_bin, last_prob_bin, bin_step, ) skynet_next2 = False except IOError: LOG.error('IOError after {0} lines'.format(line_number)) finally: f.close() area_count += 1 LOG.info('{0:0.3f} seconds for file {1}. {2} of {3} areas.'.format( time.time() - start_time, key.key, area_count, area_total)) pixel_dataset = group.create_dataset('pixels', data=data, compression='gzip') pixel_dataset.attrs['DIM3_F_MU_SFH'] = INDEX_F_MU_SFH pixel_dataset.attrs['DIM3_F_MU_IR'] = INDEX_F_MU_IR pixel_dataset.attrs['DIM3_MU_PARAMETER'] = INDEX_MU_PARAMETER pixel_dataset.attrs['DIM3_TAU_V'] = INDEX_TAU_V pixel_dataset.attrs['DIM3_SSFR_0_1GYR'] = INDEX_SSFR_0_1GYR pixel_dataset.attrs['DIM3_M_STARS'] = INDEX_M_STARS pixel_dataset.attrs['DIM3_L_DUST'] = INDEX_L_DUST pixel_dataset.attrs['DIM3_T_C_ISM'] = INDEX_T_C_ISM pixel_dataset.attrs['DIM3_T_W_BC'] = INDEX_T_W_BC pixel_dataset.attrs['DIM3_XI_C_TOT'] = INDEX_XI_C_TOT pixel_dataset.attrs['DIM3_XI_PAH_TOT'] = INDEX_XI_PAH_TOT pixel_dataset.attrs['DIM3_XI_MIR_TOT'] = INDEX_XI_MIR_TOT pixel_dataset.attrs['DIM3_XI_W_TOT'] = INDEX_XI_W_TOT pixel_dataset.attrs['DIM3_TAU_V_ISM'] = INDEX_TAU_V_ISM pixel_dataset.attrs['DIM3_M_DUST'] = INDEX_M_DUST pixel_dataset.attrs['DIM3_SFR_0_1GYR'] = INDEX_SFR_0_1GYR pixel_dataset.attrs['DIM4_BEST_FIT'] = INDEX_BEST_FIT pixel_dataset.attrs['DIM4_PERCENTILE_50'] = INDEX_PERCENTILE_50 pixel_dataset.attrs['DIM4_HIGHEST_PROB_BIN'] = INDEX_HIGHEST_PROB_BIN pixel_dataset.attrs['DIM4_PERCENTILE_2_5'] = INDEX_PERCENTILE_2_5 pixel_dataset.attrs['DIM4_PERCENTILE_16'] = INDEX_PERCENTILE_16 pixel_dataset.attrs['DIM4_PERCENTILE_84'] = INDEX_PERCENTILE_84 pixel_dataset.attrs['DIM4_PERCENTILE_97_5'] = INDEX_PERCENTILE_97_5 LOG.info('Created {0} blocks'.format(block_id)) return pixel_count
def assimilate_handler(self, wu, results, canonical_result): """ Process the Results. """ self.logDebug("Start of assimilate_handler for wu %d\n", wu.id) connection = None transaction = None try: if wu.canonical_result: out_file = self.get_file_path(canonical_result) self.area = None if out_file: if os.path.isfile(out_file): pass else: self.logDebug("File [%s] not found\n", out_file) out_file = None if out_file: self.logDebug("Reading File [%s]\n", out_file) start = time.time() connection = ENGINE.connect() transaction = connection.begin() resultCount = self._process_result(connection, out_file, wu) if self.noinsert: transaction.rollback() else: if not resultCount: self.logCritical("No results were found in the output file\n") if self._area_id is None: self.logDebug("The Area was not found\n") else: connection.execute(AREA.update() .where(AREA.c.area_id == self._area_id) .values(workunit_id=wu.id, update_time=datetime.datetime.now())) user_id_set = set() for result in results: if result.user and result.validate_state == boinc_db.VALIDATE_STATE_VALID: user_id = result.user.id if user_id not in user_id_set: user_id_set.add(user_id) connection.execute(AREA_USER.delete().where(AREA_USER.c.area_id == self._area_id)) insert = AREA_USER.insert() for user_id in user_id_set: connection.execute(insert, area_id=self._area_id, userid=user_id) # Copy the file to S3 s3helper = S3Helper() s3helper.add_file_to_bucket(get_files_bucket(), get_key_sed(self._galaxy_name, self._run_id, self._galaxy_id, self._area_id), out_file, reduced_redundancy=True) time_taken = '{0:.2f}'.format(time.time() - start) self.logDebug("Saving %d results for workunit %d in %s seconds\n", resultCount, wu.id, time_taken) transaction.commit() connection.close() else: self.logCritical("The output file was not found\n") else: self.logDebug("No canonical_result for workunit\n") self.report_errors(wu) except: if transaction is not None: transaction.rollback() if connection is not None: connection.close() print "Unexpected error:", sys.exc_info()[0] traceback.print_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]) self.logCritical("Unexpected error occurred, retrying...\n") return -1 return 0
def get_data(output_directory): """ Get the stats from the S3 archive and build the csv files :param output_directory: where to store the files :return: """ done_dates = get_done_dates() # Now get ready to load the files keys_being_restored = [] s3helper = S3Helper() bucket = s3helper.get_bucket(get_archive_bucket()) set_filenames = set() for prefix in bucket.list(prefix='stats/', delimiter='/'): elements = prefix.name.split('/') elements = elements[1].split('_') date_file = date(int(elements[1]), int(elements[2]), int(elements[3])) if date_file not in done_dates: stats_file = '{0}_{1}_{2}_user.gz'.format(elements[1], elements[2], elements[3]) full_filename = os.path.join(output_directory, stats_file) if full_filename in set_filenames: # Ignore pass elif not os.path.exists(full_filename) or os.path.getsize( full_filename) == 9: set_filenames.add(full_filename) key = bucket.get_key(os.path.join(prefix.name, 'user.gz')) if key is not None: if key.ongoing_restore or key.storage_class == 'GLACIER': LOG.info('Restoring {0}'.format(key.name)) # We need retrieve it if not key.ongoing_restore: key.restore(days=5) keys_being_restored.append([key.name, full_filename]) # Put an empty file in the directory if not os.path.exists(full_filename): output_file = open(full_filename, "wb") output_file.write('Restoring') output_file.close() else: # Put the file in the storage area LOG.info('Fetching {0}'.format(key.name)) key.get_contents_to_filename(full_filename) # Now we have to wait for all the files we need to be restored for key_pair in keys_being_restored: key = bucket.get_key(key_pair[0]) if key.ongoing_restore: time.sleep(300) else: # The file has been restored so copy it LOG.info('Fetching {0}'.format(key_pair[0])) key.get_contents_to_filename(key_pair[1]) # Build the prepared statements insert_usage = USAGE.insert() insert_individual = INDIVIDUAL.insert() # Now build up the list of filenames for file_name in glob.glob(os.path.join(output_directory, '*_user.gz')): (head, tail) = os.path.split(file_name) elements = tail.split('_') date_file = date(int(elements[0]), int(elements[1]), int(elements[2])) if date_file not in done_dates: # Read the contents LOG.info('Processing {0}'.format(file_name)) gzip_file = gzip.open(file_name, 'rb') contents = gzip_file.read() gzip_file.close() # Extract the XML data root = ET.fromstring(contents) # Initialise gflops = 0.0 active_users = 0 registered_users = 0 transaction = connection.begin() # The users are in a random order for user in root: user_id = user.find('id').text user_id = int(user_id) expavg_credit = user.find('expavg_credit').text expavg_credit = float(expavg_credit) connection.execute(insert_individual, date=date_file, user_id=user_id, expavg_credit=expavg_credit) registered_users += 1 if expavg_credit > 1: active_users += 1 gflops += expavg_credit connection.execute(insert_usage, date=date_file, gflops=gflops / COBBLESTONE_FACTOR, active_users=active_users, registered_users=registered_users) transaction.commit()
def process_file(self, registration): """ Process a registration. :param registration: """ self._filename = registration[REGISTER.c.filename] self._galaxy_name = registration[REGISTER.c.galaxy_name] self._galaxy_type = registration[REGISTER.c.galaxy_type] self._priority = registration[REGISTER.c.priority] self._redshift = registration[REGISTER.c.redshift] self._run_id = registration[REGISTER.c.run_id] self._sigma = registration[REGISTER.c.sigma] self._sigma_filename = registration[REGISTER.c.sigma_filename] # Have we files that we can use for this? self._rounded_redshift = self._get_rounded_redshift() if self._rounded_redshift is None: LOG.error('No models matching the redshift of %.4f', self._redshift) return 0 self._hdu_list = pyfits.open(self._filename, memmap=True) self._layer_count = len(self._hdu_list) # Do we need to open and sort the S/N Ratio file if self._sigma_filename is not None: self._sigma = 0.0 self._signal_noise_hdu = pyfits.open(self._sigma_filename, memmap=True) if self._layer_count != len(self._signal_noise_hdu): LOG.error('The layer counts do not match %d vs %d', self._layer_count, len(self._signal_noise_hdu)) return 0, 0 else: self._sigma = float(self._sigma) self._end_y = self._hdu_list[0].data.shape[0] self._end_x = self._hdu_list[0].data.shape[1] LOG.info("Image dimensions: %(x)d x %(y)d x %(z)d => %(pix).2f Mpixels" % {'x': self._end_x, 'y': self._end_y, 'z': self._layer_count, 'pix': self._end_x * self._end_y / 1000000.0}) # Get the flops estimate amd cobblestone factor run = self._connection.execute(select([RUN]).where(RUN.c.run_id == self._run_id)).first() self._fpops_est_per_pixel = run[RUN.c.fpops_est] self._cobblestone_scaling_factor = run[RUN.c.cobblestone_factor] # Create and save the object datetime_now = datetime.now() result = self._connection.execute(GALAXY.insert().values(name=self._galaxy_name, dimension_x=self._end_x, dimension_y=self._end_y, dimension_z=self._layer_count, redshift=self._redshift, sigma=self._sigma, create_time=datetime_now, image_time=datetime_now, galaxy_type=self._galaxy_type, ra_cent=0, dec_cent=0, pixel_count=0, pixels_processed=0, run_id=self._run_id)) self._galaxy_id = result.inserted_primary_key[0] LOG.info("Writing %s to database", self._galaxy_name) # Store the fits header self._store_fits_header() # Get the filters we're using for this run and sort the layers self._get_filters_sort_layers() # Build the template file we need if necessary self._build_template_file() # Copy the filter and model files we need self._copy_important_files() # Now break up the galaxy into chunks self._break_up_galaxy() self._connection.execute(GALAXY.update().where(GALAXY.c.galaxy_id == self._galaxy_id).values(pixel_count=self._pixel_count)) LOG.info('Building the images') galaxy_file_name = get_galaxy_file_name(self._galaxy_name, self._run_id, self._galaxy_id) s3helper = S3Helper() image = FitsImage(self._connection) image.build_image(self._filename, galaxy_file_name, self._galaxy_id, get_galaxy_image_bucket()) # Copy the fits file to S3 - renamed to make it unique bucket_name = get_files_bucket() s3helper.add_file_to_bucket(bucket_name, get_key_fits(self._galaxy_name, self._run_id, self._galaxy_id), self._filename) if self._sigma_filename is not None: s3helper.add_file_to_bucket(bucket_name, get_key_sigma_fits(self._galaxy_name, self._run_id, self._galaxy_id), self._sigma_filename) return self._work_units_added, self._pixel_count
def sync(data_type='all', last_n_days=None): project_info = get_project_info() project_name = project_info['name'] s3_helper = S3Helper() s3_helper.sync(project_name, data_type=data_type, last_n_days=last_n_days)
def generate_files(connection, hdf5_request_galaxy_ids, email, features, layers, pixel_types): """ Get the FITS files for this request :type connection: The database connection :param pixel_types: :param hdf5_request_galaxy_ids: the galaxy id :param email: :param features: :param layers: :return: """ uuid_string = str(uuid.uuid4()) results = [] available_galaxies = [] s3_helper = S3Helper() bucket_name = get_saved_files_bucket() # Check whether all the requested galaxies are available or not. for hdf5_request_galaxy in hdf5_request_galaxy_ids: galaxy = connection.execute( select([GALAXY]).where( GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first() hdf5_request_galaxy = connection.execute( select([ HDF5_REQUEST_GALAXY ]).where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id)).first() state = hdf5_request_galaxy.state if state is not 0: LOG.info('Skipping {0}, state is {1}'.format( galaxy[GALAXY.c.name], state)) continue # Skip key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) if s3_helper.file_exists(bucket_name, key): if s3_helper.file_archived(bucket_name, key): # file is archived if s3_helper.file_restoring(bucket_name, key): # if file is restoring, just need to wait for it LOG.info( 'Galaxy {0} is still restoring from glacier'.format( galaxy[GALAXY.c.name])) else: # if file is not restoring, need to request. file_size = s3_helper.file_size(bucket_name, key) if restore_file_size_check(connection, bucket_name, file_size): # We're good to restore LOG.info( 'Making request for archived galaxy {0}'.format( galaxy[GALAXY.c.name])) s3_helper.restore_archived_file(bucket_name, key) connection.execute( HDF5_REQUEST_GALAXY_SIZE.insert(), hdf5_request_galaxy_id=hdf5_request_galaxy[ 'hdf5_request_galaxy_id'], size=file_size, request_time=seconds_since_epoch(datetime.now())) else: # Don't restore or we risk spending a lot of money LOG.info( 'Daily galaxy restore size hit. Cannot request archived galaxy.' ) else: # file is not archived LOG.info('Galaxy {0} is available in s3'.format( galaxy[GALAXY.c.name])) available_galaxies.append(hdf5_request_galaxy) else: LOG.error('Galaxy {0} does not exist on s3 or glacier!'.format( galaxy[GALAXY.c.name])) total_request_galaxies = len(hdf5_request_galaxy_ids) LOG.info( 'Need to have {0} galaxies available ({1} currently available)'.format( total_request_galaxies * GALAXY_EMAIL_THRESHOLD, len(available_galaxies))) if len( available_galaxies ) >= total_request_galaxies * GALAXY_EMAIL_THRESHOLD: # Only proceed if more than the threshold of galaxies are available LOG.info('{0}/{1} (> {2}%) galaxies are available. Email will be sent'. format(len(available_galaxies), total_request_galaxies, GALAXY_EMAIL_THRESHOLD * 100)) remaining_galaxies = total_request_galaxies - len(available_galaxies) for hdf5_request_galaxy in available_galaxies: result = HDF5ToFitsResult() results.append(result) connection.execute(HDF5_REQUEST_GALAXY.update().where( HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).values(state=1)) # noinspection PyBroadException try: galaxy = connection.execute( select([GALAXY ]).where(GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first() result.galaxy_name = galaxy[GALAXY.c.name] LOG.info('Processing {0} ({1}) for {2}'.format( galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], email)) # make sure the galaxy is available if galaxy[GALAXY.c.status_id] == STORED or galaxy[ GALAXY.c.status_id] == DELETED: output_dir = tempfile.mkdtemp() try: s3_helper = S3Helper() LOG.info('Getting HDF5 file to {0}'.format(output_dir)) tmp_file = get_hdf5_file(s3_helper, output_dir, galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) LOG.info('File stored in {0}'.format(tmp_file)) # We have the file if os.path.isfile(tmp_file): int_flux_output = os.path.join( output_dir, 'intflux') rad_output = os.path.join(output_dir, 'rad') if not os.path.exists(int_flux_output): os.mkdir(int_flux_output) if not os.path.exists(rad_output): os.mkdir(rad_output) file_names = process_hdf5_file( tmp_file, galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], pixel_types, features, result, layers, output_dir, rad_output, int_flux_output, ) url = zip_files( s3_helper, get_galaxy_file_name( galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]), uuid_string, file_names, output_dir) connection.execute(HDF5_REQUEST_GALAXY.update( ).where( HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id ).values(state=2, link=url, link_expires_at=datetime.now() + timedelta(days=10))) result.error = None result.link = url except S3ResponseError as e: # Handling for a strange s3 error LOG.error( 'Error retrieving galaxy {0} from s3. Retrying next run' .format(galaxy[GALAXY.c.name])) LOG.error('{0}'.format(str(e))) key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) LOG.info('Key: {0}'.format(key)) LOG.info('Exists: {0}'.format( s3_helper.file_exists(bucket_name, key))) result.error = traceback.format_exc() remaining_galaxies += 1 finally: # Delete the temp files now we're done shutil.rmtree(output_dir) else: connection.execute(HDF5_REQUEST_GALAXY.update().where( HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).values( state=3)) result.error = 'Cannot process {0} ({1}) as the HDF5 file has not been generated'.format( galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id]) LOG.info(result.error) except: LOG.error('Major error') result.error = traceback.format_exc() connection.execute(HDF5_REQUEST_GALAXY.update().where( HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).values( state=3)) send_email(email, results, features, layers, pixel_types, remaining_galaxies)