def make_request(connection, email_address, galaxy_ids, features, layers, pixel_types): """ Makes a request in the database for the provided galaxies. :param connection: The database connection :param email_address: The email address requesting the galaxies :param galaxy_ids: The database IDs of the galaxies :param features: The features of the galaxies to request :param layers: The layers of the galaxies to request :param pixel_types: The pixel types of the galaxies to request :return: """ # Make some new DB entries for these galaxies transaction = connection.begin() try: LOG.info("Making HDF5_REQUEST entry...") result = connection.execute(HDF5_REQUEST.insert(), profile_id=0, email=email_address, created_at=datetime.datetime.utcnow()) LOG.info("Making features, layers, pixel types entries...") insert_features_layers_pixel_types_db_ids(connection, result.inserted_primary_key, features, layers, pixel_types) LOG.info("Making galaxy entries...") for galaxy in galaxy_ids: # We already checked to ensure these are valid, so throw em in. connection.execute(HDF5_REQUEST_GALAXY.insert(), hdf5_request_id=result.inserted_primary_key, galaxy_id=galaxy) transaction.commit() except: transaction.rollback() raise
def make_request(connection, email_address, galaxy_ids, features, layers, pixel_types): """ Makes a request in the database for the provided galaxies. :param connection: The database connection :param email_address: The email address requesting the galaxies :param galaxy_ids: The database IDs of the galaxies :param features: The features of the galaxies to request :param layers: The layers of the galaxies to request :param pixel_types: The pixel types of the galaxies to request :return: """ # Make some new DB entries for these galaxies transaction = connection.begin() try: LOG.info("Making HDF5_REQUEST entry...") result = connection.execute(HDF5_REQUEST.insert(), profile_id=0, email=email_address, created_at=datetime.datetime.utcnow()) LOG.info("Making features, layers, pixel types entries...") insert_features_layers_pixel_types_db_ids(connection, result.inserted_primary_key, features, layers, pixel_types) LOG.info("Making galaxy entries...") for galaxy in galaxy_ids: # We already checked to ensure these are valid, so throw em in. connection.execute(HDF5_REQUEST_GALAXY.insert(), hdf5_request_id=result.inserted_primary_key, galaxy_id=galaxy) transaction.commit() except: transaction.rollback() raise
def generate_files(connection, hdf5_request_galaxy_ids, email, features, layers): """ Get the FITS files for this request :param hdf5_request_galaxy_ids: the galaxy id :param email: :param features: :param layers: :return: """ uuid_string = str(uuid.uuid4()) results = [] for hdf5_request_galaxy in hdf5_request_galaxy_ids: result = HDF5ToFitsResult() results.append(result) connection.execute(HDF5_REQUEST_GALAXY.update().where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).values(state=1)) try: galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first() result.galaxy_name = galaxy[GALAXY.c.name] LOG.info('Processing {0} ({1}) for {2}'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], email)) # make sure the galaxy is available if galaxy[GALAXY.c.status_id] == STORED or galaxy[GALAXY.c.status_id] == DELETED: output_dir = tempfile.mkdtemp() try: s3_helper = S3Helper() LOG.info('Getting HDF5 file to {0}'.format(output_dir)) tmp_file = get_hdf5_file(s3_helper, output_dir, galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) LOG.info('File stored in {0}'.format(tmp_file)) # We have the file if os.path.isfile(tmp_file): h5_file = h5py.File(tmp_file, 'r') galaxy_group = h5_file['galaxy'] pixel_group = galaxy_group['pixel'] file_names = [] for feature in features: for layer in layers: LOG.info('Processing {0} - {1}'.format(feature, layer)) file_names.append(build_fits_image(feature, layer, output_dir, galaxy_group, pixel_group, galaxy[GALAXY.c.name])) h5_file.close() url = zip_files(s3_helper, get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]), uuid_string, file_names, output_dir) connection.execute(HDF5_REQUEST_GALAXY.update(). where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id). values(state=2, link=url, link_expires_at=datetime.now() + timedelta(days=10))) result.error = None result.link = url finally: # Delete the temp files now we're done shutil.rmtree(output_dir) else: connection.execute(HDF5_REQUEST_GALAXY.update(). where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id). values(state=3)) result.error = 'Cannot process {0} ({1}) as the HDF5 file has not been generated'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id]) LOG.info(result.error) except: LOG.error('Major error') result.error = traceback.format_exc() connection.execute(HDF5_REQUEST_GALAXY.update(). where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id). values(state=3)) send_email(email, results, features, layers)
def generate_files(connection, hdf5_request_galaxy_ids, email, features, layers, pixel_types): """ Get the FITS files for this request :type connection: The database connection :param pixel_types: :param hdf5_request_galaxy_ids: the galaxy id :param email: :param features: :param layers: :return: """ uuid_string = str(uuid.uuid4()) results = [] available_galaxies = [] s3_helper = S3Helper() bucket_name = get_saved_files_bucket() # Check whether all the requested galaxies are available or not. for hdf5_request_galaxy in hdf5_request_galaxy_ids: galaxy = connection.execute( select([GALAXY]).where( GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first() hdf5_request_galaxy = connection.execute( select([ HDF5_REQUEST_GALAXY ]).where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id)).first() state = hdf5_request_galaxy.state if state is not 0: LOG.info('Skipping {0}, state is {1}'.format( galaxy[GALAXY.c.name], state)) continue # Skip key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) if s3_helper.file_exists(bucket_name, key): if s3_helper.file_archived(bucket_name, key): # file is archived if s3_helper.file_restoring(bucket_name, key): # if file is restoring, just need to wait for it LOG.info( 'Galaxy {0} is still restoring from glacier'.format( galaxy[GALAXY.c.name])) else: # if file is not restoring, need to request. file_size = s3_helper.file_size(bucket_name, key) if restore_file_size_check(connection, bucket_name, file_size): # We're good to restore LOG.info( 'Making request for archived galaxy {0}'.format( galaxy[GALAXY.c.name])) s3_helper.restore_archived_file(bucket_name, key) connection.execute( HDF5_REQUEST_GALAXY_SIZE.insert(), hdf5_request_galaxy_id=hdf5_request_galaxy[ 'hdf5_request_galaxy_id'], size=file_size, request_time=seconds_since_epoch(datetime.now())) else: # Don't restore or we risk spending a lot of money LOG.info( 'Daily galaxy restore size hit. Cannot request archived galaxy.' ) else: # file is not archived LOG.info('Galaxy {0} is available in s3'.format( galaxy[GALAXY.c.name])) available_galaxies.append(hdf5_request_galaxy) else: LOG.error('Galaxy {0} does not exist on s3 or glacier!'.format( galaxy[GALAXY.c.name])) total_request_galaxies = len(hdf5_request_galaxy_ids) LOG.info( 'Need to have {0} galaxies available ({1} currently available)'.format( total_request_galaxies * GALAXY_EMAIL_THRESHOLD, len(available_galaxies))) if len( available_galaxies ) >= total_request_galaxies * GALAXY_EMAIL_THRESHOLD: # Only proceed if more than the threshold of galaxies are available LOG.info('{0}/{1} (> {2}%) galaxies are available. Email will be sent'. format(len(available_galaxies), total_request_galaxies, GALAXY_EMAIL_THRESHOLD * 100)) remaining_galaxies = total_request_galaxies - len(available_galaxies) for hdf5_request_galaxy in available_galaxies: result = HDF5ToFitsResult() results.append(result) connection.execute(HDF5_REQUEST_GALAXY.update().where( HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).values(state=1)) # noinspection PyBroadException try: galaxy = connection.execute( select([GALAXY ]).where(GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first() result.galaxy_name = galaxy[GALAXY.c.name] LOG.info('Processing {0} ({1}) for {2}'.format( galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], email)) # make sure the galaxy is available if galaxy[GALAXY.c.status_id] == STORED or galaxy[ GALAXY.c.status_id] == DELETED: output_dir = tempfile.mkdtemp() try: s3_helper = S3Helper() LOG.info('Getting HDF5 file to {0}'.format(output_dir)) tmp_file = get_hdf5_file(s3_helper, output_dir, galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) LOG.info('File stored in {0}'.format(tmp_file)) # We have the file if os.path.isfile(tmp_file): int_flux_output = os.path.join( output_dir, 'intflux') rad_output = os.path.join(output_dir, 'rad') if not os.path.exists(int_flux_output): os.mkdir(int_flux_output) if not os.path.exists(rad_output): os.mkdir(rad_output) file_names = process_hdf5_file( tmp_file, galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], pixel_types, features, result, layers, output_dir, rad_output, int_flux_output, ) url = zip_files( s3_helper, get_galaxy_file_name( galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]), uuid_string, file_names, output_dir) connection.execute(HDF5_REQUEST_GALAXY.update( ).where( HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id ).values(state=2, link=url, link_expires_at=datetime.now() + timedelta(days=10))) result.error = None result.link = url except S3ResponseError as e: # Handling for a strange s3 error LOG.error( 'Error retrieving galaxy {0} from s3. Retrying next run' .format(galaxy[GALAXY.c.name])) LOG.error('{0}'.format(str(e))) key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) LOG.info('Key: {0}'.format(key)) LOG.info('Exists: {0}'.format( s3_helper.file_exists(bucket_name, key))) result.error = traceback.format_exc() remaining_galaxies += 1 finally: # Delete the temp files now we're done shutil.rmtree(output_dir) else: connection.execute(HDF5_REQUEST_GALAXY.update().where( HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).values( state=3)) result.error = 'Cannot process {0} ({1}) as the HDF5 file has not been generated'.format( galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id]) LOG.info(result.error) except: LOG.error('Major error') result.error = traceback.format_exc() connection.execute(HDF5_REQUEST_GALAXY.update().where( HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).values( state=3)) send_email(email, results, features, layers, pixel_types, remaining_galaxies)
def generate_files(connection, hdf5_request_galaxy_ids, email, features, layers, pixel_types): """ Get the FITS files for this request :type connection: The database connection :param pixel_types: :param hdf5_request_galaxy_ids: the galaxy id :param email: :param features: :param layers: :return: """ uuid_string = str(uuid.uuid4()) results = [] available_galaxies = [] s3_helper = S3Helper() bucket_name = get_saved_files_bucket() # Check whether all the requested galaxies are available or not. for hdf5_request_galaxy in hdf5_request_galaxy_ids: galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first() hdf5_request_galaxy = connection.execute(select([HDF5_REQUEST_GALAXY]) .where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id)).first() state = hdf5_request_galaxy.state if state is not 0: LOG.info('Skipping {0}, state is {1}'.format(galaxy[GALAXY.c.name], state)) continue # Skip key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) if s3_helper.file_exists(bucket_name, key): if s3_helper.file_archived(bucket_name, key): # file is archived if s3_helper.file_restoring(bucket_name, key): # if file is restoring, just need to wait for it LOG.info('Galaxy {0} is still restoring from glacier'.format(galaxy[GALAXY.c.name])) else: # if file is not restoring, need to request. file_size = s3_helper.file_size(bucket_name, key) if restore_file_size_check(connection, bucket_name, file_size): # We're good to restore LOG.info('Making request for archived galaxy {0}'.format(galaxy[GALAXY.c.name])) s3_helper.restore_archived_file(bucket_name, key) connection.execute(HDF5_REQUEST_GALAXY_SIZE.insert(), hdf5_request_galaxy_id=hdf5_request_galaxy['hdf5_request_galaxy_id'], size=file_size, request_time=seconds_since_epoch(datetime.now())) else: # Don't restore or we risk spending a lot of money LOG.info('Daily galaxy restore size hit. Cannot request archived galaxy.') else: # file is not archived LOG.info('Galaxy {0} is available in s3'.format(galaxy[GALAXY.c.name])) available_galaxies.append(hdf5_request_galaxy) else: LOG.error('Galaxy {0} does not exist on s3 or glacier!'.format(galaxy[GALAXY.c.name])) total_request_galaxies = len(hdf5_request_galaxy_ids) LOG.info('Need to have {0} galaxies available ({1} currently available)'.format(total_request_galaxies * GALAXY_EMAIL_THRESHOLD, len(available_galaxies))) if len(available_galaxies) >= total_request_galaxies * GALAXY_EMAIL_THRESHOLD: # Only proceed if more than the threshold of galaxies are available LOG.info('{0}/{1} (> {2}%) galaxies are available. Email will be sent'.format( len(available_galaxies), total_request_galaxies, GALAXY_EMAIL_THRESHOLD * 100) ) remaining_galaxies = total_request_galaxies - len(available_galaxies) for hdf5_request_galaxy in available_galaxies: result = HDF5ToFitsResult() results.append(result) connection.execute(HDF5_REQUEST_GALAXY.update().where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).values(state=1)) # noinspection PyBroadException try: galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first() result.galaxy_name = galaxy[GALAXY.c.name] LOG.info('Processing {0} ({1}) for {2}'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], email)) # make sure the galaxy is available if galaxy[GALAXY.c.status_id] == STORED or galaxy[GALAXY.c.status_id] == DELETED: output_dir = tempfile.mkdtemp() try: s3_helper = S3Helper() LOG.info('Getting HDF5 file to {0}'.format(output_dir)) tmp_file = get_hdf5_file(s3_helper, output_dir, galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) LOG.info('File stored in {0}'.format(tmp_file)) # We have the file if os.path.isfile(tmp_file): int_flux_output = os.path.join(output_dir, 'intflux') rad_output = os.path.join(output_dir, 'rad') if not os.path.exists(int_flux_output): os.mkdir(int_flux_output) if not os.path.exists(rad_output): os.mkdir(rad_output) file_names = process_hdf5_file( tmp_file, galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], pixel_types, features, result, layers, output_dir, rad_output, int_flux_output, ) url = zip_files( s3_helper, get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]), uuid_string, file_names, output_dir ) connection.execute( HDF5_REQUEST_GALAXY.update(). where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id). values(state=2, link=url, link_expires_at=datetime.now() + timedelta(days=10))) result.error = None result.link = url except S3ResponseError as e: # Handling for a strange s3 error LOG.error('Error retrieving galaxy {0} from s3. Retrying next run'.format(galaxy[GALAXY.c.name])) LOG.error('{0}'.format(str(e))) key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) LOG.info('Key: {0}'.format(key)) LOG.info('Exists: {0}'.format(s3_helper.file_exists(bucket_name, key))) result.error = traceback.format_exc() remaining_galaxies += 1 finally: # Delete the temp files now we're done shutil.rmtree(output_dir) else: connection.execute(HDF5_REQUEST_GALAXY.update(). where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id). values(state=3)) result.error = 'Cannot process {0} ({1}) as the HDF5 file has not been generated'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id]) LOG.info(result.error) except: LOG.error('Major error') result.error = traceback.format_exc() connection.execute(HDF5_REQUEST_GALAXY.update(). where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id). values(state=3)) send_email(email, results, features, layers, pixel_types, remaining_galaxies)
exit(1) # Create a new request # Need to make: # HDF5_request # HDF5_request_feature # HDF5_request_galaxy # HDF5_request_layer # HDF5_request_pixel transaction = connection.begin() result = connection.execute(HDF5_REQUEST.insert(), profile_id=47016, email=args['email'], created_at=time.time()) connection.execute(HDF5_REQUEST_GALAXY.insert(), hdf5_request_id=result.inserted_primary_key, galaxy_id=args['galaxy_id']) for pixel_type in pixel_types: connection.execute(HDF5_REQUEST_PIXEL_TYPE.insert(), hdf5_request_id=result.inserted_primary_key, hdf5_pixel_type_id=pixel_type + 1) # database entries start at 1 for layer in layers: connection.execute(HDF5_REQUEST_LAYER.insert(), hdf5_request_id=result.inserted_primary_key, hdf5_layer_id=LAYERS[layer] + 1) for feature in features:
connection = engine.connect() features, layers, pixel_types = get_features_and_layers_pixeltypes_cmd_line(args) if len(features) == 0 or len(layers) == 0: parser.print_help() exit(1) # Create a new request # Need to make: # HDF5_request # HDF5_request_feature # HDF5_request_galaxy # HDF5_request_layer # HDF5_request_pixel transaction = connection.begin() result = connection.execute(HDF5_REQUEST.insert(), profile_id=47016, email=args['email'], created_at=time.time()) connection.execute(HDF5_REQUEST_GALAXY.insert(), hdf5_request_id=result.inserted_primary_key, galaxy_id=args['galaxy_id']) for pixel_type in pixel_types: connection.execute(HDF5_REQUEST_PIXEL_TYPE.insert(), hdf5_request_id=result.inserted_primary_key, hdf5_pixel_type_id=pixel_type+1) # database entries start at 1 for layer in layers: connection.execute(HDF5_REQUEST_LAYER.insert(), hdf5_request_id=result.inserted_primary_key, hdf5_layer_id=LAYERS[layer]+1) for feature in features: connection.execute(HDF5_REQUEST_FEATURE.insert(), hdf5_request_id=result.inserted_primary_key, hdf5_feature_id=FEATURES[feature]+1) transaction.commit() LOG.info('All done') connection.close()