def validate_config_file(self, config_data): """ Method to validate an ingest config file. This uses the Ingest client for validation. Args: config_data: Returns: (bool) : Status of the validation Raises: BossError : For exceptions that happen during validation """ try: # Validate the schema self.config = Configuration(config_data) self.validator = self.config.get_validator() self.validator.schema = self.config.schema self.validator.validate_schema() except jsonschema.ValidationError as e: raise BossError("Schema validation failed! {}".format(e), ErrorCodes.UNABLE_TO_VALIDATE) except Exception as e: raise BossError(" Could not validate the schema file.{}".format(e), ErrorCodes.UNABLE_TO_VALIDATE) return True
def test_get_validator(self): """Test dynamically getting the validator class""" config = Configuration(self.example_config_data) config.load_plugins() v = config.get_validator() assert isinstance(v, BossValidatorV01)
def test_create(self): """Test creating a Configuration object""" config = Configuration(self.example_config_data) config.load_plugins() assert isinstance(config, Configuration) assert isinstance(config.tile_processor_class, TestTileProcessor) assert isinstance(config.path_processor_class, TestPathProcessor)
def test_get_backend_env_var(self): """Test dynamically getting the validator class""" config = Configuration(self.example_config_data) b = config.get_backend() b.setup() assert isinstance(b, BossBackend)
def test_to_json(self): """Test json serialization""" config = Configuration(self.example_config_data) config.load_plugins() json_data = config.to_json() json_dict = json.loads(json_data) assert json_dict == self.example_config_data
def test_get_backend(self): """Test dynamically getting the validator class""" patcher = mock.patch( 'ingestclient.core.backend.BossBackend.get_default_token_file_name' ) mock_cred_path = patcher.start() mock_cred_path.side_effect = token_name_side_effect config = Configuration(self.example_config_data) b = config.get_backend() b.setup(self.api_token) assert isinstance(b, BossBackend)
def validate_config_file(self, config_data): """ Method to validate an ingest config file. This uses the Ingest client for validation. Args: config_data: Returns: (bool) : Status of the validation Raises: BossError : For exceptions that happen during validation """ try: # Validate the schema self.config = Configuration(config_data) self.validator = self.config.get_validator() self.validator.schema = self.config.schema results = self.validator.validate() except jsonschema.ValidationError as e: raise BossError("Schema validation failed! {}".format(e), ErrorCodes.UNABLE_TO_VALIDATE) except Exception as e: raise BossError("Could not validate the schema file.{}".format(e), ErrorCodes.UNABLE_TO_VALIDATE) if len(results['error']) > 0: raise BossError('Could not validate the schema: ' + '\n'.join(results['error']), ErrorCodes.UNABLE_TO_VALIDATE) return True
def setUpClass(cls): cls.vol_name = 'test_vol' create_layer((1036, 1026, 78), (0, 0, 0), layer_name=cls.vol_name, dtype=np.uint8) cls.config_file = os.path.join( resource_filename("ingestclient", "test/data"), "boss-v0.2-cloudvolume.json") with open(cls.config_file, 'rt') as example_file: cls.example_config_data = json.load(example_file) cls.config = Configuration(cls.example_config_data) cls.config.load_plugins() # Point config at generated CloudVolume. cls.config.config_data["client"]["chunk_processor"]["params"][ "cloudpath"] = ('file://{}{}'.format(layer_path, cls.vol_name)) cls.chunk_procesor = cls.config.chunk_processor_class cls.chunk_procesor.setup(cls.config.get_chunk_processor_params()) cls.chunk_size = ( cls.config.config_data["ingest_job"]["chunk_size"]["x"], cls.config.config_data["ingest_job"]["chunk_size"]["y"], cls.config.config_data["ingest_job"]["chunk_size"]["z"], 1 ) # Time dimension.
def test_valid_config(self): schema_file = os.path.join( resource_filename("ingestclient", "schema"), "boss-v0.2-schema.json") with open(schema_file, 'r') as file_handle: schema = json.load(file_handle) config_file = os.path.join( resource_filename("ingestclient", "test/data"), "boss-v0.2-test.json") with open(config_file, 'rt') as example_file: config_data = json.load(example_file) config = Configuration(config_data) validator = config.get_validator() validator.schema = schema msgs = validator.validate() self.assertEqual(0, len(msgs['error']))
def setUpClass(cls): cls.config_file = os.path.join( resource_filename("ingestclient", "test/data"), "boss-v0.1-cloudvolume.json") with open(cls.config_file, 'rt') as example_file: cls.example_config_data = json.load(example_file) cls.config = Configuration(cls.example_config_data) cls.config.load_plugins()
def setUpClass(cls): cls.config_file = os.path.join( resource_filename("ingestclient", "test/data"), "boss-v0.1-zStack.json") with open(cls.config_file, 'rt') as example_file: cls.example_config_data = json.load(example_file) # inject the file path since we don't want to hardcode cls.example_config_data["client"]["path_processor"]["params"][ "root_dir"] = "example_z_stack" # Switch to S3 cls.example_config_data["client"]["tile_processor"]["params"][ "filesystem"] = "s3" cls.example_config_data["client"]["tile_processor"]["params"][ "bucket"] = "my_bucket" cls.config = Configuration(cls.example_config_data) cls.config.load_plugins() # Set up bucket cls.mock_s3 = mock_s3() cls.mock_s3.start() client = boto3.client('s3', region_name="us-east-1") _ = client.create_bucket(ACL='private', Bucket=cls.example_config_data["client"] ["tile_processor"]["params"]["bucket"]) waiter = client.get_waiter('bucket_exists') waiter.wait(Bucket=cls.example_config_data["client"]["tile_processor"] ["params"]["bucket"]) s3 = boto3.resource('s3') bucket = s3.Bucket(cls.example_config_data["client"]["tile_processor"] ["params"]["bucket"]) # Put images in S3 imgs = [ os.path.join( resource_filename("ingestclient", "test/data/example_z_stack"), "3253_my_stack_section000.png"), os.path.join( resource_filename("ingestclient", "test/data/example_z_stack"), "3254_my_stack_section001.png") ] keys = [ "example_z_stack/3253_my_stack_section000.png", "example_z_stack/3254_my_stack_section001.png" ] for key, img in zip(keys, imgs): # put file bucket.upload_file(img, key)
def test_no_tile_size(self): schema_file = os.path.join( resource_filename("ingestclient", "schema"), "boss-v0.2-schema.json") with open(schema_file, 'r') as file_handle: schema = json.load(file_handle) config_data = self.get_skeleton_config() config_data['ingest_job']['ingest_type'] = 'tile' config_data['client']['tile_processor'] = { "class": "ingestclient.plugins.stack.ZindexStackTileProcessor", "params": {} } config = Configuration(config_data) validator = config.get_validator() validator.schema = schema msgs = validator.validate() self.assertEqual(1, len(msgs['error'])) self.assertRegex(msgs['error'][0], '.*tile_size.*')
def test_no_chunk_processor(self): schema_file = os.path.join( resource_filename("ingestclient", "schema"), "boss-v0.2-schema.json") with open(schema_file, 'r') as file_handle: schema = json.load(file_handle) config_data = self.get_skeleton_config() config_data['ingest_job']['ingest_type'] = 'volumetric' config_data['ingest_job']['chunk_size'] = { 'x': 1024, 'y': 1024, 'z': 64 } config = Configuration(config_data) validator = config.get_validator() validator.schema = schema msgs = validator.validate() self.assertEqual(1, len(msgs['error'])) self.assertRegex(msgs['error'][0], '.*chunk_processor.*')
def test_no_chunk_size(self): schema_file = os.path.join( resource_filename("ingestclient", "schema"), "boss-v0.2-schema.json") with open(schema_file, 'r') as file_handle: schema = json.load(file_handle) config_data = self.get_skeleton_config() config_data['ingest_job']['ingest_type'] = 'volumetric' config_data['client']['chunk_processor'] = { "class": "ingestclient.plugins.cloudvolume.CloudVolumeChunkProcessor", "params": { "cloudpath": "gs://neuroglancer/foo/bar" } } config = Configuration(config_data) validator = config.get_validator() validator.schema = schema msgs = validator.validate() self.assertEqual(1, len(msgs['error'])) self.assertRegex(msgs['error'][0], '.*chunk_size.*')
def setUpClass(cls): cls.config_file = os.path.join( resource_filename("ingestclient", "test/data"), "boss-v0.1-zStack.json") with open(cls.config_file, 'rt') as example_file: cls.example_config_data = json.load(example_file) # inject the file path since we don't want to hardcode cls.example_config_data["client"]["path_processor"]["params"][ "root_dir"] = resource_filename("ingestclient", "test/data/example_z_stack") cls.config = Configuration(cls.example_config_data) cls.config.load_plugins()
def setUpClass(cls): cls.config_file = os.path.join( resource_filename("ingestclient", "test/data"), "boss-v0.1-singleMultipageTiff.json") with open(cls.config_file, 'rt') as example_file: cls.example_config_data = json.load(example_file) # inject the file path since we don't want to hardcode cls.example_config_data["client"]["path_processor"]["params"][ "z_0"] = os.path.join( resource_filename("ingestclient", "test/data"), "test_multipage.tif") cls.config = Configuration(cls.example_config_data) cls.config.load_plugins()
class IngestManager: """ Helper class for the boss ingest service """ def __init__(self): """ Init function """ self.job = None self.owner = None self.config = None self.validator = None self.collection = None self.experiment = None self.channel = None self.resolution = 0 self.nd_proj = None # Some stats for testing self.file_index = 0 self.num_of_chunks = 0 self.count_of_tiles = 0 def validate_config_file(self, config_data): """ Method to validate an ingest config file. This uses the Ingest client for validation. Args: config_data: Returns: (bool) : Status of the validation Raises: BossError : For exceptions that happen during validation """ try: # Validate the schema self.config = Configuration(config_data) self.validator = self.config.get_validator() self.validator.schema = self.config.schema results = self.validator.validate() except jsonschema.ValidationError as e: raise BossError("Schema validation failed! {}".format(e), ErrorCodes.UNABLE_TO_VALIDATE) except Exception as e: raise BossError("Could not validate the schema file.{}".format(e), ErrorCodes.UNABLE_TO_VALIDATE) if len(results['error']) > 0: raise BossError('Could not validate the schema: ' + '\n'.join(results['error']), ErrorCodes.UNABLE_TO_VALIDATE) return True def validate_properties(self): """ Validate the Collection, experiment and channel being used for the ingest job Returns: (bool) : Status of the validation Raises: BossError : If the collection, experiment or channel are not valid """ # Verify Collection, Experiment and channel try: self.collection = Collection.objects.get(name=self.config.config_data["database"]["collection"]) self.experiment = Experiment.objects.get(name=self.config.config_data["database"]["experiment"], collection=self.collection) self.channel = Channel.objects.get(name=self.config.config_data["database"]["channel"], experiment=self.experiment) self.resolution = self.channel.base_resolution except Collection.DoesNotExist: raise BossError("Collection {} not found".format(self.collection), ErrorCodes.RESOURCE_NOT_FOUND) except Experiment.DoesNotExist: raise BossError("Experiment {} not found".format(self.experiment), ErrorCodes.RESOURCE_NOT_FOUND) except Channel.DoesNotExist: raise BossError("Channel {} not found".format(self.channel), ErrorCodes.RESOURCE_NOT_FOUND) # TODO If channel already exists, check corners to see if data exists. If so question user for overwrite # TODO Check tile size - error if too big return True def setup_ingest(self, creator, config_data): """ Setup the ingest job. This is the primary method for the ingest manager. It creates the ingest job and queues required for the ingest. It also uploads the messages for the ingest Args: creator: The validated user from the request to create the ingest jon config_data : Config data to create the ingest job Returns: IngestJob : data model containing the ingest job Raises: BossError : For all exceptions that happen """ # Validate config data and schema self.owner = creator try: valid_schema = self.validate_config_file(config_data) valid_prop = self.validate_properties() if valid_schema is True and valid_prop is True: # create the django model for the job self.job = self.create_ingest_job() # create the additional resources needed for the ingest # initialize the ndingest project for use with the library proj_class = BossIngestProj.load() self.nd_proj = proj_class(self.collection.name, self.experiment.name, self.channel.name, self.resolution, self.job.id) # Create the upload queue upload_queue = self.create_upload_queue() self.job.upload_queue = upload_queue.url # Create the ingest queue if self.job.ingest_type == IngestJob.TILE_INGEST: ingest_queue = self.create_ingest_queue() self.job.ingest_queue = ingest_queue.url tile_index_queue = self.create_tile_index_queue() self.add_trigger_tile_uploaded_lambda_from_queue(tile_index_queue.arn) self.create_tile_error_queue() elif self.job.ingest_type == IngestJob.VOLUMETRIC_INGEST: # Will the management console be ok with ingest_queue being null? pass # Call the step function to populate the queue. self.job.step_function_arn = self.populate_upload_queue(self.job) # Compute # of tiles or chunks in the job x_extent = self.job.x_stop - self.job.x_start y_extent = self.job.y_stop - self.job.y_start z_extent = self.job.z_stop - self.job.z_start t_extent = self.job.t_stop - self.job.t_start num_tiles_in_x = math.ceil(x_extent/self.job.tile_size_x) num_tiles_in_y = math.ceil(y_extent/self.job.tile_size_y) num_tiles_in_z = math.ceil(z_extent/self.job.tile_size_z) num_tiles_in_t = math.ceil(t_extent / self.job.tile_size_t) self.job.tile_count = num_tiles_in_x * num_tiles_in_y * num_tiles_in_z * num_tiles_in_t self.job.save() except BossError as err: raise BossError(err.message, err.error_code) except Exception as e: raise BossError("Unable to create the upload and ingest queue.{}".format(e), ErrorCodes.BOSS_SYSTEM_ERROR) return self.job def create_ingest_job(self): """ Create a new ingest job using the parameters in the ingest config data file Returns: IngestJob : Data model with the current ingest job Raises: BossError : For serialization errors that occur while creating a ingest job or if ingest_type is invalid """ ingest_job_serializer_data = { 'creator': self.owner, 'collection': self.collection.name, 'experiment': self.experiment.name, 'channel': self.channel.name, 'collection_id': self.collection.id, 'experiment_id': self.experiment.id, 'channel_id': self.channel.id, 'config_data': json.dumps(self.config.config_data), 'resolution': self.resolution, 'x_start': self.config.config_data["ingest_job"]["extent"]["x"][0], 'x_stop': self.config.config_data["ingest_job"]["extent"]["x"][1], 'y_start': self.config.config_data["ingest_job"]["extent"]["y"][0], 'y_stop': self.config.config_data["ingest_job"]["extent"]["y"][1], 'z_start': self.config.config_data["ingest_job"]["extent"]["z"][0], 'z_stop': self.config.config_data["ingest_job"]["extent"]["z"][1], 't_start': self.config.config_data["ingest_job"]["extent"]["t"][0], 't_stop': self.config.config_data["ingest_job"]["extent"]["t"][1], } if "ingest_type" in self.config.config_data["ingest_job"]: ingest_job_serializer_data["ingest_type"] = self._convert_string_to_ingest_job( self.config.config_data["ingest_job"]["ingest_type"]) else: ingest_job_serializer_data["ingest_type"] = IngestJob.TILE_INGEST if ingest_job_serializer_data["ingest_type"] == IngestJob.TILE_INGEST: ingest_job_serializer_data['tile_size_x'] = self.config.config_data["ingest_job"]["tile_size"]["x"] ingest_job_serializer_data['tile_size_y'] = self.config.config_data["ingest_job"]["tile_size"]["y"] #ingest_job_serializer_data['tile_size_z'] = self.config.config_data["ingest_job"]["tile_size"]["z"] ingest_job_serializer_data['tile_size_z'] = 1 ingest_job_serializer_data['tile_size_t'] = self.config.config_data["ingest_job"]["tile_size"]["t"] elif ingest_job_serializer_data["ingest_type"] == IngestJob.VOLUMETRIC_INGEST: ingest_job_serializer_data['tile_size_x'] = self.config.config_data["ingest_job"]["chunk_size"]["x"] ingest_job_serializer_data['tile_size_y'] = self.config.config_data["ingest_job"]["chunk_size"]["y"] ingest_job_serializer_data['tile_size_z'] = self.config.config_data["ingest_job"]["chunk_size"]["z"] ingest_job_serializer_data['tile_size_t'] = 1 else: raise BossError('Invalid ingest_type: {}'.format(ingest_job_serializer_data["ingest_type"]), ErrorCodes.UNABLE_TO_VALIDATE) serializer = IngestJobCreateSerializer(data=ingest_job_serializer_data) if serializer.is_valid(): ingest_job = serializer.save() return ingest_job else: raise BossError("{}".format(serializer.errors), ErrorCodes.SERIALIZATION_ERROR) def _convert_string_to_ingest_job(self, s): """ Convert a string representation of ingest_type to int. Args: s (str): Returns: (int): IngestJob.TILE_INGEST | IngestJob.VOLUMETRIC_INGEST Raises: (BossError): If string is invalid. """ lowered = s.lower() if lowered == 'tile': return IngestJob.TILE_INGEST if lowered == 'volumetric': return IngestJob.VOLUMETRIC_INGEST raise BossError('Unknown ingest_type: {}'.format(s)) def get_ingest_job(self, ingest_job_id): """ Get the ingest job with the specific id Args: ingest_job_id: Id of the ingest job Returns: IngestJob : Data model with the ingest job if the id is valid Raises: BossError : If the ingets job id does not exist """ try: ingest_job = IngestJob.objects.get(id=ingest_job_id) return ingest_job except IngestJob.DoesNotExist: raise BossError("The ingest job with id {} does not exist".format(str(ingest_job_id)), ErrorCodes.OBJECT_NOT_FOUND) def get_ingest_job_upload_queue(self, ingest_job): """ Return the upload queue for an ingest job Args: ingest_job: Ingest job model Returns: Ndingest.uploadqueue """ proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) queue = UploadQueue(self.nd_proj, endpoint_url=None) return queue def get_ingest_job_tile_index_queue(self, ingest_job): """ Return the tile index queue for an ingest job Args: ingest_job: Ingest job model Returns: ndingest.TileIndexQueue """ proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) queue = TileIndexQueue(self.nd_proj, endpoint_url=None) return queue def get_ingest_job_tile_error_queue(self, ingest_job): """ Return the tile index queue for an ingest job Args: ingest_job: Ingest job model Returns: ndingest.TileIndexQueue """ proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) queue = TileErrorQueue(self.nd_proj, endpoint_url=None) return queue def get_ingest_job_ingest_queue(self, ingest_job): """ Return the ingest queue for an ingest job Args: ingest_job: Ingest job model Returns: Ndingest.ingestqueue """ proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) queue = IngestQueue(self.nd_proj, endpoint_url=None) return queue def verify_ingest_job(self, ingest_job): """ Verify that all tiles ingested successfully Args: ingest_job (IngestJob): Returns: (bool): True == verified """ if ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST: # ToDo: check lambda deadletter queue. return True try: csv_file = query_tile_index(ingest_job.id, TILE_INDEX, bossutils.aws.get_region()) if csv_file is not None: upload_queue = self.get_ingest_job_upload_queue(ingest_job) args = self._generate_upload_queue_args(ingest_job) patch_upload_queue(upload_queue.queue, args, csv_file) return False # success return True except Exception as e: raise BossError("Unable to verify ingest job: {}".format(e), ErrorCodes.BOSS_SYSTEM_ERROR) def cleanup_ingest_job(self, ingest_job, job_status): """ Delete or complete an ingest job with a specific id. Note this deletes the queues, credentials and all the remaining tiles in the tile bucket for this job id. It does not delete the ingest job datamodel but changes its state. Args: ingest_job: Ingest job to cleanup job_status(int): Status to update to Returns: (int): ingest job id for the job that was successfully deleted Raises: BossError : If the the job id is not valid or any exception happens in deletion process """ try: # cleanup ingest job proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) # delete the queues self.delete_upload_queue() if ingest_job.ingest_type != IngestJob.VOLUMETRIC_INGEST: self.delete_ingest_queue() self.delete_tile_index_queue() self.delete_tile_error_queue() # delete any pending entries in the tile index database and tile bucket # Commented out due to removal of tile index's GSI. # self.delete_tiles(ingest_job) ingest_job.status = job_status ingest_job.ingest_queue = None ingest_job.upload_queue = None ingest_job.end_date = timezone.now() ingest_job.save() # Remove ingest credentials for a job self.remove_ingest_credentials(ingest_job.id) except Exception as e: raise BossError("Unable to cleanup the upload queue.{}".format(e), ErrorCodes.BOSS_SYSTEM_ERROR) except IngestJob.DoesNotExist: raise BossError("Ingest job with id {} does not exist".format(ingest_job.id), ErrorCodes.OBJECT_NOT_FOUND) return ingest_job.id def create_upload_queue(self): """ Create an upload queue for an ingest job using the ndingest library Returns: UploadQueue : Returns a upload queue object """ UploadQueue.createQueue(self.nd_proj, endpoint_url=None) queue = UploadQueue(self.nd_proj, endpoint_url=None) return queue def create_tile_index_queue(self): """ Create an tile index queue for an ingest job using the ndingest library Returns: TileIndexQueue : Returns a tile index queue object """ TileIndexQueue.createQueue(self.nd_proj, endpoint_url=None) queue = TileIndexQueue(self.nd_proj, endpoint_url=None) return queue def create_tile_error_queue(self): """ Create an tile error queue for an ingest job using the ndingest library Returns: TileErrorQueue : Returns a tile index queue object """ TileErrorQueue.createQueue(self.nd_proj, endpoint_url=None) queue = TileErrorQueue(self.nd_proj, endpoint_url=None) return queue def create_ingest_queue(self): """ Create an ingest queue for an ingest job using the ndingest library Returns: IngestQueue : Returns a ingest queue object """ IngestQueue.createQueue(self.nd_proj, endpoint_url=None) queue = IngestQueue(self.nd_proj, endpoint_url=None) return queue def delete_upload_queue(self): """ Delete the current upload queue Returns: None """ UploadQueue.deleteQueue(self.nd_proj, endpoint_url=None) def delete_tile_index_queue(self): """ Delete the current tile index queue. Also removes the queue as an event trigger for the tile uploaded lambda. Returns: None """ # self.remove_trigger_tile_uploaded_lambda_from_queue(queue.arn) TileIndexQueue.deleteQueue(self.nd_proj, endpoint_url=None, delete_deadletter_queue=True) def delete_tile_error_queue(self): """ Delete the current tile error queue Returns: None """ TileErrorQueue.deleteQueue(self.nd_proj, endpoint_url=None) def delete_ingest_queue(self): """ Delete the current ingest queue Returns: None """ IngestQueue.deleteQueue(self.nd_proj, endpoint_url=None) def add_trigger_tile_uploaded_lambda_from_queue(self, queue_arn, num_msgs=1): """ Adds an SQS event trigger to the tile uploaded lambda. Args: queue_arn (str): Arn of SQS queue that will be the trigger source. num_msgs (optional[int]): Number of messages to send to the lambda. Defaults to 1, max 10. Raises: (ValueError): if num_msgs is greater than the SQS max batch size. """ if num_msgs < 1 or num_msgs > MAX_SQS_BATCH_SIZE: raise ValueError('trigger_tile_uploaded_lambda_from_queue(): Bad num_msgs: {}'.format(num_msgs)) client = boto3.client('lambda', region_name=bossutils.aws.get_region()) client.create_event_source_mapping( EventSourceArn=queue_arn, FunctionName=TILE_UPLOADED_LAMBDA, BatchSize=num_msgs) def remove_trigger_tile_uploaded_lambda_from_queue(self, queue_arn): """ Removes an SQS event triggger from the tile uploaded lambda. Args: queue_arn (str): Arn of SQS queue that will be the trigger source. """ client = boto3.client('lambda', region_name=bossutils.aws.get_region()) resp = client.list_event_source_mappings( EventSourceArn=queue_arn, FunctionName=TILE_UPLOADED_LAMBDA) for evt in resp['EventSourceMappings']: client.delete_event_source_mapping(UUID=evt['UUID']) def get_tile_bucket(self): """ Get the name of the ingest tile bucket Returns: Str: Name of the Tile bucket """ return TileBucket.getBucketName() def populate_upload_queue(self, job): """Execute the populate_upload_queue Step Function Args: job (IngestJob): Returns: (string): ARN of the StepFunction Execution started Raises: (BossError) : if there is no valid ingest job """ args = self._generate_upload_queue_args(job) if job.ingest_type == IngestJob.TILE_INGEST: args['upload_sfn'] = config['sfn']['upload_sfn'] elif job.ingest_type == IngestJob.VOLUMETRIC_INGEST: args['upload_sfn'] = config['sfn']['volumetric_upload_sfn'] else: raise BossError( "Ingest job's ingest_type has invalid value: {}".format( job.ingest_type), ErrorCodes.UNABLE_TO_VALIDATE) session = bossutils.aws.get_session() populate_sfn = config['sfn']['populate_upload_queue'] arn = bossutils.aws.sfn_execute(session, populate_sfn, args) return arn def _generate_upload_queue_args(self, ingest_job): """ Generate dictionary to include in messages placed in the tile upload queue. Args: ingest_job (IngestJob): Returns: (dict) Raises: (BossError): If ingest_job.ingest_type invalid. """ bosskey = ingest_job.collection + CONNECTOR + ingest_job.experiment + CONNECTOR + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') args = { 'job_id': ingest_job.id, 'upload_queue': ingest_job.upload_queue, 'ingest_queue': ingest_job.ingest_queue, 'resolution': ingest_job.resolution, 'project_info': lookup_key.split(CONNECTOR), 'ingest_type': ingest_job.ingest_type, 't_start': ingest_job.t_start, 't_stop': ingest_job.t_stop, 't_tile_size': 1, 'x_start': ingest_job.x_start, 'x_stop': ingest_job.x_stop, 'x_tile_size': ingest_job.tile_size_x, 'y_start': ingest_job.y_start, 'y_stop': ingest_job.y_stop, 'y_tile_size': ingest_job.tile_size_y, 'z_start': ingest_job.z_start, 'z_stop': ingest_job.z_stop, 'z_tile_size': 1 } if ingest_job.ingest_type == IngestJob.TILE_INGEST: # Always the Boss cuboid z size for tile jobs. args['z_chunk_size'] = 16 elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST: # tile_size_* holds the chunk size dimensions for volumetric jobs. args['z_chunk_size'] = ingest_job.tile_size_z else: raise BossError( "Ingest job's ingest_type has invalid value: {}".format( self.job.ingest_type), ErrorCodes.UNABLE_TO_VALIDATE) return args def invoke_ingest_lambda(self, ingest_job, num_invokes=1): """Method to trigger extra lambda functions to make sure all the ingest jobs that are actually fully populated kick through Args: ingest_job: Ingest job object num_invokes(int): number of invocations to fire Returns: """ bosskey = ingest_job.collection + CONNECTOR + ingest_job.experiment + CONNECTOR + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] fake_chunk_key = (BossBackend(self.config)).encode_chunk_key(16, project_info, ingest_job.resolution, 0, 0, 0, 0) event = {"ingest_job": ingest_job.id, "chunk_key": fake_chunk_key, "function-name": INGEST_LAMBDA, "lambda-name": "ingest"} # Invoke Ingest lambda functions lambda_client = boto3.client('lambda', region_name=bossutils.aws.get_region()) for _ in range(0, num_invokes): lambda_client.invoke(FunctionName=INGEST_LAMBDA, InvocationType='Event', Payload=json.dumps(event).encode()) def delete_tiles(self, ingest_job): """ Delete all remaining tiles from the tile index database and tile bucket 5/24/2018 - This code depends on a GSI for the tile index. The GSI was removed because its key didn't shard well. Cleanup will now be handled by TTL policies applied to the tile bucket and the tile index. This method will be removed once that code is merged. Args: ingest_job: Ingest job model Returns: None Raises: BossError : For exceptions that happen while deleting the tiles and index """ try: # Get all the chunks for a job tiledb = BossTileIndexDB(ingest_job.collection + '&' + ingest_job.experiment) tilebucket = TileBucket(ingest_job.collection + '&' + ingest_job.experiment) chunks = list(tiledb.getTaskItems(ingest_job.id)) for chunk in chunks: # delete each tile in the chunk for key in chunk['tile_uploaded_map']: response = tilebucket.deleteObject(key) tiledb.deleteCuboid(chunk['chunk_key'], ingest_job.id) except Exception as e: raise BossError("Exception while deleteing tiles for the ingest job {}. {}".format(ingest_job.id, e), ErrorCodes.BOSS_SYSTEM_ERROR) def generate_ingest_credentials(self, ingest_job): """ Create new ingest credentials for a job Args: ingest_job: Ingest job model Returns: None Raises: (ValueError): On bad ingest_type """ # Generate credentials for the ingest_job upload_queue = self.get_ingest_job_upload_queue(ingest_job) tile_index_queue = None ingest_creds = IngestCredentials() if ingest_job.ingest_type == IngestJob.TILE_INGEST: bucket_name = TileBucket.getBucketName() tile_index_queue = self.get_ingest_job_tile_index_queue(ingest_job) elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST: bucket_name = INGEST_BUCKET else: raise ValueError('Unknown ingest_type: {}'.format(ingest_job.ingest_type)) policy = BossUtil.generate_ingest_policy(ingest_job.id, upload_queue, tile_index_queue, bucket_name, ingest_type=ingest_job.ingest_type) ingest_creds.generate_credentials(ingest_job.id, policy.arn) def remove_ingest_credentials(self, job_id): """ Remove the ingest credentials for a job Args: job_id: The id of the ingest job Returns: status """ # Create the credentials for the job ingest_creds = IngestCredentials() ingest_creds.remove_credentials(job_id) status = BossUtil.delete_ingest_policy(job_id) return status
class IngestManager: """ Helper class for the boss ingest service """ def __init__(self): """ Init function """ self.job = None self.owner = None self.config = None self.validator = None self.collection = None self.experiment = None self.channel = None self.resolution = 0 self.nd_proj = None # Some stats for testing self.file_index = 0 self.num_of_chunks = 0 self.count_of_tiles = 0 def validate_config_file(self, config_data): """ Method to validate an ingest config file. This uses the Ingest client for validation. Args: config_data: Returns: (bool) : Status of the validation Raises: BossError : For exceptions that happen during validation """ try: # Validate the schema self.config = Configuration(config_data) self.validator = self.config.get_validator() self.validator.schema = self.config.schema self.validator.validate_schema() except jsonschema.ValidationError as e: raise BossError("Schema validation failed! {}".format(e), ErrorCodes.UNABLE_TO_VALIDATE) except Exception as e: raise BossError(" Could not validate the schema file.{}".format(e), ErrorCodes.UNABLE_TO_VALIDATE) return True def validate_properties(self): """ Validate the Collection, experiment and channel being used for the ingest job Returns: (bool) : Status of the validation Raises: BossError : If the collection, experiment or channel are not valid """ # Verify Collection, Experiment and channel try: self.collection = Collection.objects.get( name=self.config.config_data["database"]["collection"]) self.experiment = Experiment.objects.get( name=self.config.config_data["database"]["experiment"], collection=self.collection) self.channel = Channel.objects.get( name=self.config.config_data["database"]["channel"], experiment=self.experiment) self.resolution = self.channel.base_resolution except Collection.DoesNotExist: raise BossError("Collection {} not found".format(self.collection), ErrorCodes.RESOURCE_NOT_FOUND) except Experiment.DoesNotExist: raise BossError("Experiment {} not found".format(self.experiment), ErrorCodes.RESOURCE_NOT_FOUND) except Channel.DoesNotExist: raise BossError("Channel {} not found".format(self.channel), ErrorCodes.RESOURCE_NOT_FOUND) # TODO If channel already exists, check corners to see if data exists. If so question user for overwrite # TODO Check tile size - error if too big return True def setup_ingest(self, creator, config_data): """ Setup the ingest job. This is the primary method for the ingest manager. It creates the ingest job and queues required for the ingest. It also uploads the messages for the ingest Args: creator: The validated user from the request to create the ingest jon config_data : Config data to create the ingest job Returns: IngestJob : data model containing the ingest job Raises: BossError : For all exceptions that happen """ # Validate config data and schema self.owner = creator try: valid_schema = self.validate_config_file(config_data) valid_prop = self.validate_properties() if valid_schema is True and valid_prop is True: # create the django model for the job self.job = self.create_ingest_job() # create the additional resources needed for the ingest # initialize the ndingest project for use with the library proj_class = BossIngestProj.load() self.nd_proj = proj_class(self.collection.name, self.experiment.name, self.channel.name, self.resolution, self.job.id) # Create the upload queue upload_queue = self.create_upload_queue() self.job.upload_queue = upload_queue.url # Create the ingest queue ingest_queue = self.create_ingest_queue() self.job.ingest_queue = ingest_queue.url # Call the step function to populate the queue. self.job.step_function_arn = self.populate_upload_queue() # Compute # of tiles in the job x_extent = self.job.x_stop - self.job.x_start y_extent = self.job.y_stop - self.job.y_start z_extent = self.job.z_stop - self.job.z_start t_extent = self.job.t_stop - self.job.t_start num_tiles_in_x = math.ceil(x_extent / self.job.tile_size_x) num_tiles_in_y = math.ceil(y_extent / self.job.tile_size_y) num_tiles_in_z = math.ceil(z_extent / self.job.tile_size_z) num_tiles_in_t = math.ceil(t_extent / self.job.tile_size_t) self.job.tile_count = num_tiles_in_x * num_tiles_in_y * num_tiles_in_z * num_tiles_in_t self.job.save() # tile_bucket = TileBucket(self.job.collection + '&' + self.job.experiment) # self.create_ingest_credentials(upload_queue, tile_bucket) except BossError as err: raise BossError(err.message, err.error_code) except Exception as e: raise BossError( "Unable to create the upload and ingest queue.{}".format(e), ErrorCodes.BOSS_SYSTEM_ERROR) return self.job def create_ingest_job(self): """ Create a new ingest job using the parameters in the ingest config data file Returns: IngestJob : Data model with the current ingest job Raises: BossError : For serialization errors that occur while creating a ingest job """ ingest_job_serializer_data = { 'creator': self.owner, 'collection': self.collection.name, 'experiment': self.experiment.name, 'channel': self.channel.name, 'config_data': json.dumps(self.config.config_data), 'resolution': self.resolution, 'x_start': self.config.config_data["ingest_job"]["extent"]["x"][0], 'x_stop': self.config.config_data["ingest_job"]["extent"]["x"][1], 'y_start': self.config.config_data["ingest_job"]["extent"]["y"][0], 'y_stop': self.config.config_data["ingest_job"]["extent"]["y"][1], 'z_start': self.config.config_data["ingest_job"]["extent"]["z"][0], 'z_stop': self.config.config_data["ingest_job"]["extent"]["z"][1], 't_start': self.config.config_data["ingest_job"]["extent"]["t"][0], 't_stop': self.config.config_data["ingest_job"]["extent"]["t"][1], 'tile_size_x': self.config.config_data["ingest_job"]["tile_size"]["x"], 'tile_size_y': self.config.config_data["ingest_job"]["tile_size"]["y"], 'tile_size_z': self.config.config_data["ingest_job"]["tile_size"]["z"], 'tile_size_t': self.config.config_data["ingest_job"]["tile_size"]["t"], } serializer = IngestJobCreateSerializer(data=ingest_job_serializer_data) if serializer.is_valid(): ingest_job = serializer.save() return ingest_job else: raise BossError("{}".format(serializer.errors), ErrorCodes.SERIALIZATION_ERROR) def get_ingest_job(self, ingest_job_id): """ Get the ingest job with the specific id Args: ingest_job_id: Id of the ingest job Returns: IngestJob : Data model with the ingest job if the id is valid Raises: BossError : If the ingets job id does not exist """ try: ingest_job = IngestJob.objects.get(id=ingest_job_id) return ingest_job except IngestJob.DoesNotExist: raise BossError( "The ingest job with id {} does not exist".format( str(ingest_job_id)), ErrorCodes.OBJECT_NOT_FOUND) def get_ingest_job_upload_queue(self, ingest_job): """ Return the upload queue for an ingest job Args: ingest_job: Ingest job model Returns: Ndingest.uploadqueue """ proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) queue = UploadQueue(self.nd_proj, endpoint_url=None) return queue def get_ingest_job_ingest_queue(self, ingest_job): """ Return the ingest queue for an ingest job Args: ingest_job: Ingest job model Returns: Ndingest.ingestqueue """ proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) queue = IngestQueue(self.nd_proj, endpoint_url=None) return queue def cleanup_ingest_job(self, ingest_job, job_status): """ Delete or complete an ingest job with a specific id. Note this deletes the queues, credentials and all the remaining tiles in the tile bucket for this job id. It does not delete the ingest job datamodel but changes its state. Args: ingest_job: Ingest job to cleanup job_status(int): Status to update to Returns: (int): ingest job id for the job that was successfully deleted Raises: BossError : If the the job id is not valid or any exception happens in deletion process """ try: # cleanup ingest job proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) # delete the ingest and upload_queue self.delete_upload_queue() self.delete_ingest_queue() # delete any pending entries in the tile index database and tile bucket self.delete_tiles(ingest_job) ingest_job.status = job_status ingest_job.ingest_queue = None ingest_job.upload_queue = None ingest_job.end_date = timezone.now() ingest_job.save() # Remove ingest credentials for a job self.remove_ingest_credentials(ingest_job.id) except Exception as e: raise BossError("Unable to cleanup the upload queue.{}".format(e), ErrorCodes.BOSS_SYSTEM_ERROR) except IngestJob.DoesNotExist: raise BossError( "Ingest job with id {} does not exist".format(ingest_job.id), ErrorCodes.OBJECT_NOT_FOUND) return ingest_job.id def create_upload_queue(self): """ Create an upload queue for an ingest job using the ndingest library Returns: UploadQueue : Returns a upload queue object """ UploadQueue.createQueue(self.nd_proj, endpoint_url=None) queue = UploadQueue(self.nd_proj, endpoint_url=None) return queue def create_ingest_queue(self): """ Create an ingest queue for an ingest job using the ndingest library Returns: IngestQueue : Returns a ingest queue object """ IngestQueue.createQueue(self.nd_proj, endpoint_url=None) queue = IngestQueue(self.nd_proj, endpoint_url=None) return queue def delete_upload_queue(self): """ Delete the current upload queue Returns: None """ UploadQueue.deleteQueue(self.nd_proj, endpoint_url=None) def delete_ingest_queue(self): """ Delete the current ingest queue Returns: None """ IngestQueue.deleteQueue(self.nd_proj, endpoint_url=None) def get_tile_bucket(self): """ Get the name of the ingest tile bucket Returns: Str: Name of the Tile bucket """ return TileBucket.getBucketName() def populate_upload_queue(self): """Execute the populate_upload_queue Step Function Returns: string: ARN of the StepFunction Execution started Raises: BossError : if there is no valid ingest job """ if self.job is None: raise BossError( "Unable to generate upload tasks for the ingest service. Please specify a ingest job", ErrorCodes.UNABLE_TO_VALIDATE) ingest_job = self.job bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] # TODO DP ???: create IngestJob method that creates the StepFunction arguments? args = { 'upload_sfn': config['sfn']['upload_sfn'], 'job_id': ingest_job.id, 'upload_queue': ingest_job.upload_queue, 'ingest_queue': ingest_job.ingest_queue, 'resolution': ingest_job.resolution, 'project_info': lookup_key.split(CONNECTER), 't_start': ingest_job.t_start, 't_stop': ingest_job.t_stop, 't_tile_size': 1, 'x_start': ingest_job.x_start, 'x_stop': ingest_job.x_stop, 'x_tile_size': ingest_job.tile_size_x, 'y_start': ingest_job.y_start, 'y_stop': ingest_job.y_stop, 'y_tile_size': ingest_job.tile_size_y, 'z_start': ingest_job.z_start, 'z_stop': ingest_job.z_stop, 'z_tile_size': 16, } session = bossutils.aws.get_session() populate_sfn = config['sfn']['populate_upload_queue'] arn = bossutils.aws.sfn_execute(session, populate_sfn, args) return arn def generate_upload_tasks(self, job_id=None): """ Generate upload tasks for the ingest job. This creates once task for each tile that has to be uploaded in the ingest queue Args: job_id: Job id of the ingest queue. If not included this takes the current ingest job Returns: None Raises: BossError : if there is no valid ingest job """ if job_id is None and self.job is None: raise BossError( "Unable to generate upload tasks for the ingest service. Please specify a ingest job", ErrorCodes.UNABLE_TO_VALIDATE) elif job_id: # Using the job id to get the job try: ingest_job = IngestJob.objects.get(id=job_id) except IngestJob.DoesNotExist: raise BossError( "Ingest job with id {} does not exist".format(job_id), ErrorCodes.RESOURCE_NOT_FOUND) else: ingest_job = self.job # Generate upload tasks for the ingest job # Get the project information bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] # Batch messages and write to file base_file_name = 'tasks_' + lookup_key + '_' + str(ingest_job.id) self.file_index = 0 # open file f = io.StringIO() header = { 'job_id': ingest_job.id, 'upload_queue_url': ingest_job.upload_queue, 'ingest_queue_url': ingest_job.ingest_queue } f.write(json.dumps(header)) f.write('\n') num_msg_per_file = 0 for time_step in range(ingest_job.t_start, ingest_job.t_stop, 1): # For each time step, compute the chunks and tile keys for z in range(ingest_job.z_start, ingest_job.z_stop, 16): for y in range(ingest_job.y_start, ingest_job.y_stop, ingest_job.tile_size_y): for x in range(ingest_job.x_start, ingest_job.x_stop, ingest_job.tile_size_x): # compute the chunk indices chunk_x = int(x / ingest_job.tile_size_x) chunk_y = int(y / ingest_job.tile_size_y) chunk_z = int(z / 16) # Compute the number of tiles in the chunk if ingest_job.z_stop - z >= 16: num_of_tiles = 16 else: num_of_tiles = ingest_job.z_stop - z # Generate the chunk key chunk_key = (BossBackend( self.config)).encode_chunk_key( num_of_tiles, project_info, ingest_job.resolution, chunk_x, chunk_y, chunk_z, time_step) self.num_of_chunks += 1 # get the tiles keys for this chunk for tile in range(z, z + num_of_tiles): # get the tile key tile_key = (BossBackend( self.config)).encode_tile_key( project_info, ingest_job.resolution, chunk_x, chunk_y, tile, time_step) self.count_of_tiles += 1 # Generate the upload task msg msg = chunk_key + ',' + tile_key + '\n' f.write(msg) num_msg_per_file += 1 # if there are 10 messages in the batch send it to the upload queue. if num_msg_per_file == MAX_NUM_MSG_PER_FILE: fname = base_file_name + '_' + str( self.file_index + 1) + '.txt' self.upload_task_file(fname, f.getvalue()) self.file_index += 1 f.close() # status = self.send_upload_message_batch(batch_msg) fname = base_file_name + '_' + str( self.file_index + 1) + '.txt' f = io.StringIO() header = { 'job_id': ingest_job.id, 'upload_queue_url': ingest_job.upload_queue, 'ingest_queue_url': ingest_job.ingest_queue } f.write(json.dumps(header)) f.write('\n') num_msg_per_file = 0 # Edge case: the last batch size maybe smaller than 10 if num_msg_per_file != 0: fname = base_file_name + '_' + str(self.file_index + 1) + '.txt' self.upload_task_file(fname, f.getvalue()) f.close() self.file_index += 1 num_msg_per_file = 0 # Update status self.job.tile_count = self.count_of_tiles self.job.save() def upload_task_file(self, file_name_key, data): """ Upload a file with ingest tasks to the ingest s3 bucket Args: file_name: Filename of the file to upload Returns: status """ s3 = boto3.resource('s3') s3.Bucket(INGEST_BUCKET).put_object(Key=file_name_key, Body=data) self.invoke_lambda(file_name_key) def invoke_lambda(self, file_name): """ Invoke the lamda per file Returns: """ msg_data = { "lambda-name": "upload_enqueue", "upload_bucket_name": INGEST_BUCKET, "filename": file_name } # Trigger lambda to handle it client = boto3.client('lambda', region_name=bossutils.aws.get_region()) response = client.invoke(FunctionName=INGEST_LAMBDA, InvocationType='Event', Payload=json.dumps(msg_data).encode()) def invoke_ingest_lambda(self, ingest_job, num_invokes=1): """Method to trigger extra lambda functions to make sure all the ingest jobs that are actually fully populated kick through Args: ingest_job: Ingest job object num_invokes(int): number of invocations to fire Returns: """ bosskey = ingest_job.collection + CONNECTER + ingest_job.experiment + CONNECTER + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] fake_chunk_key = (BossBackend(self.config)).encode_chunk_key( 16, project_info, ingest_job.resolution, 0, 0, 0, 0) event = { "ingest_job": ingest_job.id, "chunk_key": fake_chunk_key, "lambda-name": "ingest" } # Invoke Ingest lambda functions lambda_client = boto3.client('lambda', region_name=bossutils.aws.get_region()) for _ in range(0, num_invokes): lambda_client.invoke(FunctionName=INGEST_LAMBDA, InvocationType='Event', Payload=json.dumps(event).encode()) @staticmethod def create_upload_task_message(job_id, chunk_key, tile_key, upload_queue_arn, ingest_queue_arn): """ Create a dictionary with the upload task message for the tilekey Args: job_id: Job id of the ingest job chunk_key: Chunk key of the chunk in which the tile is tile_key: Unique tile key for the tile upload_queue_arn: Upload queue url ingest_queue_arn: Ingest queue url Returns: Dict : A single upload task message that corresponds to a tile """ msg = {} msg['job_id'] = job_id msg['chunk_key'] = chunk_key msg['tile_key'] = tile_key msg['upload_queue_arn'] = upload_queue_arn msg['ingest_queue_arn'] = ingest_queue_arn return json.dumps(msg) def send_upload_task_message(self, msg): """ Upload one message to the upload queue (Note : Currently not used. Replaced with the send_upload_message_batch) Args: msg: Message to send to the upload queue Returns: None """ queue = UploadQueue(self.nd_proj, endpoint_url=None) queue.sendMessage(msg) def send_upload_message_batch(self, list_msg): """ Upload a batch of 10 messages to the upload queue. An error is raised if more than 10 messages are in the batch Args: list_msg: The list containing the messages to upload Returns: None """ queue = UploadQueue(self.nd_proj, endpoint_url=None) status = queue.sendBatchMessages(list_msg) return status def delete_tiles(self, ingest_job): """ Delete all remaining tiles from the tile index database and tile bucket Args: ingest_job: Ingest job model Returns: None Raises: BossError : For exceptions that happen while deleting the tiles and index """ try: # Get all the chunks for a job tiledb = BossTileIndexDB(ingest_job.collection + '&' + ingest_job.experiment) tilebucket = TileBucket(ingest_job.collection + '&' + ingest_job.experiment) chunks = list(tiledb.getTaskItems(ingest_job.id)) for chunk in chunks: chunk_key = chunk['chunk_key'] # delete each tile in the chunk for key in chunk['tile_uploaded_map']: response = tilebucket.deleteObject(key) tiledb.deleteCuboid(chunk['chunk_key'], ingest_job.id) except Exception as e: raise BossError( "Exception while deleteing tiles for the ingest job {}. {}". format(ingest_job.id, e), ErrorCodes.BOSS_SYSTEM_ERROR) def create_ingest_credentials(self, upload_queue, tile_bucket): """ Create new ingest credentials for a job Args: upload_queue : Upload queue for the job tile_bucket : Name of the tile bucket for the job Returns: None """ # Generate credentials for the ingest_job # Create the credentials for the job # tile_bucket = TileBucket(self.job.collection + '&' + self.job.experiment) # self.create_ingest_credentials(upload_queue, tile_bucket) ingest_creds = IngestCredentials() policy = BossUtil.generate_ingest_policy(self.job.id, upload_queue, tile_bucket) ingest_creds.generate_credentials(self.job.id, policy.arn) def generate_ingest_credentials(self, ingest_job): """ Create new ingest credentials for a job Args: upload_queue : Upload queue for the job tile_bucket : Name of the tile bucket for the job Returns: None """ # Generate credentials for the ingest_job # Create the credentials for the job tile_bucket = TileBucket(ingest_job.collection + '&' + ingest_job.experiment) upload_queue = self.get_ingest_job_upload_queue(ingest_job) ingest_creds = IngestCredentials() policy = BossUtil.generate_ingest_policy(ingest_job.id, upload_queue, tile_bucket) ingest_creds.generate_credentials(ingest_job.id, policy.arn) def remove_ingest_credentials(self, job_id): """ Remove the ingest credentials for a job Args: job_id: The id of the ingest job Returns: status """ # Create the credentials for the job ingest_creds = IngestCredentials() ingest_creds.remove_credentials(job_id) status = BossUtil.delete_ingest_policy(job_id) return status
class IngestManager: """ Helper class for the boss ingest service """ def __init__(self): """ Init function """ self.job = None self.owner = None self.config = None self.validator = None self.collection = None self.experiment = None self.channel = None self.resolution = 0 self.nd_proj = None # Some stats for testing self.file_index = 0 self.num_of_chunks = 0 self.count_of_tiles = 0 def validate_config_file(self, config_data): """ Method to validate an ingest config file. This uses the Ingest client for validation. Args: config_data: Returns: (bool) : Status of the validation Raises: BossError : For exceptions that happen during validation """ try: # Validate the schema self.config = Configuration(config_data) self.validator = self.config.get_validator() self.validator.schema = self.config.schema results = self.validator.validate() except jsonschema.ValidationError as e: raise BossError("Schema validation failed! {}".format(e), ErrorCodes.UNABLE_TO_VALIDATE) except Exception as e: raise BossError("Could not validate the schema file.{}".format(e), ErrorCodes.UNABLE_TO_VALIDATE) if len(results['error']) > 0: raise BossError('Could not validate the schema: ' + '\n'.join(results['error']), ErrorCodes.UNABLE_TO_VALIDATE) return True def validate_properties(self): """ Validate the Collection, experiment and channel being used for the ingest job Returns: (bool) : Status of the validation Raises: BossError : If the collection, experiment or channel are not valid """ # Verify Collection, Experiment and channel try: self.collection = Collection.objects.get(name=self.config.config_data["database"]["collection"]) self.experiment = Experiment.objects.get(name=self.config.config_data["database"]["experiment"], collection=self.collection) self.channel = Channel.objects.get(name=self.config.config_data["database"]["channel"], experiment=self.experiment) self.resolution = self.channel.base_resolution except Collection.DoesNotExist: raise BossError("Collection {} not found".format(self.collection), ErrorCodes.RESOURCE_NOT_FOUND) except Experiment.DoesNotExist: raise BossError("Experiment {} not found".format(self.experiment), ErrorCodes.RESOURCE_NOT_FOUND) except Channel.DoesNotExist: raise BossError("Channel {} not found".format(self.channel), ErrorCodes.RESOURCE_NOT_FOUND) # TODO If channel already exists, check corners to see if data exists. If so question user for overwrite # TODO Check tile size - error if too big return True def setup_ingest(self, creator, config_data): """ Setup the ingest job. This is the primary method for the ingest manager. It creates the ingest job and queues required for the ingest. It also uploads the messages for the ingest Args: creator: The validated user from the request to create the ingest jon config_data : Config data to create the ingest job Returns: IngestJob : data model containing the ingest job Raises: BossError : For all exceptions that happen """ # Validate config data and schema self.owner = creator try: valid_schema = self.validate_config_file(config_data) valid_prop = self.validate_properties() if valid_schema is True and valid_prop is True: # create the django model for the job self.job = self.create_ingest_job() # create the additional resources needed for the ingest # initialize the ndingest project for use with the library proj_class = BossIngestProj.load() self.nd_proj = proj_class(self.collection.name, self.experiment.name, self.channel.name, self.resolution, self.job.id) # Create the upload queue upload_queue = self.create_upload_queue() self.job.upload_queue = upload_queue.url # Create the ingest queue if self.job.ingest_type == IngestJob.TILE_INGEST: ingest_queue = self.create_ingest_queue() self.job.ingest_queue = ingest_queue.url tile_index_queue = self.create_tile_index_queue() self.lambda_connect_sqs(tile_index_queue.queue, TILE_UPLOADED_LAMBDA) self.create_tile_error_queue() elif self.job.ingest_type == IngestJob.VOLUMETRIC_INGEST: # Will the management console be ok with ingest_queue being null? pass # Call the step function to populate the queue. self.job.step_function_arn = self.populate_upload_queue(self.job) # Compute # of tiles or chunks in the job x_extent = self.job.x_stop - self.job.x_start y_extent = self.job.y_stop - self.job.y_start z_extent = self.job.z_stop - self.job.z_start t_extent = self.job.t_stop - self.job.t_start num_tiles_in_x = math.ceil(x_extent/self.job.tile_size_x) num_tiles_in_y = math.ceil(y_extent/self.job.tile_size_y) num_tiles_in_z = math.ceil(z_extent/self.job.tile_size_z) num_tiles_in_t = math.ceil(t_extent / self.job.tile_size_t) self.job.tile_count = num_tiles_in_x * num_tiles_in_y * num_tiles_in_z * num_tiles_in_t self.job.save() except BossError as err: raise BossError(err.message, err.error_code) except Exception as e: raise BossError("Unable to create the upload and ingest queue.{}".format(e), ErrorCodes.BOSS_SYSTEM_ERROR) return self.job def create_ingest_job(self): """ Create a new ingest job using the parameters in the ingest config data file Returns: IngestJob : Data model with the current ingest job Raises: BossError : For serialization errors that occur while creating a ingest job or if ingest_type is invalid """ ingest_job_serializer_data = { 'creator': self.owner, 'collection': self.collection.name, 'experiment': self.experiment.name, 'channel': self.channel.name, 'collection_id': self.collection.id, 'experiment_id': self.experiment.id, 'channel_id': self.channel.id, 'config_data': json.dumps(self.config.config_data), 'resolution': self.resolution, 'x_start': self.config.config_data["ingest_job"]["extent"]["x"][0], 'x_stop': self.config.config_data["ingest_job"]["extent"]["x"][1], 'y_start': self.config.config_data["ingest_job"]["extent"]["y"][0], 'y_stop': self.config.config_data["ingest_job"]["extent"]["y"][1], 'z_start': self.config.config_data["ingest_job"]["extent"]["z"][0], 'z_stop': self.config.config_data["ingest_job"]["extent"]["z"][1], 't_start': self.config.config_data["ingest_job"]["extent"]["t"][0], 't_stop': self.config.config_data["ingest_job"]["extent"]["t"][1], } if "ingest_type" in self.config.config_data["ingest_job"]: ingest_job_serializer_data["ingest_type"] = self._convert_string_to_ingest_job( self.config.config_data["ingest_job"]["ingest_type"]) else: ingest_job_serializer_data["ingest_type"] = IngestJob.TILE_INGEST if ingest_job_serializer_data["ingest_type"] == IngestJob.TILE_INGEST: ingest_job_serializer_data['tile_size_x'] = self.config.config_data["ingest_job"]["tile_size"]["x"] ingest_job_serializer_data['tile_size_y'] = self.config.config_data["ingest_job"]["tile_size"]["y"] #ingest_job_serializer_data['tile_size_z'] = self.config.config_data["ingest_job"]["tile_size"]["z"] ingest_job_serializer_data['tile_size_z'] = 1 ingest_job_serializer_data['tile_size_t'] = self.config.config_data["ingest_job"]["tile_size"]["t"] elif ingest_job_serializer_data["ingest_type"] == IngestJob.VOLUMETRIC_INGEST: ingest_job_serializer_data['tile_size_x'] = self.config.config_data["ingest_job"]["chunk_size"]["x"] ingest_job_serializer_data['tile_size_y'] = self.config.config_data["ingest_job"]["chunk_size"]["y"] ingest_job_serializer_data['tile_size_z'] = self.config.config_data["ingest_job"]["chunk_size"]["z"] ingest_job_serializer_data['tile_size_t'] = 1 else: raise BossError('Invalid ingest_type: {}'.format(ingest_job_serializer_data["ingest_type"]), ErrorCodes.UNABLE_TO_VALIDATE) serializer = IngestJobCreateSerializer(data=ingest_job_serializer_data) if serializer.is_valid(): ingest_job = serializer.save() return ingest_job else: raise BossError("{}".format(serializer.errors), ErrorCodes.SERIALIZATION_ERROR) def _convert_string_to_ingest_job(self, s): """ Convert a string representation of ingest_type to int. Args: s (str): Returns: (int): IngestJob.TILE_INGEST | IngestJob.VOLUMETRIC_INGEST Raises: (BossError): If string is invalid. """ lowered = s.lower() if lowered == 'tile': return IngestJob.TILE_INGEST if lowered == 'volumetric': return IngestJob.VOLUMETRIC_INGEST raise BossError('Unknown ingest_type: {}'.format(s)) def get_ingest_job(self, ingest_job_id): """ Get the ingest job with the specific id Args: ingest_job_id: Id of the ingest job Returns: IngestJob : Data model with the ingest job if the id is valid Raises: BossError : If the ingets job id does not exist """ try: ingest_job = IngestJob.objects.get(id=ingest_job_id) return ingest_job except IngestJob.DoesNotExist: raise BossError("The ingest job with id {} does not exist".format(str(ingest_job_id)), ErrorCodes.OBJECT_NOT_FOUND) def get_resource_data(self, ingest_job_id): """ Get a partial set of resource data that is enough to reconstitute a Boss resource. This data is part of the data passed to the tile and ingest lambdas. Args: ingest_job_id: Id of the ingest job Returns: (dict) """ job = self.get_ingest_job(ingest_job_id) return self._get_resource_data(job) def _get_resource_data(self, ingest_job): """ Get a partial set of resource data that is enough to reconstitute a Boss resource. This data is part of the data passed to the tile and ingest lambdas. Args: ingest_job: ingest job Returns: (dict) """ # Generate a "resource" for the ingest lambda function to be able to use SPDB cleanly collection = Collection.objects.get(name=ingest_job.collection) experiment = Experiment.objects.get(name=ingest_job.experiment, collection=collection) coord_frame = experiment.coord_frame channel = Channel.objects.get(name=ingest_job.channel, experiment=experiment) resource = {} resource['boss_key'] = '{}&{}&{}'.format(collection.name, experiment.name, channel.name) resource['lookup_key'] = '{}&{}&{}'.format(collection.id, experiment.id, channel.id) # The comment below may no longer apply now that we don't trigger # the tile upload lambda from S3. # The Lambda function needs certain resource properties to perform write ops. Set required things only. # This is because S3 metadata is limited to 2kb, so we only set the bits of info needed, and in the lambda # Function Populate the rest with dummy info # IF YOU NEED ADDITIONAL DATA YOU MUST ADD IT HERE AND IN THE LAMBDA FUNCTION resource['channel'] = {} resource['channel']['type'] = channel.type resource['channel']['datatype'] = channel.datatype resource['channel']['base_resolution'] = channel.base_resolution resource['experiment'] = {} resource['experiment']['num_hierarchy_levels'] = experiment.num_hierarchy_levels resource['experiment']['hierarchy_method'] = experiment.hierarchy_method resource['coord_frame'] = {} resource['coord_frame']['x_voxel_size'] = coord_frame.x_voxel_size resource['coord_frame']['y_voxel_size'] = coord_frame.y_voxel_size resource['coord_frame']['z_voxel_size'] = coord_frame.z_voxel_size return resource def get_ingest_job_upload_queue(self, ingest_job): """ Return the upload queue for an ingest job Args: ingest_job: Ingest job model Returns: Ndingest.uploadqueue """ proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) queue = UploadQueue(self.nd_proj, endpoint_url=None) return queue def get_ingest_job_tile_index_queue(self, ingest_job): """ Return the tile index queue for an ingest job Args: ingest_job: Ingest job model Returns: ndingest.TileIndexQueue """ proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) queue = TileIndexQueue(self.nd_proj, endpoint_url=None) return queue def get_ingest_job_tile_error_queue(self, ingest_job): """ Return the tile index queue for an ingest job Args: ingest_job: Ingest job model Returns: ndingest.TileIndexQueue """ proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) queue = TileErrorQueue(self.nd_proj, endpoint_url=None) return queue def get_ingest_job_ingest_queue(self, ingest_job): """ Return the ingest queue for an ingest job Args: ingest_job: Ingest job model Returns: Ndingest.ingestqueue """ proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) queue = IngestQueue(self.nd_proj, endpoint_url=None) return queue def calculate_remaining_queue_wait(self, ingest_job): """ Return how many seconds remain while waiting for queues to clear. Once the wait period elapses, it might be safe to transition from WAIT_ON_QUEUES to the COMPLETING state. Args: ingest_job: Ingest job model Returns: (int): Seconds. """ if ingest_job.wait_on_queues_ts is None: return WAIT_FOR_QUEUES_SECS elapsed_secs = timezone.now() - ingest_job.wait_on_queues_ts secs_remaining = WAIT_FOR_QUEUES_SECS - elapsed_secs.total_seconds() if secs_remaining < 0: return 0 return secs_remaining def try_enter_wait_on_queue_state(self, ingest_job): """ Try to move the ingest job to the WAIT_ON_QUEUES state. Args: ingest_job: Ingest job model Returns: (dict): { status: (job status str), wait_secs: (int) - # seconds client should wait } Raises: (BossError): If job not in UPLOADING state. """ if ingest_job.status == IngestJob.WAIT_ON_QUEUES: return { 'job_status': IngestJob.WAIT_ON_QUEUES, 'wait_secs': self.calculate_remaining_queue_wait(ingest_job) } elif ingest_job.status != IngestJob.UPLOADING: raise BossError(NOT_IN_UPLOADING_STATE_ERR_MSG, ErrorCodes.BAD_REQUEST) self.ensure_queues_empty(ingest_job) rows_updated = (IngestJob.objects .filter(id=ingest_job.id, status=IngestJob.UPLOADING) .update(status=IngestJob.WAIT_ON_QUEUES, wait_on_queues_ts=timezone.now()) ) # No update occurred, check if job status already WAIT_ON_QUEUES. if rows_updated == 0: refresh_job = self.get_ingest_job(ingest_job.id) if refresh_job.status != IngestJob.WAIT_ON_QUEUES: raise BossError(NOT_IN_UPLOADING_STATE_ERR_MSG, ErrorCodes.BAD_REQUEST) return { 'job_status': IngestJob.WAIT_ON_QUEUES, 'wait_secs': self.calculate_remaining_queue_wait(ingest_job) } def try_start_completing(self, ingest_job): """ Tries to start completion process. It is assumed that the ingest job status is currently WAIT_ON_QUEUES. If ingest_job status can be set to COMPLETING, then this process "wins" and starts the completion process. Args: ingest_job: Ingest job model Returns: (dict): { status: (job status str), wait_secs: (int) - # seconds client should wait } Raises: (BossError): If completion process cannot be started or is already in process. """ completing_success = { 'job_status': IngestJob.COMPLETING, 'wait_secs': 0 } if ingest_job.status == IngestJob.COMPLETING: return completing_success try: self.ensure_queues_empty(ingest_job) except BossError as be: # Ensure state goes back to UPLOADING if the upload queue isn't # empty. if be.message == UPLOAD_QUEUE_NOT_EMPTY_ERR_MSG: ingest_job.status = IngestJob.UPLOADING ingest_job.save() raise if ingest_job.status != IngestJob.WAIT_ON_QUEUES: raise BossError(NOT_IN_WAIT_ON_QUEUES_STATE_ERR_MSG, ErrorCodes.BAD_REQUEST) wait_remaining = self.calculate_remaining_queue_wait(ingest_job) if wait_remaining > 0: return { 'job_status': IngestJob.WAIT_ON_QUEUES, 'wait_secs': wait_remaining } rows_updated = (IngestJob.objects .exclude(status=IngestJob.COMPLETING) .filter(id=ingest_job.id) .update(status=IngestJob.COMPLETING) ) # If successfully set status to COMPLETING, kick off the completion # process. Otherwise, completion already started. if rows_updated > 0: self._start_completion_activity(ingest_job) log = bossLogger() log.info(f"Started completion step function for job: {ingest_job.id}") return completing_success def _start_completion_activity(self, ingest_job): """ Start the step function activity that checks a tile ingest job for missing tiles. This method SHOULD NOT be called by anyone but this class. We do not any more than 1 completion activity running for an ingest job. Args: ingest_job: Ingest job model Returns: (str|None): Arn of step function if successful """ if ingest_job.ingest_type != IngestJob.TILE_INGEST: return None args = { 'tile_index_table': config['aws']['tile-index-table'], 'status': 'complete', 'region': bossutils.aws.get_region(), 'db_host': ENDPOINT_DB, 'job': { 'collection': ingest_job.collection_id, 'experiment': ingest_job.experiment_id, 'channel': ingest_job.channel_id, 'task_id': ingest_job.id, 'resolution': ingest_job.resolution, 'z_chunk_size': 16, # Number of z slices in a cuboid. 'upload_queue': ingest_job.upload_queue, 'ingest_queue': ingest_job.ingest_queue, 'ingest_type': ingest_job.ingest_type }, 'KVIO_SETTINGS': settings.KVIO_SETTINGS, 'STATEIO_CONFIG': settings.STATEIO_CONFIG, 'OBJECTIO_CONFIG': settings.OBJECTIO_CONFIG, 'resource': self._get_resource_data(ingest_job), 'x_size': ingest_job.tile_size_x, 'y_size': ingest_job.tile_size_y, } session = bossutils.aws.get_session() scan_sfn = config['sfn']['complete_ingest_sfn'] return bossutils.aws.sfn_execute(session, scan_sfn, args) def ensure_queues_empty(self, ingest_job): """ As part of verifying that an ingest job is ready to complete, check each SQS queue associated with the ingest job. If the ingest queue is not empty, connect the ingest queue to the ingest lambda. Args: ingest_job: Ingest job model Raises: (BossError): If a queue is not empty. """ upload_queue = self.get_ingest_job_upload_queue(ingest_job) if get_sqs_num_msgs(upload_queue.url, upload_queue.region_name) > 0: raise BossError(UPLOAD_QUEUE_NOT_EMPTY_ERR_MSG, ErrorCodes.BAD_REQUEST) ingest_queue = self.get_ingest_job_ingest_queue(ingest_job) if get_sqs_num_msgs(ingest_queue.url, ingest_queue.region_name) > 0: self.lambda_connect_sqs(ingest_queue.queue, INGEST_LAMBDA) raise BossError(INGEST_QUEUE_NOT_EMPTY_ERR_MSG, ErrorCodes.BAD_REQUEST) tile_index_queue = self.get_ingest_job_tile_index_queue(ingest_job) if get_sqs_num_msgs(tile_index_queue.url, tile_index_queue.region_name) > 0: raise BossError(TILE_INDEX_QUEUE_NOT_EMPTY_ERR_MSG, ErrorCodes.BAD_REQUEST) def cleanup_ingest_job(self, ingest_job, job_status): """ Delete or complete an ingest job with a specific id. Note this deletes the queues, credentials and all the remaining tiles in the tile bucket for this job id. It does not delete the ingest job datamodel but changes its state. Args: ingest_job: Ingest job to cleanup job_status(int): Status to update to Returns: (int): ingest job id for the job that was successfully deleted Raises: BossError : If the the job id is not valid or any exception happens in deletion process """ try: # cleanup ingest job proj_class = BossIngestProj.load() self.nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) # delete the queues self.delete_upload_queue() if ingest_job.ingest_type != IngestJob.VOLUMETRIC_INGEST: self.delete_ingest_queue() self.delete_tile_index_queue() self.delete_tile_error_queue() ingest_job.status = job_status ingest_job.ingest_queue = None ingest_job.upload_queue = None ingest_job.end_date = timezone.now() ingest_job.save() # Remove ingest credentials for a job self.remove_ingest_credentials(ingest_job.id) except Exception as e: raise BossError("Unable to complete cleanup {}".format(e), ErrorCodes.BOSS_SYSTEM_ERROR) except IngestJob.DoesNotExist: raise BossError("Ingest job with id {} does not exist".format(ingest_job.id), ErrorCodes.OBJECT_NOT_FOUND) return ingest_job.id def create_upload_queue(self): """ Create an upload queue for an ingest job using the ndingest library Returns: UploadQueue : Returns a upload queue object """ UploadQueue.createQueue(self.nd_proj, endpoint_url=None) queue = UploadQueue(self.nd_proj, endpoint_url=None) return queue def create_tile_index_queue(self): """ Create an tile index queue for an ingest job using the ndingest library Returns: TileIndexQueue : Returns a tile index queue object """ TileIndexQueue.createQueue(self.nd_proj, endpoint_url=None) queue = TileIndexQueue(self.nd_proj, endpoint_url=None) timeout = self.get_ingest_lambda_timeout(INGEST_LAMBDA) # Ensure visibility timeout is greater than the ingest lambda that pulls # from it with a bit of buffer. queue.queue.set_attributes(Attributes={'VisibilityTimeout': str(timeout + 20)}) return queue def create_tile_error_queue(self): """ Create an tile error queue for an ingest job using the ndingest library Returns: TileErrorQueue : Returns a tile index queue object """ TileErrorQueue.createQueue(self.nd_proj, endpoint_url=None) queue = TileErrorQueue(self.nd_proj, endpoint_url=None) return queue def create_ingest_queue(self): """ Create an ingest queue for an ingest job using the ndingest library Returns: IngestQueue : Returns a ingest queue object """ IngestQueue.createQueue(self.nd_proj, endpoint_url=None) queue = IngestQueue(self.nd_proj, endpoint_url=None) timeout = self.get_ingest_lambda_timeout(INGEST_LAMBDA) # Ensure visibility timeout is greater than the ingest lambda that pulls # from it with a bit of buffer. queue.queue.set_attributes(Attributes={'VisibilityTimeout': str(timeout + 20)}) return queue def delete_upload_queue(self): """ Delete the current upload queue Returns: None """ UploadQueue.deleteQueue(self.nd_proj, endpoint_url=None) def delete_tile_index_queue(self): """ Delete the current tile index queue. Also removes the queue as an event trigger for the tile uploaded lambda. Returns: None """ queue = TileIndexQueue(self.nd_proj) self.remove_sqs_event_source_from_lambda(queue.arn, TILE_UPLOADED_LAMBDA) TileIndexQueue.deleteQueue(self.nd_proj, delete_deadletter_queue=True) def delete_tile_error_queue(self): """ Delete the current tile error queue Returns: None """ TileErrorQueue.deleteQueue(self.nd_proj, endpoint_url=None) def delete_ingest_queue(self): """ Delete the current ingest queue and removes it as an event source of the ingest lambda if it's connected. Returns: None """ queue = IngestQueue(self.nd_proj) self.remove_sqs_event_source_from_lambda(queue.arn, INGEST_LAMBDA) IngestQueue.deleteQueue(self.nd_proj) def get_ingest_lambda_timeout(self, name): """ Get the current timeout of the tile ingest lambda. Args: name (str): Name of lambda. Returns: (int): Number of seconds of the timeout. """ client = boto3.client('lambda', region_name=bossutils.aws.get_region()) try: resp = client.get_function(FunctionName=name) return resp['Configuration']['Timeout'] except Exception as ex: log = bossLogger() log.error(f"Couldn't get lambda: {name} data from AWS: {ex}") raise def lambda_connect_sqs(self, queue, lambda_name, num_msgs=1): """ Adds an SQS event trigger to the given lambda. Args: queue (SQS.Queue): SQS queue that will be the trigger source. lambda_name (str): Lambda function name. num_msgs (optional[int]): Number of messages to send to the lambda. Defaults to 1, max 10. Raises: (ValueError): if num_msgs is greater than the SQS max batch size. """ if num_msgs < 1 or num_msgs > MAX_SQS_BATCH_SIZE: raise ValueError('lambda_connect_sqs(): Bad num_msgs: {}'.format(num_msgs)) queue_arn = queue.attributes['QueueArn'] timeout = self.get_ingest_lambda_timeout(lambda_name) # AWS recommends that an SQS queue used as a lambda event source should # have a visibility timeout that's 6 times the lambda's timeout. queue.set_attributes(Attributes={'VisibilityTimeout': str(timeout * 6)}) client = boto3.client('lambda', region_name=bossutils.aws.get_region()) try: client.create_event_source_mapping( EventSourceArn=queue_arn, FunctionName=lambda_name, BatchSize=num_msgs) except client.exceptions.ResourceConflictException: log = bossLogger() log.warning(f'ResourceConflictException caught trying to connect {queue_arn} to {lambda_name}. This should be harmless because this happens when the queue has already been connected.') def remove_sqs_event_source_from_lambda(self, queue_arn, lambda_name): """ Removes an SQS event triggger from the given lambda. Args: queue_arn (str): Arn of SQS queue that will be the trigger source. lambda_name (str): Lambda function name. """ log = bossLogger() client = boto3.client('lambda', region_name=bossutils.aws.get_region()) try: resp = client.list_event_source_mappings( EventSourceArn=queue_arn, FunctionName=lambda_name) except Exception as ex: log.error(f"Couldn't list event source mappings for {lambda_name}: {ex}") return for evt in resp['EventSourceMappings']: try: client.delete_event_source_mapping(UUID=evt['UUID']) except client.exceptions.ResourceNotFoundException: pass except Exception as ex: log.error(f"Couldn't remove event source mapping {queue_arn} from {lambda_name}: {ex}") def get_tile_bucket(self): """ Get the name of the ingest tile bucket Returns: Str: Name of the Tile bucket """ return TileBucket.getBucketName() def populate_upload_queue(self, job): """Execute the populate_upload_queue Step Function Args: job (IngestJob): Returns: (string): ARN of the StepFunction Execution started Raises: (BossError) : if there is no valid ingest job """ args = self._generate_upload_queue_args(job) if job.ingest_type == IngestJob.TILE_INGEST: args['upload_sfn'] = config['sfn']['upload_sfn'] elif job.ingest_type == IngestJob.VOLUMETRIC_INGEST: args['upload_sfn'] = config['sfn']['volumetric_upload_sfn'] else: raise BossError( "Ingest job's ingest_type has invalid value: {}".format( job.ingest_type), ErrorCodes.UNABLE_TO_VALIDATE) session = bossutils.aws.get_session() populate_sfn = config['sfn']['populate_upload_queue'] arn = bossutils.aws.sfn_execute(session, populate_sfn, args) return arn def _generate_upload_queue_args(self, ingest_job): """ Generate dictionary to include in messages placed in the tile upload queue. Args: ingest_job (IngestJob): Returns: (dict) Raises: (BossError): If ingest_job.ingest_type invalid. """ bosskey = ingest_job.collection + CONNECTOR + ingest_job.experiment + CONNECTOR + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') args = { 'job_id': ingest_job.id, 'upload_queue': ingest_job.upload_queue, 'ingest_queue': ingest_job.ingest_queue, 'resolution': ingest_job.resolution, 'project_info': lookup_key.split(CONNECTOR), 'ingest_type': ingest_job.ingest_type, 't_start': ingest_job.t_start, 't_stop': ingest_job.t_stop, 't_tile_size': 1, 'x_start': ingest_job.x_start, 'x_stop': ingest_job.x_stop, 'x_tile_size': ingest_job.tile_size_x, 'y_start': ingest_job.y_start, 'y_stop': ingest_job.y_stop, 'y_tile_size': ingest_job.tile_size_y, 'z_start': ingest_job.z_start, 'z_stop': ingest_job.z_stop, 'z_tile_size': 1 } if ingest_job.ingest_type == IngestJob.TILE_INGEST: # Always the Boss cuboid z size for tile jobs. args['z_chunk_size'] = 16 elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST: # tile_size_* holds the chunk size dimensions for volumetric jobs. args['z_chunk_size'] = ingest_job.tile_size_z else: raise BossError( "Ingest job's ingest_type has invalid value: {}".format( self.job.ingest_type), ErrorCodes.UNABLE_TO_VALIDATE) return args def invoke_ingest_lambda(self, ingest_job, num_invokes=1): """Method to trigger extra lambda functions to make sure all the ingest jobs that are actually fully populated kick through Args: ingest_job: Ingest job object num_invokes(int): number of invocations to fire Returns: """ bosskey = ingest_job.collection + CONNECTOR + ingest_job.experiment + CONNECTOR + ingest_job.channel lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] fake_chunk_key = (BossBackend(self.config)).encode_chunk_key(16, project_info, ingest_job.resolution, 0, 0, 0, 0) event = {"ingest_job": ingest_job.id, "chunk_key": fake_chunk_key, "function-name": INGEST_LAMBDA, "lambda-name": "ingest"} # Invoke Ingest lambda functions lambda_client = boto3.client('lambda', region_name=bossutils.aws.get_region()) for _ in range(0, num_invokes): lambda_client.invoke(FunctionName=INGEST_LAMBDA, InvocationType='Event', Payload=json.dumps(event).encode()) def generate_ingest_credentials(self, ingest_job): """ Create new ingest credentials for a job Args: ingest_job: Ingest job model Returns: None Raises: (ValueError): On bad ingest_type """ # Generate credentials for the ingest_job upload_queue = self.get_ingest_job_upload_queue(ingest_job) tile_index_queue = None ingest_creds = IngestCredentials() if ingest_job.ingest_type == IngestJob.TILE_INGEST: bucket_name = TileBucket.getBucketName() tile_index_queue = self.get_ingest_job_tile_index_queue(ingest_job) elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST: bucket_name = INGEST_BUCKET else: raise ValueError('Unknown ingest_type: {}'.format(ingest_job.ingest_type)) policy = BossUtil.generate_ingest_policy(ingest_job.id, upload_queue, tile_index_queue, bucket_name, ingest_type=ingest_job.ingest_type) ingest_creds.generate_credentials(ingest_job.id, policy.arn) def remove_ingest_credentials(self, job_id): """ Remove the ingest credentials for a job Args: job_id: The id of the ingest job Returns: status """ # Create the credentials for the job ingest_creds = IngestCredentials() ingest_creds.remove_credentials(job_id) status = BossUtil.delete_ingest_policy(job_id) return status