def delete(self, request, ingest_job_id): """ Args: request: ingest_job_id: Returns: """ try: ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.get_ingest_job(ingest_job_id) # Check permissions if not self.is_user_or_admin(request, ingest_job): return BossHTTPError( "Only the creator or admin can cancel an ingest job", ErrorCodes.INGEST_NOT_CREATOR) # "DELETED" status is 3 ingest_mgmr.cleanup_ingest_job(ingest_job, 3) blog = BossLogger().logger blog.info("Deleted Ingest Job {}".format(ingest_job_id)) return Response(status=status.HTTP_204_NO_CONTENT) except BossError as err: return err.to_http()
def post(self, request): """ Post a new config job and create a new ingest job Args: request: Django Rest framework Request object ingest_config_data: COnfiguration data for the ingest job Returns: """ ingest_config_data = request.data try: self.track_usage_data(ingest_config_data, request) ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.setup_ingest(self.request.user.id, ingest_config_data) serializer = IngestJobListSerializer(ingest_job) return Response(serializer.data, status=status.HTTP_201_CREATED) except BossError as err: return err.to_http()
def test_setup_ingest(self): """Method to test the setup_ingest method""" try: ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.setup_ingest(self.user.id, self.example_config_data) assert (ingest_job is not None) # Check if the queue's exist proj_class = BossIngestProj.load() nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) ingest_mgmr.nd_proj = nd_proj upload_queue = UploadQueue(nd_proj, endpoint_url=None) assert (upload_queue is not None) ingest_queue = IngestQueue(nd_proj, endpoint_url=None) assert (ingest_queue is not None) ingest_mgmr.remove_ingest_credentials(ingest_job.id) except: raise finally: ingest_mgmr.delete_upload_queue() ingest_mgmr.delete_ingest_queue()
def test_verify_ingest_job_not_ready( self, fake_query_tile_ind, fake_patch_upl_q, fake_upload_q, fake_lookup_key, fake_get_region ): """Test false returned when chunks still remain in tile index""" ingest_mgmr = IngestManager() ingest_job = IngestJob() ingest_job.status = IngestJob.UPLOADING ingest_job.collection = 'test_coll' ingest_job.experiment = 'test_exp' ingest_job.channel = 'test_chan' ingest_job.resolution = 0 ingest_job.id = 8 queue = MagicMock(spec=UploadQueue) queue.queue = MagicMock() fake_upload_q.return_value = queue key = MagicMock() key.lookup_key = '3&8&1' fake_lookup_key.get_lookup_key.return_value = key # Method under test. actual = ingest_mgmr.verify_ingest_job(ingest_job) self.assertFalse(actual) self.assertEqual(IngestJob.UPLOADING, ingest_job.status)
def test_verify_ingest_job_good(self, fake_query_tile_ind, fake_get_region): """Test with no chunks left in tile index""" ingest_mgmr = IngestManager() ingest_job = IngestJob() ingest_job.status = IngestJob.UPLOADING with patch.object(ingest_job, 'save') as fake_save: actual = ingest_mgmr.verify_ingest_job(ingest_job) self.assertTrue(actual)
def test_create_ingest_credentials(self): """""" ingest_mgmr = IngestManager() ingest_mgmr = IngestManager() ingest_mgmr.validate_config_file(self.example_config_data) ingest_mgmr.validate_properties() ingest_mgmr.owner = self.user.pk job = ingest_mgmr.create_ingest_job() ingest_mgmr.job = job
def test_create_upload_task_message(self): """Test method that creates an upload task message""" ingest_mgmr = IngestManager() msg = ingest_mgmr.create_upload_task_message( 595, '3534561bd72dcfce1af7c041fc783379&16&1&1&1&0&1&1&3&0', '3534561bd72dcfpppaf7c041fc783379&1&1&1&0&1&1&3&0', 'test_upload_queue_url', 'test_ingest_queue_url') msg = json.loads(msg) assert (msg['job_id'] == 595)
def test_validate_ingest(self): """Method to test validation method""" #Validate schema and config file ingest_mgmr = IngestManager() response = ingest_mgmr.validate_config_file(self.example_config_data) assert (response is True) #Validate properties response = ingest_mgmr.validate_properties() assert (response is True)
def get(self, request, ingest_job_id): """ Get the status of an ingest_job and number of messages in the upload queue Args: request: Django Rest framework object ingest_job_id: Ingest job id Returns: Status of the job """ try: ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.get_ingest_job(ingest_job_id) # Check if user is the ingest job creator or the sys admin if not self.is_user_or_admin(request, ingest_job): return BossHTTPError( "Only the creator or admin can check the status of an ingest job", ErrorCodes.INGEST_NOT_CREATOR) if ingest_job.status == 3: # Deleted Job raise BossError( "The job with id {} has been deleted".format( ingest_job_id), ErrorCodes.INVALID_REQUEST) else: if ingest_job.status == 2: # Job is Complete so queues are gone num_messages_in_queue = 0 else: upload_queue = ingest_mgmr.get_ingest_job_upload_queue( ingest_job) num_messages_in_queue = int( upload_queue.queue. attributes['ApproximateNumberOfMessages']) if num_messages_in_queue < ingest_job.tile_count: for n in range(9): num_messages_in_queue += int( upload_queue.queue. attributes['ApproximateNumberOfMessages']) num_messages_in_queue /= 10 data = { "id": ingest_job.id, "status": ingest_job.status, "total_message_count": ingest_job.tile_count, "current_message_count": int(num_messages_in_queue) } return Response(data, status=status.HTTP_200_OK) except BossError as err: return err.to_http() except Exception as err: return BossError("{}".format(err), ErrorCodes.BOSS_SYSTEM_ERROR).to_http()
def test_generate_upload_queue_args_tile_job(self): """Ensure ingest_type set properly""" ingest_mgmr = IngestManager() ingest_mgmr.validate_config_file(self.example_config_data) ingest_mgmr.validate_properties() ingest_mgmr.owner = self.user.pk job = ingest_mgmr.create_ingest_job() actual = ingest_mgmr._generate_upload_queue_args(job) assert actual['ingest_type'] == IngestJob.TILE_INGEST assert actual['z_chunk_size'] == 16
def test_generate_upload_queue_args_volumetric_job(self): """Ensure ingest_type set properly""" ingest_mgmr = IngestManager() ingest_mgmr.validate_config_file(self.volumetric_config_data) ingest_mgmr.validate_properties() ingest_mgmr.owner = self.user.pk job = ingest_mgmr.create_ingest_job() actual = ingest_mgmr._generate_upload_queue_args(job) assert actual['ingest_type'] == IngestJob.VOLUMETRIC_INGEST assert actual['z_chunk_size'] == 64 assert actual['ingest_queue'] is None
def test_create_ingest_job(self): """Method to test creation o a ingest job from a config_data dict""" ingest_mgmr = IngestManager() ingest_mgmr.validate_config_file(self.example_config_data) ingest_mgmr.validate_properties() ingest_mgmr.owner = self.user.id job = ingest_mgmr.create_ingest_job() assert (job.id is not None)
def delete(self, request, ingest_job_id): """ Args: ingest_job_id: Returns: """ try: ingest_mgmr = IngestManager() ingest_mgmr.delete_ingest_job(ingest_job_id) return Response(status=status.HTTP_204_NO_CONTENT) except BossError as err: return err.to_http()
def test_create_ingest_job_volumetric(self): ingest_mgmr = IngestManager() ingest_mgmr.validate_config_file(self.volumetric_config_data) ingest_mgmr.validate_properties() ingest_mgmr.owner = self.user.pk job = ingest_mgmr.create_ingest_job() assert (job.id is not None) assert (job.ingest_type == IngestJob.VOLUMETRIC_INGEST) assert (job.tile_size_x == 1024) assert (job.tile_size_y == 1024) assert (job.tile_size_z == 64) assert (job.tile_size_t == 1)
def test_create_ingest_job(self): """Method to test creation o a ingest job from a config_data dict""" ingest_mgmr = IngestManager() ingest_mgmr.validate_config_file(self.example_config_data) ingest_mgmr.validate_properties() ingest_mgmr.owner = self.user.pk job = ingest_mgmr.create_ingest_job() assert (job.id is not None) assert (job.ingest_type == IngestJob.TILE_INGEST) assert (job.tile_size_x == 512) assert (job.tile_size_y == 512) assert (job.tile_size_z == 1) assert (job.tile_size_t == 1)
def setUp(self): """ Initialize the database """ # AWS region. self.region = 'us-east-1' dbsetup = SetupTestDB() self.user = dbsetup.create_super_user(username='******', email='*****@*****.**', password='******') dbsetup.set_user(self.user) self.client.force_login(self.user) dbsetup.insert_ingest_test_data() SetupTests() # Unit under test. self.ingest_mgr = IngestManager()
def post(self, request): """ Post a new config job and create a new ingest job Args: ingest_config_data: Returns: """ ingest_config_data = request.data try: ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.setup_ingest(self.request.user.id, ingest_config_data) serializer = IngestJobListSerializer(ingest_job) return Response(serializer.data, status=status.HTTP_201_CREATED) except BossError as err: return err.to_http()
def setUp(self): """ Initialize the database :return: """ dbsetup = SetupTestDB() self.user = dbsetup.create_super_user(username='******', email='*****@*****.**', password='******') dbsetup.set_user(self.user) self.client.force_login(self.user) dbsetup.insert_ingest_test_data() setup = SetupTests() # Get the config_data for v1 schema config_data = setup.get_ingest_config_data_dict() self.example_config_data = config_data self.volumetric_config_data = setup.get_ingest_config_data_dict_volumetric() # Unit under test. self.ingest_mgr = IngestManager()
def test_validate_properties(self): """Methos to test validation of properties of the config data""" ingest_mgmr = IngestManager() ingest_mgmr.validate_config_file(self.example_config_data) ingest_mgmr.validate_properties() assert (ingest_mgmr.collection.name == 'my_col_1') assert (ingest_mgmr.experiment.name == 'my_exp_1') assert (ingest_mgmr.channel.name == 'my_ch_1')
def get(self, request, ingest_job_id=None): """ Join a job with the specified job id or list all job ids if ingest_job_id is omitted Args: request: Django rest framework request object ingest_job_id: Ingest job id Returns: Ingest job """ try: if ingest_job_id is None: # If the job ID is empty on a get, you are listing jobs return self.list_ingest_jobs(request) ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.get_ingest_job(ingest_job_id) # Check permissions if not self.is_user_or_admin(request, ingest_job): return BossHTTPError( "Only the creator or admin can join an ingest job", ErrorCodes.INGEST_NOT_CREATOR) serializer = IngestJobListSerializer(ingest_job) # Start setting up output data = {'ingest_job': serializer.data} if ingest_job.status == 3: # The job has been deleted raise BossError( "The job with id {} has been deleted".format( ingest_job_id), ErrorCodes.INVALID_REQUEST) elif ingest_job.status == 2 or ingest_job.status == 4: # Failed job or completed job return Response(data, status=status.HTTP_200_OK) elif ingest_job.status == 0: # Job is still in progress # check status of the step function session = bossutils.aws.get_session() if bossutils.aws.sfn_status( session, ingest_job.step_function_arn) == 'SUCCEEDED': # generate credentials ingest_job.status = 1 ingest_job.save() ingest_mgmr.generate_ingest_credentials(ingest_job) elif bossutils.aws.sfn_status( session, ingest_job.step_function_arn) == 'FAILED': # This indicates an error in step function raise BossError( "Error generating ingest job messages" " Delete the ingest job with id {} and try again.". format(ingest_job_id), ErrorCodes.BOSS_SYSTEM_ERROR) if ingest_job.status == 1: data['ingest_job']['status'] = 1 ingest_creds = IngestCredentials() data['credentials'] = ingest_creds.get_credentials( ingest_job.id) else: data['credentials'] = None data['tile_bucket_name'] = ingest_mgmr.get_tile_bucket() data['KVIO_SETTINGS'] = settings.KVIO_SETTINGS data['STATEIO_CONFIG'] = settings.STATEIO_CONFIG data['OBJECTIO_CONFIG'] = settings.OBJECTIO_CONFIG # add the lambda - Possibly remove this later config = bossutils.configuration.BossConfig() data['ingest_lambda'] = config["lambda"]["page_in_function"] # Generate a "resource" for the ingest lambda function to be able to use SPDB cleanly collection = Collection.objects.get( name=data['ingest_job']["collection"]) experiment = Experiment.objects.get( name=data['ingest_job']["experiment"], collection=collection) coord_frame = experiment.coord_frame channel = Channel.objects.get(name=data['ingest_job']["channel"], experiment=experiment) resource = {} resource['boss_key'] = '{}&{}&{}'.format( data['ingest_job']["collection"], data['ingest_job']["experiment"], data['ingest_job']["channel"]) resource['lookup_key'] = '{}&{}&{}'.format(collection.id, experiment.id, channel.id) # The Lambda function needs certain resource properties to perform write ops. Set required things only. # This is because S3 metadata is limited to 2kb, so we only set the bits of info needed, and in the lambda # Function Populate the rest with dummy info # IF YOU NEED ADDITIONAL DATA YOU MUST ADD IT HERE AND IN THE LAMBDA FUNCTION resource['channel'] = {} resource['channel']['type'] = channel.type resource['channel']['datatype'] = channel.datatype resource['channel']['base_resolution'] = channel.base_resolution resource['experiment'] = {} resource['experiment'][ 'num_hierarchy_levels'] = experiment.num_hierarchy_levels resource['experiment'][ 'hierarchy_method'] = experiment.hierarchy_method resource['coord_frame'] = {} resource['coord_frame']['x_voxel_size'] = coord_frame.x_voxel_size resource['coord_frame']['y_voxel_size'] = coord_frame.y_voxel_size resource['coord_frame']['z_voxel_size'] = coord_frame.z_voxel_size # Set resource data['resource'] = resource return Response(data, status=status.HTTP_200_OK) except BossError as err: return err.to_http() except Exception as err: return BossError("{}".format(err), ErrorCodes.BOSS_SYSTEM_ERROR).to_http()
def post(self, request, ingest_job_id): """ Signal an ingest job is complete and should be cleaned up by POSTing to this view Args: request: Django Rest framework Request object ingest_job_id: Ingest job id Returns: """ try: blog = bossLogger() ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.get_ingest_job(ingest_job_id) # Check if user is the ingest job creator or the sys admin if not self.is_user_or_admin(request, ingest_job): return BossHTTPError( "Only the creator or admin can complete an ingest job", ErrorCodes.INGEST_NOT_CREATOR) if ingest_job.status == IngestJob.PREPARING: # If status is Preparing. Deny return BossHTTPError( "You cannot complete a job that is still preparing. You must cancel instead.", ErrorCodes.BAD_REQUEST) elif ingest_job.status == IngestJob.UPLOADING: try: data = ingest_mgmr.try_enter_wait_on_queue_state( ingest_job) return Response(data=data, status=status.HTTP_202_ACCEPTED) except BossError as be: if (be.message == INGEST_QUEUE_NOT_EMPTY_ERR_MSG or be.message == TILE_INDEX_QUEUE_NOT_EMPTY_ERR_MSG): # If there are messages in the tile error queue, this # will have to be handled manually. Non-empty ingest # or tile index queues should resolve on their own. return Response(data={ 'wait_secs': WAIT_FOR_QUEUES_SECS, 'info': 'Internal queues not empty yet' }, status=status.HTTP_400_BAD_REQUEST) raise elif ingest_job.status == IngestJob.WAIT_ON_QUEUES: pass # Continue below. elif ingest_job.status == IngestJob.COMPLETE: # If status is already Complete, just return another 204 return Response(data={'job_status': ingest_job.status}, status=status.HTTP_204_NO_CONTENT) elif ingest_job.status == IngestJob.DELETED: # Job had already been cancelled return BossHTTPError("Ingest job has already been cancelled.", ErrorCodes.BAD_REQUEST) elif ingest_job.status == IngestJob.FAILED: # Job had failed return BossHTTPError( "Ingest job has failed during creation. You must Cancel instead.", ErrorCodes.BAD_REQUEST) elif ingest_job.status == IngestJob.COMPLETING: return Response(data={'job_status': ingest_job.status}, status=status.HTTP_202_ACCEPTED) # Check if user is the ingest job creator or the sys admin if not self.is_user_or_admin(request, ingest_job): return BossHTTPError( "Only the creator or admin can complete an ingest job", ErrorCodes.INGEST_NOT_CREATOR) # Try to start completing. try: data = ingest_mgmr.try_start_completing(ingest_job) if data['job_status'] == IngestJob.WAIT_ON_QUEUES: # Refuse complete requests until wait period expires. return Response(data=data, status=status.HTTP_400_BAD_REQUEST) except BossError as be: if (be.message == INGEST_QUEUE_NOT_EMPTY_ERR_MSG or be.message == TILE_INDEX_QUEUE_NOT_EMPTY_ERR_MSG or be.message == INGEST_QUEUE_NOT_EMPTY_ERR_MSG): return Response(data={ 'wait_secs': WAIT_FOR_QUEUES_SECS, 'info': 'Internal queues not empty yet' }, status=status.HTTP_400_BAD_REQUEST) raise blog.info("Completion process started for ingest Job {}".format( ingest_job_id)) return Response(data=data, status=status.HTTP_202_ACCEPTED) # TODO SH This is a quick fix to make sure the ingest-client does not run close option. # the clean up code commented out below, because it is not working correctly. # return Response(status=status.HTTP_204_NO_CONTENT) # if ingest_job.ingest_type == IngestJob.TILE_INGEST: # # Check if any messages remain in the ingest queue # ingest_queue = ingest_mgmr.get_ingest_job_ingest_queue(ingest_job) # num_messages_in_queue = int(ingest_queue.queue.attributes['ApproximateNumberOfMessages']) # # # Kick off extra lambdas just in case # if num_messages_in_queue: # blog.info("{} messages remaining in Ingest Queue".format(num_messages_in_queue)) # ingest_mgmr.invoke_ingest_lambda(ingest_job, num_messages_in_queue) # # # Give lambda a few seconds to fire things off # time.sleep(30) # # ingest_mgmr.cleanup_ingest_job(ingest_job, IngestJob.COMPLETE) # # elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST: # ingest_mgmr.cleanup_ingest_job(ingest_job, IngestJob.COMPLETE) # # # ToDo: call cleanup method for volumetric ingests. Don't want # # to cleanup until after testing with real data. # #ingest_mgmr.cleanup_ingest_job(ingest_job, IngestJob.COMPLETE) # # blog.info("Complete successful") # return Response(status=status.HTTP_204_NO_CONTENT) except BossError as err: return err.to_http() except Exception as err: blog.error('Caught general exception: {}'.format(err)) return BossError("{}".format(err), ErrorCodes.BOSS_SYSTEM_ERROR).to_http()
def post(self, request, ingest_job_id): """ Signal an ingest job is complete and should be cleaned up by POSTing to this view Args: request: Django Rest framework Request object ingest_job_id: Ingest job id Returns: """ try: ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.get_ingest_job(ingest_job_id) if ingest_job.status == 0: # If status is Preparing. Deny return BossHTTPError( "You cannot complete a job that is still preparing. You must cancel instead.", ErrorCodes.BAD_REQUEST) elif ingest_job.status == 1: # If status is Uploading. Complete the job. blog = BossLogger().logger blog.info("Completing Ingest Job {}".format(ingest_job_id)) # Check if user is the ingest job creator or the sys admin if not self.is_user_or_admin(request, ingest_job): return BossHTTPError( "Only the creator or admin can complete an ingest job", ErrorCodes.INGEST_NOT_CREATOR) # Check if any messages remain in the ingest queue ingest_queue = ingest_mgmr.get_ingest_job_ingest_queue( ingest_job) num_messages_in_queue = int( ingest_queue.queue. attributes['ApproximateNumberOfMessages']) # Kick off extra lambdas just in case if num_messages_in_queue: blog.info("{} messages remaining in Ingest Queue".format( num_messages_in_queue)) ingest_mgmr.invoke_ingest_lambda(ingest_job, num_messages_in_queue) # Give lambda a few seconds to fire things off time.sleep(30) # "COMPLETE" status is 2 ingest_mgmr.cleanup_ingest_job(ingest_job, 2) blog.info("Complete successful") return Response(status=status.HTTP_204_NO_CONTENT) elif ingest_job.status == 2: # If status is already Complete, just return another 204 return Response(status=status.HTTP_204_NO_CONTENT) elif ingest_job.status == 3: # Job had already been cancelled return BossHTTPError("Ingest job has already been cancelled.", ErrorCodes.BAD_REQUEST) elif ingest_job.status == 4: # Job had failed return BossHTTPError( "Ingest job has failed during creation. You must Cancel instead.", ErrorCodes.BAD_REQUEST) except BossError as err: return err.to_http()
def get(self, request, ingest_job_id): """ Args: job_id: Returns: """ try: ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.get_ingest_job(ingest_job_id) serializer = IngestJobListSerializer(ingest_job) print(serializer.data) # Start setting up output data = {} data['ingest_job'] = serializer.data if ingest_job.status == 3 or ingest_job.status == 2: # Return the information for the deleted job/completed job return Response(data, status=status.HTTP_200_OK) elif ingest_job.status == 0: # check if all message are in the upload queue upload_queue = ingest_mgmr.get_ingest_job_upload_queue( ingest_job) if int(upload_queue.queue. attributes['ApproximateNumberOfMessages']) == int( ingest_job.tile_count): #generate credentials ingest_job.status = 1 ingest_job.save() elif int(upload_queue.queue. attributes['ApproximateNumberOfMessages']) > int( ingest_job.tile_count): # This indicates an error in the lambda raise BossError( "Error generating ingest job messages due to resources timing out ." " Delete the ingest job with id {} and try again.". format(ingest_job_id), ErrorCodes.BOSS_SYSTEM_ERROR) if ingest_job.status == 1: data['ingest_job']['status'] = 1 ingest_creds = IngestCredentials() data['credentials'] = ingest_creds.get_credentials( ingest_job.id) else: data['credentials'] = None data['tile_bucket_name'] = ingest_mgmr.get_tile_bucket() data['KVIO_SETTINGS'] = settings.KVIO_SETTINGS data['STATEIO_CONFIG'] = settings.STATEIO_CONFIG data['OBJECTIO_CONFIG'] = settings.OBJECTIO_CONFIG # add the lambda - Possibly remove this later config = bossutils.configuration.BossConfig() data['ingest_lambda'] = config["lambda"]["page_in_function"] # Generate a "resource" for the ingest lambda function to be able to use SPDB cleanly collection = Collection.objects.get( name=data['ingest_job']["collection"]) experiment = Experiment.objects.get( name=data['ingest_job']["experiment"], collection=collection) channel = Channel.objects.get(name=data['ingest_job']["channel"], experiment=experiment) resource = {} resource['boss_key'] = '{}&{}&{}'.format( data['ingest_job']["collection"], data['ingest_job']["experiment"], data['ingest_job']["channel"]) resource['lookup_key'] = '{}&{}&{}'.format(collection.id, experiment.id, channel.id) resource['channel'] = {} resource['channel']['name'] = channel.name resource['channel']['description'] = "" resource['channel']['type'] = channel.type resource['channel']['datatype'] = channel.datatype resource['channel']['base_resolution'] = channel.base_resolution resource['channel']['sources'] = [ x.name for x in channel.sources.all() ] resource['channel']['related'] = [ x.name for x in channel.related.all() ] resource['channel'][ 'default_time_sample'] = channel.default_time_sample # Set resource data['resource'] = resource return Response(data, status=status.HTTP_200_OK) except BossError as err: return err.to_http() except Exception as err: return BossError("{}".format(err), ErrorCodes.BOSS_SYSTEM_ERROR).to_http()
def test_upload_tile_index_table(self): """""" ingest_mgmr = IngestManager() ingest_mgmr.validate_config_file(self.example_config_data) ingest_mgmr.validate_properties() ingest_mgmr.owner = self.user.pk ingest_job = ingest_mgmr.create_ingest_job() assert (ingest_job.id is not None) # Get the chunks in this job # Get the project information bosskey = ingest_job.collection + '&' + ingest_job.experiment + '&' + ingest_job.channel_layer lookup_key = (LookUpKey.get_lookup_key(bosskey)).lookup_key [col_id, exp_id, ch_id] = lookup_key.split('&') project_info = [col_id, exp_id, ch_id] proj_name = ingest_job.collection + '&' + ingest_job.experiment tile_index_db = BossTileIndexDB(proj_name) tilebucket = TileBucket(str(col_id) + '&' + str(exp_id)) for time_step in range(ingest_job.t_start, ingest_job.t_stop, 1): # For each time step, compute the chunks and tile keys for z in range(ingest_job.z_start, ingest_job.z_stop, 16): for y in range(ingest_job.y_start, ingest_job.y_stop, ingest_job.tile_size_y): for x in range(ingest_job.x_start, ingest_job.x_stop, ingest_job.tile_size_x): # compute the chunk indices chunk_x = int(x / ingest_job.tile_size_x) chunk_y = int(y / ingest_job.tile_size_y) chunk_z = int(z / 16) # Compute the number of tiles in the chunk if ingest_job.z_stop - z >= 16: num_of_tiles = 16 else: num_of_tiles = ingest_job.z_stop - z # Generate the chunk key chunk_key = (BossBackend( ingest_mgmr.config)).encode_chunk_key( num_of_tiles, project_info, ingest_job.resolution, chunk_x, chunk_y, chunk_z, time_step) # Upload the chunk to the tile index db tile_index_db.createCuboidEntry( chunk_key, ingest_job.id) key_map = {} for tile in range(0, num_of_tiles): # get the object key and upload it #tile_key = tilebucket.encodeObjectKey(ch_id, ingest_job.resolution, # chunk_x, chunk_y, tile, time_step) tile_key = 'fakekey' + str(tile) tile_index_db.markTileAsUploaded( chunk_key, tile_key) # for each chunk key, delete entries from the tile_bucket # Check if data has been uploaded chunks = list(tile_index_db.getTaskItems(ingest_job.id)) assert (len(chunks) != 0) ingest_mgmr.delete_tiles(ingest_job) chunks = list(tile_index_db.getTaskItems(ingest_job.id)) assert (len(chunks) == 0)
class BossIngestManagerCompleteTest(APITestCase): """ Test the completion process implemented by IngestManager. """ def setUp(self): """ Initialize the database """ # AWS region. self.region = 'us-east-1' dbsetup = SetupTestDB() self.user = dbsetup.create_super_user(username='******', email='*****@*****.**', password='******') dbsetup.set_user(self.user) self.client.force_login(self.user) dbsetup.insert_ingest_test_data() SetupTests() # Unit under test. self.ingest_mgr = IngestManager() def patch_ingest_mgr(self, name): """ Patch a method or attribute of self.ingest_manager. Allows patching w/o using with so there's not many levels of nested indentation. Args: name (str): Name of method or attribute to replace. Returns: (MagicMock): Mock or fake """ patch_wrapper = patch.object(self.ingest_mgr, name, autospec=True) magic_mock = patch_wrapper.start() # This ensures the patch is removed when the test is torn down. self.addCleanup(patch_wrapper.stop) return magic_mock def make_fake_sqs_queues(self): """ Patch the SQS queues used by the ingest manager. """ upload_q = MagicMock(spec=UploadQueue) upload_q.url = UPLOAD_QUEUE_URL upload_q.region_name = self.region upload_q.queue = MagicMock() get_upload_q = self.patch_ingest_mgr('get_ingest_job_upload_queue') get_upload_q.return_value = upload_q ingest_q = MagicMock(spec=IngestQueue) ingest_q.url = INGEST_QUEUE_URL ingest_q.region_name = self.region ingest_q.queue = MagicMock() get_ingest_q = self.patch_ingest_mgr('get_ingest_job_ingest_queue') get_ingest_q.return_value = ingest_q tile_index_q = MagicMock(spec=TileIndexQueue) tile_index_q.url = TILE_INDEX_QUEUE_URL tile_index_q.region_name = self.region tile_index_q.queue = MagicMock() get_tile_index_q = self.patch_ingest_mgr( 'get_ingest_job_tile_index_queue') get_tile_index_q.return_value = tile_index_q tile_error_q = MagicMock(spec=TileErrorQueue) tile_error_q.url = TILE_ERROR_QUEUE_URL tile_error_q.region_name = self.region tile_error_q.queue = MagicMock() get_tile_error_q = self.patch_ingest_mgr( 'get_ingest_job_tile_error_queue') get_tile_error_q.return_value = tile_error_q def make_ingest_job(self, **kwargs): """ Create an ingest job for use in a test Args: kwargs: Keyword args to override the test defaults for the ingest job. Returns: (IngestJob) """ data = { 'status': IngestJob.UPLOADING, 'creator': self.user, 'resolution': 0, 'x_start': 0, 'y_start': 0, 'z_start': 0, 't_start': 0, 'x_stop': 10, 'y_stop': 10, 'z_stop': 10, 't_stop': 1, 'tile_size_x': 1024, 'tile_size_y': 1024, 'tile_size_z': 16, 'tile_size_t': 1, 'wait_on_queues_ts': None } for key, value in kwargs.items(): data[key] = value job = IngestJob.objects.create(**data) job.save() return job @patch('bossingest.ingest_manager.timezone', autospec=True) def test_try_enter_wait_on_queue_state_success(self, fake_tz): timestamp = datetime.now(timezone.utc) fake_tz.now.return_value = timestamp job = self.make_ingest_job(status=IngestJob.WAIT_ON_QUEUES, wait_on_queues_ts=timestamp) self.patch_ingest_mgr('ensure_queues_empty') self.patch_ingest_mgr('_start_completion_activity') actual = self.ingest_mgr.try_enter_wait_on_queue_state(job) updated_job = self.ingest_mgr.get_ingest_job(job.id) self.assertEqual(IngestJob.WAIT_ON_QUEUES, updated_job.status) self.assertEqual(timestamp, updated_job.wait_on_queues_ts) exp = { 'job_status': IngestJob.WAIT_ON_QUEUES, 'wait_secs': WAIT_FOR_QUEUES_SECS } self.assertDictEqual(exp, actual) @patch('bossingest.ingest_manager.timezone', autospec=True) def test_try_enter_wait_on_queue_state_already_there(self, fake_tz): now_timestamp = datetime.now(timezone.utc) fake_tz.now.return_value = now_timestamp seconds_waiting = 100 # Time WAIT_ON_QUEUES entered. wait_timestamp = now_timestamp - timedelta(seconds=seconds_waiting) job = self.make_ingest_job(status=IngestJob.WAIT_ON_QUEUES, wait_on_queues_ts=wait_timestamp) self.patch_ingest_mgr('ensure_queues_empty') actual = self.ingest_mgr.try_enter_wait_on_queue_state(job) updated_job = self.ingest_mgr.get_ingest_job(job.id) self.assertEqual(IngestJob.WAIT_ON_QUEUES, updated_job.status) exp = { 'job_status': IngestJob.WAIT_ON_QUEUES, 'wait_secs': WAIT_FOR_QUEUES_SECS - seconds_waiting } self.assertDictEqual(exp, actual) def test_try_enter_wait_on_queue_state_should_fail_if_upload_queue_not_empty( self): job = self.make_ingest_job(status=IngestJob.UPLOADING) fake_ensure_q = self.patch_ingest_mgr('ensure_queues_empty') fake_ensure_q.side_effect = BossError(UPLOAD_QUEUE_NOT_EMPTY_ERR_MSG, ErrorCodes.BAD_REQUEST) with self.assertRaises(BossError): self.ingest_mgr.try_enter_wait_on_queue_state(job) updated_job = self.ingest_mgr.get_ingest_job(job.id) self.assertEqual(IngestJob.UPLOADING, updated_job.status) @patch('bossingest.ingest_manager.timezone', autospec=True) def test_try_start_completing_success_case(self, fake_tz): now_timestamp = datetime.now(timezone.utc) fake_tz.now.return_value = now_timestamp seconds_waiting = WAIT_FOR_QUEUES_SECS + 2 # Time WAIT_ON_QUEUES entered. wait_timestamp = now_timestamp - timedelta(seconds=seconds_waiting) job = self.make_ingest_job(status=IngestJob.WAIT_ON_QUEUES, wait_on_queues_ts=wait_timestamp) self.patch_ingest_mgr('ensure_queues_empty') self.patch_ingest_mgr('_start_completion_activity') actual = self.ingest_mgr.try_start_completing(job) updated_job = self.ingest_mgr.get_ingest_job(job.id) self.assertEqual(IngestJob.COMPLETING, updated_job.status) exp = {'job_status': IngestJob.COMPLETING, 'wait_secs': 0} self.assertDictEqual(exp, actual) def test_try_start_completing_should_fail_if_not_in_wait_on_queues_state( self): """ This method can only be called when the ingest job status is WAIT_ON_QUEUES. """ job = self.make_ingest_job(status=IngestJob.UPLOADING) self.patch_ingest_mgr('ensure_queues_empty') self.patch_ingest_mgr('_start_completion_activity') with self.assertRaises(BossError) as be: self.ingest_mgr.try_start_completing(job) actual = be.exception self.assertEqual(400, actual.status_code) self.assertEqual(ErrorCodes.BAD_REQUEST, actual.error_code) self.assertEqual(NOT_IN_WAIT_ON_QUEUES_STATE_ERR_MSG, actual.message) def test_try_start_completing_should_return_completing_if_already_completing( self): """Should fail if already completing.""" job = self.make_ingest_job(status=IngestJob.COMPLETING) self.patch_ingest_mgr('ensure_queues_empty') self.patch_ingest_mgr('_start_completion_activity') actual = self.ingest_mgr.try_start_completing(job) self.assertEqual(IngestJob.COMPLETING, actual['job_status']) @patch('bossingest.ingest_manager.timezone', autospec=True) def test_try_start_completing_should_fail_if_queue_wait_period_not_expired( self, fake_tz): now_timestamp = datetime.now(timezone.utc) fake_tz.now.return_value = now_timestamp seconds_waiting = 138 # Time WAIT_ON_QUEUES entered. wait_timestamp = now_timestamp - timedelta(seconds=seconds_waiting) job = self.make_ingest_job(status=IngestJob.WAIT_ON_QUEUES, wait_on_queues_ts=wait_timestamp) self.patch_ingest_mgr('ensure_queues_empty') self.patch_ingest_mgr('_start_completion_activity') actual = self.ingest_mgr.try_start_completing(job) exp = { 'job_status': IngestJob.WAIT_ON_QUEUES, 'wait_secs': WAIT_FOR_QUEUES_SECS - seconds_waiting } self.assertDictEqual(exp, actual) @patch('bossingest.ingest_manager.get_sqs_num_msgs', autospec=True) def test_try_start_completing_should_set_uploading_status_on_nonempty_upload_queue( self, fake_get_sqs_num_msgs): """If the upload queue isn't empty, the job status should be set to UPLOADING.""" job = self.make_ingest_job(status=IngestJob.WAIT_ON_QUEUES) fake_get_sqs_num_msgs.side_effect = make_fake_get_sqs_num_msgs([ (UPLOAD_QUEUE_URL, 1) ]) self.make_fake_sqs_queues() self.patch_ingest_mgr('_start_completion_activity') with self.assertRaises(BossError) as be: self.ingest_mgr.try_start_completing(job) actual = be.exception self.assertEqual(400, actual.status_code) self.assertEqual(ErrorCodes.BAD_REQUEST, actual.error_code) self.assertEqual(UPLOAD_QUEUE_NOT_EMPTY_ERR_MSG, actual.message) updated_job = self.ingest_mgr.get_ingest_job(job.id) self.assertEqual(IngestJob.UPLOADING, updated_job.status) @patch('bossingest.ingest_manager.get_sqs_num_msgs', autospec=True) def test_ensure_queues_empty_should_fail_if_upload_queue_not_empty( self, fake_get_sqs_num_msgs): """Should fail if the upload queue isn't empty.""" job = self.make_ingest_job(status=IngestJob.UPLOADING) fake_get_sqs_num_msgs.side_effect = make_fake_get_sqs_num_msgs([ (UPLOAD_QUEUE_URL, 1) ]) self.make_fake_sqs_queues() with self.assertRaises(BossError) as be: self.ingest_mgr.ensure_queues_empty(job) actual = be.exception self.assertEqual(400, actual.status_code) self.assertEqual(ErrorCodes.BAD_REQUEST, actual.error_code) self.assertEqual(UPLOAD_QUEUE_NOT_EMPTY_ERR_MSG, actual.message) @patch('bossingest.ingest_manager.get_sqs_num_msgs', autospec=True) def test_ensure_queues_empty_should_fail_if_ingest_queue_not_empty( self, fake_get_sqs_num_msgs): """Should fail if the ingest queue isn't empty.""" job = self.make_ingest_job(status=IngestJob.UPLOADING) fake_get_sqs_num_msgs.side_effect = make_fake_get_sqs_num_msgs([ (INGEST_QUEUE_URL, 1) ]) self.make_fake_sqs_queues() self.patch_ingest_mgr('lambda_connect_sqs') with self.assertRaises(BossError) as be: self.ingest_mgr.ensure_queues_empty(job) actual = be.exception self.assertEqual(400, actual.status_code) self.assertEqual(ErrorCodes.BAD_REQUEST, actual.error_code) self.assertEqual(INGEST_QUEUE_NOT_EMPTY_ERR_MSG, actual.message) @patch('bossingest.ingest_manager.get_sqs_num_msgs', autospec=True) def test_ensure_queues_empty_should_attach_ingest_lambda_if_ingest_queue_not_empty( self, fake_get_sqs_num_msgs): """Should fail if the ingest queue isn't empty.""" job = self.make_ingest_job(status=IngestJob.UPLOADING) fake_get_sqs_num_msgs.side_effect = make_fake_get_sqs_num_msgs([ (INGEST_QUEUE_URL, 1) ]) self.make_fake_sqs_queues() fake_lambda_connect = self.patch_ingest_mgr('lambda_connect_sqs') with self.assertRaises(BossError): self.ingest_mgr.ensure_queues_empty(job) self.assertEquals(fake_lambda_connect.call_args_list, [call(ANY, INGEST_LAMBDA)]) @patch('bossingest.ingest_manager.get_sqs_num_msgs', autospec=True) def test_ensure_queues_empty_should_fail_if_tile_index_queue_not_empty( self, fake_get_sqs_num_msgs): """Should fail if the tile index queue isn't empty.""" job = self.make_ingest_job(status=IngestJob.UPLOADING) fake_get_sqs_num_msgs.side_effect = make_fake_get_sqs_num_msgs([ (TILE_INDEX_QUEUE_URL, 1) ]) self.make_fake_sqs_queues() with self.assertRaises(BossError) as be: self.ingest_mgr.ensure_queues_empty(job) actual = be.exception self.assertEqual(400, actual.status_code) self.assertEqual(ErrorCodes.BAD_REQUEST, actual.error_code) self.assertEqual(TILE_INDEX_QUEUE_NOT_EMPTY_ERR_MSG, actual.message) def test_start_completion_activity_exits_if_not_tile_ingest(self): job = self.make_ingest_job(status=IngestJob.UPLOADING) job.ingest_type = IngestJob.VOLUMETRIC_INGEST self.assertIsNone(self.ingest_mgr._start_completion_activity(job))
def test_generate_upload_tasks(self): """""" try: ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.setup_ingest(self.user.id, self.example_config_data) ingest_mgmr.generate_upload_tasks(ingest_job.id) assert (ingest_job.collection == 'my_col_1') assert (ingest_job.experiment == 'my_exp_1') assert (ingest_job.channel == 'my_ch_1') # Pull the messages off the queue proj_class = BossIngestProj.load() nd_proj = proj_class(ingest_job.collection, ingest_job.experiment, ingest_job.channel, ingest_job.resolution, ingest_job.id) queue = UploadQueue(nd_proj, endpoint_url=None) tmp = queue.receiveMessage(number_of_messages=10) # receive message from the queue for message_id, receipt_handle, message_body in tmp: assert(message_body['job_id'] == ingest_job.id) # delete message from the queue response = queue.deleteMessage(message_id, receipt_handle) assert ('Successful' in response) ingest_mgmr.remove_ingest_credentials(ingest_job.id) except: raise finally: ingest_mgmr.delete_upload_queue() ingest_mgmr.delete_ingest_queue()
def post(self, request): """ Post a new config job and create a new ingest job Args: request: Django Rest framework Request object ingest_config_data: COnfiguration data for the ingest job Returns: """ ingest_config_data = request.data # Add metrics to CloudWatch extent = ingest_config_data['ingest_job']['extent'] tile_size = ingest_config_data['ingest_job']['tile_size'] database = ingest_config_data['database'] # Check that only permitted users are creating extra large ingests try: group = Group.objects.get(name=INGEST_GRP) in_large_ingest_group = group.user_set.filter( id=request.user.id).exists() except Group.DoesNotExist: # Just in case the group has not been created yet in_large_ingest_group = False if (not in_large_ingest_group) and \ ((extent['x'][1] - extent['x'][0]) * \ (extent['y'][1] - extent['y'][0]) * \ (extent['z'][1] - extent['z'][0]) * \ (extent['t'][1] - extent['t'][0]) > settings.INGEST_MAX_SIZE): return BossHTTPError( "Large ingests require special permission to create. Contact system administrator.", ErrorCodes.INVALID_STATE) # Calculate the cost of the ingest cost = (((extent['x'][1] - extent['x'][0]) / tile_size['x']) * ((extent['y'][1] - extent['y'][0]) / tile_size['y']) * ((extent['z'][1] - extent['z'][0]) / tile_size['z']) * ((extent['t'][1] - extent['t'][0]) / tile_size['t']) * 1.0625 # 1 lambda per tile + 1 lambda per 16 tiles (per cube) * 1 # the cost per lambda ) # Calculating the cost of the lambda invocations boss_config = bossutils.configuration.BossConfig() dimensions = [ { 'Name': 'User', 'Value': request.user.username }, { 'Name': 'Resource', 'Value': '{}/{}/{}'.format(database['collection'], database['experiment'], database['channel']) }, { 'Name': 'Stack', 'Value': boss_config['system']['fqdn'] }, ] session = bossutils.aws.get_session() client = session.client('cloudwatch') client.put_metric_data(Namespace="BOSS/Ingest", MetricData=[{ 'MetricName': 'InvokeCount', 'Dimensions': dimensions, 'Value': 1.0, 'Unit': 'Count' }, { 'MetricName': 'ComputeCost', 'Dimensions': dimensions, 'Value': cost, 'Unit': 'Count' }]) try: ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.setup_ingest(self.request.user.id, ingest_config_data) serializer = IngestJobListSerializer(ingest_job) return Response(serializer.data, status=status.HTTP_201_CREATED) except BossError as err: return err.to_http()
def test_validate_config_file(self): """Method to test validation of a config file""" ingest_mgmr = IngestManager() ingest_mgmr.validate_config_file(self.example_config_data) assert(ingest_mgmr.config is not None) assert (ingest_mgmr.config.config_data is not None)
def post(self, request, ingest_job_id): """ Signal an ingest job is complete and should be cleaned up by POSTing to this view Args: request: Django Rest framework Request object ingest_job_id: Ingest job id Returns: """ try: blog = BossLogger().logger ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.get_ingest_job(ingest_job_id) if ingest_job.status == IngestJob.PREPARING: # If status is Preparing. Deny return BossHTTPError( "You cannot complete a job that is still preparing. You must cancel instead.", ErrorCodes.BAD_REQUEST) elif ingest_job.status == IngestJob.UPLOADING: # Check if user is the ingest job creator or the sys admin if not self.is_user_or_admin(request, ingest_job): return BossHTTPError( "Only the creator or admin can start verification of an ingest job", ErrorCodes.INGEST_NOT_CREATOR) # Disable verification until it is reworked and always return # success for now. blog.info( 'Telling client job complete - completion/verificcation to be fixed later.' ) return Response(status=status.HTTP_204_NO_CONTENT) """ blog.info('Verifying ingest job {}'.format(ingest_job_id)) # Start verification process if not ingest_mgmr.verify_ingest_job(ingest_job): # Ingest not finished return Response(status=status.HTTP_202_ACCEPTED) """ # Verification successful, fall through to the complete process. elif ingest_job.status == IngestJob.COMPLETE: # If status is already Complete, just return another 204 return Response(status=status.HTTP_204_NO_CONTENT) elif ingest_job.status == IngestJob.DELETED: # Job had already been cancelled return BossHTTPError("Ingest job has already been cancelled.", ErrorCodes.BAD_REQUEST) elif ingest_job.status == IngestJob.FAILED: # Job had failed return BossHTTPError( "Ingest job has failed during creation. You must Cancel instead.", ErrorCodes.BAD_REQUEST) # Complete the job. blog.info("Completing Ingest Job {}".format(ingest_job_id)) # Check if user is the ingest job creator or the sys admin if not self.is_user_or_admin(request, ingest_job): return BossHTTPError( "Only the creator or admin can complete an ingest job", ErrorCodes.INGEST_NOT_CREATOR) # TODO SH This is a quick fix to make sure the ingest-client does not run close option. # the clean up code commented out below, because it is not working correctly. return Response(status=status.HTTP_204_NO_CONTENT) # if ingest_job.ingest_type == IngestJob.TILE_INGEST: # # Check if any messages remain in the ingest queue # ingest_queue = ingest_mgmr.get_ingest_job_ingest_queue(ingest_job) # num_messages_in_queue = int(ingest_queue.queue.attributes['ApproximateNumberOfMessages']) # # # Kick off extra lambdas just in case # if num_messages_in_queue: # blog.info("{} messages remaining in Ingest Queue".format(num_messages_in_queue)) # ingest_mgmr.invoke_ingest_lambda(ingest_job, num_messages_in_queue) # # # Give lambda a few seconds to fire things off # time.sleep(30) # # ingest_mgmr.cleanup_ingest_job(ingest_job, IngestJob.COMPLETE) # # elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST: # ingest_mgmr.cleanup_ingest_job(ingest_job, IngestJob.COMPLETE) # # # ToDo: call cleanup method for volumetric ingests. Don't want # # to cleanup until after testing with real data. # #ingest_mgmr.cleanup_ingest_job(ingest_job, IngestJob.COMPLETE) # # blog.info("Complete successful") # return Response(status=status.HTTP_204_NO_CONTENT) except BossError as err: return err.to_http() except Exception as err: blog.error('Caught general exception: {}'.format(err)) return BossError("{}".format(err), ErrorCodes.BOSS_SYSTEM_ERROR).to_http()
def test_tile_bucket_name(self): """ Test get tile bucket name""" ingest_mgmr = IngestManager() tile_bucket_name = ingest_mgmr.get_tile_bucket() assert(tile_bucket_name is not None)