def create_direct_ingest_process_job_task( self, region: Region, ingest_instance: DirectIngestInstance, ingest_args: GcsfsIngestArgs, ) -> None: task_id = _build_task_id( region.region_code, ingest_instance, ingest_args.task_id_tag(), prefix_only=False, ) params = { "region": region.region_code.lower(), "file_path": ingest_args.file_path.abs_path(), } relative_uri = f"/direct/process_job?{urlencode(params)}" body = self._get_body_from_args(ingest_args) self._get_process_job_queue_manager(region, ingest_instance).create_task( task_id=task_id, relative_uri=relative_uri, body=body, )
def test_is_task_queued_has_tasks(self): # Arrange file_path = to_normalized_unprocessed_file_path( 'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW) gcsfs_args = \ GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=GcsfsFilePath.from_absolute_path(file_path)) full_task_name = \ _build_task_id(_REGION.region_code, gcsfs_args.task_id_tag()) info = ProcessIngestJobCloudTaskQueueInfo( queue_name='queue_name', task_names=[ 'projects/path/to/random_task', f'projects/path/to/{full_task_name}' ]) file_path = to_normalized_unprocessed_file_path( 'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW) gcsfs_args = \ GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=GcsfsFilePath.from_absolute_path(file_path)) # Act gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertTrue(gcsfs_args_queued)
def test_info_single_task(self) -> None: # Arrange gcsfs_args = GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=self.ingest_view_file_path, ) full_task_name = _build_task_id(_REGION.region_code, DirectIngestInstance.PRIMARY, gcsfs_args.task_id_tag()) info = ProcessIngestJobCloudTaskQueueInfo( queue_name="queue_name", task_names=[ "projects/path/to/random_task", f"projects/path/to/{full_task_name}", ], ) gcsfs_args = GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=self.ingest_view_file_path, ) # Act gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertTrue(gcsfs_args_queued) self.assertTrue( info.tasks_for_instance(_REGION.region_code, DirectIngestInstance.PRIMARY)) self.assertFalse( info.tasks_for_instance(_REGION.region_code, DirectIngestInstance.SECONDARY))
def is_task_queued(self, region: Region, ingest_args: GcsfsIngestArgs) -> bool: """Returns true if the ingest_args correspond to a task currently in the queue. """ task_id_prefix = _build_task_id( region.region_code, DirectIngestInstance.for_ingest_bucket( ingest_args.file_path.bucket_path), ingest_args.task_id_tag(), prefix_only=True, ) return bool(next(self._tasks_for_prefix(task_id_prefix), None))
def create_direct_ingest_process_job_task( self, region: Region, ingest_instance: DirectIngestInstance, ingest_args: GcsfsIngestArgs, ) -> None: """Queues *but does not run* a process job task.""" if not self.controller: raise ValueError( "Controller is null - did you call set_controller()?") task_id = _build_task_id( self.controller.region.region_code, ingest_instance, ingest_args.task_id_tag(), ) self.process_job_tasks.append( (f"projects/path/to/{task_id}", ingest_args))
def test_is_task_queued_has_tasks(self): # Arrange file_path = to_normalized_unprocessed_file_path('file_path.csv') gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(), file_path=file_path) full_task_name = \ _build_task_id(_REGION.region_code, gcsfs_args.task_id_tag()) info = CloudTaskQueueInfo(queue_name='queue_name', task_names=[ f'projects/path/to/random_task', f'projects/path/to/{full_task_name}' ]) file_path = to_normalized_unprocessed_file_path('file_path.csv') gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(), file_path=file_path) # Act gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertTrue(gcsfs_args_queued)
def test_info_tasks_both_instances(self) -> None: # Arrange gcsfs_args = GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=self.ingest_view_file_path, ) full_task_names = [ _build_task_id( _REGION.region_code, ingest_instance, gcsfs_args.task_id_tag(), ) for ingest_instance in DirectIngestInstance ] info = ProcessIngestJobCloudTaskQueueInfo( queue_name="queue_name", task_names=[ "projects/path/to/random_task", ] + [ f"projects/path/to/{full_task_name}" for full_task_name in full_task_names ], ) gcsfs_args = GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=self.ingest_view_file_path, ) # Act gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertTrue(gcsfs_args_queued) for ingest_instance in DirectIngestInstance: self.assertTrue( info.tasks_for_instance(_REGION.region_code, ingest_instance))