class CloudTaskQueueManager(Generic[QueueInfoType]): """Class with helpers for interacting with a single CloudTask queue.""" def __init__( self, queue_info_cls: Type[QueueInfoType], queue_name: str, cloud_tasks_client: Optional[tasks_v2.CloudTasksClient] = None, ): self.cloud_task_client = GoogleCloudTasksClientWrapper( cloud_tasks_client) self.queue_info_cls = queue_info_cls self.queue_name = queue_name def get_queue_info(self, *, task_id_prefix: str = "") -> QueueInfoType: tasks_list = self.cloud_task_client.list_tasks_with_prefix( queue_name=self.queue_name, task_id_prefix=task_id_prefix) task_names = [task.name for task in tasks_list] if tasks_list else [] return self.queue_info_cls(queue_name=self.queue_name, task_names=task_names) def create_task(self, *, relative_uri: str, body: Dict[str, str], task_id: Optional[str] = None, schedule_delay_seconds: int = 0) -> None: self.cloud_task_client.create_task( task_id=task_id, queue_name=self.queue_name, relative_uri=relative_uri, body=body, schedule_delay_seconds=schedule_delay_seconds, )
class CalculateCloudTaskManager: """Class for interacting with the calculation pipeline cloud task queues.""" def __init__(self, project_id: Optional[str] = None): self.cloud_task_client = \ GoogleCloudTasksClientWrapper(project_id=project_id) def create_dataflow_monitor_task(self, job_id: str, location: str, topic: str) -> None: """Create a task to monitor the progress of a Dataflow job. Args: job_id: The unique id of the Dataflow job location: The region where the job is being run topic: Pub/Sub topic where a message will be published if the job completes successfully """ body = { 'project_id': self.cloud_task_client.project_id, 'job_id': job_id, 'location': location, 'topic': topic } task_id = '{}-{}-{}'.format(job_id, str(datetime.datetime.utcnow().date()), uuid.uuid4()) self.cloud_task_client.create_task( task_id=task_id, queue_name=JOB_MONITOR_QUEUE_V2, relative_uri='/dataflow_monitor/monitor', body=body, schedule_delay_seconds=300, # 5-minute delay )
class ScraperCloudTaskManager: """Class for interacting with the scraper cloud task queues.""" def __init__(self, project_id: Optional[str] = None): self.cloud_task_client = GoogleCloudTasksClientWrapper( project_id=project_id) def _format_scrape_task_id(self, region_code: str, rest: str): return "{}-{}".format(region_code, rest) def purge_scrape_tasks(self, *, region_code: str, queue_name: str): """Purge scrape tasks for a given region from its queue. Args: region_code: `str` region code. queue_name: `str` queue name. """ for task in self.list_scrape_tasks(region_code=region_code, queue_name=queue_name): self.cloud_task_client.delete_task(task) def list_scrape_tasks( self, *, region_code: str, queue_name: str) -> List[tasks_v2.types.task_pb2.Task]: """List scrape tasks for the given region and queue""" region_task_id_prefix = self._format_scrape_task_id(region_code, "") return self.cloud_task_client.list_tasks_with_prefix( queue_name, region_task_id_prefix) def create_scrape_task(self, *, region_code: str, queue_name: str, url: str, body: Dict[str, Any]): """Create a scrape task in a queue. Args: region_code: `str` region code. queue_name: `str` queue name. url: `str` App Engine worker url. body: `dict` task body to be passed to worker. """ self.cloud_task_client.create_task( task_id=self._format_scrape_task_id(region_code, str(uuid.uuid4())), queue_name=queue_name, relative_uri=url, body=body, ) def create_scraper_phase_task(self, *, region_code: str, url: str): """Add a task to trigger the next phase of a scrape. This triggers the phase at the given url for an individual region, passing the `region_code` as a url parameter. For example, this can trigger stopping a scraper or inferring release for a particular region. """ self.cloud_task_client.create_task( task_id=self._format_scrape_task_id(region_code, str(uuid.uuid4())), queue_name=SCRAPER_PHASE_QUEUE_V2, relative_uri=f"{url}?region={region_code}", http_method=HttpMethod.GET, )
class DirectIngestRawUpdateCloudTaskManager: """Class for interacting with cloud tasks and queues related to updating state raw data latest views. """ def __init__(self, project_id: Optional[str] = None): self.cloud_task_client = \ GoogleCloudTasksClientWrapper(project_id=project_id) def _get_queue_info(self, queue_name: str) -> CloudTaskQueueInfo: tasks_list = \ self.cloud_task_client.list_tasks_with_prefix( queue_name=queue_name, task_id_prefix='') task_names = [task.name for task in tasks_list] if tasks_list else [] return CloudTaskQueueInfo(queue_name=queue_name, task_names=task_names) def get_bq_queue_info(self) -> CloudTaskQueueInfo: return self._get_queue_info(BIGQUERY_QUEUE_V2) def create_raw_data_latest_view_update_task(self, region_code: str): relative_uri = f'/direct/update_raw_data_latest_views_for_state?region={region_code}' task_id = '{}-update_raw_data_latest_views-{}-{}'.format( region_code, str(datetime.datetime.utcnow().date()), uuid.uuid4()) self.cloud_task_client.create_task( task_id=task_id, queue_name=BIGQUERY_QUEUE_V2, relative_uri=relative_uri, body={}, )
class DirectIngestCloudTaskManagerImpl(DirectIngestCloudTaskManager): """Real implementation of the DirectIngestCloudTaskManager that interacts with actual GCP Cloud Task queues.""" def __init__(self, project_id: Optional[str] = None): self.cloud_task_client = \ GoogleCloudTasksClientWrapper(project_id=project_id) def _get_queue_info(self, queue_name: str, region_code: str) -> CloudTaskQueueInfo: tasks_list = \ self.cloud_task_client.list_tasks_with_prefix( queue_name=queue_name, task_id_prefix=region_code) task_names = [task.name for task in tasks_list] if tasks_list else [] return CloudTaskQueueInfo(queue_name=queue_name, task_names=task_names) def get_process_job_queue_info(self, region: Region) -> CloudTaskQueueInfo: return self._get_queue_info(region.get_queue_name(), region.region_code) def get_scheduler_queue_info(self, region: Region) -> CloudTaskQueueInfo: return self._get_queue_info( DIRECT_INGEST_SCHEDULER_QUEUE_V2, region.region_code) def create_direct_ingest_process_job_task(self, region: Region, ingest_args: IngestArgs): task_id = _build_task_id(region.region_code, ingest_args.task_id_tag(), prefix_only=False) relative_uri = f'/direct/process_job?region={region.region_code}' body = self._get_body_from_args(ingest_args) self.cloud_task_client.create_task( task_id=task_id, queue_name=region.get_queue_name(), relative_uri=relative_uri, body=body, ) def create_direct_ingest_scheduler_queue_task( self, region: Region, just_finished_job: bool, delay_sec: int, ): task_id = _build_task_id(region.region_code, task_id_tag='scheduler', prefix_only=False) relative_uri = f'/direct/scheduler?region={region.region_code}&' \ f'just_finished_job={just_finished_job}' self.cloud_task_client.create_task( task_id=task_id, queue_name=DIRECT_INGEST_SCHEDULER_QUEUE_V2, relative_uri=relative_uri, body={}, schedule_delay_seconds=delay_sec ) def create_direct_ingest_handle_new_files_task(self, region: Region, can_start_ingest: bool): task_id = _build_task_id(region.region_code, task_id_tag='handle_new_files', prefix_only=False) relative_uri = \ f'/direct/handle_new_files?region={region.region_code}&' \ f'can_start_ingest={can_start_ingest}' self.cloud_task_client.create_task( task_id=task_id, queue_name=DIRECT_INGEST_SCHEDULER_QUEUE_V2, relative_uri=relative_uri, body={}, )
class TestGoogleCloudTasksClientWrapper(unittest.TestCase): """Tests for GoogleCloudTasksClientWrapper.""" PROJECT_ID = "my-project-id" INSTANCE_REGION = "us-east1" QUEUE_NAME = "queue-name" QUEUE_NAME_2 = "queue-name-2" @staticmethod def create_mock_cloud_tasks_client(): mock_client = create_autospec(tasks_v2.CloudTasksClient) mock_client.queue_path.side_effect = tasks_v2.CloudTasksClient.queue_path mock_client.task_path.side_effect = tasks_v2.CloudTasksClient.task_path return mock_client def setUp(self): self.mock_client_patcher = patch( "google.cloud.tasks_v2.CloudTasksClient", return_value=self.create_mock_cloud_tasks_client(), ) self.mock_client_cls = self.mock_client_patcher.start() self.mock_client = self.mock_client_cls() with patch("recidiviz.utils.metadata.region", return_value=self.INSTANCE_REGION): with patch("recidiviz.utils.metadata.project_id", return_value=self.PROJECT_ID): self.client_wrapper = GoogleCloudTasksClientWrapper() def tearDown(self): self.mock_client_patcher.stop() def test_format_queue_path(self): queue_path = self.client_wrapper.format_queue_path(self.QUEUE_NAME) self.assertEqual( queue_path, "projects/my-project-id/locations/us-east1/queues/queue-name") queue_path = self.client_wrapper.format_queue_path(self.QUEUE_NAME_2) self.assertEqual( queue_path, "projects/my-project-id/locations/us-east1/queues/queue-name-2") def test_format_task_path(self): task_path = self.client_wrapper.format_task_path( self.QUEUE_NAME, "task-name-1234") self.assertEqual( task_path, "projects/my-project-id/locations/us-east1/queues/queue-name/tasks/" "task-name-1234", ) task_path = self.client_wrapper.format_task_path( self.QUEUE_NAME_2, "task-name-3456") self.assertEqual( task_path, "projects/my-project-id/locations/us-east1/queues/queue-name-2/" "tasks/task-name-3456", ) def test_initialize_cloud_task_queue(self): # Arrange queue = queue_pb2.Queue( name=self.client_wrapper.format_queue_path("queue1")) # Act self.client_wrapper.initialize_cloud_task_queue(queue) # Assert self.mock_client.update_queue.assert_called_with(queue=queue) @staticmethod def _tasks_to_ids(tasks: List[tasks_v2.types.task_pb2.Task]) -> Set[str]: return { task_id for _, task_id in {os.path.split(task.name) for task in tasks} } def test_list_tasks_with_prefix(self): all_tasks = [ tasks_v2.types.task_pb2.Task( name=self.client_wrapper.format_task_path( self.QUEUE_NAME, "us-nd-task-1")), tasks_v2.types.task_pb2.Task( name=self.client_wrapper.format_task_path( self.QUEUE_NAME, "us-nd-task-2")), tasks_v2.types.task_pb2.Task( name=self.client_wrapper.format_task_path( self.QUEUE_NAME, "us-mo-task-1")), ] self.mock_client.list_tasks.return_value = all_tasks # Empty prefix tasks = self.client_wrapper.list_tasks_with_prefix(self.QUEUE_NAME, "") self.mock_client.list_tasks.assert_called_with( parent=self.client_wrapper.format_queue_path(self.QUEUE_NAME)) self.assertTrue(len(tasks), 3) # Prefix that returns all tasks = self.client_wrapper.list_tasks_with_prefix( self.QUEUE_NAME, "u") self.mock_client.list_tasks.assert_called_with( parent=self.client_wrapper.format_queue_path(self.QUEUE_NAME)) self.assertTrue(len(tasks), 3) # Prefix that returns only some tasks = self.client_wrapper.list_tasks_with_prefix( self.QUEUE_NAME, "us-nd") self.mock_client.list_tasks.assert_called_with( parent=self.client_wrapper.format_queue_path(self.QUEUE_NAME)) self.assertTrue(len(tasks), 2) self.assertEqual(self._tasks_to_ids(tasks), {"us-nd-task-1", "us-nd-task-2"}) # Prefix that is exact match tasks = self.client_wrapper.list_tasks_with_prefix( self.QUEUE_NAME, "us-nd-task-2") self.mock_client.list_tasks.assert_called_with( parent=self.client_wrapper.format_queue_path(self.QUEUE_NAME)) self.assertTrue(len(tasks), 1) self.assertEqual(self._tasks_to_ids(tasks), {"us-nd-task-2"}) # Prefix with no matches tasks = self.client_wrapper.list_tasks_with_prefix( self.QUEUE_NAME, "no-match-prefix") self.mock_client.list_tasks.assert_called_with( parent=self.client_wrapper.format_queue_path(self.QUEUE_NAME)) self.assertFalse(tasks) def test_create_task_no_schedule_delay(self): self.client_wrapper.create_task( task_id="us_mo-file_name_1-123456", queue_name=self.QUEUE_NAME, relative_uri="/process_job?region=us_mo", body={ "arg1": "arg1-val", "arg2": 123 }, ) self.mock_client.create_task.assert_called_with( parent= "projects/my-project-id/locations/us-east1/queues/queue-name", task=tasks_v2.types.task_pb2.Task( name="projects/my-project-id/locations/us-east1/queues/" "queue-name/tasks/us_mo-file_name_1-123456", app_engine_http_request={ "http_method": "POST", "relative_uri": "/process_job?region=us_mo", "body": b'{"arg1": "arg1-val", "arg2": 123}', }, ), ) @freeze_time("2019-04-14") def test_create_task_schedule_delay(self): now_timestamp_sec = int(datetime.datetime.utcnow().timestamp()) self.client_wrapper.create_task( task_id="us_mo-file_name_1-123456", queue_name=self.QUEUE_NAME, relative_uri="/process_job?region=us_mo", body={}, schedule_delay_seconds=3, ) self.mock_client.create_task.assert_called_with( parent= "projects/my-project-id/locations/us-east1/queues/queue-name", task=tasks_v2.types.task_pb2.Task( name="projects/my-project-id/locations/us-east1/queues/" "queue-name/tasks/us_mo-file_name_1-123456", app_engine_http_request={ "http_method": "POST", "relative_uri": "/process_job?region=us_mo", "body": b"{}", }, schedule_time=timestamp_pb2.Timestamp( seconds=(now_timestamp_sec + 3)), ), ) def test_delete_task(self): self.client_wrapper.delete_task( tasks_v2.types.task_pb2.Task(name="task_name")) self.mock_client.delete_task.assert_called_with(name="task_name") def test_delete_task_not_found(self): self.mock_client.delete_task.side_effect = exceptions.NotFound( message="message") self.client_wrapper.delete_task( tasks_v2.types.task_pb2.Task(name="task_name")) self.mock_client.delete_task.assert_called_with(name="task_name")
class BQExportCloudTaskManager: """Class for interacting with cloud tasks and queues related to BigQuery exports. """ def __init__(self, project_id: Optional[str] = None): self.cloud_task_client = \ GoogleCloudTasksClientWrapper(project_id=project_id) def _get_queue_info(self, queue_name: str) -> CloudTaskQueueInfo: tasks_list = \ self.cloud_task_client.list_tasks_with_prefix( queue_name=queue_name, task_id_prefix='') task_names = [task.name for task in tasks_list] if tasks_list else [] return CloudTaskQueueInfo(queue_name=queue_name, task_names=task_names) def get_bq_queue_info(self) -> CloudTaskQueueInfo: return self._get_queue_info(BIGQUERY_QUEUE_V2) def create_bq_task(self, table_name: str, schema_type: str): """Create a BigQuery table export path. Args: table_name: Cloud SQL table to export to BQ. Must be defined in the *_TABLES_TO_EXPORT for the given schema. schema_type: The schema of the table being exported, either 'jails' or 'state'. url: App Engine worker URL. """ body = {'table_name': table_name, 'schema_type': schema_type} task_id = '{}-{}-{}-{}'.format(table_name, schema_type, str(datetime.datetime.utcnow().date()), uuid.uuid4()) self.cloud_task_client.create_task( task_id=task_id, queue_name=BIGQUERY_QUEUE_V2, relative_uri='/export_manager/export', body=body, ) def create_bq_monitor_task(self, topic: str, message: str) -> None: """Create a task to monitor the progress of an export to BQ. Args: topic: Pub/Sub topic where a message will be published when the BQ export tasks are complete. message: The message that will be sent to the topic. """ body = {'topic': topic, 'message': message} task_topic = topic.replace('.', '-') task_id = '{}-{}-{}'.format(task_topic, str(datetime.datetime.utcnow().date()), uuid.uuid4()) self.cloud_task_client.create_task( task_id=task_id, queue_name=JOB_MONITOR_QUEUE_V2, relative_uri='/export_manager/bq_monitor', body=body, schedule_delay_seconds=60, # 1-minute delay )
class TestGoogleCloudTasksClientWrapper(unittest.TestCase): """Tests for GoogleCloudTasksClientWrapper.""" PROJECT_ID = 'my-project-id' INSTANCE_REGION = 'us-east1' QUEUE_NAME = 'queue-name' QUEUE_NAME_2 = 'queue-name-2' @staticmethod def create_mock_cloud_tasks_client(): mock_client = create_autospec(tasks_v2.CloudTasksClient) mock_client.queue_path.side_effect = \ tasks_v2.CloudTasksClient.queue_path mock_client.task_path.side_effect = \ tasks_v2.CloudTasksClient.task_path return mock_client def setUp(self): self.mock_client_patcher = patch( 'google.cloud.tasks_v2.CloudTasksClient', return_value=self.create_mock_cloud_tasks_client()) self.mock_client_cls = self.mock_client_patcher.start() self.mock_client = self.mock_client_cls() with patch('recidiviz.utils.metadata.region', return_value=self.INSTANCE_REGION): with patch('recidiviz.utils.metadata.project_id', return_value=self.PROJECT_ID): self.client_wrapper = GoogleCloudTasksClientWrapper() def tearDown(self): self.mock_client_patcher.stop() def test_format_queue_path(self): queue_path = self.client_wrapper.format_queue_path(self.QUEUE_NAME) self.assertEqual( queue_path, 'projects/my-project-id/locations/us-east1/queues/queue-name') queue_path = self.client_wrapper.format_queue_path(self.QUEUE_NAME_2) self.assertEqual( queue_path, 'projects/my-project-id/locations/us-east1/queues/queue-name-2') def test_format_task_path(self): task_path = self.client_wrapper.format_task_path(self.QUEUE_NAME, 'task-name-1234') self.assertEqual( task_path, 'projects/my-project-id/locations/us-east1/queues/queue-name/tasks/' 'task-name-1234') task_path = self.client_wrapper.format_task_path(self.QUEUE_NAME_2, 'task-name-3456') self.assertEqual( task_path, 'projects/my-project-id/locations/us-east1/queues/queue-name-2/' 'tasks/task-name-3456') def test_initialize_cloud_task_queue(self): # Arrange queue = queue_pb2.Queue( name=self.client_wrapper.format_queue_path('queue1')) # Act self.client_wrapper.initialize_cloud_task_queue(queue) # Assert self.mock_client.update_queue.assert_called_with(queue) @staticmethod def _tasks_to_ids(tasks: List[tasks_v2.types.task_pb2.Task]) -> Set[str]: return {task_id for _, task_id in {os.path.split(task.name) for task in tasks}} def test_list_tasks_with_prefix(self): all_tasks = [ tasks_v2.types.task_pb2.Task( name=self.client_wrapper.format_task_path(self.QUEUE_NAME, 'us-nd-task-1') ), tasks_v2.types.task_pb2.Task( name=self.client_wrapper.format_task_path(self.QUEUE_NAME, 'us-nd-task-2') ), tasks_v2.types.task_pb2.Task( name=self.client_wrapper.format_task_path(self.QUEUE_NAME, 'us-mo-task-1') ), ] self.mock_client.list_tasks.return_value = all_tasks # Empty prefix tasks = self.client_wrapper.list_tasks_with_prefix(self.QUEUE_NAME, '') self.mock_client.list_tasks.assert_called_with( self.client_wrapper.format_queue_path(self.QUEUE_NAME)) self.assertTrue(len(tasks), 3) # Prefix that returns all tasks = self.client_wrapper.list_tasks_with_prefix(self.QUEUE_NAME, 'u') self.mock_client.list_tasks.assert_called_with( self.client_wrapper.format_queue_path(self.QUEUE_NAME)) self.assertTrue(len(tasks), 3) # Prefix that returns only some tasks = self.client_wrapper.list_tasks_with_prefix(self.QUEUE_NAME, 'us-nd') self.mock_client.list_tasks.assert_called_with( self.client_wrapper.format_queue_path(self.QUEUE_NAME)) self.assertTrue(len(tasks), 2) self.assertEqual(self._tasks_to_ids(tasks), {'us-nd-task-1', 'us-nd-task-2'}) # Prefix that is exact match tasks = self.client_wrapper.list_tasks_with_prefix(self.QUEUE_NAME, 'us-nd-task-2') self.mock_client.list_tasks.assert_called_with( self.client_wrapper.format_queue_path(self.QUEUE_NAME)) self.assertTrue(len(tasks), 1) self.assertEqual(self._tasks_to_ids(tasks), {'us-nd-task-2'}) # Prefix with no matches tasks = self.client_wrapper.list_tasks_with_prefix(self.QUEUE_NAME, 'no-match-prefix') self.mock_client.list_tasks.assert_called_with( self.client_wrapper.format_queue_path(self.QUEUE_NAME)) self.assertFalse(tasks) def test_create_task_no_schedule_delay(self): self.client_wrapper.create_task( task_id='us_mo-file_name_1-123456', queue_name=self.QUEUE_NAME, relative_uri='/process_job?region=us_mo', body={ 'arg1': 'arg1-val', 'arg2': 123 } ) self.mock_client.create_task.assert_called_with( 'projects/my-project-id/locations/us-east1/queues/queue-name', tasks_v2.types.task_pb2.Task( name="projects/my-project-id/locations/us-east1/queues/" "queue-name/tasks/us_mo-file_name_1-123456", app_engine_http_request={ 'http_method': 'POST', 'relative_uri': "/process_job?region=us_mo", 'body': b"{\"arg1\": \"arg1-val\", \"arg2\": 123}" } ) ) @freeze_time('2019-04-14') def test_create_task_schedule_delay(self): now_timestamp_sec = int(datetime.datetime.utcnow().timestamp()) self.client_wrapper.create_task( task_id='us_mo-file_name_1-123456', queue_name=self.QUEUE_NAME, relative_uri='/process_job?region=us_mo', body={}, schedule_delay_seconds=3 ) self.mock_client.create_task.assert_called_with( 'projects/my-project-id/locations/us-east1/queues/queue-name', tasks_v2.types.task_pb2.Task( name="projects/my-project-id/locations/us-east1/queues/" "queue-name/tasks/us_mo-file_name_1-123456", app_engine_http_request={ 'http_method': 'POST', 'relative_uri': "/process_job?region=us_mo", 'body': b"{}" }, schedule_time=timestamp_pb2.Timestamp( seconds=(now_timestamp_sec + 3) ), ) ) def test_delete_task(self): self.client_wrapper.delete_task( tasks_v2.types.task_pb2.Task(name='task_name')) self.mock_client.delete_task.assert_called_with('task_name') def test_delete_task_not_found(self): self.mock_client.delete_task.side_effect = exceptions.NotFound( message='message') self.client_wrapper.delete_task( tasks_v2.types.task_pb2.Task(name='task_name')) self.mock_client.delete_task.assert_called_with('task_name')