def test_process_job_unlaunched_region(self, mock_supported, mock_region, mock_environment): mock_supported.return_value = ['us_ca', 'us_pa'] region_code = 'us_ca' mock_environment.return_value = 'production' mock_controller = create_autospec(GcsfsDirectIngestController) mock_region.return_value = fake_region(region_code=region_code, environment='staging', ingestor=mock_controller) ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) request_args = { 'region': region_code, } body = { 'cloud_task_args': ingest_args.to_serializable(), 'args_type': 'IngestArgs', } body_encoded = json.dumps(body).encode() headers = {'X-Appengine-Cron': 'test-cron'} response = self.client.post('/process_job', query_string=request_args, headers=headers, data=body_encoded) self.assertEqual(400, response.status_code) self.assertEqual(response.get_data().decode(), "Bad environment [production] for region [us_ca].")
def test_process_job(self, mock_supported, mock_region, mock_environment): mock_supported.return_value = ['us_nd', 'us_pa'] region_code = 'us_nd' mock_environment.return_value = 'staging' mock_controller = create_autospec(GcsfsDirectIngestController) mock_region.return_value = fake_region(region_code=region_code, environment='staging', ingestor=mock_controller) ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) request_args = { 'region': region_code, } body = { 'cloud_task_args': ingest_args.to_serializable(), 'args_type': 'IngestArgs', } body_encoded = json.dumps(body).encode() headers = {'X-Appengine-Cron': 'test-cron'} response = self.client.post('/process_job', query_string=request_args, headers=headers, data=body_encoded) self.assertEqual(200, response.status_code) mock_controller.run_ingest_job_and_kick_scheduler_on_completion.assert_called_with( ingest_args)
def json_to_ingest_args(json_data): if 'ingest_args' in json_data and 'args_type' in json_data: args_type = json_data['args_type'] ingest_args = json_data['ingest_args'] if args_type == IngestArgs.__name__: return IngestArgs.from_serializable(ingest_args) if args_type == GcsfsIngestArgs.__name__: return GcsfsIngestArgs.from_serializable(ingest_args) logging.error('Unexpected args_type in json_data: %s', args_type) return None
def create_direct_ingest_process_job_task( self, region: Region, ingest_args: IngestArgs ) -> None: """Queues *but does not run* a process job task.""" if not self.controller: raise ValueError("Controller is null - did you call set_controller()?") task_id = _build_task_id( self.controller.region.region_code, ingest_args.task_id_tag() ) self.process_job_tasks.append((f"projects/path/to/{task_id}", ingest_args))
def test_create_direct_ingest_process_job_task( self, mock_client: mock.MagicMock, mock_uuid: mock.MagicMock) -> None: # Arrange ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) body = { "cloud_task_args": ingest_args.to_serializable(), "args_type": "IngestArgs", } body_encoded = json.dumps(body).encode() uuid = "random-uuid" mock_uuid.uuid4.return_value = uuid date = "2019-07-20" queue_path = f"{_REGION.shared_queue}-path" task_name = "{}/{}-{}-{}".format(_REGION.shared_queue, _REGION.region_code, date, uuid) task = tasks_v2.types.task_pb2.Task( name=task_name, app_engine_http_request={ "http_method": "POST", "relative_uri": f"/direct/process_job?region={_REGION.region_code}", "body": body_encoded, }, ) mock_client.return_value.task_path.return_value = task_name mock_client.return_value.queue_path.return_value = queue_path # Act DirectIngestCloudTaskManagerImpl( ).create_direct_ingest_process_job_task(_REGION, ingest_args) # Assert mock_client.return_value.queue_path.assert_called_with( self.mock_project_id, QUEUES_REGION, _REGION.shared_queue) mock_client.return_value.create_task.assert_called_with( parent=queue_path, task=task)
def create_direct_ingest_process_job_task(self, region: Region, ingest_args: IngestArgs) -> None: task_id = _build_task_id(region.region_code, ingest_args.task_id_tag(), prefix_only=False) relative_uri = f"/direct/process_job?region={region.region_code}" body = self._get_body_from_args(ingest_args) self._get_process_job_queue_manager(region).create_task( task_id=task_id, relative_uri=relative_uri, body=body, )
def test_process_job_unlaunched_region( self, mock_supported: mock.MagicMock, mock_region: mock.MagicMock, mock_environment: mock.MagicMock, ) -> None: mock_supported.return_value = ["us_ca", "us_pa"] region_code = "us_ca" mock_environment.return_value = "production" mock_controller = create_autospec(GcsfsDirectIngestController) mock_region.return_value = fake_region(region_code=region_code, environment="staging", ingestor=mock_controller) ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) request_args = { "region": region_code, } body = { "cloud_task_args": ingest_args.to_serializable(), "args_type": "IngestArgs", } body_encoded = json.dumps(body).encode() headers = {"X-Appengine-Cron": "test-cron"} response = self.client.post( "/process_job", query_string=request_args, headers=headers, data=body_encoded, ) self.assertEqual(400, response.status_code) self.assertEqual( response.get_data().decode(), "Bad environment [production] for region [us_ca].", )
def test_create_direct_ingest_process_job_task(self, mock_client, mock_uuid): # Arrange project_id = 'recidiviz-456' ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) body = { 'cloud_task_args': ingest_args.to_serializable(), 'args_type': 'IngestArgs' } body_encoded = json.dumps(body).encode() uuid = 'random-uuid' mock_uuid.uuid4.return_value = uuid date = '2019-07-20' queue_path = _REGION.shared_queue + '-path' task_name = _REGION.shared_queue + '/{}-{}-{}'.format( _REGION.region_code, date, uuid) task = tasks_v2.types.task_pb2.Task( name=task_name, app_engine_http_request={ 'http_method': 'POST', 'relative_uri': f'/direct/process_job?region={_REGION.region_code}', 'body': body_encoded }) mock_client.return_value.task_path.return_value = task_name mock_client.return_value.queue_path.return_value = queue_path # Act DirectIngestCloudTaskManagerImpl(project_id=project_id).\ create_direct_ingest_process_job_task(_REGION, ingest_args) # Assert mock_client.return_value.queue_path.assert_called_with( project_id, QUEUES_REGION, _REGION.shared_queue) mock_client.return_value.create_task.assert_called_with( parent=queue_path, task=task)
def create_direct_ingest_process_job_task(self, region: Region, ingest_args: IngestArgs): task_id = _build_task_id(region.region_code, ingest_args.task_id_tag(), prefix_only=False) relative_uri = f'/direct/process_job?region={region.region_code}' body = self._get_body_from_args(ingest_args) self.cloud_task_client.create_task( task_id=task_id, queue_name=region.get_queue_name(), relative_uri=relative_uri, body=body, )
def is_task_queued(self, region: Region, ingest_args: IngestArgs) -> bool: """Returns true if the ingest_args correspond to a task currently in the queue. """ task_id_prefix = _build_task_id(region.region_code, ingest_args.task_id_tag(), prefix_only=True) for task_name in self.task_names: _, task_id = os.path.split(task_name) if task_id.startswith(task_id_prefix): return True return False
def test_process_job( self, mock_supported: mock.MagicMock, mock_region: mock.MagicMock, mock_environment: mock.MagicMock, ) -> None: mock_supported.return_value = ["us_nd", "us_pa"] region_code = "us_nd" mock_environment.return_value = "staging" mock_controller = create_autospec(GcsfsDirectIngestController) mock_region.return_value = fake_region(region_code=region_code, environment="staging", ingestor=mock_controller) ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) request_args = { "region": region_code, } body = { "cloud_task_args": ingest_args.to_serializable(), "args_type": "IngestArgs", } body_encoded = json.dumps(body).encode() headers = {"X-Appengine-Cron": "test-cron"} response = self.client.post( "/process_job", query_string=request_args, headers=headers, data=body_encoded, ) self.assertEqual(200, response.status_code) mock_controller.run_ingest_job_and_kick_scheduler_on_completion.assert_called_with( ingest_args)
def _get_next_job_args(self) -> Optional[IngestArgs]: df = pd.read_sql_query('SELECT MIN(export_time) FROM booking', self._create_engine()) ingest_time = df[min][0] if not ingest_time: logging.info("No more export times - successfully persisted all " "data exports.") return None if ingest_time in self.scheduled_ingest_times: raise DirectIngestError( msg=f"Received a second job for ingest time [{ingest_time}]. " "Did the previous job delete this export from the database?", error_type=DirectIngestErrorType.CLEANUP_ERROR) return IngestArgs(ingest_time=ingest_time)
def create_direct_ingest_process_job_task(self, region: Region, ingest_args: IngestArgs): body = self._get_body_from_args(ingest_args) task_name = self._build_task_name_for_queue_and_region( region.get_queue_name(), region.region_code, ingest_args.task_id_tag()) task = tasks.types.Task( name=task_name, app_engine_http_request={ 'relative_uri': f'/direct/process_job?region={region.region_code}', 'body': json.dumps(body).encode() }) self._queue_task(region.get_queue_name(), task)
def json_to_cloud_task_args(json_data: dict): if 'cloud_task_args' in json_data and 'args_type' in json_data: args_type = json_data['args_type'] cloud_task_args_dict = json_data['cloud_task_args'] if args_type == IngestArgs.__name__: return IngestArgs.from_serializable(cloud_task_args_dict) if args_type == GcsfsIngestArgs.__name__: return GcsfsIngestArgs.from_serializable(cloud_task_args_dict) if args_type == GcsfsRawDataBQImportArgs.__name__: return GcsfsRawDataBQImportArgs.from_serializable( cloud_task_args_dict) if args_type == GcsfsIngestViewExportArgs.__name__: return GcsfsIngestViewExportArgs.from_serializable( cloud_task_args_dict) logging.error('Unexpected args_type in json_data: %s', args_type) return None
def json_to_cloud_task_args(json_data: dict) -> Optional[CloudTaskArgs]: if "cloud_task_args" in json_data and "args_type" in json_data: args_type = json_data["args_type"] cloud_task_args_dict = json_data["cloud_task_args"] if args_type == IngestArgs.__name__: return IngestArgs.from_serializable(cloud_task_args_dict) if args_type == GcsfsIngestArgs.__name__: return GcsfsIngestArgs.from_serializable(cloud_task_args_dict) if args_type == GcsfsRawDataBQImportArgs.__name__: return GcsfsRawDataBQImportArgs.from_serializable( cloud_task_args_dict) if args_type == GcsfsIngestViewExportArgs.__name__: return GcsfsIngestViewExportArgs.from_serializable( cloud_task_args_dict) logging.error("Unexpected args_type in json_data: %s", args_type) return None
def test_is_task_queued_no_tasks(self): # Arrange info = CloudTaskQueueInfo(queue_name='queue_name', task_names=[]) file_path = to_normalized_unprocessed_file_path('file_path.csv') args = IngestArgs(ingest_time=datetime.datetime.now()) gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(), file_path=file_path) # Act basic_args_queued = info.is_task_queued(_REGION, args) gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertFalse(basic_args_queued) self.assertFalse(gcsfs_args_queued) self.assertFalse(info.is_task_queued(_REGION, gcsfs_args))
def test_is_task_queued_no_tasks(self): # Arrange info = ProcessIngestJobCloudTaskQueueInfo(queue_name='queue_name', task_names=[]) file_path = to_normalized_unprocessed_file_path( 'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW) args = IngestArgs(ingest_time=datetime.datetime.now()) gcsfs_args = \ GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=GcsfsFilePath.from_absolute_path(file_path)) # Act basic_args_queued = info.is_task_queued(_REGION, args) gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertFalse(basic_args_queued) self.assertFalse(gcsfs_args_queued) self.assertFalse(info.is_task_queued(_REGION, gcsfs_args))
def _get_body_from_args(ingest_args: IngestArgs) -> Dict: body = { 'ingest_args': ingest_args.to_serializable(), 'args_type': ingest_args.__class__.__name__ } return body