def test_process_job_unlaunched_region(self, mock_supported, mock_region, mock_environment): mock_supported.return_value = ['us_ca', 'us_pa'] region_code = 'us_ca' mock_environment.return_value = 'production' mock_controller = create_autospec(GcsfsDirectIngestController) mock_region.return_value = fake_region(region_code=region_code, environment='staging', ingestor=mock_controller) ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) request_args = { 'region': region_code, } body = { 'cloud_task_args': ingest_args.to_serializable(), 'args_type': 'IngestArgs', } body_encoded = json.dumps(body).encode() headers = {'X-Appengine-Cron': 'test-cron'} response = self.client.post('/process_job', query_string=request_args, headers=headers, data=body_encoded) self.assertEqual(400, response.status_code) self.assertEqual(response.get_data().decode(), "Bad environment [production] for region [us_ca].")
def test_process_job(self, mock_supported, mock_region, mock_environment): mock_supported.return_value = ['us_nd', 'us_pa'] region_code = 'us_nd' mock_environment.return_value = 'staging' mock_controller = create_autospec(GcsfsDirectIngestController) mock_region.return_value = fake_region(region_code=region_code, environment='staging', ingestor=mock_controller) ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) request_args = { 'region': region_code, } body = { 'cloud_task_args': ingest_args.to_serializable(), 'args_type': 'IngestArgs', } body_encoded = json.dumps(body).encode() headers = {'X-Appengine-Cron': 'test-cron'} response = self.client.post('/process_job', query_string=request_args, headers=headers, data=body_encoded) self.assertEqual(200, response.status_code) mock_controller.run_ingest_job_and_kick_scheduler_on_completion.assert_called_with( ingest_args)
def _get_next_job_args(self) -> Optional[IngestArgs]: df = pd.read_sql_query('SELECT MIN(export_time) FROM booking', self._create_engine()) ingest_time = df[min][0] if not ingest_time: logging.info("No more export times - successfully persisted all " "data exports.") return None if ingest_time in self.scheduled_ingest_times: raise DirectIngestError( msg=f"Received a second job for ingest time [{ingest_time}]. " "Did the previous job delete this export from the database?", error_type=DirectIngestErrorType.CLEANUP_ERROR) return IngestArgs(ingest_time=ingest_time)
def test_is_task_queued_no_tasks(self): # Arrange info = CloudTaskQueueInfo(queue_name='queue_name', task_names=[]) file_path = to_normalized_unprocessed_file_path('file_path.csv') args = IngestArgs(ingest_time=datetime.datetime.now()) gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(), file_path=file_path) # Act basic_args_queued = info.is_task_queued(_REGION, args) gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertFalse(basic_args_queued) self.assertFalse(gcsfs_args_queued) self.assertFalse(info.is_task_queued(_REGION, gcsfs_args))
def test_is_task_queued_no_tasks(self): # Arrange info = ProcessIngestJobCloudTaskQueueInfo(queue_name='queue_name', task_names=[]) file_path = to_normalized_unprocessed_file_path( 'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW) args = IngestArgs(ingest_time=datetime.datetime.now()) gcsfs_args = \ GcsfsIngestArgs( ingest_time=datetime.datetime.now(), file_path=GcsfsFilePath.from_absolute_path(file_path)) # Act basic_args_queued = info.is_task_queued(_REGION, args) gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args) # Assert self.assertFalse(basic_args_queued) self.assertFalse(gcsfs_args_queued) self.assertFalse(info.is_task_queued(_REGION, gcsfs_args))
def test_process_job_unlaunched_region( self, mock_supported: mock.MagicMock, mock_region: mock.MagicMock, mock_environment: mock.MagicMock, ) -> None: mock_supported.return_value = ["us_ca", "us_pa"] region_code = "us_ca" mock_environment.return_value = "production" mock_controller = create_autospec(GcsfsDirectIngestController) mock_region.return_value = fake_region(region_code=region_code, environment="staging", ingestor=mock_controller) ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) request_args = { "region": region_code, } body = { "cloud_task_args": ingest_args.to_serializable(), "args_type": "IngestArgs", } body_encoded = json.dumps(body).encode() headers = {"X-Appengine-Cron": "test-cron"} response = self.client.post( "/process_job", query_string=request_args, headers=headers, data=body_encoded, ) self.assertEqual(400, response.status_code) self.assertEqual( response.get_data().decode(), "Bad environment [production] for region [us_ca].", )
def test_create_direct_ingest_process_job_task( self, mock_client: mock.MagicMock, mock_uuid: mock.MagicMock) -> None: # Arrange ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) body = { "cloud_task_args": ingest_args.to_serializable(), "args_type": "IngestArgs", } body_encoded = json.dumps(body).encode() uuid = "random-uuid" mock_uuid.uuid4.return_value = uuid date = "2019-07-20" queue_path = f"{_REGION.shared_queue}-path" task_name = "{}/{}-{}-{}".format(_REGION.shared_queue, _REGION.region_code, date, uuid) task = tasks_v2.types.task_pb2.Task( name=task_name, app_engine_http_request={ "http_method": "POST", "relative_uri": f"/direct/process_job?region={_REGION.region_code}", "body": body_encoded, }, ) mock_client.return_value.task_path.return_value = task_name mock_client.return_value.queue_path.return_value = queue_path # Act DirectIngestCloudTaskManagerImpl( ).create_direct_ingest_process_job_task(_REGION, ingest_args) # Assert mock_client.return_value.queue_path.assert_called_with( self.mock_project_id, QUEUES_REGION, _REGION.shared_queue) mock_client.return_value.create_task.assert_called_with( parent=queue_path, task=task)
def test_create_direct_ingest_process_job_task(self, mock_client, mock_uuid): # Arrange project_id = 'recidiviz-456' ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) body = { 'cloud_task_args': ingest_args.to_serializable(), 'args_type': 'IngestArgs' } body_encoded = json.dumps(body).encode() uuid = 'random-uuid' mock_uuid.uuid4.return_value = uuid date = '2019-07-20' queue_path = _REGION.shared_queue + '-path' task_name = _REGION.shared_queue + '/{}-{}-{}'.format( _REGION.region_code, date, uuid) task = tasks_v2.types.task_pb2.Task( name=task_name, app_engine_http_request={ 'http_method': 'POST', 'relative_uri': f'/direct/process_job?region={_REGION.region_code}', 'body': body_encoded }) mock_client.return_value.task_path.return_value = task_name mock_client.return_value.queue_path.return_value = queue_path # Act DirectIngestCloudTaskManagerImpl(project_id=project_id).\ create_direct_ingest_process_job_task(_REGION, ingest_args) # Assert mock_client.return_value.queue_path.assert_called_with( project_id, QUEUES_REGION, _REGION.shared_queue) mock_client.return_value.create_task.assert_called_with( parent=queue_path, task=task)
def test_process_job( self, mock_supported: mock.MagicMock, mock_region: mock.MagicMock, mock_environment: mock.MagicMock, ) -> None: mock_supported.return_value = ["us_nd", "us_pa"] region_code = "us_nd" mock_environment.return_value = "staging" mock_controller = create_autospec(GcsfsDirectIngestController) mock_region.return_value = fake_region(region_code=region_code, environment="staging", ingestor=mock_controller) ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20)) request_args = { "region": region_code, } body = { "cloud_task_args": ingest_args.to_serializable(), "args_type": "IngestArgs", } body_encoded = json.dumps(body).encode() headers = {"X-Appengine-Cron": "test-cron"} response = self.client.post( "/process_job", query_string=request_args, headers=headers, data=body_encoded, ) self.assertEqual(200, response.status_code) mock_controller.run_ingest_job_and_kick_scheduler_on_completion.assert_called_with( ingest_args)