예제 #1
0
    def test_process_job_unlaunched_region(self, mock_supported, mock_region,
                                           mock_environment):
        mock_supported.return_value = ['us_ca', 'us_pa']

        region_code = 'us_ca'

        mock_environment.return_value = 'production'
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='staging',
                                               ingestor=mock_controller)

        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        request_args = {
            'region': region_code,
        }
        body = {
            'cloud_task_args': ingest_args.to_serializable(),
            'args_type': 'IngestArgs',
        }
        body_encoded = json.dumps(body).encode()

        headers = {'X-Appengine-Cron': 'test-cron'}

        response = self.client.post('/process_job',
                                    query_string=request_args,
                                    headers=headers,
                                    data=body_encoded)
        self.assertEqual(400, response.status_code)
        self.assertEqual(response.get_data().decode(),
                         "Bad environment [production] for region [us_ca].")
예제 #2
0
    def test_process_job(self, mock_supported, mock_region, mock_environment):
        mock_supported.return_value = ['us_nd', 'us_pa']

        region_code = 'us_nd'

        mock_environment.return_value = 'staging'
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='staging',
                                               ingestor=mock_controller)

        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        request_args = {
            'region': region_code,
        }
        body = {
            'cloud_task_args': ingest_args.to_serializable(),
            'args_type': 'IngestArgs',
        }
        body_encoded = json.dumps(body).encode()

        headers = {'X-Appengine-Cron': 'test-cron'}

        response = self.client.post('/process_job',
                                    query_string=request_args,
                                    headers=headers,
                                    data=body_encoded)
        self.assertEqual(200, response.status_code)
        mock_controller.run_ingest_job_and_kick_scheduler_on_completion.assert_called_with(
            ingest_args)
예제 #3
0
    def _get_next_job_args(self) -> Optional[IngestArgs]:
        df = pd.read_sql_query('SELECT MIN(export_time) FROM booking',
                               self._create_engine())
        ingest_time = df[min][0]
        if not ingest_time:
            logging.info("No more export times - successfully persisted all "
                         "data exports.")
            return None
        if ingest_time in self.scheduled_ingest_times:
            raise DirectIngestError(
                msg=f"Received a second job for ingest time [{ingest_time}]. "
                "Did the previous job delete this export from the database?",
                error_type=DirectIngestErrorType.CLEANUP_ERROR)

        return IngestArgs(ingest_time=ingest_time)
    def test_is_task_queued_no_tasks(self):
        # Arrange
        info = CloudTaskQueueInfo(queue_name='queue_name', task_names=[])

        file_path = to_normalized_unprocessed_file_path('file_path.csv')
        args = IngestArgs(ingest_time=datetime.datetime.now())
        gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(),
                                     file_path=file_path)

        # Act
        basic_args_queued = info.is_task_queued(_REGION, args)
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertFalse(basic_args_queued)
        self.assertFalse(gcsfs_args_queued)

        self.assertFalse(info.is_task_queued(_REGION, gcsfs_args))
예제 #5
0
    def test_is_task_queued_no_tasks(self):
        # Arrange
        info = ProcessIngestJobCloudTaskQueueInfo(queue_name='queue_name',
                                                  task_names=[])

        file_path = to_normalized_unprocessed_file_path(
            'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW)
        args = IngestArgs(ingest_time=datetime.datetime.now())
        gcsfs_args = \
            GcsfsIngestArgs(
                ingest_time=datetime.datetime.now(),
                file_path=GcsfsFilePath.from_absolute_path(file_path))

        # Act
        basic_args_queued = info.is_task_queued(_REGION, args)
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertFalse(basic_args_queued)
        self.assertFalse(gcsfs_args_queued)

        self.assertFalse(info.is_task_queued(_REGION, gcsfs_args))
    def test_process_job_unlaunched_region(
        self,
        mock_supported: mock.MagicMock,
        mock_region: mock.MagicMock,
        mock_environment: mock.MagicMock,
    ) -> None:
        mock_supported.return_value = ["us_ca", "us_pa"]

        region_code = "us_ca"

        mock_environment.return_value = "production"
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment="staging",
                                               ingestor=mock_controller)

        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        request_args = {
            "region": region_code,
        }
        body = {
            "cloud_task_args": ingest_args.to_serializable(),
            "args_type": "IngestArgs",
        }
        body_encoded = json.dumps(body).encode()

        headers = {"X-Appengine-Cron": "test-cron"}

        response = self.client.post(
            "/process_job",
            query_string=request_args,
            headers=headers,
            data=body_encoded,
        )
        self.assertEqual(400, response.status_code)
        self.assertEqual(
            response.get_data().decode(),
            "Bad environment [production] for region [us_ca].",
        )
    def test_create_direct_ingest_process_job_task(
            self, mock_client: mock.MagicMock,
            mock_uuid: mock.MagicMock) -> None:
        # Arrange
        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        body = {
            "cloud_task_args": ingest_args.to_serializable(),
            "args_type": "IngestArgs",
        }
        body_encoded = json.dumps(body).encode()
        uuid = "random-uuid"
        mock_uuid.uuid4.return_value = uuid
        date = "2019-07-20"
        queue_path = f"{_REGION.shared_queue}-path"

        task_name = "{}/{}-{}-{}".format(_REGION.shared_queue,
                                         _REGION.region_code, date, uuid)
        task = tasks_v2.types.task_pb2.Task(
            name=task_name,
            app_engine_http_request={
                "http_method": "POST",
                "relative_uri":
                f"/direct/process_job?region={_REGION.region_code}",
                "body": body_encoded,
            },
        )

        mock_client.return_value.task_path.return_value = task_name
        mock_client.return_value.queue_path.return_value = queue_path

        # Act
        DirectIngestCloudTaskManagerImpl(
        ).create_direct_ingest_process_job_task(_REGION, ingest_args)

        # Assert
        mock_client.return_value.queue_path.assert_called_with(
            self.mock_project_id, QUEUES_REGION, _REGION.shared_queue)
        mock_client.return_value.create_task.assert_called_with(
            parent=queue_path, task=task)
예제 #8
0
    def test_create_direct_ingest_process_job_task(self, mock_client,
                                                   mock_uuid):
        # Arrange
        project_id = 'recidiviz-456'
        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        body = {
            'cloud_task_args': ingest_args.to_serializable(),
            'args_type': 'IngestArgs'
        }
        body_encoded = json.dumps(body).encode()
        uuid = 'random-uuid'
        mock_uuid.uuid4.return_value = uuid
        date = '2019-07-20'
        queue_path = _REGION.shared_queue + '-path'

        task_name = _REGION.shared_queue + '/{}-{}-{}'.format(
            _REGION.region_code, date, uuid)
        task = tasks_v2.types.task_pb2.Task(
            name=task_name,
            app_engine_http_request={
                'http_method': 'POST',
                'relative_uri':
                f'/direct/process_job?region={_REGION.region_code}',
                'body': body_encoded
            })

        mock_client.return_value.task_path.return_value = task_name
        mock_client.return_value.queue_path.return_value = queue_path

        # Act
        DirectIngestCloudTaskManagerImpl(project_id=project_id).\
            create_direct_ingest_process_job_task(_REGION, ingest_args)

        # Assert
        mock_client.return_value.queue_path.assert_called_with(
            project_id, QUEUES_REGION, _REGION.shared_queue)
        mock_client.return_value.create_task.assert_called_with(
            parent=queue_path, task=task)
    def test_process_job(
        self,
        mock_supported: mock.MagicMock,
        mock_region: mock.MagicMock,
        mock_environment: mock.MagicMock,
    ) -> None:
        mock_supported.return_value = ["us_nd", "us_pa"]

        region_code = "us_nd"

        mock_environment.return_value = "staging"
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment="staging",
                                               ingestor=mock_controller)

        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        request_args = {
            "region": region_code,
        }
        body = {
            "cloud_task_args": ingest_args.to_serializable(),
            "args_type": "IngestArgs",
        }
        body_encoded = json.dumps(body).encode()

        headers = {"X-Appengine-Cron": "test-cron"}

        response = self.client.post(
            "/process_job",
            query_string=request_args,
            headers=headers,
            data=body_encoded,
        )
        self.assertEqual(200, response.status_code)
        mock_controller.run_ingest_job_and_kick_scheduler_on_completion.assert_called_with(
            ingest_args)