Ejemplo n.º 1
0
    def test_process_job_unlaunched_region(self, mock_supported, mock_region,
                                           mock_environment):
        mock_supported.return_value = ['us_ca', 'us_pa']

        region_code = 'us_ca'

        mock_environment.return_value = 'production'
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='staging',
                                               ingestor=mock_controller)

        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        request_args = {
            'region': region_code,
        }
        body = {
            'cloud_task_args': ingest_args.to_serializable(),
            'args_type': 'IngestArgs',
        }
        body_encoded = json.dumps(body).encode()

        headers = {'X-Appengine-Cron': 'test-cron'}

        response = self.client.post('/process_job',
                                    query_string=request_args,
                                    headers=headers,
                                    data=body_encoded)
        self.assertEqual(400, response.status_code)
        self.assertEqual(response.get_data().decode(),
                         "Bad environment [production] for region [us_ca].")
Ejemplo n.º 2
0
    def test_process_job(self, mock_supported, mock_region, mock_environment):
        mock_supported.return_value = ['us_nd', 'us_pa']

        region_code = 'us_nd'

        mock_environment.return_value = 'staging'
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='staging',
                                               ingestor=mock_controller)

        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        request_args = {
            'region': region_code,
        }
        body = {
            'cloud_task_args': ingest_args.to_serializable(),
            'args_type': 'IngestArgs',
        }
        body_encoded = json.dumps(body).encode()

        headers = {'X-Appengine-Cron': 'test-cron'}

        response = self.client.post('/process_job',
                                    query_string=request_args,
                                    headers=headers,
                                    data=body_encoded)
        self.assertEqual(200, response.status_code)
        mock_controller.run_ingest_job_and_kick_scheduler_on_completion.assert_called_with(
            ingest_args)
Ejemplo n.º 3
0
 def json_to_ingest_args(json_data):
     if 'ingest_args' in json_data and 'args_type' in json_data:
         args_type = json_data['args_type']
         ingest_args = json_data['ingest_args']
         if args_type == IngestArgs.__name__:
             return IngestArgs.from_serializable(ingest_args)
         if args_type == GcsfsIngestArgs.__name__:
             return GcsfsIngestArgs.from_serializable(ingest_args)
         logging.error('Unexpected args_type in json_data: %s', args_type)
     return None
    def create_direct_ingest_process_job_task(
        self, region: Region, ingest_args: IngestArgs
    ) -> None:
        """Queues *but does not run* a process job task."""
        if not self.controller:
            raise ValueError("Controller is null - did you call set_controller()?")

        task_id = _build_task_id(
            self.controller.region.region_code, ingest_args.task_id_tag()
        )
        self.process_job_tasks.append((f"projects/path/to/{task_id}", ingest_args))
    def test_create_direct_ingest_process_job_task(
            self, mock_client: mock.MagicMock,
            mock_uuid: mock.MagicMock) -> None:
        # Arrange
        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        body = {
            "cloud_task_args": ingest_args.to_serializable(),
            "args_type": "IngestArgs",
        }
        body_encoded = json.dumps(body).encode()
        uuid = "random-uuid"
        mock_uuid.uuid4.return_value = uuid
        date = "2019-07-20"
        queue_path = f"{_REGION.shared_queue}-path"

        task_name = "{}/{}-{}-{}".format(_REGION.shared_queue,
                                         _REGION.region_code, date, uuid)
        task = tasks_v2.types.task_pb2.Task(
            name=task_name,
            app_engine_http_request={
                "http_method": "POST",
                "relative_uri":
                f"/direct/process_job?region={_REGION.region_code}",
                "body": body_encoded,
            },
        )

        mock_client.return_value.task_path.return_value = task_name
        mock_client.return_value.queue_path.return_value = queue_path

        # Act
        DirectIngestCloudTaskManagerImpl(
        ).create_direct_ingest_process_job_task(_REGION, ingest_args)

        # Assert
        mock_client.return_value.queue_path.assert_called_with(
            self.mock_project_id, QUEUES_REGION, _REGION.shared_queue)
        mock_client.return_value.create_task.assert_called_with(
            parent=queue_path, task=task)
Ejemplo n.º 6
0
    def create_direct_ingest_process_job_task(self, region: Region,
                                              ingest_args: IngestArgs) -> None:
        task_id = _build_task_id(region.region_code,
                                 ingest_args.task_id_tag(),
                                 prefix_only=False)
        relative_uri = f"/direct/process_job?region={region.region_code}"
        body = self._get_body_from_args(ingest_args)

        self._get_process_job_queue_manager(region).create_task(
            task_id=task_id,
            relative_uri=relative_uri,
            body=body,
        )
    def test_process_job_unlaunched_region(
        self,
        mock_supported: mock.MagicMock,
        mock_region: mock.MagicMock,
        mock_environment: mock.MagicMock,
    ) -> None:
        mock_supported.return_value = ["us_ca", "us_pa"]

        region_code = "us_ca"

        mock_environment.return_value = "production"
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment="staging",
                                               ingestor=mock_controller)

        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        request_args = {
            "region": region_code,
        }
        body = {
            "cloud_task_args": ingest_args.to_serializable(),
            "args_type": "IngestArgs",
        }
        body_encoded = json.dumps(body).encode()

        headers = {"X-Appengine-Cron": "test-cron"}

        response = self.client.post(
            "/process_job",
            query_string=request_args,
            headers=headers,
            data=body_encoded,
        )
        self.assertEqual(400, response.status_code)
        self.assertEqual(
            response.get_data().decode(),
            "Bad environment [production] for region [us_ca].",
        )
Ejemplo n.º 8
0
    def test_create_direct_ingest_process_job_task(self, mock_client,
                                                   mock_uuid):
        # Arrange
        project_id = 'recidiviz-456'
        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        body = {
            'cloud_task_args': ingest_args.to_serializable(),
            'args_type': 'IngestArgs'
        }
        body_encoded = json.dumps(body).encode()
        uuid = 'random-uuid'
        mock_uuid.uuid4.return_value = uuid
        date = '2019-07-20'
        queue_path = _REGION.shared_queue + '-path'

        task_name = _REGION.shared_queue + '/{}-{}-{}'.format(
            _REGION.region_code, date, uuid)
        task = tasks_v2.types.task_pb2.Task(
            name=task_name,
            app_engine_http_request={
                'http_method': 'POST',
                'relative_uri':
                f'/direct/process_job?region={_REGION.region_code}',
                'body': body_encoded
            })

        mock_client.return_value.task_path.return_value = task_name
        mock_client.return_value.queue_path.return_value = queue_path

        # Act
        DirectIngestCloudTaskManagerImpl(project_id=project_id).\
            create_direct_ingest_process_job_task(_REGION, ingest_args)

        # Assert
        mock_client.return_value.queue_path.assert_called_with(
            project_id, QUEUES_REGION, _REGION.shared_queue)
        mock_client.return_value.create_task.assert_called_with(
            parent=queue_path, task=task)
    def create_direct_ingest_process_job_task(self, region: Region,
                                              ingest_args: IngestArgs):
        task_id = _build_task_id(region.region_code,
                                 ingest_args.task_id_tag(),
                                 prefix_only=False)
        relative_uri = f'/direct/process_job?region={region.region_code}'
        body = self._get_body_from_args(ingest_args)

        self.cloud_task_client.create_task(
            task_id=task_id,
            queue_name=region.get_queue_name(),
            relative_uri=relative_uri,
            body=body,
        )
Ejemplo n.º 10
0
    def is_task_queued(self, region: Region, ingest_args: IngestArgs) -> bool:
        """Returns true if the ingest_args correspond to a task currently in
        the queue.
        """

        task_id_prefix = _build_task_id(region.region_code,
                                        ingest_args.task_id_tag(),
                                        prefix_only=True)

        for task_name in self.task_names:
            _, task_id = os.path.split(task_name)
            if task_id.startswith(task_id_prefix):
                return True
        return False
    def test_process_job(
        self,
        mock_supported: mock.MagicMock,
        mock_region: mock.MagicMock,
        mock_environment: mock.MagicMock,
    ) -> None:
        mock_supported.return_value = ["us_nd", "us_pa"]

        region_code = "us_nd"

        mock_environment.return_value = "staging"
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment="staging",
                                               ingestor=mock_controller)

        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        request_args = {
            "region": region_code,
        }
        body = {
            "cloud_task_args": ingest_args.to_serializable(),
            "args_type": "IngestArgs",
        }
        body_encoded = json.dumps(body).encode()

        headers = {"X-Appengine-Cron": "test-cron"}

        response = self.client.post(
            "/process_job",
            query_string=request_args,
            headers=headers,
            data=body_encoded,
        )
        self.assertEqual(200, response.status_code)
        mock_controller.run_ingest_job_and_kick_scheduler_on_completion.assert_called_with(
            ingest_args)
Ejemplo n.º 12
0
    def _get_next_job_args(self) -> Optional[IngestArgs]:
        df = pd.read_sql_query('SELECT MIN(export_time) FROM booking',
                               self._create_engine())
        ingest_time = df[min][0]
        if not ingest_time:
            logging.info("No more export times - successfully persisted all "
                         "data exports.")
            return None
        if ingest_time in self.scheduled_ingest_times:
            raise DirectIngestError(
                msg=f"Received a second job for ingest time [{ingest_time}]. "
                "Did the previous job delete this export from the database?",
                error_type=DirectIngestErrorType.CLEANUP_ERROR)

        return IngestArgs(ingest_time=ingest_time)
Ejemplo n.º 13
0
    def create_direct_ingest_process_job_task(self, region: Region,
                                              ingest_args: IngestArgs):
        body = self._get_body_from_args(ingest_args)

        task_name = self._build_task_name_for_queue_and_region(
            region.get_queue_name(), region.region_code,
            ingest_args.task_id_tag())

        task = tasks.types.Task(
            name=task_name,
            app_engine_http_request={
                'relative_uri':
                f'/direct/process_job?region={region.region_code}',
                'body': json.dumps(body).encode()
            })
        self._queue_task(region.get_queue_name(), task)
 def json_to_cloud_task_args(json_data: dict):
     if 'cloud_task_args' in json_data and 'args_type' in json_data:
         args_type = json_data['args_type']
         cloud_task_args_dict = json_data['cloud_task_args']
         if args_type == IngestArgs.__name__:
             return IngestArgs.from_serializable(cloud_task_args_dict)
         if args_type == GcsfsIngestArgs.__name__:
             return GcsfsIngestArgs.from_serializable(cloud_task_args_dict)
         if args_type == GcsfsRawDataBQImportArgs.__name__:
             return GcsfsRawDataBQImportArgs.from_serializable(
                 cloud_task_args_dict)
         if args_type == GcsfsIngestViewExportArgs.__name__:
             return GcsfsIngestViewExportArgs.from_serializable(
                 cloud_task_args_dict)
         logging.error('Unexpected args_type in json_data: %s', args_type)
     return None
Ejemplo n.º 15
0
 def json_to_cloud_task_args(json_data: dict) -> Optional[CloudTaskArgs]:
     if "cloud_task_args" in json_data and "args_type" in json_data:
         args_type = json_data["args_type"]
         cloud_task_args_dict = json_data["cloud_task_args"]
         if args_type == IngestArgs.__name__:
             return IngestArgs.from_serializable(cloud_task_args_dict)
         if args_type == GcsfsIngestArgs.__name__:
             return GcsfsIngestArgs.from_serializable(cloud_task_args_dict)
         if args_type == GcsfsRawDataBQImportArgs.__name__:
             return GcsfsRawDataBQImportArgs.from_serializable(
                 cloud_task_args_dict)
         if args_type == GcsfsIngestViewExportArgs.__name__:
             return GcsfsIngestViewExportArgs.from_serializable(
                 cloud_task_args_dict)
         logging.error("Unexpected args_type in json_data: %s", args_type)
     return None
    def test_is_task_queued_no_tasks(self):
        # Arrange
        info = CloudTaskQueueInfo(queue_name='queue_name', task_names=[])

        file_path = to_normalized_unprocessed_file_path('file_path.csv')
        args = IngestArgs(ingest_time=datetime.datetime.now())
        gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(),
                                     file_path=file_path)

        # Act
        basic_args_queued = info.is_task_queued(_REGION, args)
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertFalse(basic_args_queued)
        self.assertFalse(gcsfs_args_queued)

        self.assertFalse(info.is_task_queued(_REGION, gcsfs_args))
Ejemplo n.º 17
0
    def test_is_task_queued_no_tasks(self):
        # Arrange
        info = ProcessIngestJobCloudTaskQueueInfo(queue_name='queue_name',
                                                  task_names=[])

        file_path = to_normalized_unprocessed_file_path(
            'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW)
        args = IngestArgs(ingest_time=datetime.datetime.now())
        gcsfs_args = \
            GcsfsIngestArgs(
                ingest_time=datetime.datetime.now(),
                file_path=GcsfsFilePath.from_absolute_path(file_path))

        # Act
        basic_args_queued = info.is_task_queued(_REGION, args)
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertFalse(basic_args_queued)
        self.assertFalse(gcsfs_args_queued)

        self.assertFalse(info.is_task_queued(_REGION, gcsfs_args))
Ejemplo n.º 18
0
 def _get_body_from_args(ingest_args: IngestArgs) -> Dict:
     body = {
         'ingest_args': ingest_args.to_serializable(),
         'args_type': ingest_args.__class__.__name__
     }
     return body