def create_direct_ingest_process_job_task(
        self,
        region: Region,
        ingest_instance: DirectIngestInstance,
        ingest_args: GcsfsIngestArgs,
    ) -> None:
        task_id = _build_task_id(
            region.region_code,
            ingest_instance,
            ingest_args.task_id_tag(),
            prefix_only=False,
        )
        params = {
            "region": region.region_code.lower(),
            "file_path": ingest_args.file_path.abs_path(),
        }
        relative_uri = f"/direct/process_job?{urlencode(params)}"
        body = self._get_body_from_args(ingest_args)

        self._get_process_job_queue_manager(region,
                                            ingest_instance).create_task(
                                                task_id=task_id,
                                                relative_uri=relative_uri,
                                                body=body,
                                            )
Example #2
0
    def test_is_task_queued_has_tasks(self):
        # Arrange
        file_path = to_normalized_unprocessed_file_path(
            'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW)
        gcsfs_args = \
            GcsfsIngestArgs(
                ingest_time=datetime.datetime.now(),
                file_path=GcsfsFilePath.from_absolute_path(file_path))

        full_task_name = \
            _build_task_id(_REGION.region_code, gcsfs_args.task_id_tag())
        info = ProcessIngestJobCloudTaskQueueInfo(
            queue_name='queue_name',
            task_names=[
                'projects/path/to/random_task',
                f'projects/path/to/{full_task_name}'
            ])
        file_path = to_normalized_unprocessed_file_path(
            'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW)
        gcsfs_args = \
            GcsfsIngestArgs(
                ingest_time=datetime.datetime.now(),
                file_path=GcsfsFilePath.from_absolute_path(file_path))

        # Act
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertTrue(gcsfs_args_queued)
    def test_info_single_task(self) -> None:
        # Arrange
        gcsfs_args = GcsfsIngestArgs(
            ingest_time=datetime.datetime.now(),
            file_path=self.ingest_view_file_path,
        )

        full_task_name = _build_task_id(_REGION.region_code,
                                        DirectIngestInstance.PRIMARY,
                                        gcsfs_args.task_id_tag())
        info = ProcessIngestJobCloudTaskQueueInfo(
            queue_name="queue_name",
            task_names=[
                "projects/path/to/random_task",
                f"projects/path/to/{full_task_name}",
            ],
        )
        gcsfs_args = GcsfsIngestArgs(
            ingest_time=datetime.datetime.now(),
            file_path=self.ingest_view_file_path,
        )

        # Act
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertTrue(gcsfs_args_queued)
        self.assertTrue(
            info.tasks_for_instance(_REGION.region_code,
                                    DirectIngestInstance.PRIMARY))
        self.assertFalse(
            info.tasks_for_instance(_REGION.region_code,
                                    DirectIngestInstance.SECONDARY))
    def is_task_queued(self, region: Region,
                       ingest_args: GcsfsIngestArgs) -> bool:
        """Returns true if the ingest_args correspond to a task currently in
        the queue.
        """

        task_id_prefix = _build_task_id(
            region.region_code,
            DirectIngestInstance.for_ingest_bucket(
                ingest_args.file_path.bucket_path),
            ingest_args.task_id_tag(),
            prefix_only=True,
        )

        return bool(next(self._tasks_for_prefix(task_id_prefix), None))
    def create_direct_ingest_process_job_task(
        self,
        region: Region,
        ingest_instance: DirectIngestInstance,
        ingest_args: GcsfsIngestArgs,
    ) -> None:
        """Queues *but does not run* a process job task."""
        if not self.controller:
            raise ValueError(
                "Controller is null - did you call set_controller()?")

        task_id = _build_task_id(
            self.controller.region.region_code,
            ingest_instance,
            ingest_args.task_id_tag(),
        )
        self.process_job_tasks.append(
            (f"projects/path/to/{task_id}", ingest_args))
    def test_is_task_queued_has_tasks(self):
        # Arrange
        file_path = to_normalized_unprocessed_file_path('file_path.csv')
        gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(),
                                     file_path=file_path)
        full_task_name = \
            _build_task_id(_REGION.region_code, gcsfs_args.task_id_tag())
        info = CloudTaskQueueInfo(queue_name='queue_name',
                                  task_names=[
                                      f'projects/path/to/random_task',
                                      f'projects/path/to/{full_task_name}'
                                  ])
        file_path = to_normalized_unprocessed_file_path('file_path.csv')
        gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(),
                                     file_path=file_path)

        # Act
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertTrue(gcsfs_args_queued)
    def test_info_tasks_both_instances(self) -> None:
        # Arrange
        gcsfs_args = GcsfsIngestArgs(
            ingest_time=datetime.datetime.now(),
            file_path=self.ingest_view_file_path,
        )

        full_task_names = [
            _build_task_id(
                _REGION.region_code,
                ingest_instance,
                gcsfs_args.task_id_tag(),
            ) for ingest_instance in DirectIngestInstance
        ]

        info = ProcessIngestJobCloudTaskQueueInfo(
            queue_name="queue_name",
            task_names=[
                "projects/path/to/random_task",
            ] + [
                f"projects/path/to/{full_task_name}"
                for full_task_name in full_task_names
            ],
        )
        gcsfs_args = GcsfsIngestArgs(
            ingest_time=datetime.datetime.now(),
            file_path=self.ingest_view_file_path,
        )

        # Act
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertTrue(gcsfs_args_queued)
        for ingest_instance in DirectIngestInstance:
            self.assertTrue(
                info.tasks_for_instance(_REGION.region_code, ingest_instance))