Beispiel #1
0
    def test_is_task_queued_has_tasks(self):
        # Arrange
        file_path = to_normalized_unprocessed_file_path(
            'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW)
        gcsfs_args = \
            GcsfsIngestArgs(
                ingest_time=datetime.datetime.now(),
                file_path=GcsfsFilePath.from_absolute_path(file_path))

        full_task_name = \
            _build_task_id(_REGION.region_code, gcsfs_args.task_id_tag())
        info = ProcessIngestJobCloudTaskQueueInfo(
            queue_name='queue_name',
            task_names=[
                'projects/path/to/random_task',
                f'projects/path/to/{full_task_name}'
            ])
        file_path = to_normalized_unprocessed_file_path(
            'bucket/file_path.csv', GcsfsDirectIngestFileType.INGEST_VIEW)
        gcsfs_args = \
            GcsfsIngestArgs(
                ingest_time=datetime.datetime.now(),
                file_path=GcsfsFilePath.from_absolute_path(file_path))

        # Act
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertTrue(gcsfs_args_queued)
 def create_direct_ingest_raw_data_import_task(self, region: Region, data_import_args: GcsfsRawDataBQImportArgs):
     if not self.controller:
         raise ValueError(
             "Controller is null - did you call set_controller()?")
     task_id = _build_task_id(self.controller.region.region_code, None)
     self.bq_import_export_tasks.append(
         (f'projects/path/to/{task_id}-raw_data_import', data_import_args))
    def test_info_single_task(self) -> None:
        # Arrange
        gcsfs_args = GcsfsIngestArgs(
            ingest_time=datetime.datetime.now(),
            file_path=self.ingest_view_file_path,
        )

        full_task_name = _build_task_id(_REGION.region_code,
                                        DirectIngestInstance.PRIMARY,
                                        gcsfs_args.task_id_tag())
        info = ProcessIngestJobCloudTaskQueueInfo(
            queue_name="queue_name",
            task_names=[
                "projects/path/to/random_task",
                f"projects/path/to/{full_task_name}",
            ],
        )
        gcsfs_args = GcsfsIngestArgs(
            ingest_time=datetime.datetime.now(),
            file_path=self.ingest_view_file_path,
        )

        # Act
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertTrue(gcsfs_args_queued)
        self.assertTrue(
            info.tasks_for_instance(_REGION.region_code,
                                    DirectIngestInstance.PRIMARY))
        self.assertFalse(
            info.tasks_for_instance(_REGION.region_code,
                                    DirectIngestInstance.SECONDARY))
 def create_direct_ingest_sftp_download_task(self, region: Region) -> None:
     if not self.controller:
         raise ValueError(
             "Controller is null - did you call set_controller()?")
     task_id = _build_task_id(self.controller.region.region_code,
                              DirectIngestInstance.PRIMARY, None)
     self.sftp_tasks.append(
         f"projects/path/to/{task_id}-handle_sftp_download")
 def create_direct_ingest_handle_new_files_task(self, region: Region,
                                                can_start_ingest: bool):
     if not self.controller:
         raise ValueError(
             "Controller is null - did you call set_controller()?")
     task_id = _build_task_id(self.controller.region.region_code, None)
     self.scheduler_tasks.append(
         (f'projects/path/to/{task_id}-handle_new_files', can_start_ingest))
 def create_direct_ingest_ingest_view_export_task(
     self, region: Region, ingest_view_export_args: GcsfsIngestViewExportArgs
 ) -> None:
     if not self.controller:
         raise ValueError("Controller is null - did you call set_controller()?")
     task_id = _build_task_id(self.controller.region.region_code, None)
     self.bq_import_export_tasks.append(
         (f"projects/path/to/{task_id}-ingest_view_export", ingest_view_export_args)
     )
    def create_direct_ingest_scheduler_queue_task(
        self, region: Region, just_finished_job: bool, delay_sec: int
    ) -> None:
        """Queues *but does not run* a scheduler task."""
        if not self.controller:
            raise ValueError("Controller is null - did you call set_controller()?")

        task_id = _build_task_id(self.controller.region.region_code, None)
        self.scheduler_tasks.append(
            (f"projects/path/to/{task_id}-schedule", just_finished_job)
        )
    def create_direct_ingest_process_job_task(
        self, region: Region, ingest_args: IngestArgs
    ) -> None:
        """Queues *but does not run* a process job task."""
        if not self.controller:
            raise ValueError("Controller is null - did you call set_controller()?")

        task_id = _build_task_id(
            self.controller.region.region_code, ingest_args.task_id_tag()
        )
        self.process_job_tasks.append((f"projects/path/to/{task_id}", ingest_args))
 def create_direct_ingest_handle_new_files_task(
     self,
     region: Region,
     ingest_instance: DirectIngestInstance,
     ingest_bucket: GcsfsBucketPath,
     can_start_ingest: bool,
 ) -> None:
     if not self.controller:
         raise ValueError(
             "Controller is null - did you call set_controller()?")
     task_id = _build_task_id(self.controller.region.region_code,
                              ingest_instance, None)
     self.scheduler_tasks.append((
         f"projects/path/to/{task_id}-handle_new_files",
         ingest_bucket,
         can_start_ingest,
     ))
    def test_is_task_queued_has_tasks(self):
        # Arrange
        file_path = to_normalized_unprocessed_file_path('file_path.csv')
        gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(),
                                     file_path=file_path)
        full_task_name = \
            _build_task_id(_REGION.region_code, gcsfs_args.task_id_tag())
        info = CloudTaskQueueInfo(queue_name='queue_name',
                                  task_names=[
                                      f'projects/path/to/random_task',
                                      f'projects/path/to/{full_task_name}'
                                  ])
        file_path = to_normalized_unprocessed_file_path('file_path.csv')
        gcsfs_args = GcsfsIngestArgs(ingest_time=datetime.datetime.now(),
                                     file_path=file_path)

        # Act
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertTrue(gcsfs_args_queued)
    def test_info_tasks_both_instances(self) -> None:
        # Arrange
        gcsfs_args = GcsfsIngestArgs(
            ingest_time=datetime.datetime.now(),
            file_path=self.ingest_view_file_path,
        )

        full_task_names = [
            _build_task_id(
                _REGION.region_code,
                ingest_instance,
                gcsfs_args.task_id_tag(),
            ) for ingest_instance in DirectIngestInstance
        ]

        info = ProcessIngestJobCloudTaskQueueInfo(
            queue_name="queue_name",
            task_names=[
                "projects/path/to/random_task",
            ] + [
                f"projects/path/to/{full_task_name}"
                for full_task_name in full_task_names
            ],
        )
        gcsfs_args = GcsfsIngestArgs(
            ingest_time=datetime.datetime.now(),
            file_path=self.ingest_view_file_path,
        )

        # Act
        gcsfs_args_queued = info.is_task_queued(_REGION, gcsfs_args)

        # Assert
        self.assertTrue(gcsfs_args_queued)
        for ingest_instance in DirectIngestInstance:
            self.assertTrue(
                info.tasks_for_instance(_REGION.region_code, ingest_instance))