예제 #1
0
    def create_new_task(self, task_builder: tasks.TaskBuilder,
                        **job_args: Union[str, AbsPackageComponent]) -> None:
        """Generate a new task.

        Args:
            task_builder:
            **job_args:

        """
        job_arguments = typing.cast(JobArguments, job_args)
        existing_package: AbsPackageComponent = job_arguments['package']

        source_path = job_arguments["source_path"]
        package_id: str = existing_package.metadata[Metadata.ID]
        new_dl_package_root = job_arguments.get("output_dl")
        if new_dl_package_root is not None:
            dl_packaging_task = PackageConverter(
                source_path=source_path,
                existing_package=existing_package,
                new_package_root=new_dl_package_root,
                packaging_id=package_id,
                package_format="Digital Library Compound",
            )
            task_builder.add_subtask(dl_packaging_task)

        new_ht_package_root = job_arguments.get("output_ht")
        if new_ht_package_root is not None:
            ht_packaging_task = PackageConverter(
                source_path=source_path,
                existing_package=existing_package,
                new_package_root=new_ht_package_root,
                packaging_id=package_id,
                package_format="HathiTrust jp2",
            )
            task_builder.add_subtask(ht_packaging_task)
예제 #2
0
def simple_task_builder(tmpdir_factory):
    temp_path = os.path.join(tmpdir_factory.getbasetemp(), "test")
    os.makedirs(temp_path)
    builder = TaskBuilder(SimpleTaskBuilder(), str(temp_path))
    builder.add_subtask(subtask=SimpleSubtask("got it"))
    yield builder
    shutil.rmtree(temp_path)
예제 #3
0
    def create_new_task(self, task_builder: tasks.TaskBuilder,
                        **job_args: str) -> None:
        """Add a new task to be accomplished when the workflow is started.

        This creates 2 subtasks.
           * Subtask for creating a destination folder
           * Subtask generating a jp2

        Args:
            task_builder:
            **job_args:

        """
        source_root = job_args['source_root']
        source_file = job_args["source_file"]
        relative_location = job_args["relative_location"]
        destination_root = job_args["destination_root"]
        new_name = job_args["new_file_name"]
        image_factory = job_args["image_factory"]

        source_file = os.path.join(source_root, relative_location, source_file)

        destination_file = os.path.join(destination_root, relative_location,
                                        new_name)

        make_dir = EnsurePathTask(
            os.path.join(destination_root, relative_location))

        convert_task = ConvertFileTask(source_file=source_file,
                                       destination_file=destination_file,
                                       image_factory_name=image_factory)

        task_builder.add_subtask(make_dir)
        task_builder.add_subtask(convert_task)
    def create_new_task(self, task_builder: tasks.TaskBuilder,
                        **job_args: str) -> None:

        source_file = job_args['source_file']
        dest_path = job_args['output_path']
        new_task = PackageImageConverterTask(source_file_path=source_file,
                                             dest_path=dest_path)
        task_builder.add_subtask(new_task)
    def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args):
        source_path = job_args['source_path']
        filename = job_args['filename']
        report_name = job_args['save_to_filename']

        new_task = checksum_tasks.MakeChecksumTask(source_path, filename,
                                                   report_name)

        task_builder.add_subtask(new_task)
예제 #6
0
    def initial_task(self, task_builder: tasks.TaskBuilder,
                     **user_args) -> None:

        root = user_args['Path']
        file_type = user_args["Image File Type"]
        file_extension = self.get_file_extension(file_type)

        task_builder.add_subtask(
            FindImagesTask(root, file_extension=file_extension))
 def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args):
     filename = job_args['filename']
     file_path = job_args['path']
     expected_hash = job_args['expected_hash']
     source_report = job_args['source_report']
     task_builder.add_subtask(
         ValidateChecksumTask(file_name=filename,
                              file_path=file_path,
                              expected_hash=expected_hash,
                              source_report=source_report))
예제 #8
0
def test_adapter_results_with_posttask(tmpdir):
    temp_path = tmpdir.mkdir("test")
    post_task = SimpleSubtask("Ending")

    builder = TaskBuilder(SimpleTaskBuilder(), temp_path)
    builder.set_posttask(subtask=post_task)
    builder.add_subtask(subtask=SimpleSubtask("First"))
    builder.add_subtask(subtask=SimpleSubtask("Second"))
    new_task = builder.build_task()

    with worker.ToolJobManager() as manager:
        for subtask in new_task.subtasks:
            adapted_tool = speedwagon.worker.SubtaskJobAdapter(subtask)
            manager.add_job(adapted_tool, adapted_tool.settings)
        manager.start()
        results = list()
        for r in manager.get_results():
            results.append(r.data)

        assert len(results) == 3
        assert "First" == results[0]
        assert "Second" == results[1]
        assert "Ending" == results[2]

    shutil.rmtree(tmpdir)
    shortcut = os.path.join(tmpdir.dirname,
                            "test_adapter_results_with_postcurrent")
    os.unlink(shortcut)
예제 #9
0
    def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args):
        existing_package = job_args['package']
        new_package_root = job_args["output"]
        source_path = job_args["source_path"]
        package_id = existing_package.metadata[Metadata.ID]

        packaging_task = PackageConverter(source_path=source_path,
                                          existing_package=existing_package,
                                          new_package_root=new_package_root,
                                          packaging_id=package_id)
        task_builder.add_subtask(packaging_task)
    def completion_task(self, task_builder: tasks.TaskBuilder, results,
                        **user_args) -> None:

        sorted_results = self.sort_results([i.data for i in results])

        for checksum_report, checksums in sorted_results.items():

            process = checksum_tasks.MakeCheckSumReportTask(
                checksum_report, checksums)

            task_builder.add_subtask(process)
예제 #11
0
    def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args):
        image_file = job_args["source_file_path"]
        destination_path = job_args["destination_path"]
        ocr_file_name = job_args["output_file_name"]
        lang_code = job_args["lang_code"]

        ocr_generation_task = GenerateOCRFileTask(
            source_image=image_file,
            out_text_file=os.path.join(destination_path, ocr_file_name),
            lang=lang_code,
            tesseract_path=self.tessdata_path)
        task_builder.add_subtask(ocr_generation_task)
    def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args):

        package = job_args['package']
        destination_root = job_args['destination']
        title_page = job_args['title_page']

        # Package metadata
        bib_id = package.metadata[Metadata.ID]

        new_package_location = os.path.join(destination_root, bib_id)

        # Add the tasks
        # Transform the package into a HathiTiff package
        task_builder.add_subtask(
            subtask=TransformPackageTask(package, destination_root))

        # Generate marc file from the Bib id
        task_builder.add_subtask(
            subtask=GenerateMarcTask(
                bib_id=bib_id, destination=new_package_location)
        )

        # Generate a meta.yml file
        task_builder.add_subtask(
            subtask=MakeYamlTask(bib_id, new_package_location, title_page))

        # Generate checksum data
        task_builder.add_subtask(
            subtask=GenerateChecksumTask(bib_id, new_package_location))
    def create_new_task(self, task_builder: tasks.TaskBuilder,
                        **job_args) -> None:

        package = job_args['package']
        destination_root: str = job_args['destination']
        title_page: str = job_args['title_page']

        # Package metadata
        package_id: str = package.metadata[Metadata.ID]

        new_package_location = os.path.join(destination_root, package_id)

        # Add the tasks
        # Transform the package into a HathiTiff package
        task_builder.add_subtask(
            subtask=TransformPackageTask(package, destination_root))
        # Generate marc file from the Package id
        identifier_type = job_args['identifier_type']
        task_builder.add_subtask(subtask=workflow_get_marc.MarcGeneratorTask(
            identifier=package_id,
            identifier_type=identifier_type,
            output_name=os.path.join(new_package_location, "MARC.xml"),
            server_url=str(job_args['server_url'])))

        # Generate a meta.yml file
        task_builder.add_subtask(
            subtask=MakeYamlTask(package_id, new_package_location, title_page))

        # Generate checksum data
        task_builder.add_subtask(
            subtask=GenerateChecksumTask(package_id, new_package_location))
예제 #14
0
def test_posttask_builder(tmpdir):

    temp_path = tmpdir.mkdir("test")

    posttask = SimpleSubtask("ending")

    builder = TaskBuilder(SimpleTaskBuilder(), temp_path)
    builder.add_subtask(subtask=SimpleSubtask("First"))
    builder.add_subtask(subtask=SimpleSubtask("Second"))
    builder.set_posttask(posttask)
    task = builder.build_task()
    assert task.posttask == posttask
    shutil.rmtree(tmpdir)
    shortcut = os.path.join(tmpdir.dirname, "test_posttask_buildercurrent")
    os.unlink(shortcut)
    def create_new_task(self, task_builder: tasks.TaskBuilder,
                        **job_args: Union[str, Package]) -> None:
        """Generate a new task.

        Args:
            task_builder:
            **job_args:

        """
        existing_package: Package = job_args['package']
        new_package_root: str = job_args["output"]
        source_path: str = job_args["source_path"]
        package_id: str = existing_package.metadata[Metadata.ID]

        packaging_task = PackageConverter(source_path=source_path,
                                          existing_package=existing_package,
                                          new_package_root=new_package_root,
                                          packaging_id=package_id)
        task_builder.add_subtask(packaging_task)
예제 #16
0
def test_pretask_builder(tmpdir):

    temp_path = tmpdir.mkdir("test")

    pretask = SimplePreTask("Starting")

    builder = TaskBuilder(SimpleTaskBuilder(), temp_path)
    builder.set_pretask(subtask=pretask)
    builder.add_subtask(subtask=SimpleSubtask("First"))
    builder.add_subtask(subtask=SimpleSubtask("Second"))
    task = builder.build_task()
    assert task.pretask == pretask
    shutil.rmtree(tmpdir)
    shortcut = os.path.join(tmpdir.dirname, "test_pretask_buildercurrent")
    if os.path.exists(shortcut):
        os.unlink(shortcut)
예제 #17
0
    def create_new_task(self, task_builder: tasks.TaskBuilder,
                        **job_args) -> None:
        """Create the task to be run.

        Args:
            task_builder:
            **job_args:

        """
        identifier = job_args['identifier']["value"]
        identifier_type = job_args['identifier']["type"]

        folder = job_args["path"]
        new_task = MarcGeneratorTask(identifier=identifier,
                                     identifier_type=identifier_type,
                                     output_name=os.path.join(
                                         folder, "MARC.XML"),
                                     server_url=job_args['api_server'])

        task_builder.add_subtask(new_task)
    def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args):
        output_root = job_args['output_root']
        relative_path_to_root = job_args['relative_path_to_root']
        source_root = job_args['source_root']
        source_file = job_args['source_file']
        task_type = job_args['task_type']

        output_path = os.path.join(output_root, relative_path_to_root)

        source_file_path = os.path.join(source_root, relative_path_to_root,
                                        source_file)

        if task_type == "convert":
            task_builder.add_subtask(
                ImageConvertTask(source_file_path, output_path))
        elif task_type == "copy":
            task_builder.add_subtask(CopyTask(source_file_path, output_path))

        else:
            raise Exception("Don't know what to do for {}".format(task_type))
예제 #19
0
def test_adapter_results_with_posttask(tmpdir):
    temp_path = tmpdir.mkdir("test")
    post_task = SimpleSubtask("Ending")

    builder = TaskBuilder(SimpleTaskBuilder(), temp_path)
    builder.set_posttask(subtask=post_task)
    builder.add_subtask(subtask=SimpleSubtask("First"))
    builder.add_subtask(subtask=SimpleSubtask("Second"))
    new_task = builder.build_task()

    queued_order = []

    with worker.ToolJobManager() as manager:
        for subtask in new_task.subtasks:
            adapted_tool = speedwagon.worker.SubtaskJobAdapter(subtask)
            manager.add_job(adapted_tool, adapted_tool.settings)

        for message in manager._pending_jobs.queue:
            print(message)
            queued_order.append(message.args['message'])

        manager.start()

        # Fuzz this
        time.sleep(1)

        results = list()

        for r in manager.get_results():
            results.append(r.data)

        assert len(results) == 3

        assert "First" == results[0], "results = {}, queued_order={}".format(
            results, queued_order)

        assert "Second" == results[1]
        assert "Ending" == results[2]

    shutil.rmtree(tmpdir)

    shortcut = \
        os.path.join(tmpdir.dirname, "test_adapter_results_with_postcurrent")

    if os.path.exists(shortcut):
        os.unlink(shortcut)
예제 #20
0
 def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args):
     existing_package = job_args['package']
     new_dl_package_root = job_args["output_dl"]
     new_ht_package_root = job_args["output_ht"]
     source_path = job_args["source_path"]
     package_id = existing_package.metadata[Metadata.ID]
     dl_packaging_task = PackageConverter(
         source_path=source_path,
         existing_package=existing_package,
         new_package_root=new_dl_package_root,
         packaging_id=package_id,
         package_format="Digital Library Compound",
     )
     task_builder.add_subtask(dl_packaging_task)
     ht_packaging_task = PackageConverter(
         source_path=source_path,
         existing_package=existing_package,
         new_package_root=new_ht_package_root,
         packaging_id=package_id,
         package_format="HathiTrust jp2",
     )
     task_builder.add_subtask(ht_packaging_task)
    def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args):
        source_root = job_args['source_root']
        source_file = job_args["source_file"]
        relative_location = job_args["relative_location"]
        destination_root = job_args["destination_root"]
        new_name = job_args["new_file_name"]
        image_factory = job_args["image_factory"]

        source_file = os.path.join(source_root, relative_location, source_file)

        destination_file = os.path.join(destination_root, relative_location,
                                        new_name)

        make_dir = EnsurePathTask(
            os.path.join(destination_root, relative_location))

        convert_task = ConvertFileTask(source_file=source_file,
                                       destination_file=destination_file,
                                       image_factory_name=image_factory)

        task_builder.add_subtask(make_dir)
        task_builder.add_subtask(convert_task)
예제 #22
0
def test_task_can_be_picked(tmpdir):
    temp_path = tmpdir.mkdir("test")
    builder = TaskBuilder(SimpleTaskBuilder(), temp_path)
    builder.add_subtask(subtask=SimpleSubtask(message="got it"))

    task_original = builder.build_task()
    serialized = TaskBuilder.save(task_original)

    task_unserialized = TaskBuilder.load(serialized)
    assert task_original.name == task_unserialized.name

    shutil.rmtree(tmpdir)
    shortcut = os.path.join(tmpdir.dirname, "test_task_can_be_pickedcurrent")
    os.unlink(shortcut)
예제 #23
0
def simple_task_builder_with_2_subtasks(tmpdir_factory):
    temp_path = tmpdir_factory.mktemp("task_builder")
    builder = TaskBuilder(SimpleTaskBuilder(), temp_path)
    builder.add_subtask(subtask=SimpleSubtask("First"))
    builder.add_subtask(subtask=SimpleSubtask("Second"))
    yield builder
    shutil.rmtree(temp_path)
    shortcut = os.path.join(tmpdir_factory.getbasetemp(),
                            "task_buildercurrent")
    # if os.path.exists(shortcut):
    os.unlink(shortcut)
예제 #24
0
    def create_new_task(
            self, task_builder: tasks.TaskBuilder,
            **job_args: Union[str, Dict[str, Union[str, bool]]]) -> None:
        """Create the task to be run.

        Args:
            task_builder:
            **job_args:

        """
        if 'directory' not in job_args.keys():
            raise KeyError("Missing directory")
        directory = job_args.get('directory', dict())
        if not isinstance(directory, dict):
            raise TypeError()
        identifier_type = str(directory["type"])
        subdirectory = str(directory["value"])
        identifier, _ = self._get_identifier_volume(job_args)

        folder = str(job_args["path"])
        marc_file = os.path.join(folder, "MARC.XML")
        task_builder.add_subtask(
            MarcGeneratorTask(identifier=identifier,
                              identifier_type=identifier_type,
                              output_name=marc_file,
                              server_url=str(job_args['api_server'])))
        enhancements = job_args.get('enhancements', dict())
        if not isinstance(enhancements, dict):
            raise TypeError()

        add_955 = enhancements.get('955', False)
        if add_955:
            task_builder.add_subtask(
                MarcEnhancement955Task(added_value=subdirectory,
                                       xml_file=marc_file))
        add_035 = enhancements.get('035')
        if add_035:
            task_builder.add_subtask(
                MarcEnhancement035Task(xml_file=marc_file))
 def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args):
     source_file = job_args["source_file"]
     new_task = MetadataValidatorTask(source_file)
     task_builder.add_subtask(new_task)
 def initial_task(self, task_builder: tasks.TaskBuilder,
                  **user_args) -> None:
     root = user_args['Input']
     for checksum_report_file in self._locate_checksum_files(root):
         task_builder.add_subtask(
             ReadChecksumReportTask(checksum_file=checksum_report_file))
예제 #27
0
 def create_new_task(self, task_builder: tasks.TaskBuilder,
                     **job_args) -> None:
     task_builder.add_subtask(BadTask())
    def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args):
        bib_id = job_args["bib_id"]
        folder = job_args["path"]
        new_task = MarcGeneratorTask(bib_id, folder)

        task_builder.add_subtask(new_task)
 def initial_task(self, task_builder: tasks.TaskBuilder,
                  **user_args) -> None:
     super().initial_task(task_builder, **user_args)
     root = user_args['Source']
     task_builder.add_subtask(FindPackageTask(root=root))
 def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args):
     task_builder.add_subtask(
         PackageConverter(src=job_args['package'],
                          dst=job_args['destination'])
     )