def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args: Union[str, AbsPackageComponent]) -> None: """Generate a new task. Args: task_builder: **job_args: """ job_arguments = typing.cast(JobArguments, job_args) existing_package: AbsPackageComponent = job_arguments['package'] source_path = job_arguments["source_path"] package_id: str = existing_package.metadata[Metadata.ID] new_dl_package_root = job_arguments.get("output_dl") if new_dl_package_root is not None: dl_packaging_task = PackageConverter( source_path=source_path, existing_package=existing_package, new_package_root=new_dl_package_root, packaging_id=package_id, package_format="Digital Library Compound", ) task_builder.add_subtask(dl_packaging_task) new_ht_package_root = job_arguments.get("output_ht") if new_ht_package_root is not None: ht_packaging_task = PackageConverter( source_path=source_path, existing_package=existing_package, new_package_root=new_ht_package_root, packaging_id=package_id, package_format="HathiTrust jp2", ) task_builder.add_subtask(ht_packaging_task)
def simple_task_builder(tmpdir_factory): temp_path = os.path.join(tmpdir_factory.getbasetemp(), "test") os.makedirs(temp_path) builder = TaskBuilder(SimpleTaskBuilder(), str(temp_path)) builder.add_subtask(subtask=SimpleSubtask("got it")) yield builder shutil.rmtree(temp_path)
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args: str) -> None: """Add a new task to be accomplished when the workflow is started. This creates 2 subtasks. * Subtask for creating a destination folder * Subtask generating a jp2 Args: task_builder: **job_args: """ source_root = job_args['source_root'] source_file = job_args["source_file"] relative_location = job_args["relative_location"] destination_root = job_args["destination_root"] new_name = job_args["new_file_name"] image_factory = job_args["image_factory"] source_file = os.path.join(source_root, relative_location, source_file) destination_file = os.path.join(destination_root, relative_location, new_name) make_dir = EnsurePathTask( os.path.join(destination_root, relative_location)) convert_task = ConvertFileTask(source_file=source_file, destination_file=destination_file, image_factory_name=image_factory) task_builder.add_subtask(make_dir) task_builder.add_subtask(convert_task)
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args: str) -> None: source_file = job_args['source_file'] dest_path = job_args['output_path'] new_task = PackageImageConverterTask(source_file_path=source_file, dest_path=dest_path) task_builder.add_subtask(new_task)
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args): source_path = job_args['source_path'] filename = job_args['filename'] report_name = job_args['save_to_filename'] new_task = checksum_tasks.MakeChecksumTask(source_path, filename, report_name) task_builder.add_subtask(new_task)
def initial_task(self, task_builder: tasks.TaskBuilder, **user_args) -> None: root = user_args['Path'] file_type = user_args["Image File Type"] file_extension = self.get_file_extension(file_type) task_builder.add_subtask( FindImagesTask(root, file_extension=file_extension))
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args): filename = job_args['filename'] file_path = job_args['path'] expected_hash = job_args['expected_hash'] source_report = job_args['source_report'] task_builder.add_subtask( ValidateChecksumTask(file_name=filename, file_path=file_path, expected_hash=expected_hash, source_report=source_report))
def test_adapter_results_with_posttask(tmpdir): temp_path = tmpdir.mkdir("test") post_task = SimpleSubtask("Ending") builder = TaskBuilder(SimpleTaskBuilder(), temp_path) builder.set_posttask(subtask=post_task) builder.add_subtask(subtask=SimpleSubtask("First")) builder.add_subtask(subtask=SimpleSubtask("Second")) new_task = builder.build_task() with worker.ToolJobManager() as manager: for subtask in new_task.subtasks: adapted_tool = speedwagon.worker.SubtaskJobAdapter(subtask) manager.add_job(adapted_tool, adapted_tool.settings) manager.start() results = list() for r in manager.get_results(): results.append(r.data) assert len(results) == 3 assert "First" == results[0] assert "Second" == results[1] assert "Ending" == results[2] shutil.rmtree(tmpdir) shortcut = os.path.join(tmpdir.dirname, "test_adapter_results_with_postcurrent") os.unlink(shortcut)
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args): existing_package = job_args['package'] new_package_root = job_args["output"] source_path = job_args["source_path"] package_id = existing_package.metadata[Metadata.ID] packaging_task = PackageConverter(source_path=source_path, existing_package=existing_package, new_package_root=new_package_root, packaging_id=package_id) task_builder.add_subtask(packaging_task)
def completion_task(self, task_builder: tasks.TaskBuilder, results, **user_args) -> None: sorted_results = self.sort_results([i.data for i in results]) for checksum_report, checksums in sorted_results.items(): process = checksum_tasks.MakeCheckSumReportTask( checksum_report, checksums) task_builder.add_subtask(process)
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args): image_file = job_args["source_file_path"] destination_path = job_args["destination_path"] ocr_file_name = job_args["output_file_name"] lang_code = job_args["lang_code"] ocr_generation_task = GenerateOCRFileTask( source_image=image_file, out_text_file=os.path.join(destination_path, ocr_file_name), lang=lang_code, tesseract_path=self.tessdata_path) task_builder.add_subtask(ocr_generation_task)
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args): package = job_args['package'] destination_root = job_args['destination'] title_page = job_args['title_page'] # Package metadata bib_id = package.metadata[Metadata.ID] new_package_location = os.path.join(destination_root, bib_id) # Add the tasks # Transform the package into a HathiTiff package task_builder.add_subtask( subtask=TransformPackageTask(package, destination_root)) # Generate marc file from the Bib id task_builder.add_subtask( subtask=GenerateMarcTask( bib_id=bib_id, destination=new_package_location) ) # Generate a meta.yml file task_builder.add_subtask( subtask=MakeYamlTask(bib_id, new_package_location, title_page)) # Generate checksum data task_builder.add_subtask( subtask=GenerateChecksumTask(bib_id, new_package_location))
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args) -> None: package = job_args['package'] destination_root: str = job_args['destination'] title_page: str = job_args['title_page'] # Package metadata package_id: str = package.metadata[Metadata.ID] new_package_location = os.path.join(destination_root, package_id) # Add the tasks # Transform the package into a HathiTiff package task_builder.add_subtask( subtask=TransformPackageTask(package, destination_root)) # Generate marc file from the Package id identifier_type = job_args['identifier_type'] task_builder.add_subtask(subtask=workflow_get_marc.MarcGeneratorTask( identifier=package_id, identifier_type=identifier_type, output_name=os.path.join(new_package_location, "MARC.xml"), server_url=str(job_args['server_url']))) # Generate a meta.yml file task_builder.add_subtask( subtask=MakeYamlTask(package_id, new_package_location, title_page)) # Generate checksum data task_builder.add_subtask( subtask=GenerateChecksumTask(package_id, new_package_location))
def test_posttask_builder(tmpdir): temp_path = tmpdir.mkdir("test") posttask = SimpleSubtask("ending") builder = TaskBuilder(SimpleTaskBuilder(), temp_path) builder.add_subtask(subtask=SimpleSubtask("First")) builder.add_subtask(subtask=SimpleSubtask("Second")) builder.set_posttask(posttask) task = builder.build_task() assert task.posttask == posttask shutil.rmtree(tmpdir) shortcut = os.path.join(tmpdir.dirname, "test_posttask_buildercurrent") os.unlink(shortcut)
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args: Union[str, Package]) -> None: """Generate a new task. Args: task_builder: **job_args: """ existing_package: Package = job_args['package'] new_package_root: str = job_args["output"] source_path: str = job_args["source_path"] package_id: str = existing_package.metadata[Metadata.ID] packaging_task = PackageConverter(source_path=source_path, existing_package=existing_package, new_package_root=new_package_root, packaging_id=package_id) task_builder.add_subtask(packaging_task)
def test_pretask_builder(tmpdir): temp_path = tmpdir.mkdir("test") pretask = SimplePreTask("Starting") builder = TaskBuilder(SimpleTaskBuilder(), temp_path) builder.set_pretask(subtask=pretask) builder.add_subtask(subtask=SimpleSubtask("First")) builder.add_subtask(subtask=SimpleSubtask("Second")) task = builder.build_task() assert task.pretask == pretask shutil.rmtree(tmpdir) shortcut = os.path.join(tmpdir.dirname, "test_pretask_buildercurrent") if os.path.exists(shortcut): os.unlink(shortcut)
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args) -> None: """Create the task to be run. Args: task_builder: **job_args: """ identifier = job_args['identifier']["value"] identifier_type = job_args['identifier']["type"] folder = job_args["path"] new_task = MarcGeneratorTask(identifier=identifier, identifier_type=identifier_type, output_name=os.path.join( folder, "MARC.XML"), server_url=job_args['api_server']) task_builder.add_subtask(new_task)
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args): output_root = job_args['output_root'] relative_path_to_root = job_args['relative_path_to_root'] source_root = job_args['source_root'] source_file = job_args['source_file'] task_type = job_args['task_type'] output_path = os.path.join(output_root, relative_path_to_root) source_file_path = os.path.join(source_root, relative_path_to_root, source_file) if task_type == "convert": task_builder.add_subtask( ImageConvertTask(source_file_path, output_path)) elif task_type == "copy": task_builder.add_subtask(CopyTask(source_file_path, output_path)) else: raise Exception("Don't know what to do for {}".format(task_type))
def test_adapter_results_with_posttask(tmpdir): temp_path = tmpdir.mkdir("test") post_task = SimpleSubtask("Ending") builder = TaskBuilder(SimpleTaskBuilder(), temp_path) builder.set_posttask(subtask=post_task) builder.add_subtask(subtask=SimpleSubtask("First")) builder.add_subtask(subtask=SimpleSubtask("Second")) new_task = builder.build_task() queued_order = [] with worker.ToolJobManager() as manager: for subtask in new_task.subtasks: adapted_tool = speedwagon.worker.SubtaskJobAdapter(subtask) manager.add_job(adapted_tool, adapted_tool.settings) for message in manager._pending_jobs.queue: print(message) queued_order.append(message.args['message']) manager.start() # Fuzz this time.sleep(1) results = list() for r in manager.get_results(): results.append(r.data) assert len(results) == 3 assert "First" == results[0], "results = {}, queued_order={}".format( results, queued_order) assert "Second" == results[1] assert "Ending" == results[2] shutil.rmtree(tmpdir) shortcut = \ os.path.join(tmpdir.dirname, "test_adapter_results_with_postcurrent") if os.path.exists(shortcut): os.unlink(shortcut)
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args): existing_package = job_args['package'] new_dl_package_root = job_args["output_dl"] new_ht_package_root = job_args["output_ht"] source_path = job_args["source_path"] package_id = existing_package.metadata[Metadata.ID] dl_packaging_task = PackageConverter( source_path=source_path, existing_package=existing_package, new_package_root=new_dl_package_root, packaging_id=package_id, package_format="Digital Library Compound", ) task_builder.add_subtask(dl_packaging_task) ht_packaging_task = PackageConverter( source_path=source_path, existing_package=existing_package, new_package_root=new_ht_package_root, packaging_id=package_id, package_format="HathiTrust jp2", ) task_builder.add_subtask(ht_packaging_task)
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args): source_root = job_args['source_root'] source_file = job_args["source_file"] relative_location = job_args["relative_location"] destination_root = job_args["destination_root"] new_name = job_args["new_file_name"] image_factory = job_args["image_factory"] source_file = os.path.join(source_root, relative_location, source_file) destination_file = os.path.join(destination_root, relative_location, new_name) make_dir = EnsurePathTask( os.path.join(destination_root, relative_location)) convert_task = ConvertFileTask(source_file=source_file, destination_file=destination_file, image_factory_name=image_factory) task_builder.add_subtask(make_dir) task_builder.add_subtask(convert_task)
def test_task_can_be_picked(tmpdir): temp_path = tmpdir.mkdir("test") builder = TaskBuilder(SimpleTaskBuilder(), temp_path) builder.add_subtask(subtask=SimpleSubtask(message="got it")) task_original = builder.build_task() serialized = TaskBuilder.save(task_original) task_unserialized = TaskBuilder.load(serialized) assert task_original.name == task_unserialized.name shutil.rmtree(tmpdir) shortcut = os.path.join(tmpdir.dirname, "test_task_can_be_pickedcurrent") os.unlink(shortcut)
def simple_task_builder_with_2_subtasks(tmpdir_factory): temp_path = tmpdir_factory.mktemp("task_builder") builder = TaskBuilder(SimpleTaskBuilder(), temp_path) builder.add_subtask(subtask=SimpleSubtask("First")) builder.add_subtask(subtask=SimpleSubtask("Second")) yield builder shutil.rmtree(temp_path) shortcut = os.path.join(tmpdir_factory.getbasetemp(), "task_buildercurrent") # if os.path.exists(shortcut): os.unlink(shortcut)
def create_new_task( self, task_builder: tasks.TaskBuilder, **job_args: Union[str, Dict[str, Union[str, bool]]]) -> None: """Create the task to be run. Args: task_builder: **job_args: """ if 'directory' not in job_args.keys(): raise KeyError("Missing directory") directory = job_args.get('directory', dict()) if not isinstance(directory, dict): raise TypeError() identifier_type = str(directory["type"]) subdirectory = str(directory["value"]) identifier, _ = self._get_identifier_volume(job_args) folder = str(job_args["path"]) marc_file = os.path.join(folder, "MARC.XML") task_builder.add_subtask( MarcGeneratorTask(identifier=identifier, identifier_type=identifier_type, output_name=marc_file, server_url=str(job_args['api_server']))) enhancements = job_args.get('enhancements', dict()) if not isinstance(enhancements, dict): raise TypeError() add_955 = enhancements.get('955', False) if add_955: task_builder.add_subtask( MarcEnhancement955Task(added_value=subdirectory, xml_file=marc_file)) add_035 = enhancements.get('035') if add_035: task_builder.add_subtask( MarcEnhancement035Task(xml_file=marc_file))
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args): source_file = job_args["source_file"] new_task = MetadataValidatorTask(source_file) task_builder.add_subtask(new_task)
def initial_task(self, task_builder: tasks.TaskBuilder, **user_args) -> None: root = user_args['Input'] for checksum_report_file in self._locate_checksum_files(root): task_builder.add_subtask( ReadChecksumReportTask(checksum_file=checksum_report_file))
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args) -> None: task_builder.add_subtask(BadTask())
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args): bib_id = job_args["bib_id"] folder = job_args["path"] new_task = MarcGeneratorTask(bib_id, folder) task_builder.add_subtask(new_task)
def initial_task(self, task_builder: tasks.TaskBuilder, **user_args) -> None: super().initial_task(task_builder, **user_args) root = user_args['Source'] task_builder.add_subtask(FindPackageTask(root=root))
def create_new_task(self, task_builder: tasks.TaskBuilder, **job_args): task_builder.add_subtask( PackageConverter(src=job_args['package'], dst=job_args['destination']) )