def transformed_to_dl_compound( self, sample_cataloged_collection, monkeypatch): root, files = sample_cataloged_collection factory = packager.PackageFactory(noneas.CatalogedNonEAS()) digital_library_format = packager.PackageFactory( digital_library_compound.DigitalLibraryCompound()) output_path = os.path.join('some', 'folder') transform = Mock() def mock_transform(_, source, destination): transform(source=source, destination=destination) with monkeypatch.context() as mp: mp.setattr( packager.transformations.Transformers, "transform", mock_transform ) monkeypatch.setattr( uiucprescon.packager.transformations.CopyFile, "transform", transform ) monkeypatch.setattr( uiucprescon.packager.transformations.ConvertJp2Standard, "transform", transform ) for package in list(factory.locate_packages(root)): digital_library_format.transform(package, output_path) return output_path, transform
def transformed_to_dl_compound( self, sample_collection, monkeypatch ): root, files = sample_collection factory = packager.PackageFactory(noneas.ArchivalNonEAS()) digital_library_format = packager.PackageFactory( digital_library_compound.DigitalLibraryCompound()) import shutil kdu_compress_cli2 = Mock() output_path = os.path.join('some', 'folder') transform = Mock() with monkeypatch.context() as mp: mp.setattr( packager.transformations.Transformers, "transform", transform ) mp.setattr( uiucprescon.packager.transformations.CopyFile, "transform", transform ) mp.setattr( uiucprescon.packager.transformations.ConvertJp2Standard, "transform", transform ) mp.setattr(shutil, "copyfile", Mock()) mp.setattr(shutil, "copy", Mock()) import pykdu_compress mp.setattr(pykdu_compress, "kdu_compress_cli2", kdu_compress_cli2) for package in list(factory.locate_packages(root)): digital_library_format.transform(package, output_path) return output_path, transform
def test_transform_to_hathi(self, sample_collection_path, expected_source, expected_destination, monkeypatch): factory = packager.PackageFactory(eas.Eas()) packages = factory.locate_packages(sample_collection_path) destination_type = packager.PackageFactory(HathiJp2()) output = "out" def spec(source, destination): pass transform = Mock(spec=spec) monkeypatch.setattr( packager.transformations.Transformers, "transform", transform ) monkeypatch.setattr( packager.packages.hathi_jp2_package.pathlib.Path, "mkdir", Mock() ) for p in packages: destination_type.transform(p, output) assert transform.called is True transform.assert_has_calls( [ call(source=expected_source, destination=expected_destination) ] )
def test_capture_one_tiff_to_hathi_tiff(capture_one_fixture): source = os.path.join(capture_one_fixture, CAPTURE_ONE_BATCH_NAME) dest = os.path.join(capture_one_fixture, DESTINATION_NAME) capture_one_packages_factory = \ packager.PackageFactory(CaptureOnePackage(delimiter="_")) # find all Capture One organized packages capture_one_packages = \ list(capture_one_packages_factory.locate_packages(path=source)) # There should be 2 packages in this sample batch assert len(capture_one_packages) == 2 hathi_tiff_package_factory = \ packager.PackageFactory(packager.packages.HathiTiff()) for cap_one_package in capture_one_packages: hathi_tiff_package_factory.transform(cap_one_package, dest=dest) # This should result in the following files # # some_root/000001/00000001.tif # some_root/000001/00000002.tif # some_root/000001/00000003.tif assert os.path.exists(os.path.join(dest, "000001", "00000001.tif")) assert os.path.exists(os.path.join(dest, "000001", "00000002.tif")) assert os.path.exists(os.path.join(dest, "000001", "00000003.tif")) # some_root/000002/00000001.tif # some_root/000002/00000002.tif assert os.path.exists(os.path.join(dest, "000002", "00000001.tif")) assert os.path.exists(os.path.join(dest, "000002", "00000002.tif"))
def test_read_only_transform(capture_one_sample_package): capture_one_packager = packager.PackageFactory( packager.packages.CaptureOnePackage()) capture_one_packages = capture_one_packager.locate_packages( capture_one_sample_package) hathi_limited_view_packager = packager.PackageFactory( packager.packages.HathiLimitedView()) with pytest.raises(NotImplementedError): hathi_limited_view_packager.transform(capture_one_packages, dest=".")
def discover_task_metadata(self, initial_results: List[Any], additional_data: Dict[str, Any], **user_args: str ) -> List[Dict[str, Any]]: """Loot at user settings and discover any data needed to build a task. Args: initial_results: additional_data: **user_args: Returns: Returns a list of data to create a job with """ jobs: List[Dict[str, Union[str, Package]]] = [] source_input = user_args["Input"] dest = user_args["Output"] package_factory = packager.PackageFactory( packager.packages.CaptureOnePackage(delimiter="-")) for package in package_factory.locate_packages(source_input): new_job: Dict[str, Union[str, Package]] = { "package": package, "output": dest, "source_path": source_input } jobs.append(new_job) return jobs
def package_objects(source_path, source_package_type): source_pkg = eval(f"packager.packages.{source_package_type}") source = os.path.join(source_path, sample_packages[source_package_type][0]) packages_factory = packager.PackageFactory(source_pkg()) packages = list(packages_factory.locate_packages(path=source)) return packages
def test_cataloged_collection_transform( self, eas_collection, source_file, package_type, expected_out, monkeypatch ): transform = Mock(spec=lambda source, destination: None) monkeypatch.setattr( packager.transformations.Transformers, "transform", transform ) output_dir = "output" factory = packager.PackageFactory(eas.Eas()) for p in factory.locate_packages(eas_collection(source_file)): packager.PackageFactory(package_type()).transform(p, output_dir)
def test_capture_one_dashes(capture_one_batch_with_dashes): batch_dir, source_files = capture_one_batch_with_dashes capture_one_packages_factory = packager.PackageFactory( packager.packages.CaptureOnePackage(delimiter="-")) res = next(capture_one_packages_factory.locate_packages(batch_dir)) assert len(res) == len(source_files)
def test_capture_one_underscore(capture_one_batch_with_underscores): batch_dir, source_files = capture_one_batch_with_underscores capture_one_packages_factory = packager.PackageFactory( packager.packages.CaptureOnePackage() ) res = next(capture_one_packages_factory.locate_packages(batch_dir)) assert len(res) == len(source_files)
def capture_one_session_w_ds_store(capture_one_sample_package): source_dir = os.path.join(capture_one_sample_package) with open(os.path.join(source_dir, ".DS_Store"), "w") as wf: pass capture_one_packages_factory = \ packager.PackageFactory(packager.packages.CaptureOnePackage()) # find all Capture One organized packages return list(capture_one_packages_factory.locate_packages(path=source_dir))
def package_objects(source_path, package_type): pkg_factory_type = eval(f"packager.packages.{package_type}") source = os.path.join(source_path, sample_packages[package_type][0]) capture_one_packages_factory = packager.PackageFactory(pkg_factory_type()) # find all Capture One organized packages packages = list(capture_one_packages_factory.locate_packages(path=source)) return packages
def __init__( self, src: packager.package.collection.Package, dst: str ) -> None: super().__init__() self.src = src self.dst = dst self.output_packager = packager.PackageFactory( packager.packages.DigitalLibraryCompound())
def test_transform_into_hathi(capture_one_batch_with_dashes, tmpdir): batch_dir, source_files = capture_one_batch_with_dashes source_type = packager.PackageFactory( packager.packages.CaptureOnePackage(delimiter="-") ) packages = source_type.locate_packages(batch_dir) destination_type = packager.PackageFactory(packager.packages.HathiTiff()) output = tmpdir / "output" output.ensure_dir() for package in packages: destination_type.transform(package, dest=output.strpath) assert (output / "99423682912205899").exists() for expected_file in [f"{str(x).zfill(8)}.tif" for x in range(20)]: assert (output / "99423682912205899" / expected_file).exists() output.remove()
def work(self) -> bool: self.log("Locating packages in {}".format(self._root)) package_factory = packager.PackageFactory( packager.packages.CaptureOnePackage()) packages = list(package_factory.locate_packages(self._root)) self.set_results(packages) return True
def test_cataloged_collection_transform( self, cataloged_collection, source_file, package_type, expected_out, monkeypatch ): transform = Mock(spec=lambda source, destination: None) monkeypatch.setattr( uiucprescon.packager.transformations.Transformers, "transform", transform ) transform2 = Mock(spec=lambda source, destination, logger: None) monkeypatch.setattr( uiucprescon.packager.transformations.CopyFile, "transform", transform2 ) monkeypatch.setattr( uiucprescon.packager.transformations.ConvertJp2Standard, "transform", transform2 ) output_dir = "output" factory = packager.PackageFactory(noneas.CatalogedNonEAS()) for p in factory.locate_packages(cataloged_collection(source_file)): packager.PackageFactory(package_type()).transform(p, output_dir) if package_type == hathi_jp2_package.HathiJp2: transform.assert_any_call( ANY, os.path.join(output_dir, expected_out) ) elif package_type == digital_library_compound.DigitalLibraryCompound: transform2.assert_any_call( ANY, os.path.join(output_dir, expected_out), ANY ) else: assert False, f"testing '{package_type}' not supported"
def test_capture_one_tiff_package_size(capture_one_fixture): source = os.path.join(capture_one_fixture, CAPTURE_ONE_BATCH_NAME) capture_one_packages_factory = \ packager.PackageFactory(packager.packages.CaptureOnePackage()) # find all Capture One organized packages capture_one_packages = \ list(capture_one_packages_factory.locate_packages(path=source)) # There should be 2 packages in this sample batch assert len(capture_one_packages) == 2
def work(self): my_logger = logging.getLogger(packager.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): self.log(f"Converting {self.packaging_id} from {self.source_path} " f"to a {self.package_format} package at " f"{self.new_package_root}") package_factory = packager.PackageFactory( PackageConverter.package_formats[self.package_format]) package_factory.transform(self.existing_package, dest=self.new_package_root) return True
def work(self) -> bool: my_logger = logging.getLogger(packager.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): self.log(f"Converting {self.packaging_id} from " f"{self.source_path} to a Hathi Trust Tiff " f"package at {self.new_package_root}") package_factory = packager.PackageFactory( packager.packages.HathiTiff()) package_factory.transform(self.existing_package, dest=self.new_package_root) return True
def test_convert(hathi_limited_view_sample_packages, monkeypatch): import pathlib def kdu_compress_cli2(infile: str, outfile: str, in_args=None, out_args=None): pathlib.Path(outfile).touch() def kdu_expand_cli(infile: str, outfile: str, in_args=None, out_args=None): pathlib.Path(outfile).touch() monkeypatch.setattr(pykdu_compress, "kdu_compress_cli2", kdu_compress_cli2) monkeypatch.setattr(pykdu_compress, "kdu_expand_cli", kdu_expand_cli) digital_library_compound_builder = packager.PackageFactory( packager.packages.DigitalLibraryCompound()) with tempfile.TemporaryDirectory() as tmp_dir: for package in hathi_limited_view_sample_packages: try: digital_library_compound_builder.transform(package, dest=tmp_dir) except errors.ZipFileException as e: print(f"{e.src_zip_file} had a problem", file=sys.stderr) if len(e.problem_files) > 0: print(f"Problems with {','.join(e.problem_files)}", file=sys.stderr) problem_file = zipfile.ZipFile(e.src_zip_file) print(problem_file.namelist(), file=sys.stderr) raise assert len(list(os.scandir(tmp_dir))) == 1 for i, new_package in enumerate( digital_library_compound_builder.locate_packages(tmp_dir)): assert new_package.metadata[Metadata.ID] == \ hathi_limited_view_sample_packages[i].metadata[Metadata.ID] sample_item = new_package.items[0] access = sample_item.instantiations[InstantiationTypes.ACCESS] access_files = list(access.get_files()) assert len(access_files) > 0 pres = sample_item.instantiations[InstantiationTypes.PRESERVATION] pres_files = list(pres.get_files()) assert len(pres_files) > 0 assert new_package.metadata[Metadata.PATH] == tmp_dir
def test_capture_one_collection_transform( self, capture_one_collection, source_file, package_type, expected_out, monkeypatch ): transform = Mock(spec=lambda source, destination: None) monkeypatch.setattr( uiucprescon.packager.transformations.Transformers, "transform", transform ) output_dir = "output" capture_one_factory = packager.PackageFactory( packager.packages.CaptureOnePackage(delimiter='-') ) for p in capture_one_factory.locate_packages( capture_one_collection(source_file) ): packager.PackageFactory(package_type()).transform(p, output_dir) # transform.assert_any_call(ANY, os.path.join(output_dir, expected_out))
def discover_task_metadata( self, initial_results: List[Any], additional_data, **user_args: str ) -> List[dict]: hathi_limited_view_packager = packager.PackageFactory( packager.packages.HathiLimitedView()) return [{ "package": package, "destination": user_args['Output'] } for package in hathi_limited_view_packager.locate_packages( user_args['Input'])]
def work(self): my_logger = logging.getLogger(packager.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): self.log( f"Converting {self.packaging_id} from {self.source_path} " f"to a Hathi Trust Tiff package at {self.new_package_root}") package_factory = packager.PackageFactory( packager.packages.DigitalLibraryCompound()) package_factory.transform(self.existing_package, dest=self.new_package_root) return True
def work(self) -> bool: package_factory = packager.PackageFactory( packager.packages.HathiTiff()) package_factory.transform(self._package, self._destination) self.log("Transformed CaptureOne package {} to a HathiTiff package " "in {}".format(self._bib_id, self._destination)) self.set_results({ "bib_id": self._bib_id, "location": os.path.join(self._destination, self._bib_id) }) return True
def hathi_tiff_package_w_sidecar_text(hathi_tiff_sample_package): package_one_path = os.path.join(hathi_tiff_sample_package, "000001") for i in range(3): with open(os.path.join(package_one_path, f"0000000{i+1}.txt"), "w"): pass package_two_path = os.path.join(hathi_tiff_sample_package, "000002") for i in range(2): with open(os.path.join(package_two_path, f"0000000{i+1}.txt"), "w"): pass package_factory = packager.PackageFactory(packager.packages.HathiTiff()) packages = list(package_factory.locate_packages(hathi_tiff_sample_package)) return packages
def archival_full_name_transformed_to_ht_trust( self, sample_collection_longer, monkeypatch): root, files = sample_collection_longer factory = packager.PackageFactory(noneas.ArchivalNonEAS()) hathi_jp2_format = packager.PackageFactory( hathi_jp2_package.HathiJp2() ) output_path = os.path.join('some', 'folder') transform = Mock() def mock_transform(_, source, destination): transform(source=source, destination=destination) with monkeypatch.context() as mp: mp.setattr( packager.transformations.Transformers, "transform", mock_transform ) for package in factory.locate_packages(root): hathi_jp2_format.transform(package, output_path) return output_path, transform
def transformed_to_ht_trust( self, sample_cataloged_collection, monkeypatch, tmpdir ): transform = Mock() def mock_transform(_, source, destination): transform(source=source, destination=destination) root, files = sample_cataloged_collection factory = packager.PackageFactory(noneas.CatalogedNonEAS()) digital_library_format = packager.PackageFactory( hathi_jp2_package.HathiJp2()) output_path = os.path.join('some', 'folder') with monkeypatch.context() as mp: mp.setattr( packager.transformations.Transformers, "transform", mock_transform ) for package in list(factory.locate_packages(root)): digital_library_format.transform(package, output_path) return output_path, transform
def discover_task_metadata(self, initial_results: List[Any], additional_data, **user_args) -> List[dict]: hathi_limited_view_packager = packager.PackageFactory( packager.packages.HathiLimitedView()) new_tasks = [] for p in hathi_limited_view_packager.locate_packages( user_args['Input']): new_tasks.append({ "package": p, "destination": user_args['Output'] }) return new_tasks
def discover_task_metadata(self, initial_results: List[Any], additional_data, **user_args) -> List[dict]: jobs = [] source_input = user_args["Input"] dest = user_args["Output"] package_factory = packager.PackageFactory( packager.packages.CaptureOnePackage()) for package in package_factory.locate_packages(source_input): jobs.append({ "package": package, "output": dest, "source_path": source_input }) return jobs
def test_capture_one_tiff_package_plus(capture_one_fixture_plus): capture_one_packages_factory = \ packager.PackageFactory( packager.packages.CaptureOnePackage(delimiter='+') ) # find all Capture One organized packages capture_one_packages = \ list( capture_one_packages_factory.locate_packages( path=capture_one_fixture_plus ) ) # There should be 2 packages in this sample batch assert len(capture_one_packages) == 2