def item(self): item = collection.Item() item.component_metadata[packager.Metadata.ID] = '99127822912205899' item.component_metadata[packager.Metadata.ITEM_NAME] = '001' item.component_metadata[packager.Metadata.PATH] = 'somepath' access_file = \ collection.Instantiation( parent=item, category=packager.InstantiationTypes.ACCESS, files=["99127822912205899-001.tif"] ) access_file.component_metadata[packager.Metadata.PATH] = \ os.path.join("somepath", "access") preservation_file = \ collection.Instantiation( parent=item, category=packager.InstantiationTypes.PRESERVATION, files=["99127822912205899-001.tif"] ) preservation_file.component_metadata[packager.Metadata.PATH] = \ os.path.join("somepath", "preservation") supplementary_data_files = collection.Instantiation( parent=item, category=packager.InstantiationTypes.SUPPLEMENTARY, files=[ "99127822912205899-001a.txt", "99127822912205899-001b.txt", ]) supplementary_data_files.component_metadata[packager.Metadata.PATH] = \ "somepath" return item
def test_transform_access_error_on_multiple_access(self): compound_item = collection.Item() compound_item.component_metadata[collection.Metadata.ID] = "123" compound_item.component_metadata[collection.Metadata.PATH] = "source" compound_item.component_metadata[collection.Metadata.ITEM_NAME] \ = '00000001' collection.Instantiation(category=collection.InstantiationTypes.ACCESS, parent=compound_item, files=[ os.path.join("123", "access", "123-00000001.jp2"), os.path.join("123", "access", "123-00000001a.jp2") ]) collection.Instantiation( category=collection.InstantiationTypes.PRESERVATION, parent=compound_item, files=[ os.path.join("123", "preservation", "123-00000001.tif"), os.path.join("123", "preservation", "123-00000001a.tif"), ]) strategy = packages.hathi_jp2_package.CopyStrategy() strategy.convert = Mock() with pytest.raises(AssertionError): strategy.transform_access_file(compound_item, dest="out")
def test_set_title_page(self, delegate): combo_box = QtWidgets.QComboBox() files = [ "file1.jp2", "file2.jp2", "file3.jp2", ] for file_name in files: combo_box.addItem(file_name) combo_box.setCurrentText("file2.jp2") model = Mock() object_record = collection.PackageObject() item = collection.Item(object_record) instance = collection.Instantiation(parent=item, files=files) def get_data(index, role): if role == QtCore.Qt.UserRole: return object_record mock_index = MagicMock() model.data = get_data delegate.setModelData(combo_box, model, mock_index) assert \ object_record.metadata[collection.Metadata.TITLE_PAGE] == \ "file2.jp2"
def test_builder2_build_instance(capture_one_batch_with_dashes): batch_dir, source_files = capture_one_batch_with_dashes builder = capture_one_package.CaptureOneBuilder() builder.splitter = capture_one_package.dash_splitter sample_item = collection.Item() sample_item.component_metadata[Metadata.ID] = "00001" builder.build_instance( parent=sample_item, path=batch_dir, filename=source_files[0] ) assert len(sample_item) == 1
def get_data(role): if role == QtCore.Qt.UserRole: object_record = collection.PackageObject() item = collection.Item(object_record) instance = collection.Instantiation(parent=item, files=[ "file1.jp2", "file2.jp2", "file3.jp2", ]) return object_record
def get_data(role): if role == QtCore.Qt.UserRole: object_record = collection.PackageObject() object_record.component_metadata[ collection.Metadata.TITLE_PAGE] = "file2.jp2" item = collection.Item(object_record) instance = collection.Instantiation(parent=item, files=[ "file1.jp2", "file2.jp2", "file3.jp2", ]) return object_record
def capture_one_item(): capture_one_tiff = collection.Item() capture_one_tiff.component_metadata[collection.Metadata.ID] = "123" capture_one_tiff.component_metadata[collection.Metadata.PATH] = "source" capture_one_tiff.component_metadata[ collection.Metadata.ITEM_NAME] = '00000001' collection.Instantiation( category=collection.InstantiationTypes.PRESERVATION, parent=capture_one_tiff, files=[os.path.join("123", "123_00000001.tif")]) return capture_one_tiff
def compound_item(): new_item = collection.Item() new_item.component_metadata[collection.Metadata.ID] = "123" new_item.component_metadata[collection.Metadata.PATH] = "source" new_item.component_metadata[collection.Metadata.ITEM_NAME] = '00000001' collection.Instantiation( category=collection.InstantiationTypes.ACCESS, parent=new_item, files=[os.path.join("123", "access", "123-00000001.jp2")]) collection.Instantiation( category=collection.InstantiationTypes.PRESERVATION, parent=new_item, files=[os.path.join("123", "preservation", "123-00000001.tif")]) return new_item
def test_zip_error(monkeypatch): item = collection.Item() item.component_metadata[Metadata.PATH] = "somepath.zip" instance = collection.Instantiation(parent=item) instance._files = [ "somefile.txt" ] from zipfile import ZipFile import io with monkeypatch.context() as mp: mp.setattr(ZipFile, "extract", Mock(side_effect=KeyError)) mp.setattr(ZipFile, "_RealGetContents", Mock()) mp.setattr(io, "open", MagicMock()) with pytest.raises(errors.ZipFileException) as error: next(instance.get_files()) assert error.value.src_zip_file == item.metadata[Metadata.PATH] and \ "somefile.txt" in error.value.problem_files[0]
def build_package(self, parent, path: str, *args, **kwargs) -> None: """Build a capture one style package object.""" group_id = parent.metadata[Metadata.ID] non_system_files = self.get_non_system_files(path) def filter_by_group(candidate_file: "os.DirEntry[str]") -> bool: parts = self.identify_file_name_parts(candidate_file.name) return parts is not None and parts['group'] == group_id for file_ in filter(filter_by_group, non_system_files): file_name_parts = self.identify_file_name_parts(file_.name) if file_name_parts is None: raise ValueError( f"File does not match expected naming pattern {file_.path}" ) if file_name_parts['extension'].lower() != ".tif": continue item_part = file_name_parts['part'] new_item = collection.Item(parent=parent) new_item.component_metadata[Metadata.ITEM_NAME] = item_part self.build_instance(new_item, path, item_part)
def build_object(self, parent: collection.Package, group_id: str, path: str) -> None: """Build a new object. Args: parent: group_id: path: """ new_object = collection.PackageObject(parent=parent) new_object.component_metadata[Metadata.ID] = group_id new_object.component_metadata[Metadata.PATH] = path access_path = os.path.join(path, "access") for directory_item in filter( lambda item: self.is_eas_file(pathlib.Path(item.path)), self.locate_files_access(access_path) ): regex_result = EASBuilder.grouper_regex.match(directory_item.name) if regex_result is None: raise ValueError("Unknown pattern") groups = regex_result.groupdict() if groups['group'] != group_id: continue new_item = collection.Item(parent=new_object) new_item.component_metadata[Metadata.ITEM_NAME] = \ regex_result['part'] self.build_instance( new_item, path=os.path.dirname(directory_item.path), filename=directory_item.name )
def test_transform_access_error_on_multiple_access(self): capture_one_tiff = collection.Item() capture_one_tiff.component_metadata[collection.Metadata.ID] = "123" capture_one_tiff.component_metadata[collection.Metadata.PATH] \ = "source" capture_one_tiff.component_metadata[ collection.Metadata.ITEM_NAME] = '00000001' collection.Instantiation( category=collection.InstantiationTypes.PRESERVATION, parent=capture_one_tiff, files=[ os.path.join("123", "123_00000001.tif"), os.path.join("123", "123_00000001b.tif") ]) strategy = packages.hathi_jp2_package.ConvertStrategy( instance_source=collection.InstantiationTypes.PRESERVATION) strategy.convert = Mock() with pytest.raises(AssertionError): strategy.transform_access_file(capture_one_tiff, dest="out")
def digital_library_object(self): new_object = collection.PackageObject() new_object.component_metadata[collection.Metadata.ID] = "123" new_object.component_metadata[collection.Metadata.PACKAGE_TYPE] = \ common.PackageTypes.DIGITAL_LIBRARY_COMPOUND new_item = collection.Item(parent=new_object) new_item.component_metadata[collection.Metadata.ID] = "123" new_item.component_metadata[collection.Metadata.PATH] = "source" new_item.component_metadata[collection.Metadata.ITEM_NAME] = '00000001' collection.Instantiation( category=collection.InstantiationTypes.ACCESS, parent=new_item, files=[os.path.join("123", "access", "123-00000001.jp2")]) collection.Instantiation( category=collection.InstantiationTypes.PRESERVATION, parent=new_item, files=[os.path.join("123", "preservation", "123-00000001.tif")]) return new_object