def testVersionInfo(self): profile = Profile(PROFILE_URL, profile=self.profile_dict) self.assertEqual(profile.profile_version_info, (1, 2, 0), 'Bundled: 1.2.0') del (self.profile_dict['BagIt-Profile-Info']['BagIt-Profile-Version']) profile = Profile(PROFILE_URL, profile=self.profile_dict) self.assertEqual(profile.profile_version_info, (1, 1, 0), 'Default profile version 1.1.0')
def test_existing_not_allowed(self): self.profile_dict["Tag-Files-Allowed"] = [] with open(join(self.bagdir, 'tag-foo'), 'w'): pass profile = Profile('TEST', self.profile_dict) result = profile.validate(Bag(self.bagdir)) self.assertFalse(result) self.assertEqual(len(profile.report.errors), 1) self.assertTrue("Existing tag file" in profile.report.errors[0].value)
def test_profile_kwarg(self): profile_url = Profile(PROFILE_URL) profile_dict = Profile(PROFILE_URL, profile=self.profile_dict) profile_str = Profile(PROFILE_URL, profile=self.profile_str) self.assertEqual(json.dumps(profile_str.profile), json.dumps(profile_dict.profile), 'Loaded from string') self.assertEqual(json.dumps(profile_url.profile), json.dumps(profile_dict.profile), 'Loaded from URL')
def test_validate_serialization(self): # Test on unzipped Bag. self.assertTrue( self.profile.validate_serialization( os.path.abspath("fixtures/test-bar"))) # Test on zipped Bag. self.profile = Profile(PROFILE_URL) self.assertTrue( self.profile.validate_serialization( os.path.abspath("fixtures/test-foo.zip")))
def __init__(self, resolver, path_to_zip): """ Arguments: resolver (Resolver): resolver path_to_zip (string): Path to the OCRD-ZIP file """ self.resolver = resolver self.path_to_zip = path_to_zip self.report = ValidationReport() self.profile_validator = Profile(OCRD_BAGIT_PROFILE_URL, profile=OCRD_BAGIT_PROFILE)
class OcrdZipValidator(): """ Validate conformance with BagIt and OCR-D bagit profile. See: - https://ocr-d.github.io/ocrd_zip - https://ocr-d.github.io/bagit-profile.json - https://ocr-d.github.io/bagit-profile.yml """ def __init__(self, resolver, path_to_zip): """ Arguments: resolver (Resolver): resolver path_to_zip (string): Path to the OCRD-ZIP file """ self.resolver = resolver self.path_to_zip = path_to_zip self.report = ValidationReport() self.profile_validator = Profile(OCRD_BAGIT_PROFILE_URL, profile=OCRD_BAGIT_PROFILE) def _validate_profile(self, bag): """ Validate against OCRD BagIt profile (bag-info fields, algos etc) """ if not self.profile_validator.validate(bag): raise Exception(str(self.profile_validator.report)) def _validate_bag(self, bag, **kwargs): """ Validate BagIt (checksums, payload.oxum etc) """ failed = None try: bag.validate(**kwargs) except BagValidationError as e: failed = e # for d in e.details: # log = getLogger('ocrd.ocrd_zip_validator') # if isinstance(d, ChecksumMismatch): # log.error("Validation Error: expected %s to have %s checksum of %s but found %s", d.path, d.algorithm, d.expected, d.found) # else: # log.error("Validation Error: %s", d) if failed: raise BagValidationError("%s" % failed) def validate(self, skip_checksums=False, skip_bag=False, skip_unzip=False, skip_delete=False, processes=2): """ Validate an OCRD-ZIP file for profile, bag and workspace conformance Arguments: skip_bag (boolean): Whether to skip all checks of manifests and files skip_checksums (boolean): Whether to omit checksum checks but still check basic BagIt conformance skip_unzip (boolean): Whether the OCRD-ZIP is unzipped, i.e. a directory skip_delete (boolean): Whether to skip deleting the unpacked OCRD-ZIP dir after valdiation processes (integer): Number of processes used for checksum validation """ if skip_unzip: bagdir = self.path_to_zip skip_delete = True else: # try: self.profile_validator.validate_serialization(self.path_to_zip) # except IOError as err: # raise err # except ProfileValidationError as err: # self.report.add_error(err.value) bagdir = mkdtemp(prefix=TMP_BAGIT_PREFIX) unzip_file_to_dir(self.path_to_zip, bagdir) try: bag = Bag(bagdir) self._validate_profile(bag) if not skip_bag: self._validate_bag(bag, fast=skip_checksums, processes=processes) finally: if not skip_delete: # remove tempdir rmtree(bagdir) return self.report
def setUp(self): self.bag = Bag('fixtures/test-bar') self.profile = Profile(PROFILE_URL) self.retrieved_profile = self.profile.get_profile()
class Test_bag_profile(TestCase): def setUp(self): self.bag = Bag('fixtures/test-bar') self.profile = Profile(PROFILE_URL) self.retrieved_profile = self.profile.get_profile() def test_validate_bagit_profile_info(self): self.assertTrue( self.profile.validate_bagit_profile_info(self.retrieved_profile)) def test_report_after_validate(self): self.assertIsNone(self.profile.report) self.profile.validate(self.bag) self.assertTrue(self.profile.report.is_valid) def test_validate(self): self.assertTrue(self.profile.validate(self.bag)) def test_validate_bag_info(self): self.assertTrue(self.profile.validate_bag_info(self.bag)) def test_validate_manifests_required(self): self.assertTrue(self.profile.validate_manifests_required(self.bag)) def test_validate_allow_fetch(self): self.assertTrue(self.profile.validate_allow_fetch(self.bag)) def test_validate_accept_bagit_version(self): self.assertTrue(self.profile.validate_accept_bagit_version(self.bag)) def test_validate_serialization(self): # Test on unzipped Bag. self.assertTrue( self.profile.validate_serialization( os.path.abspath("fixtures/test-bar"))) # Test on zipped Bag. self.profile = Profile(PROFILE_URL) self.assertTrue( self.profile.validate_serialization( os.path.abspath("fixtures/test-foo.zip"))) def test_find_tag_files(self): expect = [ join(os.getcwd(), 'fixtures/test-bar', f) for f in ['DPN/dpnFirstNode.txt', 'DPN/dpnRegistry'] ] self.assertEqual(sorted(find_tag_files(self.bag.path)), expect)
def test_not_given(self): profile = Profile('TEST', self.profile_dict) bag = Bag(self.bagdir) result = profile.validate(bag) self.assertTrue(result)
class Test_bag_profile(TestCase): def setUp(self): self.bag = Bag('fixtures/test-bar') self.profile = Profile(PROFILE_URL) self.retrieved_profile = self.profile.get_profile() def test_validate_bagit_profile_info(self): self.assertTrue(self.profile.validate_bagit_profile_info(self.retrieved_profile)) def test_report_after_validate(self): self.assertIsNone(self.profile.report) self.profile.validate(self.bag) self.assertTrue(self.profile.report.is_valid) def test_validate(self): self.assertTrue(self.profile.validate(self.bag)) def test_validate_bag_info(self): self.assertTrue(self.profile.validate_bag_info(self.bag)) def test_validate_manifests_required(self): self.assertTrue(self.profile.validate_manifests_required(self.bag)) def test_validate_allow_fetch(self): self.assertTrue(self.profile.validate_allow_fetch(self.bag)) def test_validate_accept_bagit_version(self): self.assertTrue(self.profile.validate_accept_bagit_version(self.bag)) def test_validate_serialization(self): # Test on unzipped Bag. self.assertTrue(self.profile.validate_serialization(os.path.abspath("fixtures/test-bar"))) # Test on zipped Bag. self.profile = Profile(PROFILE_URL) self.assertTrue(self.profile.validate_serialization(os.path.abspath("fixtures/test-foo.zip"))) def test_find_tag_files(self): expect = [join(os.getcwd(), 'fixtures/test-bar', f) for f in ['DPN/dpnFirstNode.txt', 'DPN/dpnRegistry']] self.assertEqual(sorted(find_tag_files(self.bag.path)), expect)
def test_validate_serialization(self): # Test on unzipped Bag. self.assertTrue(self.profile.validate_serialization(os.path.abspath("fixtures/test-bar"))) # Test on zipped Bag. self.profile = Profile(PROFILE_URL) self.assertTrue(self.profile.validate_serialization(os.path.abspath("fixtures/test-foo.zip")))