def test_validate_optional_tagfile_in_directory(self): bag = bagit.make_bag(self.tmpdir, checksums=['md5']) tagdir = tempfile.mkdtemp(dir=self.tmpdir) if not os.path.exists(j(tagdir, "tagfolder")): os.makedirs(j(tagdir, "tagfolder")) with open(j(tagdir, "tagfolder", "tagfile"), "w") as tagfile: tagfile.write("test") relpath = j(tagdir, "tagfolder", "tagfile").replace(self.tmpdir + os.sep, "") relpath.replace("\\", "/") with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman: # Incorrect checksum. tagman.write("8e2af7a0143c7b8f4de0b3fc90f27354 " + relpath + "\n") bag = bagit.BDBag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag) hasher = hashlib.new("md5") with open(j(tagdir, "tagfolder", "tagfile"), "r") as tf: contents = tf.read().encode('utf-8') hasher.update(contents) with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman: tagman.write(hasher.hexdigest() + " " + relpath + "\n") bag = bagit.BDBag(self.tmpdir) self.assertTrue(self.validate(bag)) # Missing tagfile. os.remove(j(tagdir, "tagfolder", "tagfile")) bag = bagit.BDBag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag)
def test_validate_optional_tagfile(self): logger.info(self.getTestHeader(sys._getframe().f_code.co_name)) bag = bagit.make_bag(self.tmpdir, checksums=['md5']) tagdir = tempfile.mkdtemp(dir=self.tmpdir) with open(j(tagdir, "tagfile"), "w") as tagfile: tagfile.write("test") relpath = j(tagdir, "tagfile").replace(self.tmpdir + os.sep, "") relpath.replace("\\", "/") with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman: # Incorrect checksum. tagman.write("8e2af7a0143c7b8f4de0b3fc90f27354 " + relpath + "\n") bag = bagit.BDBag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag) hasher = hashlib.new("md5") contents = slurp_text_file(j(tagdir, "tagfile")).encode('utf-8') hasher.update(contents) with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman: tagman.write(hasher.hexdigest() + " " + relpath + "\n") bag = bagit.BDBag(self.tmpdir) self.assertTrue(self.validate(bag)) # Missing tagfile. os.remove(j(tagdir, "tagfile")) bag = bagit.BDBag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag)
def test_save_baginfo(self): bag = bagit.make_bag(self.tmpdir) bag.info["foo"] = "bar" bag.save() bag = bagit.BDBag(self.tmpdir) self.assertEqual(bag.info["foo"], "bar") self.assertTrue(bag.is_valid()) bag.info['x'] = ["a", "b", "c"] bag.save() b = bagit.BDBag(self.tmpdir) self.assertEqual(b.info["x"], ["a", "b", "c"]) self.assertTrue(bag.is_valid())
def test_save_baginfo(self): logger.info(self.getTestHeader(sys._getframe().f_code.co_name)) bag = bagit.make_bag(self.tmpdir) bag.info["foo"] = "bar" bag.save() bag = bagit.BDBag(self.tmpdir) self.assertEqual(bag.info["foo"], "bar") self.assertTrue(bag.is_valid()) bag.info['x'] = ["a", "b", "c"] bag.save() b = bagit.BDBag(self.tmpdir) self.assertEqual(b.info["x"], ["a", "b", "c"]) self.assertTrue(bag.is_valid())
def is_bag(bag_path): bag = None try: bag = bdbagit.BDBag(bag_path) except (bdbagit.BagError, bdbagit.BagValidationError): pass return True if bag else False
def test_mixed_case_checksums(self): bag = bagit.make_bag(self.tmpdir, checksums=['md5']) hashstr = {} # Extract entries only for the payload and ignore # entries from the tagmanifest file for key in bag.entries.keys(): if key.startswith('data' + os.sep): hashstr = bag.entries[key] hashstr = next(iter(hashstr.values())) manifest = slurp_text_file(j(self.tmpdir, "manifest-md5.txt")) manifest = manifest.replace(hashstr, hashstr.upper()) with open(j(self.tmpdir, "manifest-md5.txt"), "wb") as m: m.write(manifest.encode('utf-8')) # Since manifest-md5.txt file is updated, re-calculate its # md5 checksum and update it in the tagmanifest-md5.txt file hasher = hashlib.new('md5') contents = slurp_text_file(j(self.tmpdir, "manifest-md5.txt")).encode('utf-8') hasher.update(contents) with open(j(self.tmpdir, "tagmanifest-md5.txt"), "r") as tagmanifest: tagman_contents = tagmanifest.read() tagman_contents = tagman_contents.replace( bag.entries['manifest-md5.txt']['md5'], hasher.hexdigest()) with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagmanifest: tagmanifest.write(tagman_contents) bag = bagit.BDBag(self.tmpdir) self.assertTrue(self.validate(bag))
def test_multiple_oxum_values(self): logger.info(self.getTestHeader(sys._getframe().f_code.co_name)) bag = bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, "bag-info.txt"), "a") as baginfo: baginfo.write('Payload-Oxum: 7.7\n') bag = bagit.BDBag(self.tmpdir) self.assertTrue(self.validate(bag, fast=True))
def validate_bag(bag_path, fast=False, callback=None, config_file=DEFAULT_CONFIG_FILE): config = read_config(config_file) bag_config = config['bag_config'] bag_processes = bag_config.get('bag_processes', 1) try: logger.info("Validating bag: %s" % bag_path) bag = bdbagit.BDBag(bag_path) bag.validate(bag_processes if not callback else 1, fast=fast, callback=callback) logger.info("Bag %s is valid" % bag_path) except bdbagit.BagValidationError as e: logger.warning( "BagValidationError: A BagValidationError may be transient if the bag contains unresolved " "remote file references from a fetch.txt file. In this case the bag is incomplete but not " "necessarily invalid. Resolve remote file references (if any) and re-validate." ) raise e except bdbagit.BaggingInterruptedError as e: logger.warning(get_typed_exception(e)) raise e except Exception as e: raise RuntimeError("Unhandled exception while validating bag: %s" % e)
def test_is_valid(self): bag = bagit.make_bag(self.tmpdir) bag = bagit.BDBag(self.tmpdir) self.assertTrue(bag.is_valid()) with open(j(self.tmpdir, "data", "extra_file"), "w") as ef: ef.write("bar") self.assertFalse(bag.is_valid())
def test_validate_fast_without_oxum(self): bag = bagit.make_bag(self.tmpdir) os.remove(j(self.tmpdir, "bag-info.txt")) bag = bagit.BDBag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=True)
def test_is_valid(self): logger.info(self.getTestHeader(sys._getframe().f_code.co_name)) bag = bagit.make_bag(self.tmpdir) bag = bagit.BDBag(self.tmpdir) self.assertTrue(bag.is_valid()) with open(j(self.tmpdir, "data", "extra_file"), "w") as ef: ef.write("bar") self.assertFalse(bag.is_valid())
def test_validate_fast_without_oxum(self): logger.info(self.getTestHeader(sys._getframe().f_code.co_name)) bag = bagit.make_bag(self.tmpdir) os.remove(j(self.tmpdir, "bag-info.txt")) bag = bagit.BDBag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=True)
def test_save_only_baginfo(self): bag = bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'data', 'newfile'), 'w') as nf: nf.write('newfile') bag.info["foo"] = "bar" bag.save() bag = bagit.BDBag(self.tmpdir) self.assertEqual(bag.info["foo"], "bar") self.assertFalse(bag.is_valid())
def test_validate_slow_without_oxum_extra_file(self): bag = bagit.make_bag(self.tmpdir) os.remove(j(self.tmpdir, "bag-info.txt")) with open(j(self.tmpdir, "data", "extra_file"), "w") as ef: ef.write("foo") bag = bagit.BDBag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=False)
def test_save_baginfo_with_sha1(self): bag = bagit.make_bag(self.tmpdir, checksums=["sha1", "md5"]) self.assertTrue(bag.is_valid()) bag.save() bag.info['foo'] = "bar" bag.save() bag = bagit.BDBag(self.tmpdir) self.assertTrue(bag.is_valid())
def test_save_only_baginfo(self): logger.info(self.getTestHeader(sys._getframe().f_code.co_name)) bag = bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'data', 'newfile'), 'w') as nf: nf.write('newfile') bag.info["foo"] = "bar" bag.save() bag = bagit.BDBag(self.tmpdir) self.assertEqual(bag.info["foo"], "bar") self.assertFalse(bag.is_valid())
def test_validate_slow_without_oxum_extra_file(self): logger.info(self.getTestHeader(sys._getframe().f_code.co_name)) bag = bagit.make_bag(self.tmpdir) os.remove(j(self.tmpdir, "bag-info.txt")) with open(j(self.tmpdir, "data", "extra_file"), "w") as ef: ef.write("foo") bag = bagit.BDBag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=False)
def test_bom_in_bagit_txt(self): bag = bagit.make_bag(self.tmpdir) BOM = codecs.BOM_UTF8 if sys.version_info[0] >= 3: BOM = BOM.decode('utf-8') with open(j(self.tmpdir, "bagit.txt"), "r") as bf: bagfile = BOM + bf.read() with open(j(self.tmpdir, "bagit.txt"), "w") as bf: bf.write(bagfile) bag = bagit.BDBag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag)
def test_save_baginfo_with_sha1(self): logger.info(self.getTestHeader(sys._getframe().f_code.co_name)) bag = bagit.make_bag(self.tmpdir, checksums=["sha1", "md5"]) self.assertTrue(bag.is_valid()) bag.save() bag.info['foo'] = "bar" bag.save() bag = bagit.BDBag(self.tmpdir) self.assertTrue(bag.is_valid())
def test_open_bag_with_missing_bagit_txt(self): bagit.make_bag(self.tmpdir) os.unlink(j(self.tmpdir, 'bagit.txt')) with self.assertRaises(bagit.BagError) as error_catcher: bagit.BDBag(self.tmpdir) self.assertEqual( 'Expected bagit.txt does not exist: %s' % j(self.tmpdir, "bagit.txt"), str(error_catcher.exception))
def test_open_bag_with_unsupported_version(self): bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'bagit.txt'), 'w') as f: f.write('BagIt-Version: 2.0\nTag-File-Character-Encoding: UTF-8\n') with self.assertRaises(bagit.BagError) as error_catcher: bagit.BDBag(self.tmpdir) self.assertEqual('Unsupported bag version: 2.0', str(error_catcher.exception))
def validate_bag_structure(bag_path, skip_remote=True): try: logger.info("Validating bag structure: %s" % bag_path) bag = bdbagit.BDBag(bag_path) if not check_payload_consistency(bag, skip_remote=skip_remote): raise bdbagit.BagValidationError( "Inconsistent payload state. See log warnings for additional information." ) logger.info("The directory %s is a valid bag structure" % bag_path) except Exception as e: logger.error("Error while validating bag structure: %s", e) raise e
def test_open_bag_with_missing_bagit_txt(self): logger.info(self.getTestHeader(sys._getframe().f_code.co_name)) bagit.make_bag(self.tmpdir) os.unlink(j(self.tmpdir, 'bagit.txt')) with self.assertRaises(bagit.BagError) as error_catcher: bagit.BDBag(self.tmpdir) self.assertEqual( 'Expected bagit.txt does not exist: %s' % j(self.tmpdir, "bagit.txt"), str(error_catcher.exception))
def test_open_bag_with_malformed_bagit_txt(self): bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'bagit.txt'), 'w') as f: os.ftruncate(f.fileno(), 0) with self.assertRaises(bagit.BagError) as error_catcher: bagit.BDBag(self.tmpdir) self.assertEqual( 'Missing required tag in bagit.txt: BagIt-Version, Tag-File-Character-Encoding', str(error_catcher.exception))
def test_validate_flipped_bit(self): bag = bagit.make_bag(self.tmpdir) readme = j(self.tmpdir, "data", "README") txt = slurp_text_file(readme) txt = 'A' + txt[1:] with io.open(readme, "w", newline="\n") as r: r.write(txt) bag = bagit.BDBag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag) # fast doesn't catch the flipped bit, since oxsum is the same self.assertTrue(self.validate(bag, fast=True)) self.assertTrue(self.validate(bag, completeness_only=True))
def test_validate_missing_directory(self): bagit.make_bag(self.tmpdir) tmp_data_dir = os.path.join(self.tmpdir, 'data') shutil.rmtree(tmp_data_dir) bag = bagit.BDBag(self.tmpdir) with self.assertRaises(bagit.BagValidationError) as error_catcher: bag.validate() self.assertEqual( 'Expected data directory %s does not exist' % tmp_data_dir, str(error_catcher.exception))
def test_validate_completeness(self): bag = bagit.make_bag(self.tmpdir) old_path = j(self.tmpdir, "data", "README") new_path = j(self.tmpdir, "data", "extra_file") os.rename(old_path, new_path) bag = bagit.BDBag(self.tmpdir) self.assertTrue(self.validate(bag, fast=True)) with mock.patch.object(bag, '_validate_entries') as m: self.assertRaises(bagit.BagValidationError, self.validate, bag, completeness_only=True) self.assertEqual(m.call_count, 0)
def test_open_bag_with_unknown_encoding(self): logger.info(self.getTestHeader(sys._getframe().f_code.co_name)) bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'bagit.txt'), 'w') as f: f.write( 'BagIt-Version: 0.97\nTag-File-Character-Encoding: WTF-8\n') with self.assertRaises(bagit.BagError) as error_catcher: bagit.BDBag(self.tmpdir) self.assertEqual('Unsupported encoding: WTF-8', str(error_catcher.exception))
def test_validate_missing_directory(self): logger.info(self.getTestHeader(sys._getframe().f_code.co_name)) bagit.make_bag(self.tmpdir) tmp_data_dir = os.path.join(self.tmpdir, 'data') shutil.rmtree(tmp_data_dir) bag = bagit.BDBag(self.tmpdir) with self.assertRaises(bagit.BagValidationError) as error_catcher: bag.validate() self.assertEqual( 'Expected data directory %s does not exist' % tmp_data_dir, str(error_catcher.exception))
def test_open_bag_with_invalid_versions(self): bagit.make_bag(self.tmpdir) for v in ('a.b', '2.', '0.1.2', '1.2.3'): with open(j(self.tmpdir, 'bagit.txt'), 'w') as f: f.write( 'BagIt-Version: %s\nTag-File-Character-Encoding: UTF-8\n' % v) with self.assertRaises(bagit.BagError) as error_catcher: bagit.BDBag(self.tmpdir) self.assertEqual( 'Bag version numbers must be MAJOR.MINOR numbers, not %s' % v, str(error_catcher.exception))