def test_default_bagging_date(self): info = {'Contact-Email': '*****@*****.**'} bagit.make_bag(self.tmpdir, bag_info=info) bag_info_txt = slurp_text_file(j(self.tmpdir, 'bag-info.txt')) self.assertTrue('Contact-Email: [email protected]' in bag_info_txt) today = datetime.date.strftime(datetime.date.today(), "%Y-%m-%d") self.assertTrue('Bagging-Date: %s' % today in bag_info_txt)
def test_make_bag_with_empty_directory_tree(self): tmpdir = tempfile.mkdtemp() path = j(tmpdir, "test1", "test2") try: os.makedirs(path) bagit.make_bag(tmpdir) finally: shutil.rmtree(tmpdir)
def test_make_bag_with_bogus_directory(self): bogus_directory = os.path.realpath('this-directory-does-not-exist') with self.assertRaises(RuntimeError) as error_catcher: bagit.make_bag(bogus_directory) self.assertEqual('Bag directory %s does not exist' % bogus_directory, str(error_catcher.exception))
def test_make_bag_with_unreadable_source(self): os.chmod(self.tmpdir, 0) with self.assertRaises(bagit.BagError) as error_catcher: bagit.make_bag(self.tmpdir, checksum=['sha256']) self.assertEqual( 'Missing permissions to move all files and directories', str(error_catcher.exception))
def test_make_bag_with_unreadable_file(self): os.chmod(j(self.tmpdir, 'loc', '2478433644_2839c5e8b8_o_d.jpg'), 0) with self.assertRaises(bagit.BagError) as error_catcher: bagit.make_bag(self.tmpdir, checksum=['sha256']) self.assertEqual( 'Read permissions are required to calculate file fixities', str(error_catcher.exception))
def test_garbage_in_bagit_txt(self): bagit.make_bag(self.tmpdir) bagfile = """BagIt-Version: 0.97 Tag-File-Character-Encoding: UTF-8 ================================== """ with open(j(self.tmpdir, "bagit.txt"), "w") as bf: bf.write(bagfile) self.assertRaises(bagit.BagValidationError, bagit.Bag, self.tmpdir)
def test_make_bag_with_unreadable_subdirectory(self): # We'll set this write-only to exercise the second permission check in make_bag: os.chmod(j(self.tmpdir, 'loc'), 0o200) with self.assertRaises(bagit.BagError) as error_catcher: bagit.make_bag(self.tmpdir, checksum=['sha256']) self.assertEqual( 'Read permissions are required to calculate file fixities', str(error_catcher.exception))
def test_open_bag_with_unsupported_version(self): bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'bagit.txt'), 'w') as f: f.write('BagIt-Version: 2.0\nTag-File-Character-Encoding: UTF-8\n') with self.assertRaises(bagit.BagError) as error_catcher: bagit.Bag(self.tmpdir) self.assertEqual('Unsupported bag version: 2.0', str(error_catcher.exception))
def test_open_bag_with_missing_bagit_txt(self): bagit.make_bag(self.tmpdir) os.unlink(j(self.tmpdir, 'bagit.txt')) with self.assertRaises(bagit.BagError) as error_catcher: bagit.Bag(self.tmpdir) self.assertEqual( 'Expected bagit.txt does not exist: %s/bagit.txt' % self.tmpdir, str(error_catcher.exception))
def test_unicode_bag_info(self): info = { 'Test-BMP': u'This element contains a \N{LATIN SMALL LETTER U WITH DIAERESIS}', 'Test-SMP': u'This element contains a \N{LINEAR B SYMBOL B049}', } bagit.make_bag(self.tmpdir, bag_info=info, checksums=['md5']) bag_info_txt = slurp_text_file(j(self.tmpdir, 'bag-info.txt')) for v in info.values(): self.assertIn(v, bag_info_txt)
def test_make_bag(self): info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '*****@*****.**'} bagit.make_bag(self.tmpdir, bag_info=info, checksums=['md5']) # data dir should've been created self.assertTrue(os.path.isdir(j(self.tmpdir, 'data'))) # check bagit.txt self.assertTrue(os.path.isfile(j(self.tmpdir, 'bagit.txt'))) bagit_txt = slurp_text_file(j(self.tmpdir, 'bagit.txt')) self.assertTrue('BagIt-Version: 0.97', bagit_txt) self.assertTrue('Tag-File-Character-Encoding: UTF-8', bagit_txt) # check manifest self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-md5.txt'))) manifest_txt = slurp_text_file(j(self.tmpdir, 'manifest-md5.txt')).splitlines() self.assertIn('8e2af7a0143c7b8f4de0b3fc90f27354 data/README', manifest_txt) self.assertIn( '9a2b89e9940fea6ac3a0cc71b0a933a0 data/loc/2478433644_2839c5e8b8_o_d.jpg', manifest_txt) self.assertIn( '6172e980c2767c12135e3b9d246af5a3 data/loc/3314493806_6f1db86d66_o_d.jpg', manifest_txt) self.assertIn( '38a84cd1c41de793a0bccff6f3ec8ad0 data/si/2584174182_ffd5c24905_b_d.jpg', manifest_txt) self.assertIn( '5580eaa31ad1549739de12df819e9af8 data/si/4011399822_65987a4806_b_d.jpg', manifest_txt) # check bag-info.txt self.assertTrue(os.path.isfile(j(self.tmpdir, 'bag-info.txt'))) bag_info_txt = slurp_text_file(j(self.tmpdir, 'bag-info.txt')) bag_info_txt = bag_info_txt.splitlines() self.assertIn('Contact-Email: [email protected]', bag_info_txt) self.assertIn('Bagging-Date: 1970-01-01', bag_info_txt) self.assertIn('Payload-Oxum: 991765.5', bag_info_txt) self.assertIn( 'Bag-Software-Agent: bagit.py v1.5.4 <https://github.com/LibraryOfCongress/bagit-python>', bag_info_txt) # check tagmanifest-md5.txt self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-md5.txt'))) tagmanifest_txt = slurp_text_file(j( self.tmpdir, 'tagmanifest-md5.txt')).splitlines() self.assertIn('9e5ad981e0d29adc278f6a294b8c2aca bagit.txt', tagmanifest_txt) self.assertIn('a0ce6631a2a6d1a88e6d38453ccc72a5 manifest-md5.txt', tagmanifest_txt) self.assertIn('0a6ffcffe67e9a34e44220f7ebcb4baa bag-info.txt', tagmanifest_txt)
def test_open_bag_with_malformed_bagit_txt(self): bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'bagit.txt'), 'w') as f: os.ftruncate(f.fileno(), 0) with self.assertRaises(bagit.BagError) as error_catcher: bagit.Bag(self.tmpdir) self.assertEqual( 'Missing required tag in bagit.txt: BagIt-Version, Tag-File-Character-Encoding', str(error_catcher.exception))
def test_make_bag_with_unwritable_source(self): path_suffixes = ('', 'loc') for path_suffix in reversed(path_suffixes): os.chmod(j(self.tmpdir, path_suffix), 0o500) with self.assertRaises(bagit.BagError) as error_catcher: bagit.make_bag(self.tmpdir, checksum=['sha256']) self.assertEqual( 'Missing permissions to move all files and directories', str(error_catcher.exception))
def test_validate_missing_directory(self): bagit.make_bag(self.tmpdir) tmp_data_dir = os.path.join(self.tmpdir, 'data') shutil.rmtree(tmp_data_dir) bag = bagit.Bag(self.tmpdir) with self.assertRaises(bagit.BagValidationError) as error_catcher: bag.validate() self.assertEqual( 'Expected data directory %s does not exist' % tmp_data_dir, str(error_catcher.exception))
def setUp(self): self.tempdir = tempfile.mkdtemp() self.amendment = os.path.join(self.tempdir, "updatebag") os.mkdir(self.amendment) srcfile = os.path.join(datadir, "samplembag", "data", "trial1.json") shutil.copy(srcfile, self.amendment) subdir = os.path.join(self.amendment, "trial3") os.mkdir(subdir) shutil.copy(srcfile, subdir) bagit.make_bag(self.amendment) self.amendee = os.path.join(datadir, "samplembag.zip") self.amender = amend.Amender(self.amendee, self.amendment)
def test_payload_permissions(self): perms = os.stat(self.tmpdir).st_mode # our tmpdir should not be writeable by group self.assertEqual(perms & stat.S_IWOTH, 0) # but if we make it writeable by the group then resulting # payload directory should have the same permissions new_perms = perms | stat.S_IWOTH self.assertTrue(perms != new_perms) os.chmod(self.tmpdir, new_perms) bagit.make_bag(self.tmpdir) payload_dir = j(self.tmpdir, 'data') self.assertEqual(os.stat(payload_dir).st_mode, new_perms)
def test_init_member_bags2(self): # test when the amendee is not natively a head bag self.amendee = os.path.join(self.tempdir, "gooberbag") src = os.path.join(datadir, "samplembag", "data") shutil.copytree(src, self.amendee) bagit.make_bag(self.amendee) self.amender = amend.Amender(self.amendee, self.amendment) membagsfile = os.path.join(self.amender._newheaddir, "multibag", "member-bags.tsv") self.assertTrue(not os.path.exists(membagsfile)) self.amender._init_member_bags() self.assertTrue(not os.path.exists(membagsfile)) self.assertEqual(self.amender._newhead.member_bag_names, ["gooberbag"])
def test_open_bag_with_invalid_versions(self): bagit.make_bag(self.tmpdir) for v in ('a.b', '2.', '0.1.2', '1.2.3'): with open(j(self.tmpdir, 'bagit.txt'), 'w') as f: f.write( 'BagIt-Version: %s\nTag-File-Character-Encoding: UTF-8\n' % v) with self.assertRaises(bagit.BagError) as error_catcher: bagit.Bag(self.tmpdir) self.assertEqual( 'Bag version numbers must be MAJOR.MINOR numbers, not %s' % v, str(error_catcher.exception))
def test_validate_unreadable_file(self): bag = bagit.make_bag(self.tmpdir, checksum=["md5"]) os.chmod(j(self.tmpdir, "data/loc/2478433644_2839c5e8b8_o_d.jpg"), 0) self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=False)
def test_validate_optional_tagfile_in_directory(self): bag = bagit.make_bag(self.tmpdir, checksums=['md5']) tagdir = tempfile.mkdtemp(dir=self.tmpdir) if not os.path.exists(j(tagdir, "tagfolder")): os.makedirs(j(tagdir, "tagfolder")) with open(j(tagdir, "tagfolder", "tagfile"), "w") as tagfile: tagfile.write("test") relpath = j(tagdir, "tagfolder", "tagfile").replace(self.tmpdir + os.sep, "") relpath.replace("\\", "/") with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman: # Incorrect checksum. tagman.write("8e2af7a0143c7b8f4de0b3fc90f27354 " + relpath + "\n") bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag) hasher = hashlib.new("md5") with open(j(tagdir, "tagfolder", "tagfile"), "r") as tf: contents = tf.read().encode('utf-8') hasher.update(contents) with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman: tagman.write(hasher.hexdigest() + " " + relpath + "\n") bag = bagit.Bag(self.tmpdir) self.assertTrue(self.validate(bag)) # Missing tagfile. os.remove(j(tagdir, "tagfolder", "tagfile")) bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag)
def test_make_bag_md5_sha256_manifest(self): bag = bagit.make_bag(self.tmpdir, checksum=['md5', 'sha256']) # check that relevant manifests are created self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-md5.txt'))) self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-sha256.txt'))) # check valid with two manifests self.assertTrue(self.validate(bag, fast=True))
def test_init_member_bags3(self): # test when the amendment happens to be head-bag conformant self.amendee = os.path.join(self.tempdir, "gooberbag") src = os.path.join(datadir, "samplembag", "data") shutil.copytree(src, self.amendee) bagit.make_bag(self.amendee) amend.make_single_multibag(self.amendment) self.amender = amend.Amender(self.amendee, self.amendment) membagsfile = os.path.join(self.amender._newheaddir, "multibag", "member-bags.tsv") self.assertTrue(os.path.exists(membagsfile)) self.amender._init_member_bags() self.assertTrue(not os.path.exists(membagsfile)) self.assertEqual(self.amender._newhead.member_bag_names, ["gooberbag"])
def test_convert_new(self): # create the data self.bagdir = os.path.join(self.tempdir, "sampledata") self.assertTrue(not os.path.isdir(self.bagdir)) dm = mkdata.DatasetMaker( self.bagdir, { 'totalsize': 15, 'totalfiles': 3, 'files': [{ 'totalsize': 10, 'totalfiles': 2 }], 'dirs': [{ 'totalsize': 5, 'totalfiles': 1 }] }) dm.fill() self.assertTrue(os.path.isdir(self.bagdir)) # turn it into a bag bag = bagit.make_bag(self.bagdir) self.assertTrue(bag.validate()) mbdir = os.path.join(self.bagdir, 'multibag') self.assertTrue(not os.path.exists(mbdir)) # convert it to a multibag self.mkr = amend.SingleMultibagMaker(self.bagdir) self.mkr.convert("1.5", "doi:XXXX/11111") self.assertTrue(os.path.exists(mbdir)) # validate it as a head bag valid8.validate_headbag(self.bagdir)
def test_is_valid(self): bag = bagit.make_bag(self.tmpdir) bag = bagit.Bag(self.tmpdir) self.assertTrue(bag.is_valid()) with open(j(self.tmpdir, "data", "extra_file"), "w") as ef: ef.write("bar") self.assertFalse(bag.is_valid())
def test_mixed_case_checksums(self): bag = bagit.make_bag(self.tmpdir, checksums=['md5']) hashstr = {} # Extract entries only for the payload and ignore # entries from the tagmanifest file for key in bag.entries.keys(): if key.startswith('data' + os.sep): hashstr = bag.entries[key] hashstr = next(iter(hashstr.values())) manifest = slurp_text_file(j(self.tmpdir, "manifest-md5.txt")) manifest = manifest.replace(hashstr, hashstr.upper()) with open(j(self.tmpdir, "manifest-md5.txt"), "wb") as m: m.write(manifest.encode('utf-8')) # Since manifest-md5.txt file is updated, re-calculate its # md5 checksum and update it in the tagmanifest-md5.txt file hasher = hashlib.new('md5') contents = slurp_text_file(j(self.tmpdir, "manifest-md5.txt")).encode('utf-8') hasher.update(contents) with open(j(self.tmpdir, "tagmanifest-md5.txt"), "r") as tagmanifest: tagman_contents = tagmanifest.read() tagman_contents = tagman_contents.replace( bag.entries['manifest-md5.txt']['md5'], hasher.hexdigest()) with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagmanifest: tagmanifest.write(tagman_contents) bag = bagit.Bag(self.tmpdir) self.assertTrue(self.validate(bag))
def test_init_file_lookup2(self): # test when the amendee is not natively a head bag self.amendee = os.path.join(self.tempdir, "gooberbag") src = os.path.join(datadir, "samplembag", "data") shutil.copytree(src, self.amendee) bagit.make_bag(self.amendee) self.amender = amend.Amender(self.amendee, self.amendment) lufile = os.path.join(self.amender._newheaddir, "multibag", "file-lookup.tsv") self.assertTrue(not os.path.exists(lufile)) self.amender._init_multibag_info() self.amender._init_file_lookup() self.assertTrue(not os.path.exists(lufile)) self.assertEqual(self.amender._newhead.lookup_file("data/trial1.json"), "gooberbag")
def test_sha1_tagfile(self): info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '*****@*****.**'} bag = bagit.make_bag(self.tmpdir, checksum=['sha1'], bag_info=info) self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-sha1.txt'))) self.assertEqual( 'f69110479d0d395f7c321b3860c2bc0c96ae9fe8', bag.entries['bag-info.txt']['sha1'], )
def test_validate_fast_without_oxum(self): bag = bagit.make_bag(self.tmpdir) os.remove(j(self.tmpdir, "bag-info.txt")) bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=True)
def test_amend_bag_with(self): xtrabag1 = os.path.join(self.tempdir, "gooberbag1") os.mkdir(xtrabag1) srcfile = os.path.join(datadir, "samplembag", "data", "trial2.json") shutil.copy(srcfile, xtrabag1) bagit.make_bag(xtrabag1) xtrabag2 = os.path.join(self.tempdir, "gooberbag2") os.mkdir(xtrabag2) srcfile = os.path.join(datadir, "samplembag", "data", "trial2.json") shutil.copy(srcfile, os.path.join(xtrabag2, "trial4.json")) bagit.make_bag(xtrabag2) amend.amend_bag_with(self.amendee, self.amendment, "2", xtrabag1, xtrabag2) bag = bagit.open_bag(self.amendment) self.assertEqual(bag.info.get('Multibag-Version'), CURRENT_VERSION) self.assertEqual(bag.info.get('Multibag-Tag-Directory'), 'multibag') self.assertEqual(bag.info.get('Multibag-Head-Version'), '2') self.assertEqual(bag.info.get('Multibag-Head-Deprecates'), '1.0') tagfile = os.path.join(self.amendment, "multibag", "member-bags.tsv") with open(tagfile) as fd: names = [line.strip() for line in fd] self.assertEquals( names, ["samplembag", "gooberbag1", "gooberbag2", "updatebag"]) tagfile = os.path.join(self.amendment, "multibag", "file-lookup.tsv") lu = {} nol = 0 with open(tagfile) as fd: for line in fd: names = line.strip().split('\t') nol += 1 lu[names[0]] = names[1] self.assertEqual(lu.get('data/trial1.json'), 'updatebag') self.assertEqual(lu.get('data/trial2.json'), 'gooberbag1') self.assertEqual(lu.get('data/trial4.json'), 'gooberbag2') self.assertEqual(lu.get('data/trial3/trial3a.json'), 'samplembag') self.assertEqual(lu.get('data/trial3/trial1.json'), 'updatebag') self.assertEqual(nol, len(lu)) # validate it as a headbag valid8.validate_headbag(self.amendment)
def test_add_amending_bag(self): xtrabag = os.path.join(self.tempdir, "gooberbag") os.mkdir(xtrabag) srcfile = os.path.join(datadir, "samplembag", "data", "trial2.json") shutil.copy(srcfile, xtrabag) bagit.make_bag(xtrabag) self.amender.init_from_amendee() self.amender.add_amending_bag(xtrabag, pid="foo://goob", comment="Ya") self.assertEqual(self.amender._newhead.member_bag_names, ["samplembag", "gooberbag"]) self.assertEqual(self.amender._newhead.member_bags()[1].uri, "foo://goob") self.assertEqual(self.amender._newhead.lookup_file("data/trial1.json"), "samplembag") self.assertEqual(self.amender._newhead.lookup_file("data/trial2.json"), "gooberbag")