def test_validate_optional_tagfile_in_directory(self): bag = bagit.make_bag(self.tmpdir, checksums=['md5']) tagdir = tempfile.mkdtemp(dir=self.tmpdir) if not os.path.exists(j(tagdir, "tagfolder")): os.makedirs(j(tagdir, "tagfolder")) with open(j(tagdir, "tagfolder", "tagfile"), "w") as tagfile: tagfile.write("test") relpath = j(tagdir, "tagfolder", "tagfile").replace(self.tmpdir + os.sep, "") relpath.replace("\\", "/") with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman: # Incorrect checksum. tagman.write("8e2af7a0143c7b8f4de0b3fc90f27354 " + relpath + "\n") bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag) hasher = hashlib.new("md5") with open(j(tagdir, "tagfolder", "tagfile"), "r") as tf: contents = tf.read().encode('utf-8') hasher.update(contents) with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman: tagman.write(hasher.hexdigest() + " " + relpath + "\n") bag = bagit.Bag(self.tmpdir) self.assertTrue(self.validate(bag)) # Missing tagfile. os.remove(j(tagdir, "tagfolder", "tagfile")) bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag)
def test_save_baginfo(self): bag = bagit.make_bag(self.tmpdir) bag.info["foo"] = "bar" bag.save() bag = bagit.Bag(self.tmpdir) self.assertEqual(bag.info["foo"], "bar") self.assertTrue(bag.is_valid()) bag.info['x'] = ["a", "b", "c"] bag.save() b = bagit.Bag(self.tmpdir) self.assertEqual(b.info["x"], ["a", "b", "c"]) self.assertTrue(bag.is_valid())
def test_mixed_case_checksums(self): bag = bagit.make_bag(self.tmpdir, checksums=['md5']) hashstr = {} # Extract entries only for the payload and ignore # entries from the tagmanifest file for key in bag.entries.keys(): if key.startswith('data' + os.sep): hashstr = bag.entries[key] hashstr = next(iter(hashstr.values())) manifest = slurp_text_file(j(self.tmpdir, "manifest-md5.txt")) manifest = manifest.replace(hashstr, hashstr.upper()) with open(j(self.tmpdir, "manifest-md5.txt"), "wb") as m: m.write(manifest.encode('utf-8')) # Since manifest-md5.txt file is updated, re-calculate its # md5 checksum and update it in the tagmanifest-md5.txt file hasher = hashlib.new('md5') contents = slurp_text_file(j(self.tmpdir, "manifest-md5.txt")).encode('utf-8') hasher.update(contents) with open(j(self.tmpdir, "tagmanifest-md5.txt"), "r") as tagmanifest: tagman_contents = tagmanifest.read() tagman_contents = tagman_contents.replace( bag.entries['manifest-md5.txt']['md5'], hasher.hexdigest()) with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagmanifest: tagmanifest.write(tagman_contents) bag = bagit.Bag(self.tmpdir) self.assertTrue(self.validate(bag))
def test_is_valid(self): bag = bagit.make_bag(self.tmpdir) bag = bagit.Bag(self.tmpdir) self.assertTrue(bag.is_valid()) with open(j(self.tmpdir, "data", "extra_file"), "w") as ef: ef.write("bar") self.assertFalse(bag.is_valid())
def test_validate_fast_without_oxum(self): bag = bagit.make_bag(self.tmpdir) os.remove(j(self.tmpdir, "bag-info.txt")) bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=True)
def test_save_baginfo_with_sha1(self): bag = bagit.make_bag(self.tmpdir, checksum=["sha1", "md5"]) self.assertTrue(bag.is_valid()) bag.save() bag.info['foo'] = "bar" bag.save() bag = bagit.Bag(self.tmpdir) self.assertTrue(bag.is_valid())
def test_save_only_baginfo(self): bag = bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'data', 'newfile'), 'w') as nf: nf.write('newfile') bag.info["foo"] = "bar" bag.save() bag = bagit.Bag(self.tmpdir) self.assertEqual(bag.info["foo"], "bar") self.assertFalse(bag.is_valid())
def test_validate_slow_without_oxum_extra_file(self): bag = bagit.make_bag(self.tmpdir) os.remove(j(self.tmpdir, "bag-info.txt")) with open(j(self.tmpdir, "data", "extra_file"), "w") as ef: ef.write("foo") bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=False)
def test_bom_in_bagit_txt(self): bag = bagit.make_bag(self.tmpdir) BOM = codecs.BOM_UTF8 if sys.version_info[0] >= 3: BOM = BOM.decode('utf-8') with open(j(self.tmpdir, "bagit.txt"), "r") as bf: bagfile = BOM + bf.read() with open(j(self.tmpdir, "bagit.txt"), "w") as bf: bf.write(bagfile) bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag)
def test_open_bag_with_missing_bagit_txt(self): bagit.make_bag(self.tmpdir) os.unlink(j(self.tmpdir, 'bagit.txt')) with self.assertRaises(bagit.BagError) as error_catcher: bagit.Bag(self.tmpdir) self.assertEqual( 'Expected bagit.txt does not exist: %s/bagit.txt' % self.tmpdir, str(error_catcher.exception))
def test_open_bag_with_unsupported_version(self): bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'bagit.txt'), 'w') as f: f.write('BagIt-Version: 2.0\nTag-File-Character-Encoding: UTF-8\n') with self.assertRaises(bagit.BagError) as error_catcher: bagit.Bag(self.tmpdir) self.assertEqual('Unsupported bag version: 2.0', str(error_catcher.exception))
def test_validate_flipped_bit(self): bag = bagit.make_bag(self.tmpdir) readme = j(self.tmpdir, "data", "README") txt = slurp_text_file(readme) txt = 'A' + txt[1:] with open(readme, "w") as r: r.write(txt) bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag) # fast doesn't catch the flipped bit, since oxsum is the same self.assertTrue(self.validate(bag, fast=True)) self.assertTrue(self.validate(bag, completeness_only=True))
def test_open_bag_with_malformed_bagit_txt(self): bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'bagit.txt'), 'w') as f: os.ftruncate(f.fileno(), 0) with self.assertRaises(bagit.BagError) as error_catcher: bagit.Bag(self.tmpdir) self.assertEqual( 'Missing required tag in bagit.txt: BagIt-Version, Tag-File-Character-Encoding', str(error_catcher.exception))
def test_validate_missing_directory(self): bagit.make_bag(self.tmpdir) tmp_data_dir = os.path.join(self.tmpdir, 'data') shutil.rmtree(tmp_data_dir) bag = bagit.Bag(self.tmpdir) with self.assertRaises(bagit.BagValidationError) as error_catcher: bag.validate() self.assertEqual( 'Expected data directory %s does not exist' % tmp_data_dir, str(error_catcher.exception))
def test_validate_completeness(self): bag = bagit.make_bag(self.tmpdir) old_path = j(self.tmpdir, "data", "README") new_path = j(self.tmpdir, "data", "extra_file") os.rename(old_path, new_path) bag = bagit.Bag(self.tmpdir) self.assertTrue(self.validate(bag, fast=True)) with mock.patch.object(bag, '_validate_entries') as m: self.assertRaises(bagit.BagValidationError, self.validate, bag, completeness_only=True) self.assertEqual(m.call_count, 0)
def test_update_info(self): # test assumption bag = bagit.Bag(self.bagdir) for tag in bag.info: self.assertFalse(tag.startswith('Multibag-')) self.mkr.update_info() bag = bagit.Bag(self.bagdir) self.assertEqual(bag.info.get('Multibag-Version'), amend.CURRENT_VERSION) self.assertEqual(bag.info.get('Multibag-Head-Version'), "1") self.assertEqual(bag.info.get('Multibag-Reference'), amend.CURRENT_REFERENCE) self.assertEqual(bag.info.get('Multibag-Tag-Directory'), "multibag") self.assertTrue( isinstance(bag.info.get('Internal-Sender-Description'), list)) self.assertEqual(len(bag.info.get('Internal-Sender-Description')), 2) self.assertIn("Multibag-Reference", bag.info.get('Internal-Sender-Description')[1]) self.assertEqual(bag.info['Bag-Size'], "4.875 kB")
def setUp(self): self.tempdir = tempfile.mkdtemp() self.bagdir = os.path.join(self.tempdir, "samplebag") shutil.copytree(os.path.join(datadir, "samplembag"), self.bagdir) shutil.rmtree(os.path.join(self.bagdir, "multibag")) bag = bagit.Bag(self.bagdir) rmtag = [] for tag in bag.info: if tag.startswith('Multibag-'): rmtag.append(tag) for tag in rmtag: del bag.info[tag] bag.save() self.mkr = amend.SingleMultibagMaker(self.bagdir)
def test_open_bag_with_invalid_versions(self): bagit.make_bag(self.tmpdir) for v in ('a.b', '2.', '0.1.2', '1.2.3'): with open(j(self.tmpdir, 'bagit.txt'), 'w') as f: f.write( 'BagIt-Version: %s\nTag-File-Character-Encoding: UTF-8\n' % v) with self.assertRaises(bagit.BagError) as error_catcher: bagit.Bag(self.tmpdir) self.assertEqual( 'Bag version numbers must be MAJOR.MINOR numbers, not %s' % v, str(error_catcher.exception))
def test_unusual_bag_info_separators(self): bag = bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'bag-info.txt'), 'a') as f: print('Test-Tag: 1', file=f) print('Test-Tag:\t2', file=f) print('Test-Tag\t: 3', file=f) print('Test-Tag\t:\t4', file=f) print('Test-Tag\t \t: 5', file=f) print('Test-Tag:\t \t 6', file=f) bag = bagit.Bag(self.tmpdir) bag.save(manifests=True) self.assertTrue(bag.is_valid()) self.assertEqual(bag.info['Test-Tag'], list(map(str, range(1, 7))))
def test_init_multibag_info3(self): # test when src has deprecations self.amendee = os.path.join(self.tempdir, "gooberbag") src = os.path.join(datadir, "samplembag") shutil.copytree(src, self.amendee) bag = bagit.Bag(self.amendee) bag.info['Multibag-Head-Deprecates'] = ["0.1", "0.5"] bag.save() self.amender = amend.Amender(self.amendee, self.amendment) self.assertNotIn('Multibag-Head-Deprecates', self.amender._newhead.info) self.amender._init_multibag_info() self.assertEqual( self.amender._newhead.info.get('Multibag-Head-Deprecates'), ["0.1", "0.5", "1.0"])
def test_make_single_multibag(self): mbdir = os.path.join(self.bagdir, "multibag") mbfile = os.path.join(mbdir, 'member-bags.tsv') flfile = os.path.join(mbdir, 'file-lookup.tsv') bagn = os.path.basename(self.bagdir) self.assertTrue(not os.path.exists(mbdir)) amend.make_single_multibag(self.bagdir, "1.5", "doi:XXXX/11111") self.assertTrue(os.path.exists(mbdir)) # test for member-bags.tsv self.assertTrue(os.path.exists(mbfile)) with open(mbfile) as fd: lines = fd.readlines() self.assertEqual(len(lines), 1) parts = lines[0].strip().split("\t") self.assertEqual(parts[0], bagn) self.assertEqual(parts[1], "doi:XXXX/11111") # test for file-lookup.tsv self.assertTrue(os.path.exists(flfile)) with open(flfile) as fd: lines = fd.readlines() self.assertIn("data/trial1.json\t" + bagn + "\n", lines) self.assertIn("data/trial2.json\t" + bagn + "\n", lines) self.assertIn("data/trial3/trial3a.json\t" + bagn + "\n", lines) self.assertNotIn("metadata/pod.json\t" + bagn + "\n", lines) self.assertNotIn("about.txt\t" + bagn + "\n", lines) self.assertEqual(len(lines), 3) # test info tag data bag = bagit.Bag(self.bagdir) self.assertEqual(bag.info.get('Multibag-Version'), amend.CURRENT_VERSION) self.assertEqual(bag.info.get('Multibag-Head-Version'), "1.5") self.assertEqual(bag.info.get('Multibag-Reference'), amend.CURRENT_REFERENCE) self.assertEqual(bag.info.get('Multibag-Tag-Directory'), "multibag") self.assertTrue( isinstance(bag.info.get('Internal-Sender-Description'), list)) self.assertEqual(len(bag.info.get('Internal-Sender-Description')), 2) self.assertIn("Multibag-Reference", bag.info.get('Internal-Sender-Description')[1]) self.assertEqual(bag.info['Bag-Size'], "5.171 kB")
def test_validation_error_details(self): bag = bagit.make_bag(self.tmpdir, checksums=['md5'], bag_info={'Bagging-Date': '1970-01-01'}) readme = j(self.tmpdir, "data", "README") txt = slurp_text_file(readme) txt = 'A' + txt[1:] with open(readme, "w") as r: r.write(txt) bag = bagit.Bag(self.tmpdir) got_exception = False try: self.validate(bag) except bagit.BagValidationError as e: got_exception = True exc_str = str(e) self.assertIn( 'data/README md5 validation failed: expected="8e2af7a0143c7b8f4de0b3fc90f27354" found="fd41543285d17e7c29cd953f5cf5b955"', exc_str) self.assertEqual(len(e.details), 1) readme_error = e.details[0] self.assertEqual( 'data/README md5 validation failed: expected="8e2af7a0143c7b8f4de0b3fc90f27354" found="fd41543285d17e7c29cd953f5cf5b955"', str(readme_error)) self.assertIsInstance(readme_error, bagit.ChecksumMismatch) self.assertEqual(readme_error.algorithm, 'md5') self.assertEqual(readme_error.path, 'data/README') self.assertEqual(readme_error.expected, '8e2af7a0143c7b8f4de0b3fc90f27354') self.assertEqual(readme_error.found, 'fd41543285d17e7c29cd953f5cf5b955') if not got_exception: self.fail("didn't get BagValidationError")
def test_filename_unicode_normalization(self): # We need to handle cases where the Unicode normalization form of a # filename has changed in-transit. This is hard to do portably in both # directions because OS X normalizes *all* filenames to an NFD variant # so we'll start with a basic test which writes the manifest using the # NFC form and confirm that this does not cause the bag to fail when it # is written to the filesystem using the NFD form, which will not be # altered when saved to an HFS+ filesystem: test_filename = 'Núñez Papers.txt' test_filename_nfd = unicodedata.normalize('NFD', test_filename) os.makedirs(j(self.tmpdir, 'unicode-normalization')) with open(j(self.tmpdir, 'unicode-normalization', test_filename_nfd), 'w') as f: f.write( 'This is a test filename written using NFD normalization\n') bag = bagit.make_bag(self.tmpdir) bag.save() self.assertTrue(bag.is_valid()) # Now we'll cause the entire manifest file was normalized to NFC: for m_f in bag.manifest_files(): contents = slurp_text_file(m_f) normalized_bytes = unicodedata.normalize('NFC', contents).encode('utf-8') with open(m_f, 'wb') as f: f.write(normalized_bytes) for alg in bag.algorithms: bagit._make_tagmanifest_file(alg, bag.path, encoding=bag.encoding) # Now we'll reload the whole thing: bag = bagit.Bag(self.tmpdir) self.assertTrue(bag.is_valid())
def test_multiple_oxum_values(self): bag = bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, "bag-info.txt"), "a") as baginfo: baginfo.write('Payload-Oxum: 7.7\n') bag = bagit.Bag(self.tmpdir) self.assertTrue(self.validate(bag, fast=True))
def test_unicode_in_tags(self): bag = bagit.make_bag(self.tmpdir, {"test": '♡'}) bag = bagit.Bag(self.tmpdir) self.assertEqual(bag.info['test'], '♡')
def test_validation_completeness_error_details(self): bag = bagit.make_bag(self.tmpdir, checksums=['md5'], bag_info={'Bagging-Date': '1970-01-01'}) old_path = j(self.tmpdir, "data", "README") new_path = j(self.tmpdir, "data", "extra") os.rename(old_path, new_path) # remove the bag-info.txt which contains the oxum to force a full # check of the manifest os.remove(j(self.tmpdir, "bag-info.txt")) bag = bagit.Bag(self.tmpdir) got_exception = False try: self.validate(bag) except bagit.BagValidationError as e: got_exception = True exc_str = str(e) self.assertIn("Bag validation failed: ", exc_str) self.assertIn( "bag-info.txt exists in manifest but was not found on filesystem", exc_str) self.assertIn( "data/README exists in manifest but was not found on filesystem", exc_str) self.assertIn( "data/extra exists on filesystem but is not in the manifest", exc_str) self.assertEqual(len(e.details), 3) if e.details[0].path == "bag-info.txt": baginfo_error = e.details[0] readme_error = e.details[1] else: baginfo_error = e.details[1] readme_error = e.details[0] self.assertEqual( str(baginfo_error), "bag-info.txt exists in manifest but was not found on filesystem" ) self.assertIsInstance(baginfo_error, bagit.FileMissing) self.assertEqual(baginfo_error.path, "bag-info.txt") self.assertEqual( str(readme_error), "data/README exists in manifest but was not found on filesystem" ) self.assertIsInstance(readme_error, bagit.FileMissing) self.assertEqual(readme_error.path, "data/README") error = e.details[2] self.assertEqual( str(error), "data/extra exists on filesystem but is not in the manifest") self.assertTrue(error, bagit.UnexpectedFile) self.assertEqual(error.path, "data/extra") if not got_exception: self.fail("didn't get BagValidationError")
def test_handle_directory_end_slash_gracefully(self): bag = bagit.make_bag(self.tmpdir + '/') self.assertTrue(self.validate(bag)) bag2 = bagit.Bag(self.tmpdir + '/') self.assertTrue(self.validate(bag2))
def test_bag_constructor(self): bag = bagit.make_bag(self.tmpdir) bag = bagit.Bag(self.tmpdir) self.assertEqual(type(bag), bagit.Bag) self.assertEqual(len(list(bag.payload_files())), 5)