def test_default_bagging_date(self): info = {"Contact-Email": "*****@*****.**"} bagit.make_bag(self.tmpdir, bag_info=info) bag_info_txt = slurp_text_file(j(self.tmpdir, "bag-info.txt")) self.assertTrue("Contact-Email: [email protected]" in bag_info_txt) today = datetime.date.strftime(datetime.date.today(), "%Y-%m-%d") self.assertTrue("Bagging-Date: %s" % today in bag_info_txt)
def test_unsafe_directory_entries_raise_error(self): bad_paths = None # This could be more granular, but ought to be # adequate. if os.name == 'nt': bad_paths = ( r'C:\win32\cmd.exe', '\\\\?\\C:\\', 'COM1:', '\\\\.\\COM56', '..\\..\\..\\win32\\cmd.exe', 'data\\..\\..\\..\\win32\\cmd.exe' ) else: bad_paths = ( '../../../secrets.json', '~/.pgp/id_rsa', '/dev/null', 'data/../../../secrets.json' ) hasher = hashlib.new('md5') corpus = 'this is not a real checksum' hasher.update(corpus.encode('utf-8')) for bad_path in bad_paths: bagit.make_bag(self.tmpdir, checksums=['md5']) with open(j(self.tmpdir, 'manifest-md5.txt'), 'wb+') as manifest_out: line = '%s %s\n' % (hasher.hexdigest(), bad_path) manifest_out.write(line.encode('utf-8')) self.assertRaises(bagit.BagError, bagit.Bag, self.tmpdir)
def test_default_bagging_date(self): info = {'Contact-Email': '*****@*****.**'} bagit.make_bag(self.tmpdir, bag_info=info) bag_info_txt = slurp_text_file(j(self.tmpdir, 'bag-info.txt')) self.assertTrue('Contact-Email: [email protected]' in bag_info_txt) today = datetime.date.strftime(datetime.date.today(), "%Y-%m-%d") self.assertTrue('Bagging-Date: %s' % today in bag_info_txt)
def test_unsafe_directory_entries_raise_error(self): bad_paths = None # This could be more granular, but ought to be # adequate. if os.name == "nt": bad_paths = ( r"C:\win32\cmd.exe", "\\\\?\\C:\\", "COM1:", "\\\\.\\COM56", "..\\..\\..\\win32\\cmd.exe", "data\\..\\..\\..\\win32\\cmd.exe", ) else: bad_paths = ( "../../../secrets.json", "~/.pgp/id_rsa", "/dev/null", "data/../../../secrets.json", ) hasher = hashlib.new("md5") corpus = "this is not a real checksum" hasher.update(corpus.encode("utf-8")) for bad_path in bad_paths: bagit.make_bag(self.tmpdir, checksums=["md5"]) with open(j(self.tmpdir, "manifest-md5.txt"), "wb+") as manifest_out: line = "%s %s\n" % (hasher.hexdigest(), bad_path) manifest_out.write(line.encode("utf-8")) self.assertRaises(bagit.BagError, bagit.Bag, self.tmpdir)
def test_update_oxum(self): bagit.make_bag(self.tmpdir) bag = update_bag.Repairable_Bag(path = self.tmpdir) bag.info['Payload-Oxum'] = '0.0' self.assertFalse(bag.is_valid()) bag.write_baginfo() updated_bag = update_bag.Repairable_Bag(path = self.tmpdir) self.assertTrue(self.validate(updated_bag))
def test_make_bag_with_unreadable_source(self): os.chmod(self.tmpdir, 0) with self.assertRaises(bagit.BagError) as error_catcher: bagit.make_bag(self.tmpdir, checksum=['sha256']) self.assertEqual('Missing permissions to move all files and directories', str(error_catcher.exception))
def test_make_bag_with_empty_directory_tree(self): tmpdir = tempfile.mkdtemp() path = j(tmpdir, "test1", "test2") try: os.makedirs(path) bagit.make_bag(tmpdir) finally: shutil.rmtree(tmpdir)
def test_make_bag_with_bogus_directory(self): bogus_directory = os.path.realpath('this-directory-does-not-exist') with self.assertRaises(RuntimeError) as error_catcher: bagit.make_bag(bogus_directory) self.assertEqual('Bag directory %s does not exist' % bogus_directory, str(error_catcher.exception))
def test_make_bag_with_unreadable_file(self): os.chmod(j(self.tmpdir, 'loc', '2478433644_2839c5e8b8_o_d.jpg'), 0) with self.assertRaises(bagit.BagError) as error_catcher: bagit.make_bag(self.tmpdir, checksum=['sha256']) self.assertEqual('Read permissions are required to calculate file fixities', str(error_catcher.exception))
def test_payload_file_not_in_manifest(self): bagit.make_bag(self.tmpdir) bag = update_bag.Repairable_Bag(path = self.tmpdir) f = j(self.tmpdir, "data/._.SYSTEMFILE.db\r") with open(f, 'w') as r: r.write('♡') self.assertEqual(list(bag.payload_files_not_in_manifest()), ['data/._.SYSTEMFILE.db\r']) self.assertRaises(bagit.BagValidationError, bag.validate, bag, fast=False)
def test_load_bagmake_bag_sha1_sha256_manifest(self): bagit.make_bag(self.tmpdir, checksums=['sha1', 'sha256']) bag = update_bag.Repairable_Bag(path = self.tmpdir) # check that relevant manifests are created self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-sha1.txt'))) self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-sha256.txt'))) # check valid with two manifests self.assertTrue(self.validate(bag, fast=True))
def create_bag(resource): """ Create a bag from the current filesystem of the resource, then zip it up and add it to the resource. Note, this procedure may take awhile. It is highly advised that it be deferred to a Celery task. Parameters: :param resource: (subclass of AbstractResource) A resource to create a bag for. :return: the hs_core.models.Bags instance associated with the new bag. """ dest_prefix = getattr(settings, 'BAGIT_TEMP_LOCATION', '/tmp/hydroshare/') bagit_path = os.path.join(dest_prefix, resource.short_id, arrow.get(resource.updated).format("YYYY.MM.DD.HH.mm.ss")) visualization_path = os.path.join(bagit_path, 'visualization') contents_path = os.path.join(bagit_path, 'contents') for d in (dest_prefix, bagit_path, visualization_path, contents_path): try: os.makedirs(d) except: shutil.rmtree(d) os.makedirs(d) for f in resource.files.all(): shutil.copy2(f.resource_file.path, contents_path) with open(bagit_path + '/resourcemetadata.json', 'w') as out: tastypie_module = resource._meta.app_label + '.api' # the module name should follow this convention tastypie_name = resource._meta.object_name + 'Resource' # the classname of the Resource seralizer tastypie_api = importlib.import_module(tastypie_module) # import the module serializer = getattr(tastypie_api, tastypie_name)() # make an instance of the tastypie resource bundle = serializer.build_bundle(obj=resource) # build a serializable bundle out of the resource out.write(serializer.serialize(None, serializer.full_dehydrate(bundle), 'application/json')) bagit.make_bag(bagit_path, checksum=['md5'], bag_info={ 'title': resource.title, 'author': resource.owners.all()[0].username, 'author_email': resource.owners.all()[0].email, 'version': arrow.get(resource.updated).format("YYYY.MM.DD.HH.mm.ss"), 'resource_type': '.'.join((resource._meta.app_label, resource._meta.object_name)), 'hydroshare_version': getattr(settings, 'HYDROSHARE_VERSION', "R1 development"), 'shortkey': resource.short_id, 'slug': resource.slug }) zf = os.path.join(dest_prefix, resource.short_id) + ".zip" make_zipfile(output_filename=zf, source_dir=bagit_path) b = Bags.objects.create( content_object=resource, bag=File(open(zf)), timestamp=resource.updated ) os.unlink(zf) shutil.rmtree(bagit_path) return b
def test_garbage_in_bagit_txt(self): bagit.make_bag(self.tmpdir) bagfile = """BagIt-Version: 0.97 Tag-File-Character-Encoding: UTF-8 ================================== """ with open(j(self.tmpdir, "bagit.txt"), "w") as bf: bf.write(bagfile) self.assertRaises(bagit.BagValidationError, bagit.Bag, self.tmpdir)
def test_update_hashes(self): bagit.make_bag(self.tmpdir, checksums=['sha1', 'sha256']) bag = update_bag.Repairable_Bag(path = self.tmpdir) f = j(self.tmpdir, "data/hello.txt") with open(f, 'w') as r: r.write('♡') bag.update_hashes() updated_bag = update_bag.Repairable_Bag(path = self.tmpdir) self.assertTrue(self.validate(updated_bag))
def test_add_payload_file_not_in_multiple_manifests(self): bagit.make_bag(self.tmpdir, checksums=['sha1', 'sha256']) bag = update_bag.Repairable_Bag(path = self.tmpdir) f = j(self.tmpdir, "data/._.SYSTEMFILE.db\r") with open(f, 'w') as r: r.write('♡') bag.add_payload_files_not_in_manifest() updated_bag = update_bag.Repairable_Bag(path = self.tmpdir) self.assertTrue(self.validate(updated_bag))
def test_make_bag_with_unreadable_subdirectory(self): # We'll set this write-only to exercise the second permission check in make_bag: os.chmod(j(self.tmpdir, 'loc'), 0o200) with self.assertRaises(bagit.BagError) as error_catcher: bagit.make_bag(self.tmpdir, checksum=['sha256']) self.assertEqual('Read permissions are required to calculate file fixities', str(error_catcher.exception))
def test_make_bag_sha256_manifest(self): bagit.make_bag(self.tmpdir, checksum=['sha256']) # check manifest self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-sha256.txt'))) manifest_txt = slurp_text_file(j(self.tmpdir, 'manifest-sha256.txt')) self.assertTrue('b6df8058fa818acfd91759edffa27e473f2308d5a6fca1e07a79189b95879953 data/loc/2478433644_2839c5e8b8_o_d.jpg' in manifest_txt) self.assertTrue('1af90c21e72bb0575ae63877b3c69cfb88284f6e8c7820f2c48dc40a08569da5 data/loc/3314493806_6f1db86d66_o_d.jpg' in manifest_txt) self.assertTrue('f065a4ae2bc5d47c6d046c3cba5c8cdfd66b07c96ff3604164e2c31328e41c1a data/si/2584174182_ffd5c24905_b_d.jpg' in manifest_txt) self.assertTrue('45d257c93e59ec35187c6a34c8e62e72c3e9cfbb548984d6f6e8deb84bac41f4 data/si/4011399822_65987a4806_b_d.jpg' in manifest_txt)
def test_make_bag_sha512_manifest(self): bagit.make_bag(self.tmpdir, checksum=['sha512']) # check manifest self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-sha512.txt'))) manifest_txt = slurp_text_file(j(self.tmpdir, 'manifest-sha512.txt')) self.assertTrue('51fb9236a23795886cf42d539d580739245dc08f72c3748b60ed8803c9cb0e2accdb91b75dbe7d94a0a461827929d720ef45fe80b825941862fcde4c546a376d data/loc/2478433644_2839c5e8b8_o_d.jpg' in manifest_txt) self.assertTrue('627c15be7f9aabc395c8b2e4c3ff0b50fd84b3c217ca38044cde50fd4749621e43e63828201fa66a97975e316033e4748fb7a4a500183b571ecf17715ec3aea3 data/loc/3314493806_6f1db86d66_o_d.jpg' in manifest_txt) self.assertTrue('4cb4dafe39b2539536a9cb31d5addf335734cb91e2d2786d212a9b574e094d7619a84ad53f82bd9421478a7994cf9d3f44fea271d542af09d26ce764edbada46 data/si/2584174182_ffd5c24905_b_d.jpg' in manifest_txt) self.assertTrue('af1c03483cd1999098cce5f9e7689eea1f81899587508f59ba3c582d376f8bad34e75fed55fd1b1c26bd0c7a06671b85e90af99abac8753ad3d76d8d6bb31ebd data/si/4011399822_65987a4806_b_d.jpg' in manifest_txt)
def test_make_bag(self): info = {"Bagging-Date": "1970-01-01", "Contact-Email": "*****@*****.**"} bagit.make_bag(self.tmpdir, bag_info=info, checksums=["md5"]) # data dir should've been created self.assertTrue(os.path.isdir(j(self.tmpdir, "data"))) # check bagit.txt self.assertTrue(os.path.isfile(j(self.tmpdir, "bagit.txt"))) bagit_txt = slurp_text_file(j(self.tmpdir, "bagit.txt")) self.assertTrue("BagIt-Version: 0.97", bagit_txt) self.assertTrue("Tag-File-Character-Encoding: UTF-8", bagit_txt) # check manifest self.assertTrue(os.path.isfile(j(self.tmpdir, "manifest-md5.txt"))) manifest_txt = slurp_text_file(j(self.tmpdir, "manifest-md5.txt")).splitlines() self.assertIn("8e2af7a0143c7b8f4de0b3fc90f27354 data/README", manifest_txt) self.assertIn( "9a2b89e9940fea6ac3a0cc71b0a933a0 data/loc/2478433644_2839c5e8b8_o_d.jpg", manifest_txt, ) self.assertIn( "6172e980c2767c12135e3b9d246af5a3 data/loc/3314493806_6f1db86d66_o_d.jpg", manifest_txt, ) self.assertIn( "38a84cd1c41de793a0bccff6f3ec8ad0 data/si/2584174182_ffd5c24905_b_d.jpg", manifest_txt, ) self.assertIn( "5580eaa31ad1549739de12df819e9af8 data/si/4011399822_65987a4806_b_d.jpg", manifest_txt, ) # check bag-info.txt self.assertTrue(os.path.isfile(j(self.tmpdir, "bag-info.txt"))) bag_info_txt = slurp_text_file(j(self.tmpdir, "bag-info.txt")) bag_info_txt = bag_info_txt.splitlines() self.assertIn("Contact-Email: [email protected]", bag_info_txt) self.assertIn("Bagging-Date: 1970-01-01", bag_info_txt) self.assertIn("Payload-Oxum: 991765.5", bag_info_txt) self.assertIn( "Bag-Software-Agent: bagit.py v1.5.4 <https://github.com/LibraryOfCongress/bagit-python>", bag_info_txt, ) # check tagmanifest-md5.txt self.assertTrue(os.path.isfile(j(self.tmpdir, "tagmanifest-md5.txt"))) tagmanifest_txt = slurp_text_file( j(self.tmpdir, "tagmanifest-md5.txt") ).splitlines() self.assertIn("9e5ad981e0d29adc278f6a294b8c2aca bagit.txt", tagmanifest_txt) self.assertIn( "a0ce6631a2a6d1a88e6d38453ccc72a5 manifest-md5.txt", tagmanifest_txt ) self.assertIn("0a6ffcffe67e9a34e44220f7ebcb4baa bag-info.txt", tagmanifest_txt)
def test_update_hashes_with_filter_match(self): bagit.make_bag(self.tmpdir, checksums=['sha1']) bag = update_bag.Repairable_Bag(path = self.tmpdir) f = j(self.tmpdir, "data/hello.txt") with open(f, 'w') as r: r.write('♡') bag.update_hashes(filename_pattern = r"\w") updated_bag = update_bag.Repairable_Bag(path = self.tmpdir) self.assertEqual(bag.entries["data/hello.txt"], updated_bag.entries["data/hello.txt"]) self.assertTrue(self.validate(updated_bag))
def test_delete_payload_files_not_in_manifest_with_rules(self): bagit.make_bag(self.tmpdir) bag = update_bag.Repairable_Bag(path = self.tmpdir) f = j(self.tmpdir, "data/Thumbs.db") with open(f, 'w') as r: r.write('♡') self.assertEqual(list(bag.payload_files_not_in_manifest()), ['data/Thumbs.db']) bag.delete_payload_files_not_in_manifest(rules = {"Thumbs.db": {"regex": r"[Tt]humbs\.db$", "match": False}}) updated_bag = update_bag.Repairable_Bag(path = self.tmpdir) self.assertTrue(updated_bag.is_valid(fast = True))
def test_delete_payload_files_not_in_manifest(self): bagit.make_bag(self.tmpdir) bag = update_bag.Repairable_Bag(path = self.tmpdir) f = j(self.tmpdir, "data/._.SYSTEMFILE.db\r") with open(f, 'w') as r: r.write('♡') self.assertEqual(list(bag.payload_files_not_in_manifest()), ['data/._.SYSTEMFILE.db\r']) bag.delete_payload_files_not_in_manifest() updated_bag = update_bag.Repairable_Bag(path = self.tmpdir) self.assertTrue(self.validate(updated_bag))
def test_record_premis_nondefault_human_agent(self): bagit.make_bag(self.tmpdir) bag = update_bag.Repairable_Bag(path = self.tmpdir, repairer = "Smokey Yunick") bag.add_premisevent(process = "Peek into bag", msg = "Just looking around", outcome = "Pass", sw_agent = "update_bag.py", human_agent = "Yogi Bear") bag.write_bag_updates() updated_bag = update_bag.Repairable_Bag(path = self.tmpdir) self.assertEqual(updated_bag.premis_events[0]['Event-Human-Agent'], "Yogi Bear")
def test_open_bag_with_missing_bagit_txt(self): bagit.make_bag(self.tmpdir) os.unlink(j(self.tmpdir, 'bagit.txt')) with self.assertRaises(bagit.BagError) as error_catcher: bagit.Bag(self.tmpdir) self.assertEqual('Expected bagit.txt does not exist: %s/bagit.txt' % self.tmpdir, str(error_catcher.exception))
def test_update_hashes_with_no_filter_match(self): bagit.make_bag(self.tmpdir, checksums=['sha1']) bag = update_bag.Repairable_Bag(path = self.tmpdir) f = j(self.tmpdir, "data/hello.txt") with open(f, 'w') as r: r.write('♡') bag.update_hashes(filename_pattern = r"\d") updated_bag = update_bag.Repairable_Bag(path = self.tmpdir) self.assertEqual(bag.entries["data/hello.txt"], updated_bag.entries["data/hello.txt"]) self.assertRaises(bagit.BagValidationError, updated_bag.validate, fast=False)
def test_make_bag_sha1_manifest(self): bagit.make_bag(self.tmpdir, checksum=['sha1']) # check manifest self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-sha1.txt'))) manifest_txt = slurp_text_file(j(self.tmpdir, 'manifest-sha1.txt')) self.assertTrue('ace19416e605cfb12ab11df4898ca7fd9979ee43 data/README' in manifest_txt) self.assertTrue('4c0a3da57374e8db379145f18601b159f3cad44b data/loc/2478433644_2839c5e8b8_o_d.jpg' in manifest_txt) self.assertTrue('62095aeddae2f3207cb77c85937e13c51641ef71 data/loc/3314493806_6f1db86d66_o_d.jpg' in manifest_txt) self.assertTrue('e592194b3733e25166a631e1ec55bac08066cbc1 data/si/2584174182_ffd5c24905_b_d.jpg' in manifest_txt) self.assertTrue('db49ef009f85a5d0701829f38d29f8cf9c5df2ea data/si/4011399822_65987a4806_b_d.jpg' in manifest_txt)
def test_open_bag_with_unknown_encoding(self): bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, "bagit.txt"), "w") as f: f.write("BagIt-Version: 0.97\nTag-File-Character-Encoding: WTF-8\n") with self.assertRaises(bagit.BagError) as error_catcher: bagit.Bag(self.tmpdir) self.assertEqual("Unsupported encoding: WTF-8", str(error_catcher.exception))
def bagit(self, directory, metadata=None): """Creates a Bagit, if needs be with default metadata.""" if metadata is None: metadata = { "Contact-Name": BAGIT_CONTACT_NAME, "Contact-Email": BAGIT_CONTACT_EMAIL, "Timestamp": datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"), "Description": BAGIT_DESCRIPTION + ";".join(self.jobs), } bagit.make_bag(directory, metadata)
def test_unicode_bag_info(self): info = { "Test-BMP": "This element contains a \N{LATIN SMALL LETTER U WITH DIAERESIS}", "Test-SMP": "This element contains a \N{LINEAR B SYMBOL B049}", } bagit.make_bag(self.tmpdir, bag_info=info, checksums=["md5"]) bag_info_txt = slurp_text_file(j(self.tmpdir, "bag-info.txt")) for v in info.values(): self.assertIn(v, bag_info_txt)
def test_open_bag_with_unknown_version(self): bagit.make_bag(self.tmpdir) with open(j(self.tmpdir, 'bagit.txt'), 'w') as f: f.write('BagIt-Version: 0.123456789\nTag-File-Character-Encoding: UTF-8\n') with self.assertRaises(bagit.BagError) as error_catcher: bagit.Bag(self.tmpdir) self.assertEqual('Unsupported bag version: 0.123456789', str(error_catcher.exception))
def test_bom_in_bagit_txt(self): bag = bagit.make_bag(self.tmpdir) BOM = codecs.BOM_UTF8 if sys.version_info[0] >= 3: BOM = BOM.decode('utf-8') with open(j(self.tmpdir, "bagit.txt"), "r") as bf: bagfile = BOM + bf.read() with open(j(self.tmpdir, "bagit.txt"), "w") as bf: bf.write(bagfile) bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag)
def test_mixed_case_checksums(self): bag = bagit.make_bag(self.tmpdir) hashstr = bag.entries.itervalues().next() hashstr = hashstr.itervalues().next() manifest = open(os.path.join(self.tmpdir, "manifest-md5.txt"), "r").read() manifest = manifest.replace(hashstr, hashstr.upper()) open(os.path.join(self.tmpdir, "manifest-md5.txt"), "w").write(manifest) bag = bagit.Bag(self.tmpdir) self.assertTrue(bag.validate())
def test_make_bag(self): info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '*****@*****.**'} bag = bagit.make_bag(self.tmpdir, bag_info=info) # data dir should've been created self.assertTrue(os.path.isdir(j(self.tmpdir, 'data'))) # check bagit.txt self.assertTrue(os.path.isfile(j(self.tmpdir, 'bagit.txt'))) with open(j(self.tmpdir, 'bagit.txt')) as b: bagit_txt = b.read() self.assertTrue('BagIt-Version: 0.97' in bagit_txt) self.assertTrue('Tag-File-Character-Encoding: UTF-8' in bagit_txt) # check manifest self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-md5.txt'))) with open(j(self.tmpdir, 'manifest-md5.txt')) as m: manifest_txt = m.read() self.assertTrue( '8e2af7a0143c7b8f4de0b3fc90f27354 data/README' in manifest_txt) self.assertTrue( '9a2b89e9940fea6ac3a0cc71b0a933a0 data/loc/2478433644_2839c5e8b8_o_d.jpg' in manifest_txt) self.assertTrue( '6172e980c2767c12135e3b9d246af5a3 data/loc/3314493806_6f1db86d66_o_d.jpg' in manifest_txt) self.assertTrue( '38a84cd1c41de793a0bccff6f3ec8ad0 data/si/2584174182_ffd5c24905_b_d.jpg' in manifest_txt) self.assertTrue( '5580eaa31ad1549739de12df819e9af8 data/si/4011399822_65987a4806_b_d.jpg' in manifest_txt) # check bag-info.txt self.assertTrue(os.path.isfile(j(self.tmpdir, 'bag-info.txt'))) with open(j(self.tmpdir, 'bag-info.txt')) as bi: bag_info_txt = bi.read() self.assertTrue('Contact-Email: [email protected]' in bag_info_txt) self.assertTrue('Bagging-Date: 1970-01-01' in bag_info_txt) self.assertTrue('Payload-Oxum: 991765.5' in bag_info_txt) self.assertTrue( 'Bag-Software-Agent: bagit.py <http://github.com/libraryofcongress/bagit-python>' in bag_info_txt) # check tagmanifest-md5.txt self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-md5.txt'))) with open(j(self.tmpdir, 'tagmanifest-md5.txt')) as tm: tagmanifest_txt = tm.read() self.assertTrue( '9e5ad981e0d29adc278f6a294b8c2aca bagit.txt' in tagmanifest_txt) self.assertTrue('a0ce6631a2a6d1a88e6d38453ccc72a5 manifest-md5.txt' in tagmanifest_txt) self.assertTrue( '6a5090e27cb29d5dda8a0142fbbdf37e bag-info.txt' in tagmanifest_txt)
def test_save_bag_with_unwritable_file(self): bag = bagit.make_bag(self.tmpdir, checksum=['sha256']) os.chmod(os.path.join(self.tmpdir, 'bag-info.txt'), 0) with self.assertRaises(bagit.BagError) as error_catcher: bag.save() self.assertEqual( 'Read permissions are required to calculate file fixities', str(error_catcher.exception))
def test_save_bag_to_unwritable_directory(self): bag = bagit.make_bag(self.tmpdir, checksum=['sha256']) os.chmod(self.tmpdir, 0) with self.assertRaises(bagit.BagError) as error_catcher: bag.save() self.assertEqual( 'Cannot save bag to non-existent or inaccessible directory %s' % self.tmpdir, str(error_catcher.exception))
def test_make_bag_sha1_manifest(self): bagit.make_bag(self.tmpdir, checksum=['sha1']) # check manifest self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-sha1.txt'))) manifest_txt = slurp_text_file(j(self.tmpdir, 'manifest-sha1.txt')).splitlines() self.assertIn('ace19416e605cfb12ab11df4898ca7fd9979ee43 data/README', manifest_txt) self.assertIn( '4c0a3da57374e8db379145f18601b159f3cad44b data/loc/2478433644_2839c5e8b8_o_d.jpg', manifest_txt) self.assertIn( '62095aeddae2f3207cb77c85937e13c51641ef71 data/loc/3314493806_6f1db86d66_o_d.jpg', manifest_txt) self.assertIn( 'e592194b3733e25166a631e1ec55bac08066cbc1 data/si/2584174182_ffd5c24905_b_d.jpg', manifest_txt) self.assertIn( 'db49ef009f85a5d0701829f38d29f8cf9c5df2ea data/si/4011399822_65987a4806_b_d.jpg', manifest_txt)
def test_validate_completeness(self): bag = bagit.make_bag(self.tmpdir) old_path = j(self.tmpdir, "data", "README") new_path = j(self.tmpdir, "data", "extra_file") os.rename(old_path, new_path) bag = bagit.Bag(self.tmpdir) self.assertTrue(self.validate(bag, fast=True)) with mock.patch.object(bag, '_validate_entries') as m: self.assertRaises(bagit.BagValidationError, self.validate, bag, completeness_only=True) self.assertEqual(m.call_count, 0)
def make_bag(path, info: dict = None): # first check if there's already a bag at this path - if so, use it. try: return bagit.Bag(path) except bagit.BagError: logger.info("unable to initialize bag at %s - making a new one", path) # unable to create a bag from an existing bag if info is None: info = {} return bagit.make_bag(path, info)
async def main(guid): with tempfile.TemporaryDirectory(prefix=get_id(guid)) as temp_dir: # await first to check if withdrawn metadata = await get_registration_metadata(guid, temp_dir, "registration.json") # then start all other tasks tasks = [ write_datacite_metadata(guid, temp_dir), get_raw_data(guid, temp_dir), get_and_write_json_to_temp( from_url=f"{settings.OSF_API_URL}v2/registrations/{guid}/wikis/" f"?page[size]=100", to_dir=temp_dir, name="wikis.json", ), get_and_write_json_to_temp( from_url=f"{settings.OSF_API_URL}v2/registrations/{guid}/logs/" f"?page[size]=100", to_dir=temp_dir, name="logs.json", ), get_and_write_json_to_temp( from_url=f"{settings.OSF_API_URL}v2/registrations/{guid}/contributors/" f"?page[size]=100", to_dir=temp_dir, name="contributors.json", parse_json=get_contributors, ), ] with ThreadPoolExecutor(max_workers=5) as pool: running_tasks = [pool.submit(run, task) for task in tasks] for task in running_tasks: task.result() bagit.make_bag(temp_dir) bag = bagit.Bag(temp_dir) assert bag.is_valid() zip_data = create_zip_data(temp_dir) ia_item = await upload(get_id(guid), zip_data, metadata) return guid, ia_item.urls.details
def test_unsafe_directory_entries_raise_error(self): bad_paths = None # This could be more granular, but ought to be # adequate. if os.name == 'nt': bad_paths = (r'C:\win32\cmd.exe', '\\\\?\\C:\\', 'COM1:', '\\\\.\\COM56', '..\\..\\..\\win32\\cmd.exe', 'data\\..\\..\\..\\win32\\cmd.exe') else: bad_paths = ('../../../secrets.json', '~/.pgp/id_rsa', '/dev/null', 'data/../../../secrets.json') hasher = hashlib.new('md5') corpus = 'this is not a real checksum' hasher.update(corpus.encode('utf-8')) for bad_path in bad_paths: bagit.make_bag(self.tmpdir, checksums=['md5']) with open(j(self.tmpdir, 'manifest-md5.txt'), 'wb+') as manifest_out: line = '%s %s\n' % (hasher.hexdigest(), bad_path) manifest_out.write(line.encode('utf-8')) self.assertRaises(bagit.BagError, bagit.Bag, self.tmpdir)
def test_bag_class(self): info = {'Contact-Email': '*****@*****.**'} bag = bagit.make_bag(self.tmpdir, bag_info=info, checksums=['sha384']) self.assertIsInstance(bag, bagit.Bag) self.assertEqual(set(bag.payload_files()), set([ 'data/README', 'data/si/2584174182_ffd5c24905_b_d.jpg', 'data/si/4011399822_65987a4806_b_d.jpg', 'data/loc/2478433644_2839c5e8b8_o_d.jpg', 'data/loc/3314493806_6f1db86d66_o_d.jpg'])) self.assertEqual(list(bag.manifest_files()), ['%s/manifest-sha384.txt' % self.tmpdir])
def bag_package(self, contactname, jobtitle, department, email, phone, creator, rrsda, title, datefrom, dateto, description, metadata, package_folder): bag_dir_parent = tempfile.mkdtemp() if os.path.isdir(bag_dir_parent): shutil.rmtree(bag_dir_parent) bag_dir = os.path.join(bag_dir_parent, 'bag') os.makedirs( os.path.join(bag_dir, os.path.basename(package_folder.strip('"')))) copy_tree( os.path.normpath(package_folder.strip('"')), os.path.join(bag_dir, os.path.basename(package_folder.strip('"')))) for root, sub, files in os.walk(bag_dir): for file in files: if file == ".DS_Store": os.remove(os.path.abspath(os.path.join(root, file))) version = "2.0.6" try: bag = bagit.make_bag(bag_dir, None, 1, ['sha256']) bag.info['Package-Time'] = strftime("%Y-%m-%d %H:%M:%S") bag.info['Bag-Software-Agent'] = "MoveIt " + version bag.info['Contact-Name'] = contactname bag.info['Contact-Title'] = jobtitle bag.info['Contact-Organization'] = department bag.info['Contact-Email'] = email bag.info['Contact-Phone'] = phone bag.info['Source-Organization'] = creator bag.info['RRSDA-Number'] = rrsda bag.info['External-Identifier'] = title bag.info['Year-Start'] = datefrom bag.info['Year-End'] = dateto bag.info['External-Description'] = description bag.info['Other-Available-Metadata'] = metadata bag.info['Internal-Sender-Identifier'] = '' bag.info['Internal-Sender-Description'] = '' bag.info['Internal-Validation-Date'] = '' bag.info['Internal-Validation-By'] = '' bag.info['Internal-Validation-Note'] = '' bag.save() except (bagit.BagError, Exception) as e: return False bag_destination = os.path.join(str(bag_dir_parent), (title)) zipname = shutil.make_archive(bag_destination, 'zip', bag_dir) shutil.rmtree(bag_dir) desktopPath = os.path.expanduser("~/Desktop/") shutil.move(zipname, os.path.join(desktopPath, os.path.basename(zipname))) return True
def test_validate_flipped_bit(self): bag = bagit.make_bag(self.tmpdir) readme = j(self.tmpdir, "data", "README") with open(readme) as r: txt = r.read() txt = 'A' + txt[1:] with open(readme, "w") as r: r.write(txt) bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag) # fast doesn't catch the flipped bit, since oxsum is the same self.assertTrue(self.validate(bag, fast=True))
def create_bag(self, tree_to_bag: str, metadata: dict) -> bool: self.tree_to_bag = tree_to_bag try: self.working_bag = bagit.make_bag(self.tree_to_bag, metadata, processes=8, checksum=["sha256"]) self.working_bag.save() except bagit.BagError as e: self.bagging_error = e return False return True
def test_mixed_case_checksums(self): bag = bagit.make_bag(self.tmpdir) hashstr = next(iter(bag.entries.values())) hashstr = next(iter(hashstr.values())) manifest = None with open(os.path.join(self.tmpdir, "manifest-md5.txt"), "r") as m: manifest = m.read() manifest = manifest.replace(hashstr, hashstr.upper()) with open(os.path.join(self.tmpdir, "manifest-md5.txt"), "w") as m: m.write(manifest) bag = bagit.Bag(self.tmpdir) self.assertTrue(bag.validate())
def test_validate_flipped_bit(self): bag = bagit.make_bag(self.tmpdir) readme = os.path.join(self.tmpdir, "data", "README") with open(readme, "r", encoding="utf8") as readme_file: txt = readme_file.read() txt = 'A' + txt[1:] with open(readme, "w", encoding="utf8") as readme_file: readme_file.write(txt) bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, bag.validate) # fast doesn't catch the flipped bit, since oxsum is the same self.assertTrue(bag.validate(fast=True))
def create_bag( path ): # Functionaliteit toevoegen om een error te geven bij een lege map messageDigestAlgorithms = ['md5'] bag_info = { 'Contact-Name': 'Wim Lo', 'Source-Organization': 'Flanders Architecture Institute' } bag = bagit.make_bag(bag_dir=path, checksums=messageDigestAlgorithms, bag_info=bag_info) print(path, "bag created!")
def test_validate_flipped_bit(self): bag = bagit.make_bag(self.tmpdir) readme = j(self.tmpdir, "data", "README") txt = slurp_text_file(readme) txt = "A" + txt[1:] with open(readme, "w") as r: r.write(txt) bag = bagit.Bag(self.tmpdir) self.assertRaises(bagit.BagValidationError, self.validate, bag) # fast doesn't catch the flipped bit, since oxsum is the same self.assertTrue(self.validate(bag, fast=True)) self.assertTrue(self.validate(bag, completeness_only=True))
def test_validation_error_details(self): bag = bagit.make_bag(self.tmpdir) readme = os.path.join(self.tmpdir, "data", "README") txt = open(readme).read() txt = 'A' + txt[1:] open(readme, "w").write(txt) extra_file = os.path.join(self.tmpdir, "data", "extra") open(extra_file, "w").write('foo') # remove the bag-info.txt which contains the oxum to force a full # check of the manifest os.remove(os.path.join(self.tmpdir, "bag-info.txt")) bag = bagit.Bag(self.tmpdir) got_exception = False try: bag.validate() except bagit.BagValidationError, e: got_exception = True self.assertEqual( str(e), "invalid bag: bag-info.txt exists in manifest but not found on filesystem ; data/extra exists on filesystem but is not in manifest ; data/README checksum validation failed (alg=md5 expected=8e2af7a0143c7b8f4de0b3fc90f27354 found=fd41543285d17e7c29cd953f5cf5b955)" ) self.assertEqual(len(e.details), 3) error = e.details[0] self.assertEqual( str(error), "bag-info.txt exists in manifest but not found on filesystem") self.assertTrue(isinstance(error, bagit.FileMissing)) self.assertEqual(error.path, "bag-info.txt") error = e.details[1] self.assertEqual( str(error), "data/extra exists on filesystem but is not in manifest") self.assertTrue(isinstance(error, bagit.UnexpectedFile)) self.assertEqual(error.path, "data/extra") error = e.details[2] self.assertEqual( str(error), "data/README checksum validation failed (alg=md5 expected=8e2af7a0143c7b8f4de0b3fc90f27354 found=fd41543285d17e7c29cd953f5cf5b955)" ) self.assertTrue(isinstance(error, bagit.ChecksumMismatch)) self.assertEqual(error.algorithm, 'md5') self.assertEqual(error.path, 'data/README') self.assertEqual(error.expected, '8e2af7a0143c7b8f4de0b3fc90f27354') self.assertEqual(error.found, 'fd41543285d17e7c29cd953f5cf5b955')
def test_save_manifests_deleted_files(self): bag = bagit.make_bag(self.tmpdir) self.assertTrue(bag.is_valid()) bag.save(manifests=True) self.assertTrue(bag.is_valid()) os.remove( j(self.tmpdir, "data", "loc", "2478433644_2839c5e8b8_o_d.jpg")) self.assertRaises(bagit.BagValidationError, bag.validate, bag, fast=False) bag.save(manifests=True) self.assertTrue(bag.is_valid())
def test_make_bag_sha512_manifest(self): bagit.make_bag(self.tmpdir, checksum=["sha512"]) # check manifest self.assertTrue(os.path.isfile(j(self.tmpdir, "manifest-sha512.txt"))) manifest_txt = slurp_text_file(j(self.tmpdir, "manifest-sha512.txt")).splitlines() self.assertIn( "51fb9236a23795886cf42d539d580739245dc08f72c3748b60ed8803c9cb0e2accdb91b75dbe7d94a0a461827929d720ef45fe80b825941862fcde4c546a376d data/loc/2478433644_2839c5e8b8_o_d.jpg", manifest_txt, ) self.assertIn( "627c15be7f9aabc395c8b2e4c3ff0b50fd84b3c217ca38044cde50fd4749621e43e63828201fa66a97975e316033e4748fb7a4a500183b571ecf17715ec3aea3 data/loc/3314493806_6f1db86d66_o_d.jpg", manifest_txt, ) self.assertIn( "4cb4dafe39b2539536a9cb31d5addf335734cb91e2d2786d212a9b574e094d7619a84ad53f82bd9421478a7994cf9d3f44fea271d542af09d26ce764edbada46 data/si/2584174182_ffd5c24905_b_d.jpg", manifest_txt, ) self.assertIn( "af1c03483cd1999098cce5f9e7689eea1f81899587508f59ba3c582d376f8bad34e75fed55fd1b1c26bd0c7a06671b85e90af99abac8753ad3d76d8d6bb31ebd data/si/4011399822_65987a4806_b_d.jpg", manifest_txt, )
def test_save_manifests(self): bag = bagit.make_bag(self.tmpdir) self.assertTrue(bag.is_valid()) bag.save(manifests=True) self.assertTrue(bag.is_valid()) with open(j(self.tmpdir, "data", "newfile"), "w") as nf: nf.write("newfile") self.assertRaises(bagit.BagValidationError, bag.validate, bag, fast=False) bag.save(manifests=True) self.assertTrue(bag.is_valid())
def test_recipe_file_creation(path, recipe): with tempfile.TemporaryDirectory() as tmpdir: with open(os.path.join(tmpdir, "x.txt"), 'w') as f: f.write("This is a test file") path.return_value = tmpdir bag = bagit.make_bag(tmpdir) bag.save(manifests=True) recipe.return_value = "Some content".encode("UTF-8") recipe_file_creation("Abbati", "999655522", "formatparams") assert_true(os.path.isfile(os.path.join(tmpdir, "Abbati.json"))) with open(os.path.join(tmpdir, "Abbati.json"), "r") as f: value = f.read() assert_equal(value, "Some content")
def test_make_bag_sha256_manifest(self): bagit.make_bag(self.tmpdir, checksum=["sha256"]) # check manifest self.assertTrue(os.path.isfile(j(self.tmpdir, "manifest-sha256.txt"))) manifest_txt = slurp_text_file(j(self.tmpdir, "manifest-sha256.txt")).splitlines() self.assertIn( "b6df8058fa818acfd91759edffa27e473f2308d5a6fca1e07a79189b95879953 data/loc/2478433644_2839c5e8b8_o_d.jpg", manifest_txt, ) self.assertIn( "1af90c21e72bb0575ae63877b3c69cfb88284f6e8c7820f2c48dc40a08569da5 data/loc/3314493806_6f1db86d66_o_d.jpg", manifest_txt, ) self.assertIn( "f065a4ae2bc5d47c6d046c3cba5c8cdfd66b07c96ff3604164e2c31328e41c1a data/si/2584174182_ffd5c24905_b_d.jpg", manifest_txt, ) self.assertIn( "45d257c93e59ec35187c6a34c8e62e72c3e9cfbb548984d6f6e8deb84bac41f4 data/si/4011399822_65987a4806_b_d.jpg", manifest_txt, )
def test_make_bag(self): info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '*****@*****.**'} bagit.make_bag(self.tmpdir, bag_info=info, checksums=['md5']) # data dir should've been created self.assertTrue(os.path.isdir(j(self.tmpdir, 'data'))) # check bagit.txt self.assertTrue(os.path.isfile(j(self.tmpdir, 'bagit.txt'))) bagit_txt = slurp_text_file(j(self.tmpdir, 'bagit.txt')) self.assertTrue('BagIt-Version: 0.97', bagit_txt) self.assertTrue('Tag-File-Character-Encoding: UTF-8', bagit_txt) # check manifest self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-md5.txt'))) manifest_txt = slurp_text_file(j(self.tmpdir, 'manifest-md5.txt')).splitlines() self.assertIn('8e2af7a0143c7b8f4de0b3fc90f27354 data/README', manifest_txt) self.assertIn('9a2b89e9940fea6ac3a0cc71b0a933a0 data/loc/2478433644_2839c5e8b8_o_d.jpg', manifest_txt) self.assertIn('6172e980c2767c12135e3b9d246af5a3 data/loc/3314493806_6f1db86d66_o_d.jpg', manifest_txt) self.assertIn('38a84cd1c41de793a0bccff6f3ec8ad0 data/si/2584174182_ffd5c24905_b_d.jpg', manifest_txt) self.assertIn('5580eaa31ad1549739de12df819e9af8 data/si/4011399822_65987a4806_b_d.jpg', manifest_txt) # check bag-info.txt self.assertTrue(os.path.isfile(j(self.tmpdir, 'bag-info.txt'))) bag_info_txt = slurp_text_file(j(self.tmpdir, 'bag-info.txt')) bag_info_txt = bag_info_txt.splitlines() self.assertIn('Contact-Email: [email protected]', bag_info_txt) self.assertIn('Bagging-Date: 1970-01-01', bag_info_txt) self.assertIn('Payload-Oxum: 991765.5', bag_info_txt) self.assertIn('Bag-Software-Agent: bagit.py v1.5.4 <https://github.com/LibraryOfCongress/bagit-python>', bag_info_txt) # check tagmanifest-md5.txt self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-md5.txt'))) tagmanifest_txt = slurp_text_file(j(self.tmpdir, 'tagmanifest-md5.txt')).splitlines() self.assertIn('9e5ad981e0d29adc278f6a294b8c2aca bagit.txt', tagmanifest_txt) self.assertIn('a0ce6631a2a6d1a88e6d38453ccc72a5 manifest-md5.txt', tagmanifest_txt) self.assertIn('0a6ffcffe67e9a34e44220f7ebcb4baa bag-info.txt', tagmanifest_txt)
def test_save_baginfo(self): bag = bagit.make_bag(self.tmpdir) bag.info["foo"] = "bar" bag.save() bag = bagit.Bag(self.tmpdir) self.assertEqual(bag.info["foo"], "bar") self.assertTrue(bag.is_valid()) bag.info["x"] = ["a", "b", "c"] bag.save() b = bagit.Bag(self.tmpdir) self.assertEqual(b.info["x"], ["a", "b", "c"]) self.assertTrue(bag.is_valid())
def create_aptrust_metadata_bag(self, src, storage_dir): no_root_dir = src.replace(storage_dir, '') dirname = 'miami.edu.' + no_root_dir copytree(src, dirname) bag = bagit.make_bag( dirname, { 'Source-Organization': self.settings[':institution']['source_org'], 'Internal-Sender-Description': '', 'Internal-Sender-Identifier': '' }) bag.save(manifests=True) self.write_aptrust_info(dirname) move(dirname, self.settings[':local_server']['storage_directory'])
def test_payload_permissions(self): perms = os.stat(self.tmpdir).st_mode # our tmpdir should not be writeable by group self.assertEqual(perms & stat.S_IWOTH, 0) # but if we make it writeable by the group then resulting # payload directory should have the same permissions new_perms = perms | stat.S_IWOTH self.assertTrue(perms != new_perms) os.chmod(self.tmpdir, new_perms) bag = bagit.make_bag(self.tmpdir) payload_dir = j(self.tmpdir, 'data') self.assertEqual(os.stat(payload_dir).st_mode, new_perms)
def init_bag(bagDir, alg): ''' returns a bag instance for a given directory makes bare dir into bag if necessary - won't bag a bag ''' try: bag = bagit.Bag(bagDir) return bag except bagit.BagError, e: bag = bagit.make_bag(bagDir, None, 1, alg) if bag.is_valid(): return bag else: return False
def test_validation_completeness_error_details(self): bag = bagit.make_bag(self.tmpdir, checksums=['md5'], bag_info={'Bagging-Date': '1970-01-01'}) old_path = j(self.tmpdir, "data", "README") new_path = j(self.tmpdir, "data", "extra") os.rename(old_path, new_path) # remove the bag-info.txt which contains the oxum to force a full # check of the manifest os.remove(j(self.tmpdir, "bag-info.txt")) bag = bagit.Bag(self.tmpdir) got_exception = False try: self.validate(bag) except bagit.BagValidationError as e: got_exception = True exc_str = str(e) self.assertIn("Bag validation failed: ", exc_str) self.assertIn("bag-info.txt exists in manifest but was not found on filesystem", exc_str) self.assertIn("data/README exists in manifest but was not found on filesystem", exc_str) self.assertIn("data/extra exists on filesystem but is not in the manifest", exc_str) self.assertEqual(len(e.details), 3) if e.details[0].path == "bag-info.txt": baginfo_error = e.details[0] readme_error = e.details[1] else: baginfo_error = e.details[1] readme_error = e.details[0] self.assertEqual(str(baginfo_error), "bag-info.txt exists in manifest but was not found on filesystem") self.assertIsInstance(baginfo_error, bagit.FileMissing) self.assertEqual(baginfo_error.path, "bag-info.txt") self.assertEqual(str(readme_error), "data/README exists in manifest but was not found on filesystem") self.assertIsInstance(readme_error, bagit.FileMissing) self.assertEqual(readme_error.path, "data/README") error = e.details[2] self.assertEqual(str(error), "data/extra exists on filesystem but is not in the manifest") self.assertTrue(error, bagit.UnexpectedFile) self.assertEqual(error.path, "data/extra") if not got_exception: self.fail("didn't get BagValidationError")