def test_default_bagging_date(self):
     info = {'Contact-Email': '*****@*****.**'}
     bagit.make_bag(self.tmpdir, bag_info=info)
     bag_info_txt = slurp_text_file(j(self.tmpdir, 'bag-info.txt'))
     self.assertTrue('Contact-Email: [email protected]' in bag_info_txt)
     today = datetime.date.strftime(datetime.date.today(), "%Y-%m-%d")
     self.assertTrue('Bagging-Date: %s' % today in bag_info_txt)
 def test_make_bag_with_empty_directory_tree(self):
     tmpdir = tempfile.mkdtemp()
     path = j(tmpdir, "test1", "test2")
     try:
         os.makedirs(path)
         bagit.make_bag(tmpdir)
     finally:
         shutil.rmtree(tmpdir)
    def test_make_bag_with_bogus_directory(self):
        bogus_directory = os.path.realpath('this-directory-does-not-exist')

        with self.assertRaises(RuntimeError) as error_catcher:
            bagit.make_bag(bogus_directory)

        self.assertEqual('Bag directory %s does not exist' % bogus_directory,
                         str(error_catcher.exception))
    def test_make_bag_with_unreadable_source(self):
        os.chmod(self.tmpdir, 0)

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.make_bag(self.tmpdir, checksum=['sha256'])

        self.assertEqual(
            'Missing permissions to move all files and directories',
            str(error_catcher.exception))
    def test_make_bag_with_unreadable_file(self):
        os.chmod(j(self.tmpdir, 'loc', '2478433644_2839c5e8b8_o_d.jpg'), 0)

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.make_bag(self.tmpdir, checksum=['sha256'])

        self.assertEqual(
            'Read permissions are required to calculate file fixities',
            str(error_catcher.exception))
    def test_garbage_in_bagit_txt(self):
        bagit.make_bag(self.tmpdir)
        bagfile = """BagIt-Version: 0.97
Tag-File-Character-Encoding: UTF-8
==================================
"""
        with open(j(self.tmpdir, "bagit.txt"), "w") as bf:
            bf.write(bagfile)
        self.assertRaises(bagit.BagValidationError, bagit.Bag, self.tmpdir)
    def test_make_bag_with_unreadable_subdirectory(self):
        # We'll set this write-only to exercise the second permission check in make_bag:
        os.chmod(j(self.tmpdir, 'loc'), 0o200)

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.make_bag(self.tmpdir, checksum=['sha256'])

        self.assertEqual(
            'Read permissions are required to calculate file fixities',
            str(error_catcher.exception))
    def test_open_bag_with_unsupported_version(self):
        bagit.make_bag(self.tmpdir)

        with open(j(self.tmpdir, 'bagit.txt'), 'w') as f:
            f.write('BagIt-Version: 2.0\nTag-File-Character-Encoding: UTF-8\n')

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.Bag(self.tmpdir)

        self.assertEqual('Unsupported bag version: 2.0',
                         str(error_catcher.exception))
    def test_open_bag_with_missing_bagit_txt(self):
        bagit.make_bag(self.tmpdir)

        os.unlink(j(self.tmpdir, 'bagit.txt'))

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.Bag(self.tmpdir)

        self.assertEqual(
            'Expected bagit.txt does not exist: %s/bagit.txt' % self.tmpdir,
            str(error_catcher.exception))
    def test_unicode_bag_info(self):
        info = {
            'Test-BMP':
            u'This element contains a \N{LATIN SMALL LETTER U WITH DIAERESIS}',
            'Test-SMP': u'This element contains a \N{LINEAR B SYMBOL B049}',
        }

        bagit.make_bag(self.tmpdir, bag_info=info, checksums=['md5'])

        bag_info_txt = slurp_text_file(j(self.tmpdir, 'bag-info.txt'))
        for v in info.values():
            self.assertIn(v, bag_info_txt)
    def test_make_bag(self):
        info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '*****@*****.**'}
        bagit.make_bag(self.tmpdir, bag_info=info, checksums=['md5'])

        # data dir should've been created
        self.assertTrue(os.path.isdir(j(self.tmpdir, 'data')))

        # check bagit.txt
        self.assertTrue(os.path.isfile(j(self.tmpdir, 'bagit.txt')))
        bagit_txt = slurp_text_file(j(self.tmpdir, 'bagit.txt'))
        self.assertTrue('BagIt-Version: 0.97', bagit_txt)
        self.assertTrue('Tag-File-Character-Encoding: UTF-8', bagit_txt)

        # check manifest
        self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-md5.txt')))
        manifest_txt = slurp_text_file(j(self.tmpdir,
                                         'manifest-md5.txt')).splitlines()
        self.assertIn('8e2af7a0143c7b8f4de0b3fc90f27354  data/README',
                      manifest_txt)
        self.assertIn(
            '9a2b89e9940fea6ac3a0cc71b0a933a0  data/loc/2478433644_2839c5e8b8_o_d.jpg',
            manifest_txt)
        self.assertIn(
            '6172e980c2767c12135e3b9d246af5a3  data/loc/3314493806_6f1db86d66_o_d.jpg',
            manifest_txt)
        self.assertIn(
            '38a84cd1c41de793a0bccff6f3ec8ad0  data/si/2584174182_ffd5c24905_b_d.jpg',
            manifest_txt)
        self.assertIn(
            '5580eaa31ad1549739de12df819e9af8  data/si/4011399822_65987a4806_b_d.jpg',
            manifest_txt)

        # check bag-info.txt
        self.assertTrue(os.path.isfile(j(self.tmpdir, 'bag-info.txt')))
        bag_info_txt = slurp_text_file(j(self.tmpdir, 'bag-info.txt'))
        bag_info_txt = bag_info_txt.splitlines()
        self.assertIn('Contact-Email: [email protected]', bag_info_txt)
        self.assertIn('Bagging-Date: 1970-01-01', bag_info_txt)
        self.assertIn('Payload-Oxum: 991765.5', bag_info_txt)
        self.assertIn(
            'Bag-Software-Agent: bagit.py v1.5.4 <https://github.com/LibraryOfCongress/bagit-python>',
            bag_info_txt)

        # check tagmanifest-md5.txt
        self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-md5.txt')))
        tagmanifest_txt = slurp_text_file(j(
            self.tmpdir, 'tagmanifest-md5.txt')).splitlines()
        self.assertIn('9e5ad981e0d29adc278f6a294b8c2aca bagit.txt',
                      tagmanifest_txt)
        self.assertIn('a0ce6631a2a6d1a88e6d38453ccc72a5 manifest-md5.txt',
                      tagmanifest_txt)
        self.assertIn('0a6ffcffe67e9a34e44220f7ebcb4baa bag-info.txt',
                      tagmanifest_txt)
    def test_open_bag_with_malformed_bagit_txt(self):
        bagit.make_bag(self.tmpdir)

        with open(j(self.tmpdir, 'bagit.txt'), 'w') as f:
            os.ftruncate(f.fileno(), 0)

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.Bag(self.tmpdir)

        self.assertEqual(
            'Missing required tag in bagit.txt: BagIt-Version, Tag-File-Character-Encoding',
            str(error_catcher.exception))
    def test_make_bag_with_unwritable_source(self):
        path_suffixes = ('', 'loc')

        for path_suffix in reversed(path_suffixes):
            os.chmod(j(self.tmpdir, path_suffix), 0o500)

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.make_bag(self.tmpdir, checksum=['sha256'])

        self.assertEqual(
            'Missing permissions to move all files and directories',
            str(error_catcher.exception))
    def test_validate_missing_directory(self):
        bagit.make_bag(self.tmpdir)

        tmp_data_dir = os.path.join(self.tmpdir, 'data')
        shutil.rmtree(tmp_data_dir)

        bag = bagit.Bag(self.tmpdir)
        with self.assertRaises(bagit.BagValidationError) as error_catcher:
            bag.validate()

        self.assertEqual(
            'Expected data directory %s does not exist' % tmp_data_dir,
            str(error_catcher.exception))
Beispiel #15
0
    def setUp(self):
        self.tempdir = tempfile.mkdtemp()
        self.amendment = os.path.join(self.tempdir, "updatebag")
        os.mkdir(self.amendment)
        srcfile = os.path.join(datadir, "samplembag", "data", "trial1.json")
        shutil.copy(srcfile, self.amendment)
        subdir = os.path.join(self.amendment, "trial3")
        os.mkdir(subdir)
        shutil.copy(srcfile, subdir)

        bagit.make_bag(self.amendment)

        self.amendee = os.path.join(datadir, "samplembag.zip")
        self.amender = amend.Amender(self.amendee, self.amendment)
    def test_payload_permissions(self):
        perms = os.stat(self.tmpdir).st_mode

        # our tmpdir should not be writeable by group
        self.assertEqual(perms & stat.S_IWOTH, 0)

        # but if we make it writeable by the group then resulting
        # payload directory should have the same permissions
        new_perms = perms | stat.S_IWOTH
        self.assertTrue(perms != new_perms)
        os.chmod(self.tmpdir, new_perms)
        bagit.make_bag(self.tmpdir)
        payload_dir = j(self.tmpdir, 'data')
        self.assertEqual(os.stat(payload_dir).st_mode, new_perms)
Beispiel #17
0
    def test_init_member_bags2(self):
        # test when the amendee is not natively a head bag
        self.amendee = os.path.join(self.tempdir, "gooberbag")
        src = os.path.join(datadir, "samplembag", "data")
        shutil.copytree(src, self.amendee)
        bagit.make_bag(self.amendee)
        self.amender = amend.Amender(self.amendee, self.amendment)

        membagsfile = os.path.join(self.amender._newheaddir, "multibag",
                                   "member-bags.tsv")
        self.assertTrue(not os.path.exists(membagsfile))

        self.amender._init_member_bags()
        self.assertTrue(not os.path.exists(membagsfile))
        self.assertEqual(self.amender._newhead.member_bag_names, ["gooberbag"])
    def test_open_bag_with_invalid_versions(self):
        bagit.make_bag(self.tmpdir)

        for v in ('a.b', '2.', '0.1.2', '1.2.3'):
            with open(j(self.tmpdir, 'bagit.txt'), 'w') as f:
                f.write(
                    'BagIt-Version: %s\nTag-File-Character-Encoding: UTF-8\n' %
                    v)

            with self.assertRaises(bagit.BagError) as error_catcher:
                bagit.Bag(self.tmpdir)

            self.assertEqual(
                'Bag version numbers must be MAJOR.MINOR numbers, not %s' % v,
                str(error_catcher.exception))
 def test_validate_unreadable_file(self):
     bag = bagit.make_bag(self.tmpdir, checksum=["md5"])
     os.chmod(j(self.tmpdir, "data/loc/2478433644_2839c5e8b8_o_d.jpg"), 0)
     self.assertRaises(bagit.BagValidationError,
                       self.validate,
                       bag,
                       fast=False)
    def test_validate_optional_tagfile_in_directory(self):
        bag = bagit.make_bag(self.tmpdir, checksums=['md5'])
        tagdir = tempfile.mkdtemp(dir=self.tmpdir)

        if not os.path.exists(j(tagdir, "tagfolder")):
            os.makedirs(j(tagdir, "tagfolder"))

        with open(j(tagdir, "tagfolder", "tagfile"), "w") as tagfile:
            tagfile.write("test")
        relpath = j(tagdir, "tagfolder",
                    "tagfile").replace(self.tmpdir + os.sep, "")
        relpath.replace("\\", "/")
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            # Incorrect checksum.
            tagman.write("8e2af7a0143c7b8f4de0b3fc90f27354 " + relpath + "\n")
        bag = bagit.Bag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)

        hasher = hashlib.new("md5")
        with open(j(tagdir, "tagfolder", "tagfile"), "r") as tf:
            contents = tf.read().encode('utf-8')
        hasher.update(contents)
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            tagman.write(hasher.hexdigest() + " " + relpath + "\n")
        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(self.validate(bag))

        # Missing tagfile.
        os.remove(j(tagdir, "tagfolder", "tagfile"))
        bag = bagit.Bag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)
 def test_make_bag_md5_sha256_manifest(self):
     bag = bagit.make_bag(self.tmpdir, checksum=['md5', 'sha256'])
     # check that relevant manifests are created
     self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-md5.txt')))
     self.assertTrue(os.path.isfile(j(self.tmpdir, 'manifest-sha256.txt')))
     # check valid with two manifests
     self.assertTrue(self.validate(bag, fast=True))
Beispiel #22
0
    def test_init_member_bags3(self):
        # test when the amendment happens to be head-bag conformant
        self.amendee = os.path.join(self.tempdir, "gooberbag")
        src = os.path.join(datadir, "samplembag", "data")
        shutil.copytree(src, self.amendee)
        bagit.make_bag(self.amendee)
        amend.make_single_multibag(self.amendment)
        self.amender = amend.Amender(self.amendee, self.amendment)

        membagsfile = os.path.join(self.amender._newheaddir, "multibag",
                                   "member-bags.tsv")
        self.assertTrue(os.path.exists(membagsfile))

        self.amender._init_member_bags()
        self.assertTrue(not os.path.exists(membagsfile))
        self.assertEqual(self.amender._newhead.member_bag_names, ["gooberbag"])
Beispiel #23
0
    def test_convert_new(self):
        # create the data
        self.bagdir = os.path.join(self.tempdir, "sampledata")
        self.assertTrue(not os.path.isdir(self.bagdir))
        dm = mkdata.DatasetMaker(
            self.bagdir, {
                'totalsize': 15,
                'totalfiles': 3,
                'files': [{
                    'totalsize': 10,
                    'totalfiles': 2
                }],
                'dirs': [{
                    'totalsize': 5,
                    'totalfiles': 1
                }]
            })
        dm.fill()
        self.assertTrue(os.path.isdir(self.bagdir))

        # turn it into a bag
        bag = bagit.make_bag(self.bagdir)
        self.assertTrue(bag.validate())

        mbdir = os.path.join(self.bagdir, 'multibag')
        self.assertTrue(not os.path.exists(mbdir))

        # convert it to a multibag
        self.mkr = amend.SingleMultibagMaker(self.bagdir)
        self.mkr.convert("1.5", "doi:XXXX/11111")
        self.assertTrue(os.path.exists(mbdir))

        # validate it as a head bag
        valid8.validate_headbag(self.bagdir)
 def test_is_valid(self):
     bag = bagit.make_bag(self.tmpdir)
     bag = bagit.Bag(self.tmpdir)
     self.assertTrue(bag.is_valid())
     with open(j(self.tmpdir, "data", "extra_file"), "w") as ef:
         ef.write("bar")
     self.assertFalse(bag.is_valid())
    def test_mixed_case_checksums(self):
        bag = bagit.make_bag(self.tmpdir, checksums=['md5'])
        hashstr = {}
        # Extract entries only for the payload and ignore
        # entries from the tagmanifest file
        for key in bag.entries.keys():
            if key.startswith('data' + os.sep):
                hashstr = bag.entries[key]
        hashstr = next(iter(hashstr.values()))
        manifest = slurp_text_file(j(self.tmpdir, "manifest-md5.txt"))

        manifest = manifest.replace(hashstr, hashstr.upper())

        with open(j(self.tmpdir, "manifest-md5.txt"), "wb") as m:
            m.write(manifest.encode('utf-8'))

        # Since manifest-md5.txt file is updated, re-calculate its
        # md5 checksum and update it in the tagmanifest-md5.txt file
        hasher = hashlib.new('md5')
        contents = slurp_text_file(j(self.tmpdir,
                                     "manifest-md5.txt")).encode('utf-8')
        hasher.update(contents)
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "r") as tagmanifest:
            tagman_contents = tagmanifest.read()
            tagman_contents = tagman_contents.replace(
                bag.entries['manifest-md5.txt']['md5'], hasher.hexdigest())
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagmanifest:
            tagmanifest.write(tagman_contents)

        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(self.validate(bag))
Beispiel #26
0
    def test_init_file_lookup2(self):
        # test when the amendee is not natively a head bag
        self.amendee = os.path.join(self.tempdir, "gooberbag")
        src = os.path.join(datadir, "samplembag", "data")
        shutil.copytree(src, self.amendee)
        bagit.make_bag(self.amendee)
        self.amender = amend.Amender(self.amendee, self.amendment)

        lufile = os.path.join(self.amender._newheaddir, "multibag",
                              "file-lookup.tsv")
        self.assertTrue(not os.path.exists(lufile))

        self.amender._init_multibag_info()
        self.amender._init_file_lookup()
        self.assertTrue(not os.path.exists(lufile))
        self.assertEqual(self.amender._newhead.lookup_file("data/trial1.json"),
                         "gooberbag")
 def test_sha1_tagfile(self):
     info = {'Bagging-Date': '1970-01-01', 'Contact-Email': '*****@*****.**'}
     bag = bagit.make_bag(self.tmpdir, checksum=['sha1'], bag_info=info)
     self.assertTrue(os.path.isfile(j(self.tmpdir, 'tagmanifest-sha1.txt')))
     self.assertEqual(
         'f69110479d0d395f7c321b3860c2bc0c96ae9fe8',
         bag.entries['bag-info.txt']['sha1'],
     )
 def test_validate_fast_without_oxum(self):
     bag = bagit.make_bag(self.tmpdir)
     os.remove(j(self.tmpdir, "bag-info.txt"))
     bag = bagit.Bag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError,
                       self.validate,
                       bag,
                       fast=True)
Beispiel #29
0
    def test_amend_bag_with(self):
        xtrabag1 = os.path.join(self.tempdir, "gooberbag1")
        os.mkdir(xtrabag1)
        srcfile = os.path.join(datadir, "samplembag", "data", "trial2.json")
        shutil.copy(srcfile, xtrabag1)
        bagit.make_bag(xtrabag1)

        xtrabag2 = os.path.join(self.tempdir, "gooberbag2")
        os.mkdir(xtrabag2)
        srcfile = os.path.join(datadir, "samplembag", "data", "trial2.json")
        shutil.copy(srcfile, os.path.join(xtrabag2, "trial4.json"))
        bagit.make_bag(xtrabag2)

        amend.amend_bag_with(self.amendee, self.amendment, "2", xtrabag1,
                             xtrabag2)

        bag = bagit.open_bag(self.amendment)
        self.assertEqual(bag.info.get('Multibag-Version'), CURRENT_VERSION)
        self.assertEqual(bag.info.get('Multibag-Tag-Directory'), 'multibag')
        self.assertEqual(bag.info.get('Multibag-Head-Version'), '2')
        self.assertEqual(bag.info.get('Multibag-Head-Deprecates'), '1.0')

        tagfile = os.path.join(self.amendment, "multibag", "member-bags.tsv")
        with open(tagfile) as fd:
            names = [line.strip() for line in fd]
            self.assertEquals(
                names, ["samplembag", "gooberbag1", "gooberbag2", "updatebag"])

        tagfile = os.path.join(self.amendment, "multibag", "file-lookup.tsv")
        lu = {}
        nol = 0
        with open(tagfile) as fd:
            for line in fd:
                names = line.strip().split('\t')
                nol += 1
                lu[names[0]] = names[1]

        self.assertEqual(lu.get('data/trial1.json'), 'updatebag')
        self.assertEqual(lu.get('data/trial2.json'), 'gooberbag1')
        self.assertEqual(lu.get('data/trial4.json'), 'gooberbag2')
        self.assertEqual(lu.get('data/trial3/trial3a.json'), 'samplembag')
        self.assertEqual(lu.get('data/trial3/trial1.json'), 'updatebag')
        self.assertEqual(nol, len(lu))

        # validate it as a headbag
        valid8.validate_headbag(self.amendment)
Beispiel #30
0
    def test_add_amending_bag(self):
        xtrabag = os.path.join(self.tempdir, "gooberbag")
        os.mkdir(xtrabag)
        srcfile = os.path.join(datadir, "samplembag", "data", "trial2.json")
        shutil.copy(srcfile, xtrabag)
        bagit.make_bag(xtrabag)

        self.amender.init_from_amendee()
        self.amender.add_amending_bag(xtrabag, pid="foo://goob", comment="Ya")
        self.assertEqual(self.amender._newhead.member_bag_names,
                         ["samplembag", "gooberbag"])
        self.assertEqual(self.amender._newhead.member_bags()[1].uri,
                         "foo://goob")
        self.assertEqual(self.amender._newhead.lookup_file("data/trial1.json"),
                         "samplembag")
        self.assertEqual(self.amender._newhead.lookup_file("data/trial2.json"),
                         "gooberbag")