Ejemplo n.º 1
0
    def test_validate_optional_tagfile_in_directory(self):
        bag = bagit.make_bag(self.tmpdir, checksums=['md5'])
        tagdir = tempfile.mkdtemp(dir=self.tmpdir)

        if not os.path.exists(j(tagdir, "tagfolder")):
            os.makedirs(j(tagdir, "tagfolder"))

        with open(j(tagdir, "tagfolder", "tagfile"), "w") as tagfile:
            tagfile.write("test")
        relpath = j(tagdir, "tagfolder",
                    "tagfile").replace(self.tmpdir + os.sep, "")
        relpath.replace("\\", "/")
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            # Incorrect checksum.
            tagman.write("8e2af7a0143c7b8f4de0b3fc90f27354 " + relpath + "\n")
        bag = bagit.Bag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)

        hasher = hashlib.new("md5")
        with open(j(tagdir, "tagfolder", "tagfile"), "r") as tf:
            contents = tf.read().encode('utf-8')
        hasher.update(contents)
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            tagman.write(hasher.hexdigest() + " " + relpath + "\n")
        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(self.validate(bag))

        # Missing tagfile.
        os.remove(j(tagdir, "tagfolder", "tagfile"))
        bag = bagit.Bag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)
Ejemplo n.º 2
0
    def test_save_baginfo(self):
        bag = bagit.make_bag(self.tmpdir)

        bag.info["foo"] = "bar"
        bag.save()
        bag = bagit.Bag(self.tmpdir)
        self.assertEqual(bag.info["foo"], "bar")
        self.assertTrue(bag.is_valid())

        bag.info['x'] = ["a", "b", "c"]
        bag.save()
        b = bagit.Bag(self.tmpdir)
        self.assertEqual(b.info["x"], ["a", "b", "c"])
        self.assertTrue(bag.is_valid())
Ejemplo n.º 3
0
    def test_mixed_case_checksums(self):
        bag = bagit.make_bag(self.tmpdir, checksums=['md5'])
        hashstr = {}
        # Extract entries only for the payload and ignore
        # entries from the tagmanifest file
        for key in bag.entries.keys():
            if key.startswith('data' + os.sep):
                hashstr = bag.entries[key]
        hashstr = next(iter(hashstr.values()))
        manifest = slurp_text_file(j(self.tmpdir, "manifest-md5.txt"))

        manifest = manifest.replace(hashstr, hashstr.upper())

        with open(j(self.tmpdir, "manifest-md5.txt"), "wb") as m:
            m.write(manifest.encode('utf-8'))

        # Since manifest-md5.txt file is updated, re-calculate its
        # md5 checksum and update it in the tagmanifest-md5.txt file
        hasher = hashlib.new('md5')
        contents = slurp_text_file(j(self.tmpdir,
                                     "manifest-md5.txt")).encode('utf-8')
        hasher.update(contents)
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "r") as tagmanifest:
            tagman_contents = tagmanifest.read()
            tagman_contents = tagman_contents.replace(
                bag.entries['manifest-md5.txt']['md5'], hasher.hexdigest())
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagmanifest:
            tagmanifest.write(tagman_contents)

        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(self.validate(bag))
Ejemplo n.º 4
0
 def test_is_valid(self):
     bag = bagit.make_bag(self.tmpdir)
     bag = bagit.Bag(self.tmpdir)
     self.assertTrue(bag.is_valid())
     with open(j(self.tmpdir, "data", "extra_file"), "w") as ef:
         ef.write("bar")
     self.assertFalse(bag.is_valid())
Ejemplo n.º 5
0
 def test_validate_fast_without_oxum(self):
     bag = bagit.make_bag(self.tmpdir)
     os.remove(j(self.tmpdir, "bag-info.txt"))
     bag = bagit.Bag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError,
                       self.validate,
                       bag,
                       fast=True)
Ejemplo n.º 6
0
    def test_save_baginfo_with_sha1(self):
        bag = bagit.make_bag(self.tmpdir, checksum=["sha1", "md5"])
        self.assertTrue(bag.is_valid())
        bag.save()

        bag.info['foo'] = "bar"
        bag.save()

        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(bag.is_valid())
Ejemplo n.º 7
0
    def test_save_only_baginfo(self):
        bag = bagit.make_bag(self.tmpdir)
        with open(j(self.tmpdir, 'data', 'newfile'), 'w') as nf:
            nf.write('newfile')
        bag.info["foo"] = "bar"
        bag.save()

        bag = bagit.Bag(self.tmpdir)
        self.assertEqual(bag.info["foo"], "bar")
        self.assertFalse(bag.is_valid())
Ejemplo n.º 8
0
 def test_validate_slow_without_oxum_extra_file(self):
     bag = bagit.make_bag(self.tmpdir)
     os.remove(j(self.tmpdir, "bag-info.txt"))
     with open(j(self.tmpdir, "data", "extra_file"), "w") as ef:
         ef.write("foo")
     bag = bagit.Bag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError,
                       self.validate,
                       bag,
                       fast=False)
Ejemplo n.º 9
0
 def test_bom_in_bagit_txt(self):
     bag = bagit.make_bag(self.tmpdir)
     BOM = codecs.BOM_UTF8
     if sys.version_info[0] >= 3:
         BOM = BOM.decode('utf-8')
     with open(j(self.tmpdir, "bagit.txt"), "r") as bf:
         bagfile = BOM + bf.read()
     with open(j(self.tmpdir, "bagit.txt"), "w") as bf:
         bf.write(bagfile)
     bag = bagit.Bag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError, self.validate, bag)
Ejemplo n.º 10
0
    def test_open_bag_with_missing_bagit_txt(self):
        bagit.make_bag(self.tmpdir)

        os.unlink(j(self.tmpdir, 'bagit.txt'))

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.Bag(self.tmpdir)

        self.assertEqual(
            'Expected bagit.txt does not exist: %s/bagit.txt' % self.tmpdir,
            str(error_catcher.exception))
Ejemplo n.º 11
0
    def test_open_bag_with_unsupported_version(self):
        bagit.make_bag(self.tmpdir)

        with open(j(self.tmpdir, 'bagit.txt'), 'w') as f:
            f.write('BagIt-Version: 2.0\nTag-File-Character-Encoding: UTF-8\n')

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.Bag(self.tmpdir)

        self.assertEqual('Unsupported bag version: 2.0',
                         str(error_catcher.exception))
Ejemplo n.º 12
0
 def test_validate_flipped_bit(self):
     bag = bagit.make_bag(self.tmpdir)
     readme = j(self.tmpdir, "data", "README")
     txt = slurp_text_file(readme)
     txt = 'A' + txt[1:]
     with open(readme, "w") as r:
         r.write(txt)
     bag = bagit.Bag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError, self.validate, bag)
     # fast doesn't catch the flipped bit, since oxsum is the same
     self.assertTrue(self.validate(bag, fast=True))
     self.assertTrue(self.validate(bag, completeness_only=True))
Ejemplo n.º 13
0
    def test_open_bag_with_malformed_bagit_txt(self):
        bagit.make_bag(self.tmpdir)

        with open(j(self.tmpdir, 'bagit.txt'), 'w') as f:
            os.ftruncate(f.fileno(), 0)

        with self.assertRaises(bagit.BagError) as error_catcher:
            bagit.Bag(self.tmpdir)

        self.assertEqual(
            'Missing required tag in bagit.txt: BagIt-Version, Tag-File-Character-Encoding',
            str(error_catcher.exception))
Ejemplo n.º 14
0
    def test_validate_missing_directory(self):
        bagit.make_bag(self.tmpdir)

        tmp_data_dir = os.path.join(self.tmpdir, 'data')
        shutil.rmtree(tmp_data_dir)

        bag = bagit.Bag(self.tmpdir)
        with self.assertRaises(bagit.BagValidationError) as error_catcher:
            bag.validate()

        self.assertEqual(
            'Expected data directory %s does not exist' % tmp_data_dir,
            str(error_catcher.exception))
Ejemplo n.º 15
0
 def test_validate_completeness(self):
     bag = bagit.make_bag(self.tmpdir)
     old_path = j(self.tmpdir, "data", "README")
     new_path = j(self.tmpdir, "data", "extra_file")
     os.rename(old_path, new_path)
     bag = bagit.Bag(self.tmpdir)
     self.assertTrue(self.validate(bag, fast=True))
     with mock.patch.object(bag, '_validate_entries') as m:
         self.assertRaises(bagit.BagValidationError,
                           self.validate,
                           bag,
                           completeness_only=True)
         self.assertEqual(m.call_count, 0)
Ejemplo n.º 16
0
    def test_update_info(self):
        # test assumption
        bag = bagit.Bag(self.bagdir)
        for tag in bag.info:
            self.assertFalse(tag.startswith('Multibag-'))

        self.mkr.update_info()
        bag = bagit.Bag(self.bagdir)
        self.assertEqual(bag.info.get('Multibag-Version'),
                         amend.CURRENT_VERSION)
        self.assertEqual(bag.info.get('Multibag-Head-Version'), "1")
        self.assertEqual(bag.info.get('Multibag-Reference'),
                         amend.CURRENT_REFERENCE)
        self.assertEqual(bag.info.get('Multibag-Tag-Directory'), "multibag")

        self.assertTrue(
            isinstance(bag.info.get('Internal-Sender-Description'), list))
        self.assertEqual(len(bag.info.get('Internal-Sender-Description')), 2)
        self.assertIn("Multibag-Reference",
                      bag.info.get('Internal-Sender-Description')[1])

        self.assertEqual(bag.info['Bag-Size'], "4.875 kB")
Ejemplo n.º 17
0
    def setUp(self):
        self.tempdir = tempfile.mkdtemp()
        self.bagdir = os.path.join(self.tempdir, "samplebag")
        shutil.copytree(os.path.join(datadir, "samplembag"), self.bagdir)
        shutil.rmtree(os.path.join(self.bagdir, "multibag"))
        bag = bagit.Bag(self.bagdir)
        rmtag = []
        for tag in bag.info:
            if tag.startswith('Multibag-'):
                rmtag.append(tag)
        for tag in rmtag:
            del bag.info[tag]
        bag.save()

        self.mkr = amend.SingleMultibagMaker(self.bagdir)
Ejemplo n.º 18
0
    def test_open_bag_with_invalid_versions(self):
        bagit.make_bag(self.tmpdir)

        for v in ('a.b', '2.', '0.1.2', '1.2.3'):
            with open(j(self.tmpdir, 'bagit.txt'), 'w') as f:
                f.write(
                    'BagIt-Version: %s\nTag-File-Character-Encoding: UTF-8\n' %
                    v)

            with self.assertRaises(bagit.BagError) as error_catcher:
                bagit.Bag(self.tmpdir)

            self.assertEqual(
                'Bag version numbers must be MAJOR.MINOR numbers, not %s' % v,
                str(error_catcher.exception))
Ejemplo n.º 19
0
    def test_unusual_bag_info_separators(self):
        bag = bagit.make_bag(self.tmpdir)

        with open(j(self.tmpdir, 'bag-info.txt'), 'a') as f:
            print('Test-Tag: 1', file=f)
            print('Test-Tag:\t2', file=f)
            print('Test-Tag\t: 3', file=f)
            print('Test-Tag\t:\t4', file=f)
            print('Test-Tag\t \t: 5', file=f)
            print('Test-Tag:\t \t 6', file=f)

        bag = bagit.Bag(self.tmpdir)
        bag.save(manifests=True)

        self.assertTrue(bag.is_valid())
        self.assertEqual(bag.info['Test-Tag'], list(map(str, range(1, 7))))
Ejemplo n.º 20
0
    def test_init_multibag_info3(self):
        # test when src has deprecations
        self.amendee = os.path.join(self.tempdir, "gooberbag")
        src = os.path.join(datadir, "samplembag")
        shutil.copytree(src, self.amendee)
        bag = bagit.Bag(self.amendee)
        bag.info['Multibag-Head-Deprecates'] = ["0.1", "0.5"]
        bag.save()
        self.amender = amend.Amender(self.amendee, self.amendment)

        self.assertNotIn('Multibag-Head-Deprecates',
                         self.amender._newhead.info)

        self.amender._init_multibag_info()
        self.assertEqual(
            self.amender._newhead.info.get('Multibag-Head-Deprecates'),
            ["0.1", "0.5", "1.0"])
Ejemplo n.º 21
0
    def test_make_single_multibag(self):
        mbdir = os.path.join(self.bagdir, "multibag")
        mbfile = os.path.join(mbdir, 'member-bags.tsv')
        flfile = os.path.join(mbdir, 'file-lookup.tsv')
        bagn = os.path.basename(self.bagdir)
        self.assertTrue(not os.path.exists(mbdir))

        amend.make_single_multibag(self.bagdir, "1.5", "doi:XXXX/11111")
        self.assertTrue(os.path.exists(mbdir))

        # test for member-bags.tsv
        self.assertTrue(os.path.exists(mbfile))
        with open(mbfile) as fd:
            lines = fd.readlines()
        self.assertEqual(len(lines), 1)
        parts = lines[0].strip().split("\t")
        self.assertEqual(parts[0], bagn)
        self.assertEqual(parts[1], "doi:XXXX/11111")

        # test for file-lookup.tsv
        self.assertTrue(os.path.exists(flfile))
        with open(flfile) as fd:
            lines = fd.readlines()
        self.assertIn("data/trial1.json\t" + bagn + "\n", lines)
        self.assertIn("data/trial2.json\t" + bagn + "\n", lines)
        self.assertIn("data/trial3/trial3a.json\t" + bagn + "\n", lines)
        self.assertNotIn("metadata/pod.json\t" + bagn + "\n", lines)
        self.assertNotIn("about.txt\t" + bagn + "\n", lines)
        self.assertEqual(len(lines), 3)

        # test info tag data
        bag = bagit.Bag(self.bagdir)
        self.assertEqual(bag.info.get('Multibag-Version'),
                         amend.CURRENT_VERSION)
        self.assertEqual(bag.info.get('Multibag-Head-Version'), "1.5")
        self.assertEqual(bag.info.get('Multibag-Reference'),
                         amend.CURRENT_REFERENCE)
        self.assertEqual(bag.info.get('Multibag-Tag-Directory'), "multibag")

        self.assertTrue(
            isinstance(bag.info.get('Internal-Sender-Description'), list))
        self.assertEqual(len(bag.info.get('Internal-Sender-Description')), 2)
        self.assertIn("Multibag-Reference",
                      bag.info.get('Internal-Sender-Description')[1])

        self.assertEqual(bag.info['Bag-Size'], "5.171 kB")
Ejemplo n.º 22
0
    def test_validation_error_details(self):
        bag = bagit.make_bag(self.tmpdir,
                             checksums=['md5'],
                             bag_info={'Bagging-Date': '1970-01-01'})
        readme = j(self.tmpdir, "data", "README")
        txt = slurp_text_file(readme)
        txt = 'A' + txt[1:]
        with open(readme, "w") as r:
            r.write(txt)

        bag = bagit.Bag(self.tmpdir)
        got_exception = False

        try:
            self.validate(bag)
        except bagit.BagValidationError as e:
            got_exception = True

            exc_str = str(e)
            self.assertIn(
                'data/README md5 validation failed: expected="8e2af7a0143c7b8f4de0b3fc90f27354" found="fd41543285d17e7c29cd953f5cf5b955"',
                exc_str)
            self.assertEqual(len(e.details), 1)

            readme_error = e.details[0]
            self.assertEqual(
                'data/README md5 validation failed: expected="8e2af7a0143c7b8f4de0b3fc90f27354" found="fd41543285d17e7c29cd953f5cf5b955"',
                str(readme_error))
            self.assertIsInstance(readme_error, bagit.ChecksumMismatch)
            self.assertEqual(readme_error.algorithm, 'md5')
            self.assertEqual(readme_error.path, 'data/README')
            self.assertEqual(readme_error.expected,
                             '8e2af7a0143c7b8f4de0b3fc90f27354')
            self.assertEqual(readme_error.found,
                             'fd41543285d17e7c29cd953f5cf5b955')

        if not got_exception:
            self.fail("didn't get BagValidationError")
Ejemplo n.º 23
0
    def test_filename_unicode_normalization(self):
        # We need to handle cases where the Unicode normalization form of a
        # filename has changed in-transit. This is hard to do portably in both
        # directions because OS X normalizes *all* filenames to an NFD variant
        # so we'll start with a basic test which writes the manifest using the
        # NFC form and confirm that this does not cause the bag to fail when it
        # is written to the filesystem using the NFD form, which will not be
        # altered when saved to an HFS+ filesystem:

        test_filename = 'Núñez Papers.txt'
        test_filename_nfd = unicodedata.normalize('NFD', test_filename)

        os.makedirs(j(self.tmpdir, 'unicode-normalization'))

        with open(j(self.tmpdir, 'unicode-normalization', test_filename_nfd),
                  'w') as f:
            f.write(
                'This is a test filename written using NFD normalization\n')

        bag = bagit.make_bag(self.tmpdir)
        bag.save()

        self.assertTrue(bag.is_valid())

        # Now we'll cause the entire manifest file was normalized to NFC:
        for m_f in bag.manifest_files():
            contents = slurp_text_file(m_f)
            normalized_bytes = unicodedata.normalize('NFC',
                                                     contents).encode('utf-8')
            with open(m_f, 'wb') as f:
                f.write(normalized_bytes)

        for alg in bag.algorithms:
            bagit._make_tagmanifest_file(alg, bag.path, encoding=bag.encoding)

        # Now we'll reload the whole thing:
        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(bag.is_valid())
Ejemplo n.º 24
0
 def test_multiple_oxum_values(self):
     bag = bagit.make_bag(self.tmpdir)
     with open(j(self.tmpdir, "bag-info.txt"), "a") as baginfo:
         baginfo.write('Payload-Oxum: 7.7\n')
     bag = bagit.Bag(self.tmpdir)
     self.assertTrue(self.validate(bag, fast=True))
Ejemplo n.º 25
0
 def test_unicode_in_tags(self):
     bag = bagit.make_bag(self.tmpdir, {"test": '♡'})
     bag = bagit.Bag(self.tmpdir)
     self.assertEqual(bag.info['test'], '♡')
Ejemplo n.º 26
0
    def test_validation_completeness_error_details(self):
        bag = bagit.make_bag(self.tmpdir,
                             checksums=['md5'],
                             bag_info={'Bagging-Date': '1970-01-01'})

        old_path = j(self.tmpdir, "data", "README")
        new_path = j(self.tmpdir, "data", "extra")
        os.rename(old_path, new_path)

        # remove the bag-info.txt which contains the oxum to force a full
        # check of the manifest
        os.remove(j(self.tmpdir, "bag-info.txt"))

        bag = bagit.Bag(self.tmpdir)
        got_exception = False

        try:
            self.validate(bag)
        except bagit.BagValidationError as e:
            got_exception = True

            exc_str = str(e)
            self.assertIn("Bag validation failed: ", exc_str)
            self.assertIn(
                "bag-info.txt exists in manifest but was not found on filesystem",
                exc_str)
            self.assertIn(
                "data/README exists in manifest but was not found on filesystem",
                exc_str)
            self.assertIn(
                "data/extra exists on filesystem but is not in the manifest",
                exc_str)
            self.assertEqual(len(e.details), 3)

            if e.details[0].path == "bag-info.txt":
                baginfo_error = e.details[0]
                readme_error = e.details[1]
            else:
                baginfo_error = e.details[1]
                readme_error = e.details[0]

            self.assertEqual(
                str(baginfo_error),
                "bag-info.txt exists in manifest but was not found on filesystem"
            )
            self.assertIsInstance(baginfo_error, bagit.FileMissing)
            self.assertEqual(baginfo_error.path, "bag-info.txt")

            self.assertEqual(
                str(readme_error),
                "data/README exists in manifest but was not found on filesystem"
            )
            self.assertIsInstance(readme_error, bagit.FileMissing)
            self.assertEqual(readme_error.path, "data/README")

            error = e.details[2]
            self.assertEqual(
                str(error),
                "data/extra exists on filesystem but is not in the manifest")
            self.assertTrue(error, bagit.UnexpectedFile)
            self.assertEqual(error.path, "data/extra")

        if not got_exception:
            self.fail("didn't get BagValidationError")
Ejemplo n.º 27
0
 def test_handle_directory_end_slash_gracefully(self):
     bag = bagit.make_bag(self.tmpdir + '/')
     self.assertTrue(self.validate(bag))
     bag2 = bagit.Bag(self.tmpdir + '/')
     self.assertTrue(self.validate(bag2))
Ejemplo n.º 28
0
 def test_bag_constructor(self):
     bag = bagit.make_bag(self.tmpdir)
     bag = bagit.Bag(self.tmpdir)
     self.assertEqual(type(bag), bagit.Bag)
     self.assertEqual(len(list(bag.payload_files())), 5)