Example #1
0
def main(source, dest, splitter, partition=False):
    source_bag = bagit.Bag(source)
    new_payload, old_payload = payload_filter(source_bag, splitter)
    if len(new_payload) == 0:
        print('No files to split!')
        exit()
    if not os.path.exists(dest):
        os.mkdir(dest)
    os.chdir(source)
    new_oxum = write_manifests(new_payload, source_bag.algorithms, dest)
    move_files(new_payload, dest)
    logging.info('Writing manifests')
    if partition:
        for file in source_bag.manifest_files():
            os.remove(file)
        old_oxum = write_manifests(
            old_payload, source_bag.algorithms, source_bag.path)
    for file, hash in source_bag.tagfile_entries().items():
        if 'manifest' not in file:
            logging.info('Copying tag file {} to {}'.format(file, dest))
            shutil.copy(file, dest)
    new_bag = bagit.Bag(dest)
    new_bag.info['Payload-Oxum'] = new_oxum
    if partition:
        source_bag.info['Payload-Oxum'] = old_oxum
    new_bag.save()
    logging.info('Validating bag {}'.format(new_bag.path))
    if new_bag.validate() and partition:
        del_files(new_payload)
        source_bag.save()
    else:
        logging.error('bag {} is invalid'.format(new_bag.path))
    logging.info('Validating bag {}'.format(source_bag.path))
    source_bag.validate()
Example #2
0
    def test_validate_optional_tagfile(self):
        bag = bagit.make_bag(self.tmpdir, checksums=['md5'])
        tagdir = tempfile.mkdtemp(dir=self.tmpdir)
        with open(j(tagdir, "tagfile"), "w") as tagfile:
            tagfile.write("test")
        relpath = j(tagdir, "tagfile").replace(self.tmpdir + os.sep, "")
        relpath.replace("\\", "/")
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            # Incorrect checksum.
            tagman.write("8e2af7a0143c7b8f4de0b3fc90f27354 " + relpath + "\n")
        bag = bagit.Bag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)

        hasher = hashlib.new("md5")
        contents = slurp_text_file(j(tagdir, "tagfile")).encode('utf-8')
        hasher.update(contents)
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            tagman.write(hasher.hexdigest() + " " + relpath + "\n")
        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(self.validate(bag))

        # Missing tagfile.
        os.remove(j(tagdir, "tagfile"))
        bag = bagit.Bag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)
Example #3
0
    def test_validate_optional_tagfile_in_directory(self):
        bag = bagit.make_bag(self.tmpdir, checksums=["md5"])
        tagdir = tempfile.mkdtemp(dir=self.tmpdir)

        if not os.path.exists(j(tagdir, "tagfolder")):
            os.makedirs(j(tagdir, "tagfolder"))

        with open(j(tagdir, "tagfolder", "tagfile"), "w") as tagfile:
            tagfile.write("test")
        relpath = j(tagdir, "tagfolder",
                    "tagfile").replace(self.tmpdir + os.sep, "")
        relpath.replace("\\", "/")
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            # Incorrect checksum.
            tagman.write("8e2af7a0143c7b8f4de0b3fc90f27354 " + relpath + "\n")
        bag = bagit.Bag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)

        hasher = hashlib.new("md5")
        with open(j(tagdir, "tagfolder", "tagfile"), "r") as tf:
            contents = tf.read().encode("utf-8")
        hasher.update(contents)
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagman:
            tagman.write(hasher.hexdigest() + " " + relpath + "\n")
        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(self.validate(bag))

        # Missing tagfile.
        os.remove(j(tagdir, "tagfolder", "tagfile"))
        bag = bagit.Bag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, self.validate, bag)
Example #4
0
def callback(ch, method, properties, body):
    """Passed a 'jobname/timestamp', creates a SIP. Having created the
	SIP, adds a message to the indexing queue."""
    try:
        logger.info("Message received: %s." % body)
        if verify_message(body):
            sip_dir = create_sip(body)
            logger.debug("Created SIP: %s" % sip_dir)
            # Create our Bagit.
            bag = bagit.Bag(sip_dir)
            if bag.validate():
                logger.debug("Moving %s to %s." % (body, settings.DLS_DROP))
                dls = copy_to_dls(body)
                bag = bagit.Bag(dls)
                if bag.validate():
                    logger.debug("Moving %s to %s." %
                                 (dls, settings.DLS_WATCH))
                    shutil.move(
                        dls,
                        "%s/%s" % (settings.DLS_WATCH, os.path.basename(body)))
                    gztar = copy_to_hdfs(sip_dir)
                    logger.debug("SIP tarball at hdfs://%s" % gztar)
                    logger.debug("Sending message to '%s': %s" %
                                 (settings.SUBMITTED_QUEUE_NAME, body))
                    send_index_message(body)
                else:
                    raise Exception("Invalid Bagit after copy: %s" % dls)
            else:
                raise Exception("Invalid Bagit: %s" % sip_dir)
        else:
            raise Exception("Could not verify message: %s" % body)
    except Exception as e:
        logger.error("%s [%s]" % (str(e), body))
        send_error_message("%s|%s" % (body, str(e)))
Example #5
0
    def test_validate_optional_tagfile(self):
        bag = bagit.make_bag(self.tmpdir)
        tagdir = tempfile.mkdtemp(dir=self.tmpdir)
        tagfile = open(os.path.join(tagdir, "tagfile"), "w")
        tagfile.write("test")
        tagfile.close()
        relpath = os.path.join(tagdir,
                               "tagfile").replace(self.tmpdir + os.sep, "")
        relpath.replace("\\", "/")
        tagman = open(os.path.join(self.tmpdir, "tagmanifest-md5.txt"), "w")

        # Incorrect checksum.
        tagman.write("8e2af7a0143c7b8f4de0b3fc90f27354 " + relpath + "\n")
        tagman.close()
        bag = bagit.Bag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, bag.validate)

        hasher = hashlib.new("md5")
        hasher.update(open(os.path.join(tagdir, "tagfile"), "rb").read())
        tagman = open(os.path.join(self.tmpdir, "tagmanifest-md5.txt"), "w")
        tagman.write(hasher.hexdigest() + " " + relpath + "\n")
        tagman.close()
        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(bag.validate())

        # Missing tagfile.
        os.remove(os.path.join(tagdir, "tagfile"))
        bag = bagit.Bag(self.tmpdir)
        self.assertRaises(bagit.BagValidationError, bag.validate)
Example #6
0
    def test_save_baginfo(self):
        bag = bagit.make_bag(self.tmpdir)

        bag.info["foo"] = "bar"
        bag.save()
        bag = bagit.Bag(self.tmpdir)
        self.assertEqual(bag.info["foo"], "bar")
        self.assertTrue(bag.is_valid())

        bag.info["x"] = ["a", "b", "c"]
        bag.save()
        b = bagit.Bag(self.tmpdir)
        self.assertEqual(b.info["x"], ["a", "b", "c"])
        self.assertTrue(bag.is_valid())
Example #7
0
 def test_multiple_oxum_values(self):
     bag = bagit.make_bag(self.tmpdir)
     baginfo = open(os.path.join(self.tmpdir, "bag-info.txt"), "a")
     baginfo.write('Payload-Oxum: 7.7\n')
     baginfo.close()
     bag = bagit.Bag(self.tmpdir)
     self.assertTrue(bag.validate(fast=True))
Example #8
0
def bag_derivatives(taskid, update_manifest=True):
    """
    Generate bag of derivative

    args:
      taskid: cybercommons generated task id for derivative
      update_manifest: boolean to update bag manifest - default is True
    """

    bagpath = "{0}/oulib_tasks/{1}/derivative/".format(basedir, taskid)
    for bagname in os.listdir(bagpath):
        fullpath = "{0}/{1}".format(bagpath, bagname)
        try:
            bag = bagit.Bag(fullpath)
        except bagit.BagError:
            bag = bagit.make_bag(fullpath)

        bag.info['External-Description'] = bagname
        bag.info['External-Identifier'] = 'University of Oklahoma Libraries'

        try:
            bag.save(manifests=update_manifest)
        except IOError as err:
            logging.error(err)
    # point back at task
    return "{0}/oulib_tasks/{1}".format(base_url, taskid)
Example #9
0
 def test_validate_slow_without_oxum_extra_file(self):
     bag = bagit.make_bag(self.tmpdir)
     os.remove(j(self.tmpdir, "bag-info.txt"))
     with open(j(self.tmpdir, "data", "extra_file"), "w") as ef:
         ef.write("foo")
     bag = bagit.Bag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError, self.validate, bag, fast=False)
Example #10
0
    def bag_load(self, bag_path):
        global bag
        global tempdir
        tempdir = tempfile.TemporaryDirectory()
        ZipFile(bag_path).extractall(path=tempdir.name)
        try:
            bag = bagit.Bag(path=tempdir.name)
        except:
            return False, False

        if bag.is_valid():
            encrypted_files = []
            for x in bag.payload_files():
                if Path(x).suffix == '.pdf':
                    with open(os.path.join(tempdir.name, x), mode='rb') as pdf:
                        reader = PyPDF2.PdfFileReader(pdf)
                        if reader.isEncrypted:
                            encrypted_files.append(x)
                elif Path(x).suffix == '.zip':
                    with ZipFile(os.path.join(tempdir.name, x)) as zippy:
                        try:
                            zippy.open(zippy.namelist()[0])
                        except RuntimeError:
                            encrypted_files.append(x)
            return bag.info, encrypted_files

        else:
            bad_files = []
            try:
                bag.validate()
            except bagit.BagValidationError as e:
                for d in e.details:
                    if isinstance(d, bagit.ChecksumMismatch):
                        bad_files.append(d.path)
            return False, bad_files
Example #11
0
    def test_cron(self):
        for ref in BAGS_REF:
            helpers.create_target_bags(ref[0],
                                       settings.TEST_BAGS_DIR,
                                       self.orgs[0],
                                       username=self.user.username)
        discovered = DiscoverTransfers().do()
        self.assertIsNot(False, discovered)

        for archive in Archives.objects.filter(
                process_status=Archives.VALIDATED):
            archive.process_status = Archives.ACCESSIONING_STARTED
            archive.save()
        delivered = DeliverTransfers().do()
        self.assertIsNot(False, delivered)
        self.assertEqual(
            len(
                Archives.objects.filter(
                    process_status=Archives.ACCESSIONING_STARTED)), 0)
        self.assertEqual(
            len(Archives.objects.filter(process_status=Archives.DELIVERED)),
            len(os.listdir(settings.DELIVERY_QUEUE_DIR)))
        for bag_path in os.listdir(settings.DELIVERY_QUEUE_DIR):
            bag = bagit.Bag(os.path.join(settings.DELIVERY_QUEUE_DIR,
                                         bag_path))
            self.assertTrue('Origin' in bag.bag_info)
Example #12
0
def find_arcp(base_path):
    # First try to find External-Identifier
    bag = bagit.Bag(base_path)
    ext_id = bag.info.get("External-Identifier")
    if arcp.is_arcp_uri(ext_id):
        return ext_id
    raise Exception("Can't find External-Identifier")
Example #13
0
def read_bag(bag_path):
    """
    :param bag_path:
    :return:
    """

    tmpdir = None

    try:
        if not os.path.exists(bag_path):
            raise HsBagitException('Bag does not exist')
        if os.path.isdir(bag_path):
            unpacked_bag_path = bag_path
        else:
            mtype = mimetypes.guess_type(bag_path)
            if mtype[0] != 'application/zip':
                msg = "Expected bag to have MIME type application/zip, " \
                      "but it has {0} instead.".format(mtype[0])
                raise HsBagitException(msg)
            tmpdir = tempfile.mkdtemp()
            zfile = zipfile.ZipFile(bag_path)
            zroot = zfile.namelist()[0].split(os.sep)[0]
            zfile.extractall(tmpdir)
            unpacked_bag_path = os.path.join(tmpdir, zroot)

        bag = bagit.Bag(unpacked_bag_path)
        if not bag.is_valid():
            msg = "Bag is not valid"
            raise HsBagitException(msg)

    finally:
        if tmpdir:
            shutil.rmtree(tmpdir)
Example #14
0
    def test_mixed_case_checksums(self):
        bag = bagit.make_bag(self.tmpdir)
        hashstr = {}
        #Extract entries only for the payload and ignore
        # entries from the tagmanifest file
        for key in bag.entries.iterkeys():
            if key.startswith('data' + os.sep):
                hashstr = bag.entries[key]
        hashstr = hashstr.itervalues().next()
        manifest = open(os.path.join(self.tmpdir, "manifest-md5.txt"),
                        "r").read()
        manifest = manifest.replace(hashstr, hashstr.upper())
        open(os.path.join(self.tmpdir, "manifest-md5.txt"),
             "w").write(manifest)

        #Since manifest-md5.txt file is updated, re-calculate its
        # md5 checksum and update it in the tagmanifest-md5.txt file
        hasher = hashlib.new('md5')
        hasher.update(
            open(os.path.join(self.tmpdir, "manifest-md5.txt"), "r").read())
        tagmanifest = open(os.path.join(self.tmpdir, "tagmanifest-md5.txt"),
                           "r").read()
        tagmanifest = tagmanifest.replace(
            bag.entries['manifest-md5.txt']['md5'], hasher.hexdigest())
        open(os.path.join(self.tmpdir, "tagmanifest-md5.txt"),
             "w").write(tagmanifest)

        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(bag.validate())
Example #15
0
def main(bagdir, outdir, parent_ref):
    """traverses a bagit package with an object directory, converting it to a
    V6 SIP using existing checksums
    """
    bag = bagit.Bag(bagdir)
    os.chdir(bag.path)
    bag_path = pathlib.Path(bagdir)
    sip_path = pathlib.Path(outdir, bag_path.name+'.zip')
    sip = siplib.Sip(sip_path, parent_ref)
    for root, dirs, files in os.walk('data/objects'):
        if root == 'data/objects':
            parent_ref = sip.add_structobj(
                bag.info['identifier'], parent_ref=parent_ref)
            sip.add_identifier(parent_ref, bag.info['identifier'])
        else:
            parent_ref = sip.add_structobj(
                os.path.split(root)[1], parent_ref=parent_ref)
        for file in files:
            fpath = pathlib.Path(root) / file
            hash = [hash for file, hash in bag.payload_entries().items() if pathlib.Path(file) == fpath]
            if len(hash) == 1:
                norm_hash = {alg.upper(): val for alg, val in hash[0].items()}
                sip.add_asset_tree(parent_ref, fpath, checksum=norm_hash)
            else:
                raise ValueError('Too many hashes')
    sip.serialise()
    sip.close()
def update_bag_info(bag_path, data):
    """Adds metadata from a dictionary to `bag-info.txt`"""
    assert(isinstance(data, dict))
    bag = bagit.Bag(bag_path)
    for k, v in data.items():
        bag.info[k] = v
    bag.save()
Example #17
0
def survey_bag(bag_path):
    try:
        bag = ami_bag(bag_path)
        bag_valid = bag.validate_amibag(metadata=True)
        bag_type = bag.type
        bag_subtype = bag.subtype
    except:
        bag = bagit.Bag(bag_path)
        bag_valid = False
        bag_type = None
        bag_subtype = None

    all_files = glob.iglob(os.path.join(bag_path, 'data/**/*.*'),
                           recursive=True)

    bag_files = 0
    bag_size = 0
    for filepath in all_files:
        bag_files += 1
        filesize = os.stat(filepath).st_size
        bag_size += filesize

    bag_metadata = [
        filename for filename in all_files
        if filename.endswith(('.xlsx', '.json'))
    ]
    if len(bag_metadata) > 0:
        bag_metadata = ','.join(bag_metadata)
    else:
        bag_metadata = 'no metadata'

    return [bag_path, bag_type, bag_subtype, bag_size, bag_files, bag_valid]
Example #18
0
def is_bag(bag_path):
    bag = None
    try:
        bag = bagit.Bag(bag_path)
    except (bagit.BagError, bagit.BagValidationError):
        pass
    return True if bag else False
Example #19
0
def validate_bag(bag_path,
                 fast=False,
                 callback=None,
                 config_file=bdbag.DEFAULT_CONFIG_FILE):
    config = read_config(config_file)
    bag_config = config['bag_config']
    bag_processes = bag_config.get('bag_processes', 1)

    try:
        logger.info("Validating bag: %s" % bag_path)
        bag = bagit.Bag(bag_path)
        bag.validate(bag_processes if not callback else 1,
                     fast=fast,
                     callback=callback)
        logger.info("Bag %s is valid" % bag_path)
    except bagit.BagIncompleteError as e:
        logger.warning(
            "BagIncompleteError: %s %s", e,
            "This validation error may be transient if the bag contains unresolved remote file references "
            "from a fetch.txt file. In this case the bag is incomplete but not necessarily invalid. "
            "Resolve remote file references (if any) and re-validate.")
        raise e
    except bagit.BagValidationError as e:
        errors = list()
        for d in e.details:
            errors.append(bdbag.get_named_exception(d))
        raise bagit.BagValidationError('\nError: '.join(errors))
    except bagit.InterruptedError as e:
        logger.warn(bdbag.get_named_exception(e))
        raise e
    except Exception as e:
        raise RuntimeError("Unhandled exception while validating bag: %s" % e)
Example #20
0
def extract_bag(local_bag_archive_path):
    """Unachive a local bdbag, and return the local path. Places the unachived
    bag next to the archived one, minus the archived bag's extension."""
    local_bag, _ = os.path.splitext(local_bag_archive_path)
    bdbag_api.extract_bag(local_bag_archive_path, os.path.dirname(local_bag))
    bagit_bag = bagit.Bag(local_bag)
    return bagit_bag
Example #21
0
    def test_validation_error_details(self):
        bag = bagit.make_bag(self.tmpdir, checksums=['md5'], bag_info={'Bagging-Date': '1970-01-01'})
        readme = j(self.tmpdir, "data", "README")
        txt = slurp_text_file(readme)
        txt = 'A' + txt[1:]
        with open(readme, "w") as r:
            r.write(txt)

        bag = bagit.Bag(self.tmpdir)
        got_exception = False

        try:
            self.validate(bag)
        except bagit.BagValidationError as e:
            got_exception = True

            exc_str = str(e)
            self.assertIn('data/README md5 validation failed: expected="8e2af7a0143c7b8f4de0b3fc90f27354" found="fd41543285d17e7c29cd953f5cf5b955"',
                          exc_str)
            self.assertEqual(len(e.details), 1)

            readme_error = e.details[0]
            self.assertEqual('data/README md5 validation failed: expected="8e2af7a0143c7b8f4de0b3fc90f27354" found="fd41543285d17e7c29cd953f5cf5b955"',
                             str(readme_error))
            self.assertIsInstance(readme_error, bagit.ChecksumMismatch)
            self.assertEqual(readme_error.algorithm, 'md5')
            self.assertEqual(readme_error.path, 'data/README')
            self.assertEqual(readme_error.expected, '8e2af7a0143c7b8f4de0b3fc90f27354')
            self.assertEqual(readme_error.found, 'fd41543285d17e7c29cd953f5cf5b955')

        if not got_exception:
            self.fail("didn't get BagValidationError")
 def populate_data_from_files(self, es_client, transfer_backlog_dir):
     """Populate indices and/or database from files."""
     transfer_backlog_dir = Path(transfer_backlog_dir)
     processed = 0
     for transfer_dir in transfer_backlog_dir.glob("*"):
         if transfer_dir.name == ".gitignore" or transfer_dir.is_file():
             continue
         try:
             bag = bagit.Bag(str(transfer_dir))
             bag.validate(processes=multiprocessing.cpu_count(),
                          completeness_only=True)
         except bagit.BagError:
             bag = None
         transfer_uuid = transfer_dir.name[-36:]
         if bag and "External-Identifier" in bag.info:
             self.info("Importing self-describing transfer {}.".format(
                 transfer_uuid))
             size = am.get_bag_size(bag, str(transfer_dir))
             _import_self_describing_transfer(self, es_client, self.stdout,
                                              transfer_dir, transfer_uuid,
                                              size)
         else:
             self.info(
                 "Rebuilding known transfer {}.".format(transfer_uuid))
             if bag:
                 size = am.get_bag_size(bag, str(transfer_dir))
             else:
                 size = am.walk_dir(str(transfer_dir))
             _import_pipeline_dependant_transfer(self, es_client,
                                                 self.stdout, transfer_dir,
                                                 transfer_uuid, size)
         processed += 1
     self.success("{} transfers indexed!".format(processed))
Example #23
0
 def test_restructure_sip(self, mock_validate, mock_processing_config):
     """Asserts the RestructurePackageRoutine adds expected data and does not replace files."""
     with open(join(processing_config_fixture_dir, "processingMCP.xml"),
               "r") as config_file:
         config_contents = config_file.read()
     mock_processing_config.return_value = config_contents
     mock_validate.return_value = {"valid": "true"}
     self.set_process_status(SIP.CREATED)
     total_sips = len(SIP.objects.all())
     extracted = 0
     while extracted < total_sips:
         ExtractPackageRoutine().run()
         extracted += 1
     restructured = 0
     while restructured < total_sips:
         message, sip_id = RestructurePackageRoutine().run()
         self.assertEqual(message, "SIP restructured.")
         self.assertEqual(len(sip_id), 1)
         restructured += 1
     for sip in SIP.objects.filter(process_status=SIP.RESTRUCTURED):
         bag = bagit.Bag(sip.bag_path)
         self.assertEqual(sip.bag_identifier,
                          bag.info["Internal-Sender-Identifier"])
         self.assertTrue(isfile(join(sip.bag_path, "processingMCP.xml")))
         self.assert_files_not_removed(sip)
Example #24
0
 def test_is_valid(self):
     bag = bagit.make_bag(self.tmpdir)
     bag = bagit.Bag(self.tmpdir)
     self.assertTrue(bag.is_valid())
     with open(j(self.tmpdir, "data", "extra_file"), "w") as ef:
         ef.write("bar")
     self.assertFalse(bag.is_valid())
Example #25
0
def bag_as_source(srcbag, metadata):
    """Validate and read metadata from srcbag as input.

    The notion of a bag being valid includes it being complete, ie. not having
    a fetch.txt to provide URLs for files that are not included in local
    filesystem. We thus don't need to test for that case, bagit.is_valid() is
    enough.

    Parameters:
        srcbag - The directory of the bag on disk
        metadata - A VersionMetadata object that will be updated with metadata
            from the bag

    Returns the srcdir for OCFL object content within the bag as it should be
    expressed in the state block.
    """
    bag = bagit.Bag(srcbag)
    if not bag.is_valid():
        raise BaggerError("Source Bagit bag at %s is not valid" % (srcbag))
    # Local arguments override but otherwise take metadata from bag-info.txt
    if not metadata.id and 'External-Identifier' in bag.info:
        metadata.id = bag.info['External-Identifier']
    if not metadata.created and 'Bagging-Date' in bag.info:
        metadata.created = bag.info[
            'Bagging-Date'] + 'T00:00:00Z'  # FIXME - timezone fudge
    if not metadata.message and 'External-Description' in bag.info:
        metadata.message = bag.info['External-Description']
    if not metadata.name and 'Contact-Name' in bag.info:
        metadata.name = bag.info['Contact-Name']
    if not metadata.address and 'Contact-Email' in bag.info:
        metadata.address = 'mailto:' + bag.info['Contact-Email']
    return os.path.join(srcbag, 'data')
Example #26
0
    def test_mixed_case_checksums(self):
        bag = bagit.make_bag(self.tmpdir, checksums=["md5"])
        hashstr = {}
        # Extract entries only for the payload and ignore
        # entries from the tagmanifest file
        for key in bag.entries.keys():
            if key.startswith("data" + os.sep):
                hashstr = bag.entries[key]
        hashstr = next(iter(hashstr.values()))
        manifest = slurp_text_file(j(self.tmpdir, "manifest-md5.txt"))

        manifest = manifest.replace(hashstr, hashstr.upper())

        with open(j(self.tmpdir, "manifest-md5.txt"), "wb") as m:
            m.write(manifest.encode("utf-8"))

        # Since manifest-md5.txt file is updated, re-calculate its
        # md5 checksum and update it in the tagmanifest-md5.txt file
        hasher = hashlib.new("md5")
        contents = slurp_text_file(j(self.tmpdir,
                                     "manifest-md5.txt")).encode("utf-8")
        hasher.update(contents)
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "r") as tagmanifest:
            tagman_contents = tagmanifest.read()
            tagman_contents = tagman_contents.replace(
                bag.entries["manifest-md5.txt"]["md5"], hasher.hexdigest())
        with open(j(self.tmpdir, "tagmanifest-md5.txt"), "w") as tagmanifest:
            tagmanifest.write(tagman_contents)

        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(self.validate(bag))
Example #27
0
    def __init__(self, path=None):
        self.path = os.path.abspath(path)
        try:
            self.bag = bagit.Bag(path)
        except:
            print("not bag")

        self.check_baginfo()
 def test_update_bag_info(self):
     key = "foo"
     value = "bar"
     bagit_helpers.update_bag_info(self.bag_path, {key: value})
     bag = bagit.Bag(self.bag_path)
     self.assertEqual(bag.info[key], value)
     with self.assertRaises(AssertionError):
         bagit_helpers.update_bag_info(self.bag_path, [key, value])
Example #29
0
 def test_validate_fast_without_oxum(self):
     bag = bagit.make_bag(self.tmpdir)
     os.remove(j(self.tmpdir, "bag-info.txt"))
     bag = bagit.Bag(self.tmpdir)
     self.assertRaises(bagit.BagValidationError,
                       self.validate,
                       bag,
                       fast=True)
Example #30
0
 def find_arcp(self):
     # First try to find External-Identifier
     bag = bagit.Bag(self.folder)
     ext_id = bag.info.get("External-Identifier")
     if arcp.is_arcp_uri(ext_id):
         return ext_id
     else:
         return arcp.arcp_random()