예제 #1
0
    def test_filename_unicode_normalization(self):
        # We need to handle cases where the Unicode normalization form of a
        # filename has changed in-transit. This is hard to do portably in both
        # directions because OS X normalizes *all* filenames to an NFD variant
        # so we'll start with a basic test which writes the manifest using the
        # NFC form and confirm that this does not cause the bag to fail when it
        # is written to the filesystem using the NFD form, which will not be
        # altered when saved to an HFS+ filesystem:

        test_filename = "Núñez Papers.txt"
        test_filename_nfd = unicodedata.normalize("NFD", test_filename)

        os.makedirs(j(self.tmpdir, "unicode-normalization"))

        with open(j(self.tmpdir, "unicode-normalization", test_filename_nfd), "w") as f:
            f.write("This is a test filename written using NFD normalization\n")

        bag = bagit.make_bag(self.tmpdir)
        bag.save()

        self.assertTrue(bag.is_valid())

        # Now we'll cause the entire manifest file was normalized to NFC:
        for m_f in bag.manifest_files():
            contents = slurp_text_file(m_f)
            normalized_bytes = unicodedata.normalize("NFC", contents).encode("utf-8")
            with open(m_f, "wb") as f:
                f.write(normalized_bytes)

        for alg in bag.algorithms:
            bagit._make_tagmanifest_file(alg, bag.path, encoding=bag.encoding)

        # Now we'll reload the whole thing:
        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(bag.is_valid())
예제 #2
0
  def write_tag_manifests(self):
    for alg in set(self.algorithms):
      try:
        bagit._make_tagmanifest_file(alg, self.path)
      except:
        LOGGER.error("Do not have permission to overwrite tag manifests")

    return True
예제 #3
0
    def write_tag_manifests(self):
        for alg in set(self.algs):
            try:
                bagit._make_tagmanifest_file(alg, self.path)
            except:
                LOGGER.error(
                    "Do not have permission to overwrite tag manifests")

        return True
예제 #4
0
    def test_filename_unicode_normalization(self):
        # We need to handle cases where the Unicode normalization form of a
        # filename has changed in-transit. This is hard to do portably in both
        # directions because OS X normalizes *all* filenames to an NFD variant
        # so we'll start with a basic test which writes the manifest using the
        # NFC form and confirm that this does not cause the bag to fail when it
        # is written to the filesystem using the NFD form, which will not be
        # altered when saved to an HFS+ filesystem:

        test_filename = 'Núñez Papers.txt'
        test_filename_nfc = unicodedata.normalize('NFC', test_filename)
        test_filename_nfd = unicodedata.normalize('NFD', test_filename)

        os.makedirs(j(self.tmpdir, 'unicode-normalization'))

        with open(j(self.tmpdir, 'unicode-normalization', test_filename_nfd),
                  'w') as f:
            f.write(
                'This is a test filename written using NFD normalization\n')

        bag = bagit.make_bag(self.tmpdir)
        bag.save()

        self.assertTrue(bag.is_valid())

        # Now we'll cause the entire manifest file was normalized to NFC:
        for m_f in bag.manifest_files():
            contents = slurp_text_file(m_f)
            normalized_bytes = unicodedata.normalize('NFC',
                                                     contents).encode('utf-8')
            with open(m_f, 'wb') as f:
                f.write(normalized_bytes)

        for alg in bag.algs:
            bagit._make_tagmanifest_file(alg, bag.path, encoding=bag.encoding)

        # Now we'll reload the whole thing:
        bag = bagit.Bag(self.tmpdir)
        self.assertTrue(bag.is_valid())
def move_tag_files(source_directory, tags):
    # move tag files after bagging into a tags folder
    # then update the tag manifest
    for tag_file in tags:
        cms_id = re.search(r'_(\d{6})_', tag_file).group(1)
        object_bag = os.path.join(source_directory, cms_id)

        # tag file for object that didn't get bagged
        if not os.path.exists(object_bag):
            print('ummm, no bag for {}'.format(cms_id))
            continue

        else:
            tag_dir = os.path.join(object_bag, 'tags')
            os.makedirs(tag_dir, exist_ok=True)
            shutil.move(tag_file, tag_dir)

            # update the tag manifest,
            # messy but takes advantage of bagit setup
            # rewrites tag manifest for every file move, not ideal, but not the worst
            cur_dir = os.getcwd()
            os.chdir(object_bag)
            bagit._make_tagmanifest_file("md5", object_bag)
            os.chdir(cur_dir)
예제 #6
0
    def save(self, processes=1, manifests=False):
        """
        save will persist any changes that have been made to the bag
        metadata (self.info).

        If you have modified the payload of the bag (added, modified,
        removed files in the data directory) and want to regenerate manifests
        set the manifests parameter to True. The default is False since you
        wouldn't want a save to accidentally create a new manifest for
        a corrupted bag.

        If you want to control the number of processes that are used when
        recalculating checksums use the processes parameter.
        """
        # Error checking
        if not self.path:
            raise BagError(_('Bag.save() called before setting the path!'))

        if not os.access(self.path, os.R_OK | os.W_OK | os.X_OK):
            raise BagError(_('Cannot save bag to non-existent or inaccessible directory %s') % self.path)

        unbaggable = _can_bag(self.path)
        if unbaggable:
            LOGGER.error(_("Missing write permissions for the following directories and files:\n%s"),
                         unbaggable)
            raise BagError(_("Missing permissions to move all files and directories"))

        unreadable_dirs, unreadable_files = _can_read(self.path)
        if unreadable_dirs or unreadable_files:
            if unreadable_dirs:
                LOGGER.error(_("The following directories do not have read permissions:\n%s"),
                             unreadable_dirs)
            if unreadable_files:
                LOGGER.error(_("The following files do not have read permissions:\n%s"),
                             unreadable_files)
            raise BagError(_("Read permissions are required to calculate file fixities"))

        # Change working directory to bag directory so helper functions work
        old_dir = os.path.abspath(os.path.curdir)
        try:
            os.chdir(self.path)

            # Generate new manifest files
            if manifests:
                self._sync_remote_entries_with_existing_fetch()
                validate_remote_entries(self.remote_entries, self.path)
                total_bytes, total_files = make_manifests('data', processes,
                                                          algorithms=self.algorithms,
                                                          encoding=self.encoding)
                total_bytes_remote, total_files_remote = update_manifests_from_remote(self.remote_entries, self.path)
                total_bytes += total_bytes_remote
                total_files += total_files_remote

                # Update fetch.txt
                _make_fetch_file(self.path, self.remote_entries)

                # Update Payload-Oxum
                LOGGER.info(_('Updating Payload-Oxum in %s'), self.tag_file_name)
                self.info['Payload-Oxum'] = '%s.%s' % (total_bytes, total_files)

            _make_tag_file(self.tag_file_name, self.info)

            # Update tag-manifest for changes to manifest & bag-info files
            for alg in self.algorithms:
                _make_tagmanifest_file(alg, self.path, encoding=self.encoding)

            # Reload the manifests
            self._load_manifests()

        except Exception:
            LOGGER.error(_("An error occurred updating bag in %s"), self.path)
            raise

        finally:
            os.chdir(old_dir)
예제 #7
0
def make_bag(bag_dir, bag_info=None, processes=1, checksums=None, encoding='utf-8', remote_entries=None):
    """
    Convert a given directory into a bag. You can pass in arbitrary
    key/value pairs to put into the bag-info.txt metadata file as
    the bag_info dictionary.
    """

    if checksums is None:
        checksums = DEFAULT_CHECKSUMS

    bag_dir = os.path.abspath(bag_dir)
    cwd = os.path.abspath(os.path.curdir)

    if cwd.startswith(bag_dir) and cwd != bag_dir:
        raise RuntimeError(_('Bagging a parent of the current directory is not supported'))

    LOGGER.info(_("Creating bag for directory %s"), bag_dir)

    if not os.path.isdir(bag_dir):
        LOGGER.error(_("Bag directory %s does not exist"), bag_dir)
        raise RuntimeError(_("Bag directory %s does not exist") % bag_dir)

    # FIXME: we should do the permissions checks before changing directories
    old_dir = os.path.abspath(os.path.curdir)

    try:
        # TODO: These two checks are currently redundant since an unreadable directory will also
        #       often be unwritable, and this code will require review when we add the option to
        #       bag to a destination other than the source. It would be nice if we could avoid
        #       walking the directory tree more than once even if most filesystems will cache it

        unbaggable = _can_bag(bag_dir)

        if unbaggable:
            LOGGER.error(_("Unable to write to the following directories and files:\n%s"), unbaggable)
            raise BagError(_("Missing permissions to move all files and directories"))

        unreadable_dirs, unreadable_files = _can_read(bag_dir)

        if unreadable_dirs or unreadable_files:
            if unreadable_dirs:
                LOGGER.error(_("The following directories do not have read permissions:\n%s"),
                             unreadable_dirs)
            if unreadable_files:
                LOGGER.error(_("The following files do not have read permissions:\n%s"),
                             unreadable_files)
            raise BagError(_("Read permissions are required to calculate file fixities"))
        else:
            LOGGER.info(_("Creating data directory"))

            # FIXME: if we calculate full paths we won't need to deal with changing directories
            os.chdir(bag_dir)
            cwd = os.getcwd()
            temp_data = tempfile.mkdtemp(dir=cwd)

            for f in os.listdir('.'):
                if os.path.abspath(f) == temp_data:
                    continue
                new_f = os.path.join(temp_data, f)
                LOGGER.info(_('Moving %(source)s to %(destination)s'), {'source': f, 'destination': new_f})
                os.rename(f, new_f)

            LOGGER.info(_('Moving %(source)s to %(destination)s'), {'source': temp_data, 'destination': 'data'})
            os.rename(temp_data, 'data')

            # permissions for the payload directory should match those of the
            # original directory
            os.chmod('data', os.stat(cwd).st_mode)

            validate_remote_entries(remote_entries, bag_dir)
            total_bytes, total_files = make_manifests('data', processes, algorithms=checksums, encoding=encoding)
            total_bytes_remote, total_files_remote = update_manifests_from_remote(remote_entries, bag_dir)
            total_bytes += total_bytes_remote
            total_files += total_files_remote

            _make_fetch_file(bag_dir, remote_entries)

            LOGGER.info(_("Creating bagit.txt"))
            txt = """BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8\n"""
            with open_text_file('bagit.txt', 'w') as bagit_file:
                bagit_file.write(txt)

            LOGGER.info(_("Creating bag-info.txt"))
            if bag_info is None:
                bag_info = {}

            # allow 'Bagging-Date' and 'Bag-Software-Agent' to be overidden
            if 'Bagging-Date' not in bag_info:
                bag_info['Bagging-Date'] = date.strftime(date.today(), "%Y-%m-%d")
            if 'Bag-Software-Agent' not in bag_info:
                bag_info['Bag-Software-Agent'] = \
                    'BDBag version: %s (Bagit version: %s) <%s>' % (VERSION, BAGIT_VERSION, PROJECT_URL)
            bag_info['Payload-Oxum'] = "%s.%s" % (total_bytes, total_files)
            _make_tag_file('bag-info.txt', bag_info)

            for c in checksums:
                _make_tagmanifest_file(c, bag_dir, encoding='utf-8')
    except Exception:
        LOGGER.error(_("An error occurred creating a bag in %s"), bag_dir)
        raise
    finally:
        os.chdir(old_dir)

    return BDBag(bag_dir)