コード例 #1
0
 def move_from_storage_service(self,
                               source_path,
                               destination_path,
                               package=None,
                               resume=False):
     """ Moves self.staging_path/src_path to dest_path. """
     source_path = utils.coerce_str(source_path)
     destination_path = utils.coerce_str(destination_path)
     if os.path.isdir(source_path):
         # Both source and destination paths should end with /
         destination_path = os.path.join(destination_path, '')
         # Duracloud does not accept folders, so upload each file individually
         for path, dirs, files in os.walk(source_path):
             for basename in files:
                 entry = os.path.join(path, basename)
                 dest = entry.replace(source_path, destination_path, 1)
                 url = self.duraspace_url + urllib.quote(dest)
                 self._upload_file(url, entry, resume=resume)
     elif os.path.isfile(source_path):
         url = self.duraspace_url + urllib.quote(destination_path)
         self._upload_file(url, source_path, resume=resume)
     elif not os.path.exists(source_path):
         raise StorageException(
             _('%(path)s does not exist.') % {'path': source_path})
     else:
         raise StorageException(
             _('%(path)s is not a file or directory.') %
             {'path': source_path})
コード例 #2
0
 def move_to_storage_service(self, src_path, dest_path, dest_space):
     """ Moves src_path to dest_space.staging_path/dest_path. """
     # Convert unicode strings to byte strings
     #  .replace() doesn't handle mixed unicode/str well, and it's easiest to put it all in strs
     src_path = utils.coerce_str(src_path)
     dest_path = utils.coerce_str(dest_path)
     # Try to fetch if it's a file
     url = self.duraspace_url + urllib.quote(src_path)
     success = self._download_file(url, dest_path)
     if not success:
         LOGGER.debug('%s not found, trying as folder', src_path)
         # File cannot be found - this may be a folder
         # Remove /. and /* at the end of the string. These glob-match on a
         # filesystem, but do not character-match in Duracloud.
         # Normalize dest_path as well so replace continues to work
         find_regex = r'/[\.\*]$'
         src_path = re.sub(find_regex, '/', src_path)
         dest_path = re.sub(find_regex, '/', dest_path)
         LOGGER.debug('Modified paths: src: %s dest: %s', src_path,
                      dest_path)
         to_get = self._get_files_list(src_path, show_split_files=False)
         for entry in to_get:
             url = self.duraspace_url + urllib.quote(entry)
             dest = entry.replace(src_path, dest_path, 1)
             self._download_file(url, dest)
コード例 #3
0
    def move_rsync(self, source, destination, try_mv_local=False, assume_rsync_daemon=False, rsync_password=None):
        """ Moves a file from source to destination.

        By default, uses rsync to move files.
        All directories leading to destination must exist; Space.create_local_directory may be useful.

        If try_mv_local is True, will attempt to use os.rename, which only works on the same device.
        This will not leave a copy at the source.

        :param source: Path to source file or directory. May have user@host: at beginning.
        :param destination: Path to destination file or directory. May have user@host: at the beginning.
        :param bool try_mv_local: If true, try moving/renaming instead of copying.  Should be False if source or destination specify a user@host.  Warning: this will not leave a copy at the source.
        :param bool assume_rsync_daemon: If true, will use rsync daemon-style commands instead of the default rsync with remote shell transport
        :param rsync_password: used if assume_rsync_daemon is true, to specify value of RSYNC_PASSWORD environment variable
        """
        source = utils.coerce_str(source)
        destination = utils.coerce_str(destination)
        LOGGER.info("Moving from %s to %s", source, destination)

        if source == destination:
            return

        if try_mv_local:
            # Try using mv, and if that fails, fallback to rsync
            chmod_command = ['chmod', '--recursive', 'ug+rw,o+r', destination]
            try:
                os.rename(source, destination)
                # Set permissions (rsync does with --chmod=ugo+rw)
                subprocess.call(chmod_command)
                return
            except OSError:
                LOGGER.debug('os.rename failed, trying with normalized paths')
            source_norm = os.path.normpath(source)
            dest_norm = os.path.normpath(destination)
            try:
                os.rename(source_norm, dest_norm)
                # Set permissions (rsync does with --chmod=ugo+rw)
                subprocess.call(chmod_command)
                return
            except OSError:
                LOGGER.debug('os.rename failed, falling back to rsync. Source: %s; Destination: %s', source_norm, dest_norm)

        # Rsync file over
        # TODO Do this asyncronously, with restarting failed attempts
        command = ['rsync', '-t', '-O', '--protect-args', '-vv',
                   '--chmod=Fug+rw,o-rwx,Dug+rwx,o-rwx',
                   '-r', source, destination]
        LOGGER.info("rsync command: %s", command)
        kwargs = {'stdout': subprocess.PIPE, 'stderr': subprocess.STDOUT}
        if assume_rsync_daemon:
            kwargs['env'] = {'RSYNC_PASSWORD': rsync_password}
        p = subprocess.Popen(command, **kwargs)
        stdout, _ = p.communicate()
        if p.returncode != 0:
            s = "Rsync failed with status {}: {}".format(p.returncode, stdout)
            LOGGER.warning(s)
            raise StorageException(s)
コード例 #4
0
    def move_rsync(self, source, destination, try_mv_local=False):
        """ Moves a file from source to destination.

        By default, uses rsync to move files.
        All directories leading to destination must exist; Space.create_local_directory may be useful.

        If try_mv_local is True, will attempt to use os.rename, which only works on the same device.
        This will not leave a copy at the source.

        :param source: Path to source file or directory. May have user@host: at beginning.
        :param destination: Path to destination file or directory. May have user@host: at the beginning.
        :param bool try_mv_local: If true, try moving/renaming instead of copying.  Should be False if source or destination specify a user@host.  Warning: this will not leave a copy at the source.
        """
        source = utils.coerce_str(source)
        destination = utils.coerce_str(destination)
        LOGGER.info("Moving from %s to %s", source, destination)

        if source == destination:
            return

        if try_mv_local:
            # Try using mv, and if that fails, fallback to rsync
            try:
                os.rename(source, destination)
                return
            except OSError:
                LOGGER.debug('os.rename failed, trying with normalized paths',
                             exc_info=True)
            source_norm = os.path.normpath(source)
            dest_norm = os.path.normpath(destination)
            try:
                os.rename(source_norm, dest_norm)
                return
            except OSError:
                LOGGER.debug(
                    'os.rename failed, falling back to rsync. Source: %s; Destination: %s',
                    source_norm,
                    dest_norm,
                    exc_info=True)

        # Rsync file over
        # TODO Do this asyncronously, with restarting failed attempts
        command = [
            'rsync', '-t', '-O', '--protect-args', '-vv', '--chmod=ugo+rw',
            '-r', source, destination
        ]
        LOGGER.info("rsync command: %s", command)

        p = subprocess.Popen(command,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
        stdout, _ = p.communicate()
        if p.returncode != 0:
            s = "Rsync failed with status {}: {}".format(p.returncode, stdout)
            LOGGER.warning(s)
            raise StorageException(s)
コード例 #5
0
    def _get_files_list(self, prefix, show_split_files=True):
        """
        Generator function to return the full path of all files starting with prefix.

        :param prefix: All paths returned will start with prefix
        :param bool show_split_files: If True, will show files ending with .dura-chunk-#### and .dura-manifest. If False, will show the original file name (everything before .dura-manifest)
        :returns: Iterator of paths
        """
        params = {"prefix": prefix}
        LOGGER.debug("URL: %s, params: %s", self.duraspace_url, params)
        response = self.session.get(self.duraspace_url, params=params)
        LOGGER.debug("Response: %s", response)
        if response.status_code != 200:
            LOGGER.warning("%s: Response: %s", response, response.text)
            raise StorageException(
                _("Unable to get list of files in %(prefix)s") %
                {"prefix": prefix})
        # Response is XML in the form:
        # <space id="self.durastore">
        #   <item>path</item>
        #   <item>path</item>
        # </space>
        root = etree.fromstring(response.content)
        paths = [p.text for p in root]
        LOGGER.debug("Paths first 10: %s", paths[:10])
        LOGGER.debug("Paths last 10: %s", paths[-10:])
        durachunk_regex = r".dura-chunk-\d{4}$"
        duramanifest_len = len(self.MANIFEST_SUFFIX)
        while paths:
            for p in paths:
                if not show_split_files:
                    # There is exactly one .dura-manifest for chunked files
                    # Return the original filename when we find a manifest file
                    if p.endswith(self.MANIFEST_SUFFIX):
                        yield utils.coerce_str(p[:-duramanifest_len])
                        continue
                    # File chunks skipped - manifest returns original filename
                    if re.search(durachunk_regex, p):
                        continue
                yield utils.coerce_str(p)
            params["marker"] = paths[-1]
            LOGGER.debug("URL: %s, params: %s", self.duraspace_url, params)
            response = self.session.get(self.duraspace_url, params=params)
            LOGGER.debug("Response: %s", response)
            if response.status_code != 200:
                LOGGER.warning("%s: Response: %s", response, response.text)
                raise StorageException(
                    _("Unable to get list of files in %(prefix)s") %
                    {"prefix": prefix})
            root = etree.fromstring(response.content)
            paths = [p.text for p in root]
            LOGGER.debug("Paths first 10: %s", paths[:10])
            LOGGER.debug("Paths last 10: %s", paths[-10:])
コード例 #6
0
 def browse(self, path):
     # Support browse so that the Location select works
     if self.remote_user and self.remote_name:
         path = os.path.join(path, '')
         ssh_path = "{}@{}:{}".format(self.remote_user, self.remote_name, utils.coerce_str(path))
         return self.space.browse_rsync(ssh_path)
     else:
         return self.space.browse_local(path)
コード例 #7
0
    def _format_host_path(self, path, user=None, host=None):
        """Formats a remote path suitable for use with rsync."""
        if user is None:
            user = self.remote_user
        if host is None:
            host = self.remote_name

        return "{}@{}:{}".format(user, host, utils.coerce_str(path))
コード例 #8
0
 def move_from_storage_service(self, source_path, destination_path, package=None):
     """ Moves self.staging_path/src_path to dest_path. """
     try_mv_local = False
     # Rsync to Arkivum watched directory
     if self.remote_user and self.remote_name:
         self.space.create_rsync_directory(destination_path, self.remote_user, self.remote_name)
         rsync_dest = "{}@{}:{}".format(self.remote_user, self.remote_name, utils.coerce_str(destination_path))
     else:
         rsync_dest = destination_path
         self.space.create_local_directory(destination_path)
         try_mv_local = True
     self.space.move_rsync(source_path, rsync_dest, try_mv_local=try_mv_local)
コード例 #9
0
    def _move_rsync(self, source, destination):
        """ Moves a file from source to destination using rsync.

        All directories leading to destination must exist.
        Space._create_local_directory may be useful.
        """
        source = utils.coerce_str(source)
        destination = utils.coerce_str(destination)
        LOGGER.info("Rsyncing from %s to %s", source, destination)

        if source == destination:
            return

        # Rsync file over
        # TODO Do this asyncronously, with restarting failed attempts
        command = ['rsync', '-t', '-O', '--protect-args', '-vv', '--chmod=ugo+rw', '-r', source, destination]
        LOGGER.info("rsync command: %s", command)

        p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        stdout, _ = p.communicate()
        if p.returncode != 0:
            s = "Rsync failed with status {}: {}".format(p.returncode, stdout)
            LOGGER.warning(s)
            raise StorageException(s)
コード例 #10
0
    def post_move_from_storage_service(self,
                                       staging_path,
                                       destination_path,
                                       package=None,
                                       *args,
                                       **kwargs):
        """
        Hook for any actions that need to be taken after moving from the storage
        service to the final destination.

        :param str staging_path: Path to the staging copy relative to the SS internal location. Can be None if destination_path is also None.
        :param str destination_path: Path to the destination copy relative to this Space's path. Can be None if staging_path is also None.
        :param package: (Optional) :class:`Package` that is being moved.
        """
        if staging_path is None or destination_path is None:
            staging_path = destination_path = None
        if staging_path and destination_path:
            staging_path, destination_path = self._move_from_path_mangling(
                staging_path, destination_path)
        try:
            self.get_child_space().post_move_from_storage_service(
                staging_path=staging_path,
                destination_path=destination_path,
                package=package,
                *args,
                **kwargs)
        except AttributeError:
            # This is optional for the child class to implement
            pass
        # Delete staging copy
        if staging_path != destination_path:
            try:
                if os.path.isdir(staging_path):
                    # Need to convert this to an str - if this is a
                    # unicode string, rmtree will use os.path.join
                    # on the directory and the names of its children,
                    # which can result in an attempt to join mixed encodings;
                    # this blows up if the filename cannot be converted to
                    # unicode
                    shutil.rmtree(
                        utils.coerce_str(os.path.normpath(staging_path)))
                elif os.path.isfile(staging_path):
                    os.remove(os.path.normpath(staging_path))
            except OSError:
                logging.warning("Unable to remove %s",
                                staging_path,
                                exc_info=True)
コード例 #11
0
    def _format_host_path(self, path, user=None, host=None):
        """Formats a remote path suitable for use with rsync."""
        if user is None:
            user = self.remote_user
        if host is None:
            host = self.remote_name
        if self.assume_rsync_daemon:
            # when talking to a rsync daemon server:
            # - a double colon is needed
            # - location is not a path but a remote module (no slash)
            # when creating a Storage Service Space, path can't be empty and must start with a slash
            # to work around this restriction, the code here removes the slash
            path = path.lstrip("/")
            return_str = "{}@{}::{}"
        else:
            return_str = "{}@{}:{}"

        return return_str.format(user, host, utils.coerce_str(path))
コード例 #12
0
    def create_rsync_directory(self, destination_path, user, host):
        """
        Creates a remote directory structure for destination_path.

        :param path: path to create the directories for.  Should end with a / or
            a filename, or final directory may not be created. If path is empty,
            no directories are created.
        :param user: Username on remote host
        :param host: Hostname of remote host
        """
        # Assemble a set of directories to create on the remote server;
        # these will be created one at a time
        directories = []
        path = destination_path
        while path != "" and path != "/":
            directories.insert(0, path)
            path = os.path.dirname(path)

        # Syncing an empty directory will ensure no files get transferred
        temp_dir = os.path.join(tempfile.mkdtemp(), "")

        # Creates the destination_path directory without copying any files
        # Dir must end in a / for rsync to create it
        for directory in directories:
            path = os.path.join(os.path.dirname(directory), "")
            path = "{}@{}:{}".format(user, host, utils.coerce_str(path))
            cmd = [
                "rsync",
                "-vv",
                "--protect-args",
                "--chmod=ug=rwx,o=rx",
                "--recursive",
                temp_dir,
                path,
            ]
            LOGGER.info("rsync path creation command: %s", cmd)
            try:
                subprocess.check_call(cmd)
            except subprocess.CalledProcessError as e:
                shutil.rmtree(temp_dir)
                LOGGER.warning("rsync path creation failed: %s", e)
                raise

        shutil.rmtree(temp_dir)