def move_from_storage_service(self, source_path, destination_path, package=None, resume=False): """ Moves self.staging_path/src_path to dest_path. """ source_path = utils.coerce_str(source_path) destination_path = utils.coerce_str(destination_path) if os.path.isdir(source_path): # Both source and destination paths should end with / destination_path = os.path.join(destination_path, '') # Duracloud does not accept folders, so upload each file individually for path, dirs, files in os.walk(source_path): for basename in files: entry = os.path.join(path, basename) dest = entry.replace(source_path, destination_path, 1) url = self.duraspace_url + urllib.quote(dest) self._upload_file(url, entry, resume=resume) elif os.path.isfile(source_path): url = self.duraspace_url + urllib.quote(destination_path) self._upload_file(url, source_path, resume=resume) elif not os.path.exists(source_path): raise StorageException( _('%(path)s does not exist.') % {'path': source_path}) else: raise StorageException( _('%(path)s is not a file or directory.') % {'path': source_path})
def move_to_storage_service(self, src_path, dest_path, dest_space): """ Moves src_path to dest_space.staging_path/dest_path. """ # Convert unicode strings to byte strings # .replace() doesn't handle mixed unicode/str well, and it's easiest to put it all in strs src_path = utils.coerce_str(src_path) dest_path = utils.coerce_str(dest_path) # Try to fetch if it's a file url = self.duraspace_url + urllib.quote(src_path) success = self._download_file(url, dest_path) if not success: LOGGER.debug('%s not found, trying as folder', src_path) # File cannot be found - this may be a folder # Remove /. and /* at the end of the string. These glob-match on a # filesystem, but do not character-match in Duracloud. # Normalize dest_path as well so replace continues to work find_regex = r'/[\.\*]$' src_path = re.sub(find_regex, '/', src_path) dest_path = re.sub(find_regex, '/', dest_path) LOGGER.debug('Modified paths: src: %s dest: %s', src_path, dest_path) to_get = self._get_files_list(src_path, show_split_files=False) for entry in to_get: url = self.duraspace_url + urllib.quote(entry) dest = entry.replace(src_path, dest_path, 1) self._download_file(url, dest)
def move_rsync(self, source, destination, try_mv_local=False, assume_rsync_daemon=False, rsync_password=None): """ Moves a file from source to destination. By default, uses rsync to move files. All directories leading to destination must exist; Space.create_local_directory may be useful. If try_mv_local is True, will attempt to use os.rename, which only works on the same device. This will not leave a copy at the source. :param source: Path to source file or directory. May have user@host: at beginning. :param destination: Path to destination file or directory. May have user@host: at the beginning. :param bool try_mv_local: If true, try moving/renaming instead of copying. Should be False if source or destination specify a user@host. Warning: this will not leave a copy at the source. :param bool assume_rsync_daemon: If true, will use rsync daemon-style commands instead of the default rsync with remote shell transport :param rsync_password: used if assume_rsync_daemon is true, to specify value of RSYNC_PASSWORD environment variable """ source = utils.coerce_str(source) destination = utils.coerce_str(destination) LOGGER.info("Moving from %s to %s", source, destination) if source == destination: return if try_mv_local: # Try using mv, and if that fails, fallback to rsync chmod_command = ['chmod', '--recursive', 'ug+rw,o+r', destination] try: os.rename(source, destination) # Set permissions (rsync does with --chmod=ugo+rw) subprocess.call(chmod_command) return except OSError: LOGGER.debug('os.rename failed, trying with normalized paths') source_norm = os.path.normpath(source) dest_norm = os.path.normpath(destination) try: os.rename(source_norm, dest_norm) # Set permissions (rsync does with --chmod=ugo+rw) subprocess.call(chmod_command) return except OSError: LOGGER.debug('os.rename failed, falling back to rsync. Source: %s; Destination: %s', source_norm, dest_norm) # Rsync file over # TODO Do this asyncronously, with restarting failed attempts command = ['rsync', '-t', '-O', '--protect-args', '-vv', '--chmod=Fug+rw,o-rwx,Dug+rwx,o-rwx', '-r', source, destination] LOGGER.info("rsync command: %s", command) kwargs = {'stdout': subprocess.PIPE, 'stderr': subprocess.STDOUT} if assume_rsync_daemon: kwargs['env'] = {'RSYNC_PASSWORD': rsync_password} p = subprocess.Popen(command, **kwargs) stdout, _ = p.communicate() if p.returncode != 0: s = "Rsync failed with status {}: {}".format(p.returncode, stdout) LOGGER.warning(s) raise StorageException(s)
def move_rsync(self, source, destination, try_mv_local=False): """ Moves a file from source to destination. By default, uses rsync to move files. All directories leading to destination must exist; Space.create_local_directory may be useful. If try_mv_local is True, will attempt to use os.rename, which only works on the same device. This will not leave a copy at the source. :param source: Path to source file or directory. May have user@host: at beginning. :param destination: Path to destination file or directory. May have user@host: at the beginning. :param bool try_mv_local: If true, try moving/renaming instead of copying. Should be False if source or destination specify a user@host. Warning: this will not leave a copy at the source. """ source = utils.coerce_str(source) destination = utils.coerce_str(destination) LOGGER.info("Moving from %s to %s", source, destination) if source == destination: return if try_mv_local: # Try using mv, and if that fails, fallback to rsync try: os.rename(source, destination) return except OSError: LOGGER.debug('os.rename failed, trying with normalized paths', exc_info=True) source_norm = os.path.normpath(source) dest_norm = os.path.normpath(destination) try: os.rename(source_norm, dest_norm) return except OSError: LOGGER.debug( 'os.rename failed, falling back to rsync. Source: %s; Destination: %s', source_norm, dest_norm, exc_info=True) # Rsync file over # TODO Do this asyncronously, with restarting failed attempts command = [ 'rsync', '-t', '-O', '--protect-args', '-vv', '--chmod=ugo+rw', '-r', source, destination ] LOGGER.info("rsync command: %s", command) p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, _ = p.communicate() if p.returncode != 0: s = "Rsync failed with status {}: {}".format(p.returncode, stdout) LOGGER.warning(s) raise StorageException(s)
def _get_files_list(self, prefix, show_split_files=True): """ Generator function to return the full path of all files starting with prefix. :param prefix: All paths returned will start with prefix :param bool show_split_files: If True, will show files ending with .dura-chunk-#### and .dura-manifest. If False, will show the original file name (everything before .dura-manifest) :returns: Iterator of paths """ params = {"prefix": prefix} LOGGER.debug("URL: %s, params: %s", self.duraspace_url, params) response = self.session.get(self.duraspace_url, params=params) LOGGER.debug("Response: %s", response) if response.status_code != 200: LOGGER.warning("%s: Response: %s", response, response.text) raise StorageException( _("Unable to get list of files in %(prefix)s") % {"prefix": prefix}) # Response is XML in the form: # <space id="self.durastore"> # <item>path</item> # <item>path</item> # </space> root = etree.fromstring(response.content) paths = [p.text for p in root] LOGGER.debug("Paths first 10: %s", paths[:10]) LOGGER.debug("Paths last 10: %s", paths[-10:]) durachunk_regex = r".dura-chunk-\d{4}$" duramanifest_len = len(self.MANIFEST_SUFFIX) while paths: for p in paths: if not show_split_files: # There is exactly one .dura-manifest for chunked files # Return the original filename when we find a manifest file if p.endswith(self.MANIFEST_SUFFIX): yield utils.coerce_str(p[:-duramanifest_len]) continue # File chunks skipped - manifest returns original filename if re.search(durachunk_regex, p): continue yield utils.coerce_str(p) params["marker"] = paths[-1] LOGGER.debug("URL: %s, params: %s", self.duraspace_url, params) response = self.session.get(self.duraspace_url, params=params) LOGGER.debug("Response: %s", response) if response.status_code != 200: LOGGER.warning("%s: Response: %s", response, response.text) raise StorageException( _("Unable to get list of files in %(prefix)s") % {"prefix": prefix}) root = etree.fromstring(response.content) paths = [p.text for p in root] LOGGER.debug("Paths first 10: %s", paths[:10]) LOGGER.debug("Paths last 10: %s", paths[-10:])
def browse(self, path): # Support browse so that the Location select works if self.remote_user and self.remote_name: path = os.path.join(path, '') ssh_path = "{}@{}:{}".format(self.remote_user, self.remote_name, utils.coerce_str(path)) return self.space.browse_rsync(ssh_path) else: return self.space.browse_local(path)
def _format_host_path(self, path, user=None, host=None): """Formats a remote path suitable for use with rsync.""" if user is None: user = self.remote_user if host is None: host = self.remote_name return "{}@{}:{}".format(user, host, utils.coerce_str(path))
def move_from_storage_service(self, source_path, destination_path, package=None): """ Moves self.staging_path/src_path to dest_path. """ try_mv_local = False # Rsync to Arkivum watched directory if self.remote_user and self.remote_name: self.space.create_rsync_directory(destination_path, self.remote_user, self.remote_name) rsync_dest = "{}@{}:{}".format(self.remote_user, self.remote_name, utils.coerce_str(destination_path)) else: rsync_dest = destination_path self.space.create_local_directory(destination_path) try_mv_local = True self.space.move_rsync(source_path, rsync_dest, try_mv_local=try_mv_local)
def _move_rsync(self, source, destination): """ Moves a file from source to destination using rsync. All directories leading to destination must exist. Space._create_local_directory may be useful. """ source = utils.coerce_str(source) destination = utils.coerce_str(destination) LOGGER.info("Rsyncing from %s to %s", source, destination) if source == destination: return # Rsync file over # TODO Do this asyncronously, with restarting failed attempts command = ['rsync', '-t', '-O', '--protect-args', '-vv', '--chmod=ugo+rw', '-r', source, destination] LOGGER.info("rsync command: %s", command) p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, _ = p.communicate() if p.returncode != 0: s = "Rsync failed with status {}: {}".format(p.returncode, stdout) LOGGER.warning(s) raise StorageException(s)
def post_move_from_storage_service(self, staging_path, destination_path, package=None, *args, **kwargs): """ Hook for any actions that need to be taken after moving from the storage service to the final destination. :param str staging_path: Path to the staging copy relative to the SS internal location. Can be None if destination_path is also None. :param str destination_path: Path to the destination copy relative to this Space's path. Can be None if staging_path is also None. :param package: (Optional) :class:`Package` that is being moved. """ if staging_path is None or destination_path is None: staging_path = destination_path = None if staging_path and destination_path: staging_path, destination_path = self._move_from_path_mangling( staging_path, destination_path) try: self.get_child_space().post_move_from_storage_service( staging_path=staging_path, destination_path=destination_path, package=package, *args, **kwargs) except AttributeError: # This is optional for the child class to implement pass # Delete staging copy if staging_path != destination_path: try: if os.path.isdir(staging_path): # Need to convert this to an str - if this is a # unicode string, rmtree will use os.path.join # on the directory and the names of its children, # which can result in an attempt to join mixed encodings; # this blows up if the filename cannot be converted to # unicode shutil.rmtree( utils.coerce_str(os.path.normpath(staging_path))) elif os.path.isfile(staging_path): os.remove(os.path.normpath(staging_path)) except OSError: logging.warning("Unable to remove %s", staging_path, exc_info=True)
def _format_host_path(self, path, user=None, host=None): """Formats a remote path suitable for use with rsync.""" if user is None: user = self.remote_user if host is None: host = self.remote_name if self.assume_rsync_daemon: # when talking to a rsync daemon server: # - a double colon is needed # - location is not a path but a remote module (no slash) # when creating a Storage Service Space, path can't be empty and must start with a slash # to work around this restriction, the code here removes the slash path = path.lstrip("/") return_str = "{}@{}::{}" else: return_str = "{}@{}:{}" return return_str.format(user, host, utils.coerce_str(path))
def create_rsync_directory(self, destination_path, user, host): """ Creates a remote directory structure for destination_path. :param path: path to create the directories for. Should end with a / or a filename, or final directory may not be created. If path is empty, no directories are created. :param user: Username on remote host :param host: Hostname of remote host """ # Assemble a set of directories to create on the remote server; # these will be created one at a time directories = [] path = destination_path while path != "" and path != "/": directories.insert(0, path) path = os.path.dirname(path) # Syncing an empty directory will ensure no files get transferred temp_dir = os.path.join(tempfile.mkdtemp(), "") # Creates the destination_path directory without copying any files # Dir must end in a / for rsync to create it for directory in directories: path = os.path.join(os.path.dirname(directory), "") path = "{}@{}:{}".format(user, host, utils.coerce_str(path)) cmd = [ "rsync", "-vv", "--protect-args", "--chmod=ug=rwx,o=rx", "--recursive", temp_dir, path, ] LOGGER.info("rsync path creation command: %s", cmd) try: subprocess.check_call(cmd) except subprocess.CalledProcessError as e: shutil.rmtree(temp_dir) LOGGER.warning("rsync path creation failed: %s", e) raise shutil.rmtree(temp_dir)