def _GetDirCopyTasks(self, dirs, dest): """Get the Tasks to be executed to copy the given directories. If dest is dir-like (ending in a slash), all dirs are copied under the destination. If it is file-like, at most one directory can be provided and it is copied directly to the destination name. File copy tasks are generated recursively for the contents of all directories. Args: dirs: [paths.Path], The directories to copy. dest: paths.Path, The destination to copy the directories to. Returns: [storage_parallel.Task], The file copy tasks to execute. """ tasks = [] for d in dirs: item_dest = self._GetDestinationName(d, dest) expander = self._GetExpander(d) (files, sub_dirs) = expander.ExpandPath(d.Join('*').path) files = [paths.Path(f) for f in sorted(files)] sub_dirs = [paths.Path(d) for d in sorted(sub_dirs)] tasks.extend(self._GetFileCopyTasks(files, item_dest)) tasks.extend(self._GetDirCopyTasks(sub_dirs, item_dest)) return tasks
def Run(self, args): sources = [paths.Path(p) for p in args.source] dest = paths.Path(args.destination) copier = copying.CopyTaskGenerator() tasks = copier.GetCopyTasks(sources, dest, recursive=args.recursive) storage_parallel.ExecuteTasks(tasks, num_threads=args.num_threads, progress_bar_label='Copying Files') log.status.write('Copied [{}] file{}.\n'.format( len(tasks), 's' if len(tasks) > 1 else ''))
def GetCopyTasks(self, sources, dest, recursive=False): """Get all the file copy tasks for the sources given to this copier. Args: sources: [paths.Path], The sources (containing optional wildcards) that you want to copy. dest: paths.Path, The wildcard-free path you want to copy the sources to. recursive: bool, True to allow recursive copying of directories. Raises: WildcardError: If dest contains a wildcard. LocationMismatchError: If you are trying to copy local files to local files. DestinationNotDirectoryError: If trying to copy multiple files to a single dest name. RecursionError: If any of sources are directories, but recursive is false. Returns: [storage_parallel.Task], All the tasks that should be executed to perform this copy. """ # Sources go through the expander where they are converted to absolute # paths. The dest does not, so convert it manually here. dest_is_dir = dest.is_dir_like dest = paths.Path(self._GetExpander(dest).AbsPath(dest.path)) if dest_is_dir: dest = dest.Join('') if expansion.PathExpander.HasExpansion(dest.path): raise WildcardError( 'Destination [{}] cannot contain wildcards.'.format(dest.path)) if not dest.is_remote: local_sources = [s for s in sources if not s.is_remote] if local_sources: raise LocationMismatchError( 'When destination is a local path, all sources must be remote ' 'paths.') files, dirs = self._ExpandFilesToCopy(sources) if not dest.is_dir_like: # Destination is a file, we can only perform a single file/dir copy. if (len(files) + len(dirs)) > 1: raise DestinationNotDirectoryError( 'When copying multiple sources, destination must be a directory ' '(a path ending with a slash).') if dirs and not recursive: raise RecursionError( 'Source path matches directories but --recursive was not specified.' ) tasks = [] tasks.extend(self._GetFileCopyTasks(files, dest)) tasks.extend(self._GetDirCopyTasks(dirs, dest)) return tasks
def testMultiSourceNoDirDest(self): with self.assertRaisesRegex( copying.Error, r'When copying multiple sources, destination must be a directory ' r'\(a path ending with a slash\).'): copier = copying.CopyTaskGenerator() copier.GetCopyTasks( [self._Abs('some/file'), self._Abs('another/file')], paths.Path('gs://bucket1/o'))
def _ExpandFilesToCopy(self, sources): """Do initial expansion of all the wildcard arguments. Args: sources: [paths.Path], The sources (containing optional wildcards) that you want to copy. Returns: ([paths.Path], [paths.Path]), The file and directory paths that the initial set of sources expanded to. """ files = set() dirs = set() for s in sources: expander = self._GetExpander(s) (current_files, current_dirs) = expander.ExpandPath(s.path) files.update(current_files) dirs.update(current_dirs) return ([paths.Path(f) for f in sorted(files)], [paths.Path(d) for d in sorted(dirs)])
def _Abs(self, path): if path.startswith('gs://'): return paths.Path(path) return paths.Path( os.path.join(self.root_path, path.replace('/', os.sep)))