def _s3_to_local_cp(from_path: str, to_path: str, overwrite: bool, fs: s3fs.S3FileSystem, **kwargs) -> None: from_path = _norm_s3_path(from_path) to_path = local._norm_path(to_path) files = fs.walk(from_path) if files: ################################ # Copying a directory of files # ################################ # Check to see if to_path already exists if not overwrite and local.already_exists(to_path): raise ValueError(f"Overwrite set to False and {to_path!r} " f"already exists") elif local.already_exists(to_path): local.rm(to_path) # Make the root directory to fill in os.makedirs(to_path) # Need to create any additional subfolders _local_create_subfolders(from_path, to_path, fs) to_files = [ os.path.join(to_path, f.replace(from_path + "/", "")) for f in files ] num_threads = kwargs.pop("num_threads", 100) # Turn off connectionpool warnings logging.getLogger("urllib3.connectionpool").setLevel(logging.CRITICAL) with ThreadPoolExecutor(num_threads) as executor: for from_file, to_file in zip(files, to_files): executor.submit(fs.get, from_file, to_file, **kwargs) else: ###################### # Copy a single file # ###################### if not overwrite and local.already_exists(to_path, fs): raise ValueError(f"Overwrite set to False and {to_path!r} already " f"exists") fs.get(from_path, to_path, **kwargs)
def already_exists(path: str, **kwargs) -> bool: """ Check if a file/directory already exists Parameters ----------- path : str File / Directory path kwargs : Dict If path is an s3 path, fs: s3fs.S3FileSystem can be optionally specified Returns -------- bool """ if s3.is_s3path(path): return s3.already_exists(path, **kwargs) else: return local.already_exists(path)
def test_already_exists_non_dir(self, sample_dir): path = os.path.join(sample_dir, "foo/fizz/bar/") assert not local.already_exists(path)
def test_already_exists_non_file(self, sample_dir): path = os.path.join(sample_dir, "foo/foobar.txt") assert not local.already_exists(path)
def test_already_exists_dir(self, sample_dir): path = os.path.join(sample_dir, "foo") assert local.already_exists(path)
def cp(from_path: str, to_path: str, overwrite: bool = True, include_folder_name: bool = True, fs: Optional[s3fs.S3FileSystem] = None, **kwargs) -> None: """ Copy a file/directory to/from s3 and your local machine Parameters ----------- from_path : str File path containing file(s) to copy to_path : str File path to copy file(s) to overwrite : bool (default True) Should the to_path be overwritten if it already exists? include_folder_name : bool (default True) If copying a directory, add the directory name automatically to the to_path. i.e. if True, the entire folder will be copied to the to_path. If False, the *contents* of the directory will be copied to the to_path fs : s3fs.S3FileSystem If None, an instance of S3FileSystem will be created **kwargs "acl" to specify how file permission are set "num_threads" to specify number of threads when copying (default 100) NOTE: This is only used when copying a directory of files Extra args to be passed to S3FileSystem Returns -------- None """ s3FileArgs = { "acl": kwargs.pop("acl", "bucket-owner-full-control"), "num_threads": kwargs.pop("num_threads", 100) } if fs is None: fs = s3fs.S3FileSystem(**kwargs) if is_s3path(from_path): ################################## # Copy s3 file(s) to local or s3 # ################################## if not already_exists(from_path, fs): raise ValueError(f"from_path: {from_path!r} does not exist") # Use existing file/directory name if one was not specified if include_folder_name and is_dir(from_path, fs): folder_name = os.path.basename(os.path.normpath(from_path)) to_path = os.path.join(to_path, folder_name) logger.debug(f"to_path after adding folder name: {to_path!r}") if is_s3path(to_path): ################# # s3 -> s3 copy # ################# logger.debug( f"Copying s3 files: {from_path!r} to s3 location: {to_path!r}") _s3_to_s3_cp(from_path, to_path, overwrite, fs, **s3FileArgs) else: ##################### # s3 --> local copy # ##################### logger.debug( f"Copying s3 files: {from_path!r} to local location: {to_path!r}" ) _s3_to_local_cp(from_path, to_path, overwrite, fs, **s3FileArgs) else: ############################ # Copy local file(s) to s3 # ############################ if not local.already_exists(from_path): raise ValueError(f"{from_path!r} does not exist") if include_folder_name and os.path.isdir(from_path): folder_name = os.path.basename(local._norm_path(from_path)) to_path = os.path.join(to_path, folder_name) logger.debug(f"to_path after adding folder name: {to_path!r}") logger.debug( f"Copying local files: {from_path!r} to s3 location: {to_path!r}") _local_to_s3_cp(from_path, to_path, overwrite, fs, **s3FileArgs)