Example #1
0
def _s3_to_local_cp(from_path: str, to_path: str, overwrite: bool,
                    fs: s3fs.S3FileSystem, **kwargs) -> None:
    from_path = _norm_s3_path(from_path)
    to_path = local._norm_path(to_path)
    files = fs.walk(from_path)

    if files:
        ################################
        # Copying a directory of files #
        ################################

        # Check to see if to_path already exists
        if not overwrite and local.already_exists(to_path):
            raise ValueError(f"Overwrite set to False and {to_path!r} "
                             f"already exists")
        elif local.already_exists(to_path):
            local.rm(to_path)

        # Make the root directory to fill in
        os.makedirs(to_path)

        # Need to create any additional subfolders
        _local_create_subfolders(from_path, to_path, fs)

        to_files = [
            os.path.join(to_path, f.replace(from_path + "/", ""))
            for f in files
        ]

        num_threads = kwargs.pop("num_threads", 100)
        # Turn off connectionpool warnings
        logging.getLogger("urllib3.connectionpool").setLevel(logging.CRITICAL)
        with ThreadPoolExecutor(num_threads) as executor:
            for from_file, to_file in zip(files, to_files):
                executor.submit(fs.get, from_file, to_file, **kwargs)
    else:
        ######################
        # Copy a single file #
        ######################
        if not overwrite and local.already_exists(to_path, fs):
            raise ValueError(f"Overwrite set to False and {to_path!r} already "
                             f"exists")

        fs.get(from_path, to_path, **kwargs)
Example #2
0
def already_exists(path: str, **kwargs) -> bool:
    """ Check if a file/directory already exists

    Parameters
    -----------
    path : str
        File / Directory path

    kwargs : Dict
        If path is an s3 path, fs: s3fs.S3FileSystem can be optionally specified

    Returns
    --------
    bool
    """
    if s3.is_s3path(path):
        return s3.already_exists(path, **kwargs)
    else:
        return local.already_exists(path)
Example #3
0
 def test_already_exists_non_dir(self, sample_dir):
     path = os.path.join(sample_dir, "foo/fizz/bar/")
     assert not local.already_exists(path)
Example #4
0
 def test_already_exists_non_file(self, sample_dir):
     path = os.path.join(sample_dir, "foo/foobar.txt")
     assert not local.already_exists(path)
Example #5
0
 def test_already_exists_dir(self, sample_dir):
     path = os.path.join(sample_dir, "foo")
     assert local.already_exists(path)
Example #6
0
def cp(from_path: str,
       to_path: str,
       overwrite: bool = True,
       include_folder_name: bool = True,
       fs: Optional[s3fs.S3FileSystem] = None,
       **kwargs) -> None:
    """ Copy a file/directory to/from s3 and your local machine

    Parameters
    -----------
    from_path : str
        File path containing file(s) to copy

    to_path : str
        File path to copy file(s) to

    overwrite : bool (default True)
        Should the to_path be overwritten if it already exists?

    include_folder_name : bool (default True)
        If copying a directory, add the directory name automatically to the
        to_path.  i.e. if True, the entire folder will be copied to the
        to_path. If False, the *contents* of the directory will be copied to
        the to_path

    fs : s3fs.S3FileSystem
        If None, an instance of S3FileSystem will be created

    **kwargs
        "acl" to specify how file permission are set
        "num_threads" to specify number of threads when copying (default 100)
            NOTE: This is only used when copying a directory of files
        Extra args to be passed to S3FileSystem

    Returns
    --------
    None
    """
    s3FileArgs = {
        "acl": kwargs.pop("acl", "bucket-owner-full-control"),
        "num_threads": kwargs.pop("num_threads", 100)
    }

    if fs is None:
        fs = s3fs.S3FileSystem(**kwargs)

    if is_s3path(from_path):
        ##################################
        # Copy s3 file(s) to local or s3 #
        ##################################
        if not already_exists(from_path, fs):
            raise ValueError(f"from_path: {from_path!r} does not exist")

        # Use existing file/directory name if one was not specified
        if include_folder_name and is_dir(from_path, fs):
            folder_name = os.path.basename(os.path.normpath(from_path))
            to_path = os.path.join(to_path, folder_name)
            logger.debug(f"to_path after adding folder name: {to_path!r}")

        if is_s3path(to_path):
            #################
            # s3 -> s3 copy #
            #################
            logger.debug(
                f"Copying s3 files: {from_path!r} to s3 location: {to_path!r}")
            _s3_to_s3_cp(from_path, to_path, overwrite, fs, **s3FileArgs)
        else:
            #####################
            # s3 --> local copy #
            #####################
            logger.debug(
                f"Copying s3 files: {from_path!r} to local location: {to_path!r}"
            )
            _s3_to_local_cp(from_path, to_path, overwrite, fs, **s3FileArgs)

    else:
        ############################
        # Copy local file(s) to s3 #
        ############################
        if not local.already_exists(from_path):
            raise ValueError(f"{from_path!r} does not exist")

        if include_folder_name and os.path.isdir(from_path):
            folder_name = os.path.basename(local._norm_path(from_path))
            to_path = os.path.join(to_path, folder_name)
            logger.debug(f"to_path after adding folder name: {to_path!r}")

        logger.debug(
            f"Copying local files: {from_path!r} to s3 location: {to_path!r}")
        _local_to_s3_cp(from_path, to_path, overwrite, fs, **s3FileArgs)