Esempio n. 1
0
    def __init__(self,
                 ds_id: str,
                 obs_file_system: s3fs.S3FileSystem,
                 dir_path: str,
                 zarr_kwargs: Dict[str, Any] = None,
                 exception_type=ValueError):

        level_paths = {}
        for entry in obs_file_system.walk(dir_path, directories=True):
            basename = None
            if entry.endswith(".zarr") and obs_file_system.isdir(entry):
                basename, _ = os.path.splitext(entry)
            elif entry.endswith(".link") and obs_file_system.isfile(entry):
                basename, _ = os.path.splitext(entry)
            if basename is not None and basename.isdigit():
                level = int(basename)
                level_paths[level] = dir_path + "/" + entry

        num_levels = len(level_paths)
        # Consistency check
        for level in range(num_levels):
            if level not in level_paths:
                raise exception_type(
                    f"Invalid dataset descriptor {ds_id!r}: missing level {level} in {dir_path}"
                )

        super().__init__(kwargs=zarr_kwargs)
        self._obs_file_system = obs_file_system
        self._dir_path = dir_path
        self._level_paths = level_paths
        self._num_levels = num_levels
Esempio n. 2
0
    def __init__(self,
                 obs_file_system: s3fs.S3FileSystem,
                 dir_path: str,
                 zarr_kwargs: Dict[str, Any] = None,
                 ds_id: str = None,
                 exception_type: type = ValueError):

        level_paths = {}
        for entry in obs_file_system.walk(dir_path, directories=True):
            level_dir = entry.split("/")[-1]
            basename, ext = os.path.splitext(level_dir)
            if basename.isdigit():
                level = int(basename)
                if entry.endswith(".zarr") and obs_file_system.isdir(entry):
                    level_paths[level] = (ext, dir_path + "/" + level_dir)
                elif entry.endswith(".link") and obs_file_system.isfile(entry):
                    level_paths[level] = (ext, dir_path + "/" + level_dir)

        num_levels = len(level_paths)
        # Consistency check
        for level in range(num_levels):
            if level not in level_paths:
                raise exception_type(
                    f"Invalid multi-level dataset {ds_id!r}: missing level {level} in {dir_path}"
                )

        super().__init__(ds_id=ds_id, parameters=zarr_kwargs)
        self._obs_file_system = obs_file_system
        self._dir_path = dir_path
        self._level_paths = level_paths
        self._num_levels = num_levels
Esempio n. 3
0
def _s3_to_local_cp(from_path: str, to_path: str, overwrite: bool,
                    fs: s3fs.S3FileSystem, **kwargs) -> None:
    from_path = _norm_s3_path(from_path)
    to_path = local._norm_path(to_path)
    files = fs.walk(from_path)

    if files:
        ################################
        # Copying a directory of files #
        ################################

        # Check to see if to_path already exists
        if not overwrite and local.already_exists(to_path):
            raise ValueError(f"Overwrite set to False and {to_path!r} "
                             f"already exists")
        elif local.already_exists(to_path):
            local.rm(to_path)

        # Make the root directory to fill in
        os.makedirs(to_path)

        # Need to create any additional subfolders
        _local_create_subfolders(from_path, to_path, fs)

        to_files = [
            os.path.join(to_path, f.replace(from_path + "/", ""))
            for f in files
        ]

        num_threads = kwargs.pop("num_threads", 100)
        # Turn off connectionpool warnings
        logging.getLogger("urllib3.connectionpool").setLevel(logging.CRITICAL)
        with ThreadPoolExecutor(num_threads) as executor:
            for from_file, to_file in zip(files, to_files):
                executor.submit(fs.get, from_file, to_file, **kwargs)
    else:
        ######################
        # Copy a single file #
        ######################
        if not overwrite and local.already_exists(to_path, fs):
            raise ValueError(f"Overwrite set to False and {to_path!r} already "
                             f"exists")

        fs.get(from_path, to_path, **kwargs)
Esempio n. 4
0
def _s3_to_s3_cp(from_path: str, to_path: str, overwrite: bool,
                 fs: s3fs.S3FileSystem, **kwargs) -> None:
    from_path = _norm_s3_path(from_path)
    to_path = _norm_s3_path(to_path)
    files = fs.walk(from_path)

    if files:
        ################################
        # Copying a directory of files #
        ################################
        to_files = [
            os.path.join(to_path, f.replace(from_path + "/", ""))
            for f in files
        ]

        # Ensure we aren't overwriting any files
        if not overwrite:
            for to_file in to_files:
                if already_exists(to_file, fs):
                    raise ValueError(
                        f"Overwrite set to False and {to_file!r} exists")

        num_threads = kwargs.pop("num_threads", 100)
        # Turn off connectionpool warnings
        logging.getLogger("urllib3.connectionpool").setLevel(logging.CRITICAL)
        with ThreadPoolExecutor(num_threads) as executor:
            for from_file, to_file in zip(files, to_files):
                executor.submit(fs.copy, from_file, to_file, **kwargs)
    else:
        #########################
        # Copying a single file #
        #########################

        # Ensure we aren't overwriting the file
        if not overwrite and already_exists(to_path, fs):
            raise ValueError(f"Overwrite set to False and {to_file!r} exists")

        fs.copy(from_path, to_path, **kwargs)
Esempio n. 5
0
 def walk(self, path, *args, **kwargs):
     return S3FileSystem.walk(self, get_key(path), *args, **kwargs)