def __init__(self, ds_id: str, obs_file_system: s3fs.S3FileSystem, dir_path: str, zarr_kwargs: Dict[str, Any] = None, exception_type=ValueError): level_paths = {} for entry in obs_file_system.walk(dir_path, directories=True): basename = None if entry.endswith(".zarr") and obs_file_system.isdir(entry): basename, _ = os.path.splitext(entry) elif entry.endswith(".link") and obs_file_system.isfile(entry): basename, _ = os.path.splitext(entry) if basename is not None and basename.isdigit(): level = int(basename) level_paths[level] = dir_path + "/" + entry num_levels = len(level_paths) # Consistency check for level in range(num_levels): if level not in level_paths: raise exception_type( f"Invalid dataset descriptor {ds_id!r}: missing level {level} in {dir_path}" ) super().__init__(kwargs=zarr_kwargs) self._obs_file_system = obs_file_system self._dir_path = dir_path self._level_paths = level_paths self._num_levels = num_levels
def __init__(self, obs_file_system: s3fs.S3FileSystem, dir_path: str, zarr_kwargs: Dict[str, Any] = None, ds_id: str = None, exception_type: type = ValueError): level_paths = {} for entry in obs_file_system.walk(dir_path, directories=True): level_dir = entry.split("/")[-1] basename, ext = os.path.splitext(level_dir) if basename.isdigit(): level = int(basename) if entry.endswith(".zarr") and obs_file_system.isdir(entry): level_paths[level] = (ext, dir_path + "/" + level_dir) elif entry.endswith(".link") and obs_file_system.isfile(entry): level_paths[level] = (ext, dir_path + "/" + level_dir) num_levels = len(level_paths) # Consistency check for level in range(num_levels): if level not in level_paths: raise exception_type( f"Invalid multi-level dataset {ds_id!r}: missing level {level} in {dir_path}" ) super().__init__(ds_id=ds_id, parameters=zarr_kwargs) self._obs_file_system = obs_file_system self._dir_path = dir_path self._level_paths = level_paths self._num_levels = num_levels
def _s3_to_local_cp(from_path: str, to_path: str, overwrite: bool, fs: s3fs.S3FileSystem, **kwargs) -> None: from_path = _norm_s3_path(from_path) to_path = local._norm_path(to_path) files = fs.walk(from_path) if files: ################################ # Copying a directory of files # ################################ # Check to see if to_path already exists if not overwrite and local.already_exists(to_path): raise ValueError(f"Overwrite set to False and {to_path!r} " f"already exists") elif local.already_exists(to_path): local.rm(to_path) # Make the root directory to fill in os.makedirs(to_path) # Need to create any additional subfolders _local_create_subfolders(from_path, to_path, fs) to_files = [ os.path.join(to_path, f.replace(from_path + "/", "")) for f in files ] num_threads = kwargs.pop("num_threads", 100) # Turn off connectionpool warnings logging.getLogger("urllib3.connectionpool").setLevel(logging.CRITICAL) with ThreadPoolExecutor(num_threads) as executor: for from_file, to_file in zip(files, to_files): executor.submit(fs.get, from_file, to_file, **kwargs) else: ###################### # Copy a single file # ###################### if not overwrite and local.already_exists(to_path, fs): raise ValueError(f"Overwrite set to False and {to_path!r} already " f"exists") fs.get(from_path, to_path, **kwargs)
def _s3_to_s3_cp(from_path: str, to_path: str, overwrite: bool, fs: s3fs.S3FileSystem, **kwargs) -> None: from_path = _norm_s3_path(from_path) to_path = _norm_s3_path(to_path) files = fs.walk(from_path) if files: ################################ # Copying a directory of files # ################################ to_files = [ os.path.join(to_path, f.replace(from_path + "/", "")) for f in files ] # Ensure we aren't overwriting any files if not overwrite: for to_file in to_files: if already_exists(to_file, fs): raise ValueError( f"Overwrite set to False and {to_file!r} exists") num_threads = kwargs.pop("num_threads", 100) # Turn off connectionpool warnings logging.getLogger("urllib3.connectionpool").setLevel(logging.CRITICAL) with ThreadPoolExecutor(num_threads) as executor: for from_file, to_file in zip(files, to_files): executor.submit(fs.copy, from_file, to_file, **kwargs) else: ######################### # Copying a single file # ######################### # Ensure we aren't overwriting the file if not overwrite and already_exists(to_path, fs): raise ValueError(f"Overwrite set to False and {to_file!r} exists") fs.copy(from_path, to_path, **kwargs)
def walk(self, path, *args, **kwargs): return S3FileSystem.walk(self, get_key(path), *args, **kwargs)