def __init__(self, ds_id: str, obs_file_system: s3fs.S3FileSystem, dir_path: str, zarr_kwargs: Dict[str, Any] = None, exception_type=ValueError): level_paths = {} for entry in obs_file_system.walk(dir_path, directories=True): basename = None if entry.endswith(".zarr") and obs_file_system.isdir(entry): basename, _ = os.path.splitext(entry) elif entry.endswith(".link") and obs_file_system.isfile(entry): basename, _ = os.path.splitext(entry) if basename is not None and basename.isdigit(): level = int(basename) level_paths[level] = dir_path + "/" + entry num_levels = len(level_paths) # Consistency check for level in range(num_levels): if level not in level_paths: raise exception_type( f"Invalid dataset descriptor {ds_id!r}: missing level {level} in {dir_path}" ) super().__init__(kwargs=zarr_kwargs) self._obs_file_system = obs_file_system self._dir_path = dir_path self._level_paths = level_paths self._num_levels = num_levels
def __init__(self, obs_file_system: s3fs.S3FileSystem, dir_path: str, zarr_kwargs: Dict[str, Any] = None, ds_id: str = None, exception_type: type = ValueError): level_paths = {} for entry in obs_file_system.walk(dir_path, directories=True): level_dir = entry.split("/")[-1] basename, ext = os.path.splitext(level_dir) if basename.isdigit(): level = int(basename) if entry.endswith(".zarr") and obs_file_system.isdir(entry): level_paths[level] = (ext, dir_path + "/" + level_dir) elif entry.endswith(".link") and obs_file_system.isfile(entry): level_paths[level] = (ext, dir_path + "/" + level_dir) num_levels = len(level_paths) # Consistency check for level in range(num_levels): if level not in level_paths: raise exception_type( f"Invalid multi-level dataset {ds_id!r}: missing level {level} in {dir_path}" ) super().__init__(ds_id=ds_id, parameters=zarr_kwargs) self._obs_file_system = obs_file_system self._dir_path = dir_path self._level_paths = level_paths self._num_levels = num_levels
def __init__(self, s3_file_system: s3fs.S3FileSystem, dir_path: str, zarr_kwargs: Dict[str, Any] = None, ds_id: str = None, chunk_cache_capacity: int = None, exception_type: type = ValueError): level_paths = {} entries = s3_file_system.ls(dir_path, detail=False) for entry in entries: level_dir = entry.split("/")[-1] basename, ext = os.path.splitext(level_dir) if basename.isdigit(): level = int(basename) if entry.endswith(".zarr") and s3_file_system.isdir(entry): level_paths[level] = (ext, dir_path + "/" + level_dir) elif entry.endswith(".link") and s3_file_system.isfile(entry): level_paths[level] = (ext, dir_path + "/" + level_dir) num_levels = len(level_paths) # Consistency check for level in range(num_levels): if level not in level_paths: raise exception_type( f"Invalid multi-level dataset {ds_id!r}: missing level {level} in {dir_path}" ) super().__init__(ds_id=ds_id, parameters=zarr_kwargs) self._s3_file_system = s3_file_system self._dir_path = dir_path self._level_paths = level_paths self._num_levels = num_levels self._chunk_cache_capacities = None if chunk_cache_capacity: weights = [] weigth_sum = 0 for level in range(num_levels): weight = 2**(num_levels - 1 - level) weight *= weight weigth_sum += weight weights.append(weight) self._chunk_cache_capacities = [ round(chunk_cache_capacity * weight / weigth_sum) for weight in weights ]