Esempio n. 1
0
    def __init__(self,
                 s3_file_system: s3fs.S3FileSystem,
                 dir_path: str,
                 zarr_kwargs: Dict[str, Any] = None,
                 ds_id: str = None,
                 chunk_cache_capacity: int = None,
                 exception_type: type = ValueError):

        level_paths = {}
        entries = s3_file_system.ls(dir_path, detail=False)
        for entry in entries:
            level_dir = entry.split("/")[-1]
            basename, ext = os.path.splitext(level_dir)
            if basename.isdigit():
                level = int(basename)
                if entry.endswith(".zarr") and s3_file_system.isdir(entry):
                    level_paths[level] = (ext, dir_path + "/" + level_dir)
                elif entry.endswith(".link") and s3_file_system.isfile(entry):
                    level_paths[level] = (ext, dir_path + "/" + level_dir)

        num_levels = len(level_paths)
        # Consistency check
        for level in range(num_levels):
            if level not in level_paths:
                raise exception_type(
                    f"Invalid multi-level dataset {ds_id!r}: missing level {level} in {dir_path}"
                )

        super().__init__(ds_id=ds_id, parameters=zarr_kwargs)
        self._s3_file_system = s3_file_system
        self._dir_path = dir_path
        self._level_paths = level_paths
        self._num_levels = num_levels

        self._chunk_cache_capacities = None
        if chunk_cache_capacity:
            weights = []
            weigth_sum = 0
            for level in range(num_levels):
                weight = 2**(num_levels - 1 - level)
                weight *= weight
                weigth_sum += weight
                weights.append(weight)
            self._chunk_cache_capacities = [
                round(chunk_cache_capacity * weight / weigth_sum)
                for weight in weights
            ]
Esempio n. 2
0
def _local_create_subfolders(from_path: str, to_path: str,
                             fs: s3fs.S3FileSystem) -> None:
    """ Helper for creating subdirectories when calling _s3_to_local_cp
    """
    files = fs.ls(from_path, detail=True)

    subfolders = [
        f["Key"].replace(from_path + "/", "") for f in files
        if f["StorageClass"] == "DIRECTORY"
    ]

    for sub in subfolders:
        from_sub_path = os.path.join(from_path, sub)
        path_to_create = os.path.join(to_path, sub)

        logger.debug(f"Creating local subfolder {to_path!r}")

        os.makedirs(path_to_create)
        _local_create_subfolders(from_sub_path, path_to_create, fs)
Esempio n. 3
0
 def ls(self, path, **kwargs):
     return S3FileSystem.ls(self, get_key(path), **kwargs)