def rename(self, src_path, dst_path, metadata_key_suffix='.json'): store_src_path = normalize_storage_path(src_path) store_dst_path = normalize_storage_path(dst_path) dir_path = self.path any_existed = False for root_prefix in ['meta', 'data']: src_path = os.path.join(dir_path, root_prefix, 'root', store_src_path) if os.path.exists(src_path): any_existed = True dst_path = os.path.join(dir_path, root_prefix, 'root', store_dst_path) os.renames(src_path, dst_path) for suffix in [ '.array' + metadata_key_suffix, '.group' + metadata_key_suffix ]: src_meta = os.path.join(dir_path, 'meta', 'root', store_src_path + suffix) if os.path.exists(src_meta): any_existed = True dst_meta = os.path.join(dir_path, 'meta', 'root', store_dst_path + suffix) dst_dir = os.path.dirname(dst_meta) if not os.path.exists(dst_dir): os.makedirs(dst_dir) os.rename(src_meta, dst_meta) if not any_existed: raise FileNotFoundError("nothing found at src_path")
def rename(self, src_path: Path, dst_path: Path): src_path = normalize_storage_path(src_path) dst_path = normalize_storage_path(dst_path) any_renamed = False for base in [meta_root, data_root]: if self.list_prefix(base + src_path): src_parent, src_key = self._get_parent(base + src_path) dst_parent, dst_key = self._require_parent(base + dst_path) if src_key in src_parent: dst_parent[dst_key] = src_parent.pop(src_key) if base == meta_root: # check for and move corresponding metadata sfx = _get_metadata_suffix(self) src_meta = src_key + '.array' + sfx if src_meta in src_parent: dst_meta = dst_key + '.array' + sfx dst_parent[dst_meta] = src_parent.pop(src_meta) src_meta = src_key + '.group' + sfx if src_meta in src_parent: dst_meta = dst_key + '.group' + sfx dst_parent[dst_meta] = src_parent.pop(src_meta) any_renamed = True any_renamed = _rename_metadata_v3(self, src_path, dst_path) or any_renamed if not any_renamed: raise ValueError(f"no item {src_path} found to rename")
def _strip_prefix_from_path(path, prefix): # normalized things will not have any leading or trailing slashes path_norm = normalize_storage_path(path) prefix_norm = normalize_storage_path(prefix) if prefix: return path_norm[(len(prefix_norm) + 1):] else: return path_norm
def _item_path(self, item): if item and item[0] == '/': # absolute path path = normalize_storage_path(item) else: # relative path path = normalize_storage_path(item) if self._path: path = self._key_prefix + path return path
def __init__(self, store, path=None, read_only=False, chunk_store=None, synchronizer=None, cache_metadata=True): # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized self._store = store self._path = normalize_storage_path(path) if self._path: self._key_prefix = self._path + '/' else: self._key_prefix = '' self._read_only = read_only if chunk_store is None: self._chunk_store = store else: self._chunk_store = chunk_store self._synchronizer = synchronizer self._cache_metadata = cache_metadata self._is_view = False # initialize metadata self._load_metadata() # initialize attributes akey = self._key_prefix + attrs_key self._attrs = Attributes(store, key=akey, read_only=read_only, synchronizer=synchronizer)
def getsize(self, path=None): path = normalize_storage_path(path) # obtain value to return size of if path: try: parent, key = self._get_parent(path) value = parent[key] except KeyError: err_path_not_found(path) else: value = self.root # obtain size of value if isinstance(value, self.cls): # total size for directory size = 0 for v in value.values(): if not isinstance(v, self.cls): try: size += buffer_size(v) except TypeError: return -1 return size else: try: return buffer_size(value) except TypeError: return -1
def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None): self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) if self._path: self._key_prefix = self._path + '/' else: self._key_prefix = '' self._read_only = read_only self._synchronizer = synchronizer # guard conditions if contains_array(store, path=self._path): raise ContainsArrayError(path) # initialize metadata try: mkey = self._key_prefix + group_meta_key meta_bytes = store[mkey] except KeyError: raise GroupNotFoundError(path) else: meta = decode_group_metadata(meta_bytes) self._meta = meta # setup attributes akey = self._key_prefix + attrs_key self._attrs = Attributes(store, key=akey, read_only=read_only, cache=cache_attrs, synchronizer=synchronizer) # setup info self._info = InfoReporter(self)
def rmdir(self, path=None): store_path = normalize_storage_path(path) dir_path = self.path if store_path: dir_path = os.path.join(dir_path, store_path) if os.path.isdir(dir_path): shutil.rmtree(dir_path)
def rmdir(self, path: str = "") -> None: if not self.is_erasable(): raise NotImplementedError( f'{type(self)} is not erasable, cannot call "rmdir"' ) # pragma: no cover path = normalize_storage_path(path) _rmdir_from_keys(self, path)
def __init__( self, container=None, prefix='', account_name=None, account_key=None, blob_service_kwargs=None, dimension_separator=None, client=None, ): self._dimension_separator = dimension_separator self.prefix = normalize_storage_path(prefix) if client is None: # deprecated option, try to construct the client for them msg = ( "Providing 'container', 'account_name', 'account_key', and 'blob_service_kwargs'" "is deprecated. Provide and instance of 'azure.storage.blob.ContainerClient' " "'client' instead.") warnings.warn(msg, FutureWarning, stacklevel=2) from azure.storage.blob import ContainerClient blob_service_kwargs = blob_service_kwargs or {} client = ContainerClient( "https://{}.blob.core.windows.net/".format(account_name), container, credential=account_key, **blob_service_kwargs) self.client = client self._container = container self._account_name = account_name self._account_key = account_key
def group(store=None, overwrite=False, chunk_store=None, synchronizer=None, path=None): """Create a group. Parameters ---------- store : MutableMapping or string, optional Store or path to directory in file system. overwrite : bool, optional If True, delete any pre-existing data in `store` at `path` before creating the group. chunk_store : MutableMapping, optional Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. synchronizer : object, optional Array synchronizer. path : string, optional Group path within store. Returns ------- g : zarr.hierarchy.Group Examples -------- Create a group in memory:: >>> import zarr >>> g = zarr.group() >>> g <zarr.hierarchy.Group '/'> Create a group with a different store:: >>> store = zarr.DirectoryStore('data/example.zarr') >>> g = zarr.group(store=store, overwrite=True) >>> g <zarr.hierarchy.Group '/'> """ # handle polymorphic store arg store = _normalize_store_arg(store) path = normalize_storage_path(path) # require group if overwrite or not contains_group(store): init_group(store, overwrite=overwrite, chunk_store=chunk_store, path=path) return Group(store, read_only=False, chunk_store=chunk_store, synchronizer=synchronizer, path=path)
def getsize(self, path=None): path = normalize_storage_path(path) children = self.listdir(path) if children: size = 0 for child in children: if path: name = path + '/' + child else: name = child try: info = self.zf.getinfo(name) except KeyError: pass else: size += info.compress_size return size elif path: try: info = self.zf.getinfo(path) return info.compress_size except KeyError: err_path_not_found(path) else: return 0
def __init__(self, store, path=None, read_only=False, chunk_store=None, synchronizer=None): self._store = store self._path = normalize_storage_path(path) if self._path: self._key_prefix = self._path + '/' else: self._key_prefix = '' self._read_only = read_only if chunk_store is None: self._chunk_store = store else: self._chunk_store = chunk_store self._synchronizer = synchronizer # guard conditions if contains_array(store, path=self._path): err_contains_array(path) # initialize metadata try: mkey = self._key_prefix + group_meta_key meta_bytes = store[mkey] except KeyError: err_group_not_found(path) else: meta = decode_group_metadata(meta_bytes) self._meta = meta # setup attributes akey = self._key_prefix + attrs_key self._attrs = Attributes(store, key=akey, read_only=read_only, synchronizer=synchronizer)
def init_group(store, overwrite=False, path=None, chunk_store=None): """initialize a group store. Parameters ---------- store : MutableMapping A mapping that supports string keys and byte sequence values. overwrite : bool, optional If True, erase all data in `store` prior to initialisation. path : string, optional Path under which array is stored. chunk_store : MutableMapping, optional Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. """ # normalize path path = normalize_storage_path(path) # ensure parent group initialized _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) # initialise metadata _init_group_metadata(store=store, overwrite=overwrite, path=path, chunk_store=chunk_store)
def listdir(self, path=None): dir_path = normalize_storage_path(self._append_path_to_prefix(path)) if dir_path: dir_path += '/' items = [ self._strip_prefix_from_path(blob.name, dir_path) for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter='/') ] return items
def rmdir(store, path=None): """Remove all items under the given path.""" path = normalize_storage_path(path) if hasattr(store, 'rmdir'): # pass through store.rmdir(path) else: # slow version, delete one key at a time _rmdir_from_keys(store, path)
def listdir(store, path=None): """Obtain a directory listing for the given path.""" path = normalize_storage_path(path) if hasattr(store, 'listdir'): # pass through return store.listdir(path) else: # slow version, iterate through all keys return _listdir_from_keys(store, path)
def listdir(self, path=None): store_path = normalize_storage_path(path) dir_path = self.path if store_path: dir_path = os.path.join(dir_path, store_path) if os.path.isdir(dir_path): return sorted(os.listdir(dir_path)) else: return []
def group(store=None, overwrite=False, chunk_store=None, cache_attrs=True, synchronizer=None, path=None): """Create a group. Parameters ---------- store : MutableMapping or string, optional Store or path to directory in file system. overwrite : bool, optional If True, delete any pre-existing data in `store` at `path` before creating the group. chunk_store : MutableMapping, optional Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. synchronizer : object, optional Array synchronizer. path : string, optional Group path within store. Returns ------- g : zarr.hierarchy.Group Examples -------- Create a group in memory:: >>> import zarr >>> g = zarr.group() >>> g <zarr.hierarchy.Group '/'> Create a group with a different store:: >>> store = zarr.DirectoryStore('data/example.zarr') >>> g = zarr.group(store=store, overwrite=True) >>> g <zarr.hierarchy.Group '/'> """ # handle polymorphic store arg store = _normalize_store_arg(store) path = normalize_storage_path(path) # require group if overwrite or not contains_group(store): init_group(store, overwrite=overwrite, chunk_store=chunk_store, path=path) return Group(store, read_only=False, chunk_store=chunk_store, cache_attrs=cache_attrs, synchronizer=synchronizer, path=path)
def group(store=None, overwrite=False, chunk_store=None, synchronizer=None, path=None): """Create a group. Parameters ---------- store : MutableMapping or string Store or path to directory in file system. overwrite : bool, optional If True, delete any pre-existing data in `store` at `path` before creating the group. chunk_store : MutableMapping, optional Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. synchronizer : object, optional Array synchronizer. path : string, optional Group path. Returns ------- g : zarr.hierarchy.Group Examples -------- Create a group in memory:: >>> import zarr >>> g = zarr.group() >>> g Group(/, 0) store: DictStore Create a group with a different store:: >>> store = zarr.DirectoryStore('example') >>> g = zarr.group(store=store, overwrite=True) >>> g Group(/, 0) store: DirectoryStore """ # handle polymorphic store arg store = _handle_store_arg(store) path = normalize_storage_path(path) # require group if overwrite or not contains_group(store): init_group(store, overwrite=overwrite, chunk_store=chunk_store, path=path) return Group(store, read_only=False, chunk_store=chunk_store, synchronizer=synchronizer, path=path)
def rmdir(self, path=None): path = normalize_storage_path(path) if path: try: parent, key = self._get_parent(path) value = parent[key] except KeyError: return else: if isinstance(value, self.cls): del parent[key] else: # clear out root self.root = self.cls()
def listdir(self, path=None): path = normalize_storage_path(path) if path: try: parent, key = self._get_parent(path) value = parent[key] except KeyError: return [] else: value = self.root if isinstance(value, self.cls): return sorted(value.keys()) else: return []
def getsize(self, path=None): path = normalize_storage_path(path) with self.mutex: children = self.list_prefix(data_root + path) children += self.list_prefix(meta_root + path) print(f"path={path}, children={children}") if children: size = 0 for name in children: info = self.zf.getinfo(name) size += info.compress_size return size elif path in self: info = self.zf.getinfo(path) return info.compress_size else: return 0
def rmdir(self, path=None): path = normalize_storage_path(path) if path: for base in [meta_root, data_root]: with self.lock: self.cursor.execute( 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path, )) # remove any associated metadata files sfx = _get_metadata_suffix(self) meta_dir = (meta_root + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx self.pop(array_meta_file, None) group_meta_file = meta_dir + '.group' + sfx self.pop(group_meta_file, None) else: self.clear()
def getsize(self, path=None): store_path = normalize_storage_path(path) fs_path = self.path if store_path: fs_path = os.path.join(fs_path, store_path) if os.path.isfile(fs_path): return os.path.getsize(fs_path) elif os.path.isdir(fs_path): children = os.listdir(fs_path) size = 0 for child in children: child_fs_path = os.path.join(fs_path, child) if os.path.isfile(child_fs_path): size += os.path.getsize(child_fs_path) return size else: err_path_not_found(path)
def rmdir(self, path=None): store_path = normalize_storage_path(path) dir_path = self.path if store_path: for base in [meta_root, data_root]: dir_path = os.path.join(dir_path, base + store_path) if os.path.isdir(dir_path): shutil.rmtree(dir_path) # remove any associated metadata files sfx = _get_metadata_suffix(self) meta_dir = (meta_root + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx self.pop(array_meta_file, None) group_meta_file = meta_dir + '.group' + sfx self.pop(group_meta_file, None) elif os.path.isdir(dir_path): shutil.rmtree(dir_path)
def _list_container(self, path=None, delimiter=None, strip_prefix=False, treat_path_as_dir=True): path = self.prefix if path is None else self._add_prefix(path) if path and treat_path_as_dir: path += "/" _, contents = self.conn.get_container(self.container, prefix=path, delimiter=delimiter) if strip_prefix: prefix_size = len(path) for entry in contents: name = entry.get('name', entry.get('subdir', '')) entry["name"] = normalize_storage_path(name[prefix_size:]) for entry in contents: entry["bytes"] = entry.get("bytes", 0) return contents
def getsize(self, path=None): store_path = normalize_storage_path(path) fs_path = self._append_path_to_prefix(store_path) if fs_path: blob_client = self.client.get_blob_client(fs_path) else: blob_client = None if blob_client and blob_client.exists(): return blob_client.get_blob_properties().size else: size = 0 if fs_path == '': fs_path = None elif not fs_path.endswith('/'): fs_path += '/' for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter='/'): blob_client = self.client.get_blob_client(blob) if blob_client.exists(): size += blob_client.get_blob_properties().size return size
def _get_files_and_dirs_from_path(store, path): path = normalize_storage_path(path) files = [] # add array metadata file if present array_key = _prefix_to_array_key(store, path) if array_key in store: files.append(os.path.join(store.path, array_key)) # add group metadata file if present group_key = _prefix_to_group_key(store, path) if group_key in store: files.append(os.path.join(store.path, group_key)) dirs = [] # add array and group folders if present for d in [data_root + path, meta_root + path]: dir_path = os.path.join(store.path, d) if os.path.exists(dir_path): dirs.append(dir_path) return files, dirs
def getsize(store, path=None): """Compute size of stored items for a given path.""" path = normalize_storage_path(path) if hasattr(store, 'getsize'): # pass through return store.getsize(path) elif isinstance(store, dict): # compute from size of values prefix = _path_to_prefix(path) size = 0 for k in listdir(store, path): try: v = store[prefix + k] except KeyError: pass else: try: size += buffer_size(v) except TypeError: return -1 return size else: return -1
def rmdir(self, path: Path = None): path = normalize_storage_path(path) if path: for base in [meta_root, data_root]: try: parent, key = self._get_parent(base + path) value = parent[key] except KeyError: continue else: if isinstance(value, self.cls): del parent[key] # remove any associated metadata files sfx = _get_metadata_suffix(self) meta_dir = (meta_root + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx self.pop(array_meta_file, None) group_meta_file = meta_dir + '.group' + sfx self.pop(group_meta_file, None) else: # clear out root self.root = self.cls()
def open_group(store=None, mode='a', synchronizer=None, path=None): """Open a group using mode-like semantics. Parameters ---------- store : MutableMapping or string Store or path to directory in file system. mode : {'r', 'r+', 'a', 'w', 'w-'} Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). synchronizer : object, optional Array synchronizer. path : string, optional Group path. Returns ------- g : zarr.hierarchy.Group Examples -------- >>> import zarr >>> root = zarr.open_group('example', mode='w') >>> foo = root.create_group('foo') >>> bar = root.create_group('bar') >>> root Group(/, 2) groups: 2; bar, foo store: DirectoryStore >>> root2 = zarr.open_group('example', mode='a') >>> root2 Group(/, 2) groups: 2; bar, foo store: DirectoryStore >>> root == root2 True """ # handle polymorphic store arg store = _handle_store_arg(store) path = normalize_storage_path(path) # ensure store is initialized if mode in ['r', 'r+']: if contains_array(store, path=path): err_contains_array(path) elif not contains_group(store, path=path): err_group_not_found(path) elif mode == 'w': init_group(store, overwrite=True, path=path) elif mode == 'a': if contains_array(store, path=path): err_contains_array(path) if not contains_group(store, path=path): init_group(store, path=path) elif mode in ['w-', 'x']: if contains_array(store, path=path): err_contains_array(path) elif contains_group(store, path=path): err_contains_group(path) else: init_group(store, path=path) # determine read only status read_only = mode == 'r' return Group(store, read_only=read_only, synchronizer=synchronizer, path=path)
def _item_path(self, item): absolute = isinstance(item, str) and item and item[0] == '/' path = normalize_storage_path(item) if not absolute and self._path: path = self._key_prefix + path return path
def open_group(store, mode='a', cache_attrs=True, synchronizer=None, path=None): """Open a group using file-mode-like semantics. Parameters ---------- store : MutableMapping or string Store or path to directory in file system or name of zip file. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. synchronizer : object, optional Array synchronizer. path : string, optional Group path within store. Returns ------- g : zarr.hierarchy.Group Examples -------- >>> import zarr >>> root = zarr.open_group('data/example.zarr', mode='w') >>> foo = root.create_group('foo') >>> bar = root.create_group('bar') >>> root <zarr.hierarchy.Group '/'> >>> root2 = zarr.open_group('data/example.zarr', mode='a') >>> root2 <zarr.hierarchy.Group '/'> >>> root == root2 True """ # handle polymorphic store arg store = _normalize_store_arg(store) path = normalize_storage_path(path) # ensure store is initialized if mode in ['r', 'r+']: if contains_array(store, path=path): err_contains_array(path) elif not contains_group(store, path=path): err_group_not_found(path) elif mode == 'w': init_group(store, overwrite=True, path=path) elif mode == 'a': if contains_array(store, path=path): err_contains_array(path) if not contains_group(store, path=path): init_group(store, path=path) elif mode in ['w-', 'x']: if contains_array(store, path=path): err_contains_array(path) elif contains_group(store, path=path): err_contains_group(path) else: init_group(store, path=path) # determine read only status read_only = mode == 'r' return Group(store, read_only=read_only, cache_attrs=cache_attrs, synchronizer=synchronizer, path=path)
def listdir(self, path=None): path = normalize_storage_path(path) return _listdir_from_keys(self, path)
def contains_group(store, path=None): """Return True if the store contains a group at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) key = prefix + group_meta_key return key in store
def init_array(store, shape, chunks=None, dtype=None, compressor='default', fill_value=None, order='C', overwrite=False, path=None, chunk_store=None, filters=None): """initialize an array store with the given configuration. Parameters ---------- store : MutableMapping A mapping that supports string keys and bytes-like values. shape : int or tuple of ints Array shape. chunks : int or tuple of ints, optional Chunk shape. If not provided, will be guessed from `shape` and `dtype`. dtype : string or dtype, optional NumPy dtype. compressor : Codec, optional Primary compressor. fill_value : object Default value to use for uninitialized portions of the array. order : {'C', 'F'}, optional Memory layout to be used within each chunk. overwrite : bool, optional If True, erase all data in `store` prior to initialisation. path : string, optional Path under which array is stored. chunk_store : MutableMapping, optional Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. filters : sequence, optional Sequence of filters to use to encode chunk data prior to compression. Examples -------- Initialize an array store:: >>> from zarr.storage import init_array >>> store = dict() >>> init_array(store, shape=(10000, 10000), chunks=(1000, 1000)) >>> sorted(store.keys()) ['.zarray', '.zattrs'] Array metadata is stored as JSON:: >>> print(str(store['.zarray'], 'ascii')) { "chunks": [ 1000, 1000 ], "compressor": { "clevel": 5, "cname": "lz4", "id": "blosc", "shuffle": 1 }, "dtype": "<f8", "fill_value": null, "filters": null, "order": "C", "shape": [ 10000, 10000 ], "zarr_format": 2 } User-defined attributes are also stored as JSON, initially empty:: >>> print(str(store['.zattrs'], 'ascii')) {} Initialize an array using a storage path:: >>> store = dict() >>> init_array(store, shape=100000000, chunks=1000000, dtype='i1', ... path='foo') >>> sorted(store.keys()) ['.zattrs', '.zgroup', 'foo/.zarray', 'foo/.zattrs'] >>> print(str(store['foo/.zarray'], 'ascii')) { "chunks": [ 1000000 ], "compressor": { "clevel": 5, "cname": "lz4", "id": "blosc", "shuffle": 1 }, "dtype": "|i1", "fill_value": null, "filters": null, "order": "C", "shape": [ 100000000 ], "zarr_format": 2 } Notes ----- The initialisation process involves normalising all array metadata, encoding as JSON and storing under the '.zarray' key. User attributes are also initialized and stored as JSON under the '.zattrs' key. """ # normalize path path = normalize_storage_path(path) # ensure parent group initialized _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) _init_array_metadata(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, overwrite=overwrite, path=path, chunk_store=chunk_store, filters=filters)
def open(store, mode='a', **kwargs): """Convenience function to open a group or array using file-mode-like semantics. Parameters ---------- store : MutableMapping or string Store or path to directory in file system or name of zip file. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). **kwargs Additional parameters are passed through to :func:`zarr.open_array` or :func:`zarr.open_group`. See Also -------- zarr.open_array, zarr.open_group Examples -------- Storing data in a directory 'data/example.zarr' on the local file system:: >>> import zarr >>> store = 'data/example.zarr' >>> zw = zarr.open(store, mode='w', shape=100, dtype='i4') # open new array >>> zw <zarr.core.Array (100,) int32> >>> za = zarr.open(store, mode='a') # open existing array for reading and writing >>> za <zarr.core.Array (100,) int32> >>> zr = zarr.open(store, mode='r') # open existing array read-only >>> zr <zarr.core.Array (100,) int32 read-only> >>> gw = zarr.open(store, mode='w') # open new group, overwriting previous data >>> gw <zarr.hierarchy.Group '/'> >>> ga = zarr.open(store, mode='a') # open existing group for reading and writing >>> ga <zarr.hierarchy.Group '/'> >>> gr = zarr.open(store, mode='r') # open existing group read-only >>> gr <zarr.hierarchy.Group '/' read-only> """ path = kwargs.get('path', None) # handle polymorphic store arg store = normalize_store_arg(store, clobber=(mode == 'w')) path = normalize_storage_path(path) if mode in {'w', 'w-', 'x'}: if 'shape' in kwargs: return open_array(store, mode=mode, **kwargs) else: return open_group(store, mode=mode, **kwargs) elif mode == 'a': if contains_array(store, path): return open_array(store, mode=mode, **kwargs) elif contains_group(store, path): return open_group(store, mode=mode, **kwargs) elif 'shape' in kwargs: return open_array(store, mode=mode, **kwargs) else: return open_group(store, mode=mode, **kwargs) else: if contains_array(store, path): return open_array(store, mode=mode, **kwargs) elif contains_group(store, path): return open_group(store, mode=mode, **kwargs) else: err_path_not_found(path)
def copy_store(source, dest, source_path='', dest_path='', excludes=None, includes=None, flags=0, if_exists='raise', dry_run=False, log=None): """Copy data directly from the `source` store to the `dest` store. Use this function when you want to copy a group or array in the most efficient way, preserving all configuration and attributes. This function is more efficient than the copy() or copy_all() functions because it avoids de-compressing and re-compressing data, rather the compressed chunk data for each array are copied directly between stores. Parameters ---------- source : Mapping Store to copy data from. dest : MutableMapping Store to copy data into. source_path : str, optional Only copy data from under this path in the source store. dest_path : str, optional Copy data into this path in the destination store. excludes : sequence of str, optional One or more regular expressions which will be matched against keys in the source store. Any matching key will not be copied. includes : sequence of str, optional One or more regular expressions which will be matched against keys in the source store and will override any excludes also matching. flags : int, optional Regular expression flags used for matching excludes and includes. if_exists : {'raise', 'replace', 'skip'}, optional How to handle keys that already exist in the destination store. If 'raise' then a CopyError is raised on the first key already present in the destination store. If 'replace' then any data will be replaced in the destination. If 'skip' then any existing keys will not be copied. dry_run : bool, optional If True, don't actually copy anything, just log what would have happened. log : callable, file path or file-like object, optional If provided, will be used to log progress information. Returns ------- n_copied : int Number of items copied. n_skipped : int Number of items skipped. n_bytes_copied : int Number of bytes of data that were actually copied. Examples -------- >>> import zarr >>> store1 = zarr.DirectoryStore('data/example.zarr') >>> root = zarr.group(store1, overwrite=True) >>> foo = root.create_group('foo') >>> bar = foo.create_group('bar') >>> baz = bar.create_dataset('baz', shape=100, chunks=50, dtype='i8') >>> import numpy as np >>> baz[:] = np.arange(100) >>> root.tree() / └── foo └── bar └── baz (100,) int64 >>> from sys import stdout >>> store2 = zarr.ZipStore('data/example.zip', mode='w') >>> zarr.copy_store(store1, store2, log=stdout) copy .zgroup copy foo/.zgroup copy foo/bar/.zgroup copy foo/bar/baz/.zarray copy foo/bar/baz/0 copy foo/bar/baz/1 all done: 6 copied, 0 skipped, 566 bytes copied (6, 0, 566) >>> new_root = zarr.group(store2) >>> new_root.tree() / └── foo └── bar └── baz (100,) int64 >>> new_root['foo/bar/baz'][:] array([ 0, 1, 2, ..., 97, 98, 99]) >>> store2.close() # zip stores need to be closed Notes ----- Please note that this is an experimental feature. The behaviour of this function is still evolving and the default behaviour and/or parameters may change in future versions. """ # normalize paths source_path = normalize_storage_path(source_path) dest_path = normalize_storage_path(dest_path) if source_path: source_path = source_path + '/' if dest_path: dest_path = dest_path + '/' # normalize excludes and includes if excludes is None: excludes = [] elif isinstance(excludes, str): excludes = [excludes] if includes is None: includes = [] elif isinstance(includes, str): includes = [includes] excludes = [re.compile(e, flags) for e in excludes] includes = [re.compile(i, flags) for i in includes] # check if_exists parameter valid_if_exists = ['raise', 'replace', 'skip'] if if_exists not in valid_if_exists: raise ValueError('if_exists must be one of {!r}; found {!r}' .format(valid_if_exists, if_exists)) # setup counting variables n_copied = n_skipped = n_bytes_copied = 0 # setup logging with _LogWriter(log) as log: # iterate over source keys for source_key in sorted(source.keys()): # filter to keys under source path if source_key.startswith(source_path): # process excludes and includes exclude = False for prog in excludes: if prog.search(source_key): exclude = True break if exclude: for prog in includes: if prog.search(source_key): exclude = False break if exclude: continue # map key to destination path key_suffix = source_key[len(source_path):] dest_key = dest_path + key_suffix # create a descriptive label for this operation descr = source_key if dest_key != source_key: descr = descr + ' -> ' + dest_key # decide what to do do_copy = True if if_exists != 'replace': if dest_key in dest: if if_exists == 'raise': raise CopyError('key {!r} exists in destination' .format(dest_key)) elif if_exists == 'skip': do_copy = False # take action if do_copy: log('copy {}'.format(descr)) if not dry_run: data = source[source_key] n_bytes_copied += buffer_size(data) dest[dest_key] = data n_copied += 1 else: log('skip {}'.format(descr)) n_skipped += 1 # log a final message with a summary of what happened _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied) return n_copied, n_skipped, n_bytes_copied