Ejemplo n.º 1
0
    def getsize(self, path=None):
        path = normalize_storage_path(path)

        # obtain value to return size of
        if path:
            try:
                parent, key = self._get_parent(path)
                value = parent[key]
            except KeyError:
                err_path_not_found(path)
        else:
            value = self.root

        # obtain size of value
        if isinstance(value, self.cls):
            # total size for directory
            size = 0
            for v in value.values():
                if not isinstance(v, self.cls):
                    try:
                        size += buffer_size(v)
                    except TypeError:
                        return -1
            return size
        else:
            try:
                return buffer_size(value)
            except TypeError:
                return -1
Ejemplo n.º 2
0
    def getsize(self, path=None):
        path = normalize_storage_path(path)

        # obtain value to return size of
        if path:
            try:
                parent, key = self._get_parent(path)
                value = parent[key]
            except KeyError:
                err_path_not_found(path)
        else:
            value = self.root

        # obtain size of value
        if isinstance(value, self.cls):
            # total size for directory
            size = 0
            for v in value.values():
                if not isinstance(v, self.cls):
                    try:
                        size += buffer_size(v)
                    except TypeError:
                        return -1
            return size
        else:
            try:
                return buffer_size(value)
            except TypeError:
                return -1
Ejemplo n.º 3
0
    def test_nbytes_stored(self):

        # dict as store
        z = self.create_array(shape=1000, chunks=100)
        expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
        eq(expect_nbytes_stored, z.nbytes_stored)
        z[:] = 42
        expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
        eq(expect_nbytes_stored, z.nbytes_stored)
Ejemplo n.º 4
0
    def test_nbytes_stored(self):

        # dict as store
        z = self.create_array(shape=1000, chunks=100)
        expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
        eq(expect_nbytes_stored, z.nbytes_stored)
        z[:] = 42
        expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
        eq(expect_nbytes_stored, z.nbytes_stored)
Ejemplo n.º 5
0
    def test_nbytes_stored(self):

        z = self.create_array(shape=1000, chunks=100)
        expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
        expect_nbytes_stored += sum(buffer_size(v)
                                    for v in z.chunk_store.values())
        eq(expect_nbytes_stored, z.nbytes_stored)
        z[:] = 42
        expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
        expect_nbytes_stored += sum(buffer_size(v)
                                    for v in z.chunk_store.values())
        eq(expect_nbytes_stored, z.nbytes_stored)

        # mess with store
        z.chunk_store[z._key_prefix + 'foo'] = list(range(10))
        eq(-1, z.nbytes_stored)
Ejemplo n.º 6
0
    def test_nbytes_stored(self):

        # dict as store
        z = self.create_array(shape=1000, chunks=100)
        expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
        eq(expect_nbytes_stored, z.nbytes_stored)
        z[:] = 42
        expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
        eq(expect_nbytes_stored, z.nbytes_stored)

        # mess with store
        try:
            z.store[z._key_prefix + 'foo'] = list(range(10))
            eq(-1, z.nbytes_stored)
        except TypeError:
            pass
Ejemplo n.º 7
0
    def test_nbytes_stored(self):

        # dict as store
        z = self.create_array(shape=1000, chunks=100)
        expect_nbytes_stored = sum(buffer_size(v)
                                   for k, v in z.store.items()
                                   if k.startswith('foo/bar/'))
        eq(expect_nbytes_stored, z.nbytes_stored)
        z[:] = 42
        expect_nbytes_stored = sum(buffer_size(v)
                                   for k, v in z.store.items()
                                   if k.startswith('foo/bar/'))
        eq(expect_nbytes_stored, z.nbytes_stored)

        # mess with store
        z.store[z._key_prefix + 'foo'] = list(range(10))
        eq(-1, z.nbytes_stored)
Ejemplo n.º 8
0
    def test_nbytes_stored(self):

        # dict as store
        z = self.create_array(shape=1000, chunks=100)
        expect_nbytes_stored = sum(buffer_size(v)
                                   for k, v in z.store.items()
                                   if k.startswith('foo/bar/'))
        eq(expect_nbytes_stored, z.nbytes_stored)
        z[:] = 42
        expect_nbytes_stored = sum(buffer_size(v)
                                   for k, v in z.store.items()
                                   if k.startswith('foo/bar/'))
        eq(expect_nbytes_stored, z.nbytes_stored)

        # mess with store
        z.store[z._key_prefix + 'foo'] = list(range(10))
        eq(-1, z.nbytes_stored)
Ejemplo n.º 9
0
 def getsize(self, path=None):
     size = 0
     if path is None or path == '':
         # size of both the data and meta subdirs
         dirs = []
         for d in ['data/root', 'meta/root']:
             dir_path = os.path.join(self.path, d)
             if os.path.exists(dir_path):
                 dirs.append(dir_path)
     elif path in self:
         # access individual element by full path
         return buffer_size(self[path])
     else:
         files, dirs = _get_files_and_dirs_from_path(self, path)
         for file in files:
             size += os.path.getsize(file)
     for d in dirs:
         size += self.fs.du(d, total=True, maxdepth=None)
     return size
Ejemplo n.º 10
0
def getsize(store, path=None):
    """Compute size of stored items for a given path."""
    path = normalize_storage_path(path)
    if hasattr(store, 'getsize'):
        # pass through
        return store.getsize(path)
    elif isinstance(store, dict):
        # compute from size of values
        prefix = _path_to_prefix(path)
        size = 0
        for k in listdir(store, path):
            try:
                v = store[prefix + k]
            except KeyError:
                pass
            else:
                try:
                    size += buffer_size(v)
                except TypeError:
                    return -1
        return size
    else:
        return -1
Ejemplo n.º 11
0
def getsize(store, path=None):
    """Compute size of stored items for a given path."""
    path = normalize_storage_path(path)
    if hasattr(store, 'getsize'):
        # pass through
        return store.getsize(path)
    elif isinstance(store, dict):
        # compute from size of values
        prefix = _path_to_prefix(path)
        size = 0
        for k in listdir(store, path):
            try:
                v = store[prefix + k]
            except KeyError:
                pass
            else:
                try:
                    size += buffer_size(v)
                except TypeError:
                    return -1
        return size
    else:
        return -1
Ejemplo n.º 12
0
def copy_store(source,
               dest,
               source_path='',
               dest_path='',
               excludes=None,
               includes=None,
               flags=0,
               if_exists='raise',
               dry_run=False,
               log=None):
    """Copy data directly from the `source` store to the `dest` store. Use this
    function when you want to copy a group or array in the most efficient way,
    preserving all configuration and attributes. This function is more efficient
    than the copy() or copy_all() functions because it avoids de-compressing and
    re-compressing data, rather the compressed chunk data for each array are
    copied directly between stores.

    Parameters
    ----------
    source : Mapping
        Store to copy data from.
    dest : MutableMapping
        Store to copy data into.
    source_path : str, optional
        Only copy data from under this path in the source store.
    dest_path : str, optional
        Copy data into this path in the destination store.
    excludes : sequence of str, optional
        One or more regular expressions which will be matched against keys in
        the source store. Any matching key will not be copied.
    includes : sequence of str, optional
        One or more regular expressions which will be matched against keys in
        the source store and will override any excludes also matching.
    flags : int, optional
        Regular expression flags used for matching excludes and includes.
    if_exists : {'raise', 'replace', 'skip'}, optional
        How to handle keys that already exist in the destination store. If
        'raise' then a CopyError is raised on the first key already present
        in the destination store. If 'replace' then any data will be replaced in
        the destination. If 'skip' then any existing keys will not be copied.
    dry_run : bool, optional
        If True, don't actually copy anything, just log what would have
        happened.
    log : callable, file path or file-like object, optional
        If provided, will be used to log progress information.

    Returns
    -------
    n_copied : int
        Number of items copied.
    n_skipped : int
        Number of items skipped.
    n_bytes_copied : int
        Number of bytes of data that were actually copied.

    Examples
    --------

    >>> import zarr
    >>> store1 = zarr.DirectoryStore('data/example.zarr')
    >>> root = zarr.group(store1, overwrite=True)
    >>> foo = root.create_group('foo')
    >>> bar = foo.create_group('bar')
    >>> baz = bar.create_dataset('baz', shape=100, chunks=50, dtype='i8')
    >>> import numpy as np
    >>> baz[:] = np.arange(100)
    >>> root.tree()
    /
     └── foo
         └── bar
             └── baz (100,) int64
    >>> from sys import stdout
    >>> store2 = zarr.ZipStore('data/example.zip', mode='w')
    >>> zarr.copy_store(store1, store2, log=stdout)
    copy .zgroup
    copy foo/.zgroup
    copy foo/bar/.zgroup
    copy foo/bar/baz/.zarray
    copy foo/bar/baz/0
    copy foo/bar/baz/1
    all done: 6 copied, 0 skipped, 566 bytes copied
    (6, 0, 566)
    >>> new_root = zarr.group(store2)
    >>> new_root.tree()
    /
     └── foo
         └── bar
             └── baz (100,) int64
    >>> new_root['foo/bar/baz'][:]
    array([ 0,  1,  2,  ..., 97, 98, 99])
    >>> store2.close()  # zip stores need to be closed

    Notes
    -----
    Please note that this is an experimental feature. The behaviour of this
    function is still evolving and the default behaviour and/or parameters may change
    in future versions.

    """

    # normalize paths
    source_path = normalize_storage_path(source_path)
    dest_path = normalize_storage_path(dest_path)
    if source_path:
        source_path = source_path + '/'
    if dest_path:
        dest_path = dest_path + '/'

    # normalize excludes and includes
    if excludes is None:
        excludes = []
    elif isinstance(excludes, str):
        excludes = [excludes]
    if includes is None:
        includes = []
    elif isinstance(includes, str):
        includes = [includes]
    excludes = [re.compile(e, flags) for e in excludes]
    includes = [re.compile(i, flags) for i in includes]

    # check if_exists parameter
    valid_if_exists = ['raise', 'replace', 'skip']
    if if_exists not in valid_if_exists:
        raise ValueError('if_exists must be one of {!r}; found {!r}'.format(
            valid_if_exists, if_exists))

    # setup counting variables
    n_copied = n_skipped = n_bytes_copied = 0

    # setup logging
    with _LogWriter(log) as log:

        # iterate over source keys
        for source_key in sorted(source.keys()):

            # filter to keys under source path
            if source_key.startswith(source_path):

                # process excludes and includes
                exclude = False
                for prog in excludes:
                    if prog.search(source_key):
                        exclude = True
                        break
                if exclude:
                    for prog in includes:
                        if prog.search(source_key):
                            exclude = False
                            break
                if exclude:
                    continue

                # map key to destination path
                key_suffix = source_key[len(source_path):]
                dest_key = dest_path + key_suffix

                # create a descriptive label for this operation
                descr = source_key
                if dest_key != source_key:
                    descr = descr + ' -> ' + dest_key

                # decide what to do
                do_copy = True
                if if_exists != 'replace':
                    if dest_key in dest:
                        if if_exists == 'raise':
                            raise CopyError(
                                'key {!r} exists in destination'.format(
                                    dest_key))
                        elif if_exists == 'skip':
                            do_copy = False

                # take action
                if do_copy:
                    log('copy {}'.format(descr))
                    if not dry_run:
                        data = source[source_key]
                        n_bytes_copied += buffer_size(data)
                        dest[dest_key] = data
                    n_copied += 1
                else:
                    log('skip {}'.format(descr))
                    n_skipped += 1

        # log a final message with a summary of what happened
        _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied)

    return n_copied, n_skipped, n_bytes_copied
Ejemplo n.º 13
0
def copy_store(source, dest, source_path='', dest_path='', excludes=None,
               includes=None, flags=0, if_exists='raise', dry_run=False,
               log=None):
    """Copy data directly from the `source` store to the `dest` store. Use this
    function when you want to copy a group or array in the most efficient way,
    preserving all configuration and attributes. This function is more efficient
    than the copy() or copy_all() functions because it avoids de-compressing and
    re-compressing data, rather the compressed chunk data for each array are
    copied directly between stores.

    Parameters
    ----------
    source : Mapping
        Store to copy data from.
    dest : MutableMapping
        Store to copy data into.
    source_path : str, optional
        Only copy data from under this path in the source store.
    dest_path : str, optional
        Copy data into this path in the destination store.
    excludes : sequence of str, optional
        One or more regular expressions which will be matched against keys in
        the source store. Any matching key will not be copied.
    includes : sequence of str, optional
        One or more regular expressions which will be matched against keys in
        the source store and will override any excludes also matching.
    flags : int, optional
        Regular expression flags used for matching excludes and includes.
    if_exists : {'raise', 'replace', 'skip'}, optional
        How to handle keys that already exist in the destination store. If
        'raise' then a CopyError is raised on the first key already present
        in the destination store. If 'replace' then any data will be replaced in
        the destination. If 'skip' then any existing keys will not be copied.
    dry_run : bool, optional
        If True, don't actually copy anything, just log what would have
        happened.
    log : callable, file path or file-like object, optional
        If provided, will be used to log progress information.

    Returns
    -------
    n_copied : int
        Number of items copied.
    n_skipped : int
        Number of items skipped.
    n_bytes_copied : int
        Number of bytes of data that were actually copied.

    Examples
    --------

    >>> import zarr
    >>> store1 = zarr.DirectoryStore('data/example.zarr')
    >>> root = zarr.group(store1, overwrite=True)
    >>> foo = root.create_group('foo')
    >>> bar = foo.create_group('bar')
    >>> baz = bar.create_dataset('baz', shape=100, chunks=50, dtype='i8')
    >>> import numpy as np
    >>> baz[:] = np.arange(100)
    >>> root.tree()
    /
     └── foo
         └── bar
             └── baz (100,) int64
    >>> from sys import stdout
    >>> store2 = zarr.ZipStore('data/example.zip', mode='w')
    >>> zarr.copy_store(store1, store2, log=stdout)
    copy .zgroup
    copy foo/.zgroup
    copy foo/bar/.zgroup
    copy foo/bar/baz/.zarray
    copy foo/bar/baz/0
    copy foo/bar/baz/1
    all done: 6 copied, 0 skipped, 566 bytes copied
    (6, 0, 566)
    >>> new_root = zarr.group(store2)
    >>> new_root.tree()
    /
     └── foo
         └── bar
             └── baz (100,) int64
    >>> new_root['foo/bar/baz'][:]
    array([ 0,  1,  2,  ..., 97, 98, 99])
    >>> store2.close()  # zip stores need to be closed

    Notes
    -----
    Please note that this is an experimental feature. The behaviour of this
    function is still evolving and the default behaviour and/or parameters may change
    in future versions.

    """

    # normalize paths
    source_path = normalize_storage_path(source_path)
    dest_path = normalize_storage_path(dest_path)
    if source_path:
        source_path = source_path + '/'
    if dest_path:
        dest_path = dest_path + '/'

    # normalize excludes and includes
    if excludes is None:
        excludes = []
    elif isinstance(excludes, str):
        excludes = [excludes]
    if includes is None:
        includes = []
    elif isinstance(includes, str):
        includes = [includes]
    excludes = [re.compile(e, flags) for e in excludes]
    includes = [re.compile(i, flags) for i in includes]

    # check if_exists parameter
    valid_if_exists = ['raise', 'replace', 'skip']
    if if_exists not in valid_if_exists:
        raise ValueError('if_exists must be one of {!r}; found {!r}'
                         .format(valid_if_exists, if_exists))

    # setup counting variables
    n_copied = n_skipped = n_bytes_copied = 0

    # setup logging
    with _LogWriter(log) as log:

        # iterate over source keys
        for source_key in sorted(source.keys()):

            # filter to keys under source path
            if source_key.startswith(source_path):

                # process excludes and includes
                exclude = False
                for prog in excludes:
                    if prog.search(source_key):
                        exclude = True
                        break
                if exclude:
                    for prog in includes:
                        if prog.search(source_key):
                            exclude = False
                            break
                if exclude:
                    continue

                # map key to destination path
                key_suffix = source_key[len(source_path):]
                dest_key = dest_path + key_suffix

                # create a descriptive label for this operation
                descr = source_key
                if dest_key != source_key:
                    descr = descr + ' -> ' + dest_key

                # decide what to do
                do_copy = True
                if if_exists != 'replace':
                    if dest_key in dest:
                        if if_exists == 'raise':
                            raise CopyError('key {!r} exists in destination'
                                            .format(dest_key))
                        elif if_exists == 'skip':
                            do_copy = False

                # take action
                if do_copy:
                    log('copy {}'.format(descr))
                    if not dry_run:
                        data = source[source_key]
                        n_bytes_copied += buffer_size(data)
                        dest[dest_key] = data
                    n_copied += 1
                else:
                    log('skip {}'.format(descr))
                    n_skipped += 1

        # log a final message with a summary of what happened
        _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied)

    return n_copied, n_skipped, n_bytes_copied