Example #1
0
File: core.py Project: will133/zarr
    def __setitem__(self, item, value):
        """Modify data for some portion of the array.

        Examples
        --------

        Setup a 1-dimensional array::

            >>> import zarr
            >>> z = zarr.zeros(100000000, chunks=1000000, dtype='i4')
            >>> z
            Array((100000000,), int32, chunks=(1000000,), order=C)
              nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 0/100
              compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
              store: dict

        Set all array elements to the same scalar value::

            >>> z[:] = 42
            >>> z[:]
            array([42, 42, 42, ..., 42, 42, 42], dtype=int32)

        Set a portion of the array::

            >>> z[:100] = np.arange(100)
            >>> z[-100:] = np.arange(100)[::-1]
            >>> z[:]
            array([0, 1, 2, ..., 2, 1, 0], dtype=int32)

        Setup a 2-dimensional array::

            >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4')
            >>> z
            Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
              nbytes: 381.5M; nbytes_stored: ...; ratio: ...; initialized: 0/100
              compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
              store: dict

        Set all array elements to the same scalar value::

            >>> z[:] = 42
            >>> z[:]
            array([[42, 42, 42, ..., 42, 42, 42],
                   [42, 42, 42, ..., 42, 42, 42],
                   [42, 42, 42, ..., 42, 42, 42],
                   ...,
                   [42, 42, 42, ..., 42, 42, 42],
                   [42, 42, 42, ..., 42, 42, 42],
                   [42, 42, 42, ..., 42, 42, 42]], dtype=int32)

        Set a portion of the array::

            >>> z[0, :] = np.arange(z.shape[1])
            >>> z[:, 0] = np.arange(z.shape[0])
            >>> z[:]
            array([[   0,    1,    2, ..., 9997, 9998, 9999],
                   [   1,   42,   42, ...,   42,   42,   42],
                   [   2,   42,   42, ...,   42,   42,   42],
                   ...,
                   [9997,   42,   42, ...,   42,   42,   42],
                   [9998,   42,   42, ...,   42,   42,   42],
                   [9999,   42,   42, ...,   42,   42,   42]], dtype=int32)

        """

        # guard conditions
        if self._read_only:
            err_read_only()

        # refresh metadata
        if not self._cache_metadata:
            self._load_metadata_nosync()

        # normalize selection
        selection = normalize_array_selection(item, self._shape)

        # check value shape
        expected_shape = tuple(s.stop - s.start for s in selection
                               if isinstance(s, slice))
        if np.isscalar(value):
            pass
        elif expected_shape != value.shape:
            raise ValueError('value has wrong shape, expecting %s, found %s' %
                             (str(expected_shape), str(value.shape)))

        # determine indices of chunks overlapping the selection
        chunk_range = get_chunk_range(selection, self._chunks)

        # iterate over chunks in range
        for cidx in itertools.product(*chunk_range):

            # determine chunk offset
            offset = [i * c for i, c in zip(cidx, self._chunks)]

            # determine required index range within chunk
            chunk_selection = tuple(
                slice(max(0, s.start -
                          o), min(c, s.stop -
                                  o)) if isinstance(s, slice) else s - o
                for s, o, c in zip(selection, offset, self._chunks))

            if np.isscalar(value):

                # put data
                self._chunk_setitem(cidx, chunk_selection, value)

            else:
                # assume value is array-like

                # determine index within value
                value_selection = tuple(
                    slice(max(0, o -
                              s.start), min(o + c - s.start, s.stop - s.start))
                    for s, o, c in zip(selection, offset, self._chunks)
                    if isinstance(s, slice))

                # put data
                self._chunk_setitem(cidx, chunk_selection,
                                    value[value_selection])
Example #2
0
    def __setitem__(self, item, value):
        """Modify data for some portion of the array.

        Examples
        --------

        Setup a 1-dimensional array::

            >>> import zarr
            >>> z = zarr.zeros(100000000, chunks=1000000, dtype='i4')
            >>> z
            Array((100000000,), int32, chunks=(1000000,), order=C)
              nbytes: 381.5M; nbytes_stored: 301; ratio: 1328903.7; initialized: 0/100
              compressor: Blosc(cname='lz4', clevel=5, shuffle=1)
              store: dict

        Set all array elements to the same scalar value::

            >>> z[:] = 42
            >>> z[:]
            array([42, 42, 42, ..., 42, 42, 42], dtype=int32)

        Set a portion of the array::

            >>> z[:100] = np.arange(100)
            >>> z[-100:] = np.arange(100)[::-1]
            >>> z[:]
            array([0, 1, 2, ..., 2, 1, 0], dtype=int32)

        Setup a 2-dimensional array::

            >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4')
            >>> z
            Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
              nbytes: 381.5M; nbytes_stored: 323; ratio: 1238390.1; initialized: 0/100
              compressor: Blosc(cname='lz4', clevel=5, shuffle=1)
              store: dict

        Set all array elements to the same scalar value::

            >>> z[:] = 42
            >>> z[:]
            array([[42, 42, 42, ..., 42, 42, 42],
                   [42, 42, 42, ..., 42, 42, 42],
                   [42, 42, 42, ..., 42, 42, 42],
                   ...,
                   [42, 42, 42, ..., 42, 42, 42],
                   [42, 42, 42, ..., 42, 42, 42],
                   [42, 42, 42, ..., 42, 42, 42]], dtype=int32)

        Set a portion of the array::

            >>> z[0, :] = np.arange(z.shape[1])
            >>> z[:, 0] = np.arange(z.shape[0])
            >>> z[:]
            array([[   0,    1,    2, ..., 9997, 9998, 9999],
                   [   1,   42,   42, ...,   42,   42,   42],
                   [   2,   42,   42, ...,   42,   42,   42],
                   ...,
                   [9997,   42,   42, ...,   42,   42,   42],
                   [9998,   42,   42, ...,   42,   42,   42],
                   [9999,   42,   42, ...,   42,   42,   42]], dtype=int32)

        """

        # guard conditions
        if self._read_only:
            err_read_only()

        # refresh metadata
        if not self._cache_metadata:
            self._load_metadata_nosync()

        # normalize selection
        selection = normalize_array_selection(item, self._shape)

        # check value shape
        expected_shape = tuple(
            s.stop - s.start for s in selection
            if isinstance(s, slice)
        )
        if np.isscalar(value):
            pass
        elif expected_shape != value.shape:
            raise ValueError('value has wrong shape, expecting %s, found %s'
                             % (str(expected_shape),
                                str(value.shape)))

        # determine indices of chunks overlapping the selection
        chunk_range = get_chunk_range(selection, self._chunks)

        # iterate over chunks in range
        for cidx in itertools.product(*chunk_range):

            # determine chunk offset
            offset = [i * c for i, c in zip(cidx, self._chunks)]

            # determine required index range within chunk
            chunk_selection = tuple(
                slice(max(0, s.start - o), min(c, s.stop - o))
                if isinstance(s, slice)
                else s - o
                for s, o, c in zip(selection, offset, self._chunks)
            )

            if np.isscalar(value):

                # put data
                self._chunk_setitem(cidx, chunk_selection, value)

            else:
                # assume value is array-like

                # determine index within value
                value_selection = tuple(
                    slice(max(0, o - s.start),
                          min(o + c - s.start, s.stop - s.start))
                    for s, o, c in zip(selection, offset, self._chunks)
                    if isinstance(s, slice)
                )

                # put data
                self._chunk_setitem(cidx, chunk_selection,
                                    value[value_selection])
Example #3
0
File: core.py Project: will133/zarr
    def __getitem__(self, item):
        """Retrieve data for some portion of the array. Most NumPy-style
        slicing operations are supported.

        Returns
        -------
        out : ndarray
            A NumPy array containing the data for the requested region.

        Examples
        --------

        Setup a 1-dimensional array::

            >>> import zarr
            >>> import numpy as np
            >>> z = zarr.array(np.arange(100000000), chunks=1000000, dtype='i4')
            >>> z
            Array((100000000,), int32, chunks=(1000000,), order=C)
              nbytes: 381.5M; nbytes_stored: 6.4M; ratio: 59.9; initialized: 100/100
              compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
              store: dict

        Take some slices::

            >>> z[5]
            5
            >>> z[:5]
            array([0, 1, 2, 3, 4], dtype=int32)
            >>> z[-5:]
            array([99999995, 99999996, 99999997, 99999998, 99999999], dtype=int32)
            >>> z[5:10]
            array([5, 6, 7, 8, 9], dtype=int32)
            >>> z[:]
            array([       0,        1,        2, ..., 99999997, 99999998, 99999999], dtype=int32)

        Setup a 2-dimensional array::

            >>> import zarr
            >>> import numpy as np
            >>> z = zarr.array(np.arange(100000000).reshape(10000, 10000),
            ...                chunks=(1000, 1000), dtype='i4')
            >>> z
            Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
              nbytes: 381.5M; nbytes_stored: 9.2M; ratio: 41.5; initialized: 100/100
              compressor: Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
              store: dict

        Take some slices::

            >>> z[2, 2]
            20002
            >>> z[:2, :2]
            array([[    0,     1],
                   [10000, 10001]], dtype=int32)
            >>> z[:2]
            array([[    0,     1,     2, ...,  9997,  9998,  9999],
                   [10000, 10001, 10002, ..., 19997, 19998, 19999]], dtype=int32)
            >>> z[:, :2]
            array([[       0,        1],
                   [   10000,    10001],
                   [   20000,    20001],
                   ...,
                   [99970000, 99970001],
                   [99980000, 99980001],
                   [99990000, 99990001]], dtype=int32)
            >>> z[:]
            array([[       0,        1,        2, ...,     9997,     9998,     9999],
                   [   10000,    10001,    10002, ...,    19997,    19998,    19999],
                   [   20000,    20001,    20002, ...,    29997,    29998,    29999],
                   ...,
                   [99970000, 99970001, 99970002, ..., 99979997, 99979998, 99979999],
                   [99980000, 99980001, 99980002, ..., 99989997, 99989998, 99989999],
                   [99990000, 99990001, 99990002, ..., 99999997, 99999998, 99999999]], dtype=int32)

        """  # flake8: noqa

        # refresh metadata
        if not self._cache_metadata:
            self._load_metadata()

        # normalize selection
        selection = normalize_array_selection(item, self._shape)

        # determine output array shape
        out_shape = tuple(s.stop - s.start for s in selection
                          if isinstance(s, slice))

        # setup output array
        out = np.empty(out_shape, dtype=self._dtype, order=self._order)

        # determine indices of chunks overlapping the selection
        chunk_range = get_chunk_range(selection, self._chunks)

        # iterate over chunks in range
        for cidx in itertools.product(*chunk_range):

            # determine chunk offset
            offset = [i * c for i, c in zip(cidx, self._chunks)]

            # determine region within output array
            out_selection = tuple(
                slice(max(0, o -
                          s.start), min(o + c - s.start, s.stop - s.start))
                for s, o, c, in zip(selection, offset, self._chunks)
                if isinstance(s, slice))

            # determine region within chunk
            chunk_selection = tuple(
                slice(max(0, s.start -
                          o), min(c, s.stop -
                                  o)) if isinstance(s, slice) else s - o
                for s, o, c in zip(selection, offset, self._chunks))

            # obtain the destination array as a view of the output array
            if out_selection:
                dest = out[out_selection]
            else:
                dest = out

            # load chunk selection into output array
            self._chunk_getitem(cidx, chunk_selection, dest)

        if out.shape:
            return out
        else:
            return out[()]
Example #4
0
    def __getitem__(self, item):
        """Retrieve data for some portion of the array. Most NumPy-style
        slicing operations are supported.

        Returns
        -------
        out : ndarray
            A NumPy array containing the data for the requested region.

        Examples
        --------

        Setup a 1-dimensional array::

            >>> import zarr
            >>> import numpy as np
            >>> z = zarr.array(np.arange(100000000), chunks=1000000, dtype='i4')
            >>> z
            Array((100000000,), int32, chunks=(1000000,), order=C)
              nbytes: 381.5M; nbytes_stored: 6.4M; ratio: 59.9; initialized: 100/100
              compressor: Blosc(cname='lz4', clevel=5, shuffle=1)
              store: dict

        Take some slices::

            >>> z[5]
            5
            >>> z[:5]
            array([0, 1, 2, 3, 4], dtype=int32)
            >>> z[-5:]
            array([99999995, 99999996, 99999997, 99999998, 99999999], dtype=int32)
            >>> z[5:10]
            array([5, 6, 7, 8, 9], dtype=int32)
            >>> z[:]
            array([       0,        1,        2, ..., 99999997, 99999998, 99999999], dtype=int32)

        Setup a 2-dimensional array::

            >>> import zarr
            >>> import numpy as np
            >>> z = zarr.array(np.arange(100000000).reshape(10000, 10000),
            ...                chunks=(1000, 1000), dtype='i4')
            >>> z
            Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
              nbytes: 381.5M; nbytes_stored: 9.2M; ratio: 41.6; initialized: 100/100
              compressor: Blosc(cname='lz4', clevel=5, shuffle=1)
              store: dict

        Take some slices::

            >>> z[2, 2]
            20002
            >>> z[:2, :2]
            array([[    0,     1],
                   [10000, 10001]], dtype=int32)
            >>> z[:2]
            array([[    0,     1,     2, ...,  9997,  9998,  9999],
                   [10000, 10001, 10002, ..., 19997, 19998, 19999]], dtype=int32)
            >>> z[:, :2]
            array([[       0,        1],
                   [   10000,    10001],
                   [   20000,    20001],
                   ...,
                   [99970000, 99970001],
                   [99980000, 99980001],
                   [99990000, 99990001]], dtype=int32)
            >>> z[:]
            array([[       0,        1,        2, ...,     9997,     9998,     9999],
                   [   10000,    10001,    10002, ...,    19997,    19998,    19999],
                   [   20000,    20001,    20002, ...,    29997,    29998,    29999],
                   ...,
                   [99970000, 99970001, 99970002, ..., 99979997, 99979998, 99979999],
                   [99980000, 99980001, 99980002, ..., 99989997, 99989998, 99989999],
                   [99990000, 99990001, 99990002, ..., 99999997, 99999998, 99999999]], dtype=int32)

        """  # flake8: noqa

        # refresh metadata
        if not self._cache_metadata:
            self._load_metadata()

        # normalize selection
        selection = normalize_array_selection(item, self._shape)

        # determine output array shape
        out_shape = tuple(s.stop - s.start for s in selection
                          if isinstance(s, slice))

        # setup output array
        out = np.empty(out_shape, dtype=self._dtype, order=self._order)

        # determine indices of chunks overlapping the selection
        chunk_range = get_chunk_range(selection, self._chunks)

        # iterate over chunks in range
        for cidx in itertools.product(*chunk_range):

            # determine chunk offset
            offset = [i * c for i, c in zip(cidx, self._chunks)]

            # determine region within output array
            out_selection = tuple(
                slice(max(0, o - s.start),
                      min(o + c - s.start, s.stop - s.start))
                for s, o, c, in zip(selection, offset, self._chunks)
                if isinstance(s, slice)
            )

            # determine region within chunk
            chunk_selection = tuple(
                slice(max(0, s.start - o), min(c, s.stop - o))
                if isinstance(s, slice)
                else s - o
                for s, o, c in zip(selection, offset, self._chunks)
            )

            # obtain the destination array as a view of the output array
            if out_selection:
                dest = out[out_selection]
            else:
                dest = out

            # load chunk selection into output array
            self._chunk_getitem(cidx, chunk_selection, dest)

        if out.shape:
            return out
        else:
            return out[()]