Esempio n. 1
0
    def download_raw_array(self, object_name, buffersize=2**16, **kwargs):
        """Download a binary np.ndarray and return an np.ndarray object
        This method downloads an array without any disk or memory overhead.

        Parameters
        ----------
        object_name : str
        buffersize  : optional (defaults 2^16)

        Returns
        -------
        array : np.ndarray

        Notes
        -----
        The object must have metadata containing: shape, dtype and a gzip
        boolean flag. This is all automatically handled by ``upload_raw_array``.
        """
        self.exists_object(object_name, raise_err=True)

        arraystream = self.download_stream(object_name)

        shape = arraystream.metadata['shape']
        shape = tuple(map(int, shape.split(',')) if shape else ())
        dtype = np.dtype(arraystream.metadata['dtype'])
        order = arraystream.metadata.get('order', 'C')
        array = np.empty(tuple(shape), dtype = dtype, order = order)

        body = arraystream.content


        if 'gzip' in arraystream.metadata:
            # Backward compatibility, over-write "compression" value
            isgzipped = string2bool(arraystream.metadata['gzip'])
            if isgzipped:
                arraystream.metadata['compression'] = 'gzip'
            else:
                arraystream.metadata['compression'] = 'False'

        assert 'compression' in arraystream.metadata

        if arraystream.metadata['compression'] == 'gzip':
            # gzipped!
            datastream = GzipInputStream(body)
        elif arraystream.metadata['compression'] in ['False', 'None']:
            # uncompressed data
            datastream = body
        else:
            # numcodecs compression
            compression = arraystream.metadata['compression']
            decompressor = numcodecs.get_codec(dict(id=compression.lower()))
            # Can't decode stream; must read in file. Memory hungry?
            bits_compr = arraystream.content.read()
            bits = decompressor.decode(bits_compr)
            array = np.frombuffer(bits, dtype=dtype)
            array = array.reshape(shape)
            return array

        read_buffered(datastream, array, buffersize=buffersize)
        return array
Esempio n. 2
0
def create_dataset(output_n5,
                   template_n5,
                   compression='same',
                   dtype='same',
                   overwrite=True):

    data_set = '/s0'
    template = zarr.open(store=zarr.N5Store(template_n5), mode='r')[data_set]
    out = zarr.open(store=zarr.N5Store(output_n5), mode='a')

    if compression == 'raw':
        compressor = None
    elif compression == 'same':
        compressor = template.compressor
    else:
        compressor = codecs.get_codec(dict(id=compression))

    if dtype == 'same':
        dtype = template.dtype

    print("Using compressor:", compressor or 'raw')

    print("Creating n5 data set with:")
    print(f"  compressor: {compressor}")
    print(f"  shape:      {template.shape}")
    print(f"  chunking:   {template.chunks}")
    print(f"  dtype:      {dtype}")
    print(f"  to path:    {output_n5}{data_set}")

    out.create_dataset(data_set,
                       shape=template.shape,
                       chunks=template.chunks,
                       dtype=dtype,
                       compressor=compressor,
                       overwrite=overwrite)
Esempio n. 3
0
    def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes:
        dtype = meta["dtype"]
        sdshape = ()
        if dtype.subdtype is not None:
            dtype, sdshape = dtype.subdtype

        dimension_separator = meta.get("dimension_separator")
        if dtype.hasobject:
            import numcodecs
            object_codec = numcodecs.get_codec(meta['filters'][0])
        else:
            object_codec = None

        meta = dict(
            zarr_format=cls.ZARR_FORMAT,
            shape=meta["shape"] + sdshape,
            chunks=meta["chunks"],
            dtype=cls.encode_dtype(dtype),
            compressor=meta["compressor"],
            fill_value=cls.encode_fill_value(meta["fill_value"], dtype, object_codec),
            order=meta["order"],
            filters=meta["filters"],
        )
        if dimension_separator:
            meta['dimension_separator'] = dimension_separator

        if dimension_separator:
            meta["dimension_separator"] = dimension_separator

        return json_dumps(meta)
Esempio n. 4
0
    def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]:
        meta = cls.parse_metadata(s)

        # check metadata format
        zarr_format = meta.get("zarr_format", None)
        if zarr_format != cls.ZARR_FORMAT:
            raise MetadataError("unsupported zarr format: %s" % zarr_format)

        # extract array metadata fields
        try:
            dtype = cls.decode_dtype(meta["dtype"])
            if dtype.hasobject:
                import numcodecs
                object_codec = numcodecs.get_codec(meta['filters'][0])
            else:
                object_codec = None

            dimension_separator = meta.get("dimension_separator", None)
            fill_value = cls.decode_fill_value(meta['fill_value'], dtype, object_codec)
            meta = dict(
                zarr_format=meta["zarr_format"],
                shape=tuple(meta["shape"]),
                chunks=tuple(meta["chunks"]),
                dtype=dtype,
                compressor=meta["compressor"],
                fill_value=fill_value,
                order=meta["order"],
                filters=meta["filters"],
            )
            if dimension_separator:
                meta['dimension_separator'] = dimension_separator
        except Exception as e:
            raise MetadataError("error decoding metadata") from e
        else:
            return meta
Esempio n. 5
0
    def upload_raw_array(self, object_name, array, compression=DO_COMPRESSION, acl=DEFAULT_ACL, **metadata):
        """Upload a a binary representation of a np.ndarray

        This method reads the array content from memory to upload.
        It does not have any overhead.

        Parameters
        ----------
        object_name : str
        array : np.ndarray
        compression  : str, bool
            `True` uses the configuration defaults. `False` is no compression.
            Available options are: 'gzip', 'LZ4', 'Zlib', 'Zstd', 'BZ2' (attend to caps).
            NB: Zstd appears to be the only one that supports >2GB arrays.
        acl : str
            ACL for the object
        **metadata : optional

        Notes
        -----
        This method also uploads the array ``dtype``, ``shape``, and ``gzip``
        flag as metadata
        """
        if compression is None:
            compression = False

        # Backward compatibility
        if 'gzip' in metadata:
            warn("Deprecated keyword argument `gzip`. Use `compression='gzip'` instead", DeprecationWarning)
            gz = metadata.pop('gzip')
            compression = 'gzip' if gz else False

        if compression is True:
            # check whether array is >= 2 GB
            large_array = array.nbytes > 2**31
            compression = COMPRESSION_LARGE if large_array else COMPRESSION_SMALL

            if large_array and compression == 'gzip':
                # Raise exception for specification of gzip w/ large array
                raise ValueError(("gzip does not support compression of >2GB arrays. "
                                  "Try `compression='Zstd'` instead."))

        order = 'C' if array.flags.carray else 'F'
        if ((not array.flags['%s_CONTIGUOUS' % order] and six.PY2) or
            (not array.flags['C_CONTIGUOUS'] and six.PY3)):
            warn('Non-contiguous array. Creating copy (will use extra memory)...')

            if six.PY3 and order == 'F':
                # memoryview (PY3) vs buffer (PY2) issues
                warn("PY3: Changing array from 'F' to 'C' order")
                order = 'C'

            # create contiguous copy
            array = np.array(array, order=order)

        meta = dict(dtype=array.dtype.str,
                    shape=','.join(map(str, array.shape)),
                    compression=str(compression),
                    order=order)

        # check for conflicts in metadata
        metadata_keys = []
        for k in metadata.keys():
            # check for conflicts in metadata
            metadata_keys.append(k in meta)

        assert not any(metadata_keys)
        meta.update(metadata)

        if compression is False:
            filestream = StringIO(array.data)
        elif compression == 'gzip':
            if six.PY3 and array.flags['F_CONTIGUOUS']:
                # eventually, array.data below should be changed to np.getbuffer(array)
                # (not yet working in python3 numpy)
                # F-contiguous arrays break gzip in python 3
                array = array.T
            zipdata = StringIO()
            gz = GzipFile(mode='wb', fileobj=zipdata)
            gz.write(array.data)
            gz.close()
            zipdata.seek(0)
            filestream = zipdata
        elif hasattr(numcodecs, compression.lower()):
            # If the specified compression type is in numcodecs, use numcodecs
            orig_nbytes = array.nbytes
            compressor = numcodecs.get_codec(dict(id=compression.lower()))
            filestream = StringIO(compressor.encode(array))
            data_nbytes = get_fileobject_size(filestream)
            print('Compressed to %0.2f%% the size'%(data_nbytes / float(orig_nbytes) * 100))
        else:
            raise ValueError('Unknown compression scheme: %s'%compression)
        response = self.upload_object(object_name, filestream, acl=acl, **meta)
        return response
Esempio n. 6
0
 def __init__(self, codec=None, **kwargs):
     if codec is None:
         import numcodecs
         self.codec = numcodecs.get_codec(kwargs)
     else:
         self.codec = codec