Esempio n. 1
0
    def load_to_buffer(self, file, buffer):
        """ Decompress data from a file to a provided buffer. """
        _ = dill.load(file)
        compressed = file.read()

        blosc.decompress_ptr(compressed, buffer.__array_interface__['data'][0])
        return buffer
Esempio n. 2
0
def decompress_ndarray(binary: bytes, output_array=None) -> numpy.ndarray:
    """
    decompress_ndarray(binary[, output_array=None])

    Decompress array from buffer.
    Data will write in output_array if it is provided. Save time request memory spaces and avoid out of memory.
    """
    assert type(binary) is bytes
    assert type(output_array) in [numpy.ndarray, numpy.memmap, type(None)]

    buffer, dtype, shape = pickle.loads(binary)
    if output_array is None:
        output_array = numpy.empty(shape, dtype)
    else:
        assert dtype == output_array.dtype

        if type(output_array) is numpy.ndarray:
            output_array.resize(shape, refcheck=False)
        else:
            output_array = numpy.memmap(output_array.filename, shape=shape, dtype=dtype)

    assert output_array.shape == shape, "Array output's shape is't same as compressed array."

    blosc.decompress_ptr(buffer, output_array.__array_interface__['data'][0])
    return output_array
Esempio n. 3
0
def decompress_pre(Path, arr):
    f = open(Path, "rb")
    shape, dtype = pickle.load(f)
    c = f.read()
    #array allocation takes most of the time
    blosc.decompress_ptr(c, arr.__array_interface__['data'][0])
    return arr
Esempio n. 4
0
    def test_decompress_ptr_input_types(self):
        import numpy as np
        # assume the expected answer was compressed from bytes
        expected = b'0123456789'
        out = np.zeros(len(expected), dtype=np.byte)
        compressed = blosc.compress(expected, typesize=1)

        # now for all the things that support the buffer interface
        out[:] = 0  # reset the output array
        nout = blosc.decompress_ptr(compressed, out.ctypes.data)
        self.assertEqual(expected, out.tobytes())
        self.assertEqual(len(expected),
                         nout)  # check that we didn't write too many bytes

        out[:] = 0
        nout = blosc.decompress_ptr(memoryview(compressed), out.ctypes.data)
        self.assertEqual(expected, out.tobytes())
        self.assertEqual(len(expected), nout)

        out[:] = 0
        nout = blosc.decompress_ptr(bytearray(compressed), out.ctypes.data)
        self.assertEqual(expected, out.tobytes())
        self.assertEqual(len(expected), nout)

        out[:] = 0
        nout = blosc.decompress_ptr(np.frombuffer(compressed, dtype=np.byte),
                                    out.ctypes.data)
        self.assertEqual(expected, out.tobytes())
        self.assertEqual(len(expected), nout)
Esempio n. 5
0
    def load(self, file):
        """ Read the shape and dtype from a file, create a buffer, decompress data into it. """
        shape, dtype = dill.load(file)
        compressed = file.read()

        array = np.empty(shape=shape, dtype=dtype)
        blosc.decompress_ptr(compressed, array.__array_interface__['data'][0])
        return array
Esempio n. 6
0
    def decompression(self):
        """ data decompression

        Return
        -------------
        uncompressed numpy array
        """
        c = empty(self.size, dtype=self.dtype)
        decompress_ptr(self.data, c.__array_interface__['data'][0])
        return c
Esempio n. 7
0
    def decompression(self):
        """ data decompression

        Return
        -------------
        uncompressed numpy array
        """
        c = empty(self.size, dtype=self.dtype)
        decompress_ptr(self.data, c.__array_interface__['data'][0])
        return c
 def __getitem__(self, index):
     fname = os.path.join(self.dataname, self.filenames[index])
     if self.is_compressed:
         with open(fname, 'rb') as f:
             arr = numpy.empty(self.ndim, dtype=numpy.float64)
             blosc.decompress_ptr(f.read(),
                                  arr.__array_interface__['data'][0])
             return torch.from_numpy(arr)
     else:
         return torch.from_numpy(numpy.load(fname))
Esempio n. 9
0
def test_codec(chunk, codec, filter_name, clevel):
    """
    Compress the chunk and return tested data.

    Parameters
    ----------
    chunk: bytes-like object (supporting the buffer interface)
        The data to be compressed.
    codec : string
        The name of the compressor used internally in Blosc. It can be
        any of the supported by Blosc ('blosclz', 'lz4', 'lz4hc',
        'snappy', 'zlib', 'zstd' and maybe others too).
    filter_name : int
        The shuffle filter to be activated.  Allowed values are
        blosc.NOSHUFFLE, blosc.SHUFFLE and blosc.BITSHUFFLE.
    clevel : int
        The compression level from 0 (no compression) to 9
        (maximum compression).
    Returns
    -------
    out: tuple
        The associated compression rate, compression speed and
        decompression speed (in GB/s).
    Raises
    ------
    TypeError
        If bytesobj doesn't support the buffer interface.
    ValueError
        If bytesobj is too long.
        If typesize is not within the allowed range.
        If clevel is not within the allowed range.
        If cname is not a valid codec.
    """
    t0 = time()
    c = blosc.compress_ptr(chunk.__array_interface__['data'][0],
                           chunk.size,
                           chunk.dtype.itemsize,
                           clevel=clevel,
                           shuffle=filter_name,
                           cname=codec)
    tc = time() - t0
    out = np.empty(chunk.size, dtype=chunk.dtype)
    times = []
    for i in range(3):
        t0 = time()
        blosc.decompress_ptr(c, out.__array_interface__['data'][0])
        times.append(time() - t0)
    chunk_byte_size = chunk.size * chunk.dtype.itemsize
    rate = chunk_byte_size / len(c)
    c_speed = chunk_byte_size / tc / SPEED_UNIT
    d_speed = chunk_byte_size / min(times) / SPEED_UNIT
    # print("  *** %-8s, %-10s, CL%d *** %6.4f s / %5.4f s " %
    #        ( codec, blosc.filters[filter], clevel, tc, td), end='')
    # print("\tCompr. ratio: %5.1fx" % rate)
    return rate, c_speed, d_speed
Esempio n. 10
0
def decompress_ndarray(binary: bytes, output_array=None) -> numpy.ndarray:
    assert type(binary) is bytes
    assert type(output_array) is numpy.ndarray

    buffer, dtype, shape = pickle.loads(binary)
    if not output_array:
        output_array = numpy.empty(shape, dtype)

    assert output_array.shape == shape, "Array output's shape is't same as compressed array."

    blosc.decompress_ptr(buffer, output_array.__array_interface__['data'][0])
    return output_array
Esempio n. 11
0
def test_codec( chunk, codec, filter, clevel ):
    """
    Compresses the array chunk with the given codec, filter and clevel
    and return the compression time and rate.

    Parameters
    ----------
    chunk : bytes-like object (supporting the buffer interface)
        The data to be compressed.
    codec : string
        The name of the compressor used internally in Blosc. It can be
        any of the supported by Blosc ('blosclz', 'lz4', 'lz4hc',
        'snappy', 'zlib', 'zstd' and maybe others too).
    clevel : int
        The compression level from 0 (no compression) to 9
        (maximum compression).
    shuffle : int
        The shuffle filter to be activated.  Allowed values are
        blosc.NOSHUFFLE, blosc.SHUFFLE and blosc.BITSHUFFLE.

    Returns
    -------
    out : tuple
        The associated compression time, rate and decompression time.

    Raises
    ------
    TypeError
        If bytesobj doesn't support the buffer interface.
    ValueError
        If bytesobj is too long.
        If typesize is not within the allowed range.
        If clevel is not within the allowed range.
        If cname is not a valid codec.
    """
    t0 = time()
    c = blosc.compress_ptr(chunk.__array_interface__['data'][0],
                           chunk.size, chunk.dtype.itemsize,
                           clevel = clevel, shuffle = filter, cname = codec)
    tc = time() - t0
    out = np.empty(chunk.size, dtype = chunk.dtype)
    t0 = time()
    blosc.decompress_ptr(c, out.__array_interface__['data'][0])
    td = time() - t0
    rate = (chunk.size * chunk.dtype.itemsize / len(c))
    assert ((chunk == out).all())
    # print("  *** %-8s, %-10s, CL%d *** %6.4f s / %5.4f s " %
    #        ( codec, blosc.filters[filter], clevel, tc, td), end='')
    # print("\tCompr. ratio: %5.1fx" % rate)
    return (rate, tc, td)
Esempio n. 12
0
def _expanded_copy(obj):
    """Expand arrays within dicts, tuples and lists, do not dig other objects for now
    """
    if isinstance(obj, _SqueezedArray):
        shape, dtype, comp = obj
        array = np.empty(shape, dtype=dtype)
        blosc.decompress_ptr(comp, array.__array_interface__['data'][0])
        return array
    tpe = type(obj)
    if tpe is tuple or tpe is list:
        return tpe(_expanded_copy(el) for el in obj)
    if tpe is dict:
        return tpe((k, _expanded_copy(v)) for k, v in obj.items())
    return obj
Esempio n. 13
0
def read_blosc(stream, out=None):
    meta = read_json(stream)
    shape = tuple(meta['shape'])
    dtype = restore_dtype(meta['dtype'])
    if out is None:
        out = np.empty(shape, dtype)
    elif not isinstance(out, np.ndarray):
        raise TypeError('expected ndarray, got {}'.format(type(out).__name__))
    elif out.shape != shape:
        raise ValueError('incompatible shape: expected {}, got {}'.format(shape, out.shape))
    elif out.dtype != dtype:
        raise ValueError('incompatible dtype: expected {}, got {}'.format(dtype, out.dtype))
    elif not out.flags.contiguous:
        raise ValueError('expected contiguous array')
    blosc.decompress_ptr(
        stream.read(meta['length']),
        out.__array_interface__['data'][0]
    )
    if out.dtype.type is np.record:
        out = out.view(np.recarray)
    return out
Esempio n. 14
0
 def receive_msg(self):
     # receive (blocking) messages
     self.log.debug("Waiting for multipart message")
     frames = self.zmq_socket.recv_multipart(flags=0, copy=False, track=False)
     self.log.debug("    Multipart message received. Length: %d", len(frames))
     arr_desc = msgpack.unpackb(frames[0].bytes)
     self.log.debug('Array description: %s', str(arr_desc))
     self.log.debug("Unpacking numpy array from bytes")
     # Create an empty numpy array placeholder to unpack the compressed array into
     arr = np.empty(arr_desc['shape'], dtype=arr_desc['dtype'])
     dest_arr_ptr = arr.__array_interface__['data'][0]
     # Unfortunately the access to Frame.bytes makes a copy of the compressed data.
     # As we only read the compressed data it is not strictly necessary to make
     # a copy, however it seems impossible to get a string object out without making a copy...
     # We would have to modify the blosc python bindings to add a decompress_ptr function which 
     # would work when given a python memoryview object (i.e. a pointer)
     compressed_bytes = frames[1].bytes
     blosc.decompress_ptr(compressed_bytes, dest_arr_ptr)
     
     self.log.debug("    unpacked array: shape: %s", str(arr.shape))
     attr = msgpack.unpackb(frames[2].bytes)
     self.log.debug("    unpacked attributes: %s", str(attr))
     return arr, attr
Esempio n. 15
0
 def decompress_ptr():
     cx = blosc.compress_ptr(address, num_elements, typesize, clevel=0)
     blosc.decompress_ptr(cx, address)
Esempio n. 16
0
    print("Using *** %s *** compressor::" % cname)
    ctic = time.time()
    c = blosc.pack_array(in_, clevel=clevel, shuffle=True, cname=cname)
    ctoc = time.time()
    dtic = time.time()
    out = blosc.unpack_array(c)
    dtoc = time.time()
    assert ((in_ == out).all())
    print("  Time for pack_array/unpack_array:     %.3f/%.3f s." % \
          (ctoc-ctic, dtoc-dtic), end='')
    print("\tCompr ratio: %.2f" %
          (in_.size * in_.dtype.itemsize * 1. / len(c)))

    ctic = time.time()
    c = blosc.compress_ptr(in_.__array_interface__['data'][0],
                           in_.size,
                           in_.dtype.itemsize,
                           clevel=clevel,
                           shuffle=True,
                           cname=cname)
    ctoc = time.time()
    out = np.empty(in_.size, dtype=in_.dtype)
    dtic = time.time()
    blosc.decompress_ptr(c, out.__array_interface__['data'][0])
    dtoc = time.time()
    assert ((in_ == out).all())
    print("  Time for compress_ptr/decompress_ptr: %.3f/%.3f s." % \
          (ctoc-ctic, dtoc-dtic), end='')
    print("\tCompr ratio: %.2f" %
          (in_.size * in_.dtype.itemsize * 1. / len(c)))
Esempio n. 17
0
 def decompress_ptr():
     cx = blosc.compress_ptr(address, num_elements, typesize, clevel=0)
     blosc.decompress_ptr(cx, address)
Esempio n. 18
0
 def put(self, compressed):
     bwritten = blosc.decompress_ptr(compressed, self.ptr)
     self.ptr += bwritten
     return bwritten
Esempio n. 19
0
def decompress_ndarray(binary, output_array=None) -> numpy.ndarray:
    """
    decompress_ndarray(binary[, output_array=None])

    Decompress array from buffer.
    Data will write in output_array if it is provided. Save time request memory spaces and avoid out of memory.

    Parameters
    ----------
    binary: bytes
        Numpy array bytes, which was compressed.

    output_array: numpy.ndarray
        Data after decompress will be write into output_array if give.
    """
    if not isinstance(binary, bytes):
        raise TypeError("Require byte type of input data.")

    if output_array is not None and not isinstance(
            output_array, (numpy.ndarray, numpy.memmap)):
        raise TypeError("Require numpy.ndarray type of output array.")

    cursor = 0

    # get header_size
    header_size = cvt_hex2dec(binary[cursor:cursor + 1])
    cursor += 1

    # get dtype_size
    dtype_size = cvt_hex2dec(binary[cursor:cursor + 1])
    cursor += 1

    # get dtype
    dtype = numpy.dtype(cvt_hex2str(binary[cursor:cursor + dtype_size]))
    cursor += dtype_size

    # get shape
    shape = []
    while 1:
        if cursor >= header_size + 1:
            break

        shape.append(cvt_hex2dec(binary[cursor:cursor + 2]))
        cursor += 2

    if output_array is None:
        output_array = numpy.empty(shape, dtype)
    else:
        if dtype != output_array.dtype:
            raise TypeError("Type of output array and data aren't the same!")

        if tuple(shape) != output_array.shape:
            if isinstance(output_array, numpy.memmap):
                output_array = numpy.memmap(output_array.filename,
                                            shape=shape,
                                            dtype=dtype)
            else:
                output_array.resize(shape, refcheck=False)

    blosc.decompress_ptr(binary[cursor:],
                         output_array.__array_interface__['data'][0])
    return output_array
Esempio n. 20
0
 def put(self, compressed):
     bwritten = blosc.decompress_ptr(compressed, self.ptr)
     self.ptr += bwritten
     return bwritten
Esempio n. 21
0
def compressed_bytes_to_arraydata(cbytes, size, dtype):

    arraydata = np.empty(size, dtype=dtype)
    blosc.decompress_ptr(cbytes, arraydata.__array_interface__['data'][0])

    return arraydata
Esempio n. 22
0
arrays = [None]*3
labels = [None]*3
arrays[0] = np.arange(N, dtype=np.int64)
labels[0] = "the arange linear distribution"
arrays[1] = np.linspace(0, 1000, N)
labels[1] = "the linspace linear distribution"
arrays[2] = np.random.random_integers(0, 1000, N)
labels[2] = "the random distribution"

tic = time.time()
out_ = np.copy(arrays[0])
toc = time.time()
print("  *** np.copy() **** Time for memcpy():     %.3f s" % (toc-tic,))

for (in_, label) in zip(arrays, labels):
    print("\n*** %s ***" % label)
    for cname in blosc.compressor_list():
        ctic = time.time()
        c = blosc.compress_ptr(in_.__array_interface__['data'][0],
                               in_.size, in_.dtype.itemsize,
                               clevel=clevel, shuffle=True, cname=cname)
        ctoc = time.time()
        out = np.empty(in_.size, dtype=in_.dtype)
        dtic = time.time()
        blosc.decompress_ptr(c, out.__array_interface__['data'][0])
        dtoc = time.time()
        assert((in_ == out).all())
        print("  *** %-8s *** Time for comp/decomp: %.3f/%.3f s." % \
              (cname, ctoc-ctic, dtoc-dtic), end='')
        print("\tCompr ratio: %6.2f" % (in_.size*in_.dtype.itemsize*1. / len(c)))
Esempio n. 23
0
def unpack(col):
    a = np.empty(col['size'], dtype=col['dtype'])
    blosc.decompress_ptr(bytes(col['data']), a.__array_interface__['data'][0])
    return a
Esempio n. 24
0
def decompress(size, dtype, data):
    out = np.empty(size, dtype)
    blosc.decompress_ptr(data, out.__array_interface__['data'][0])
    return out
Esempio n. 25
0
    def decompress(self, blocks, out, **kwargs):
        '''Useful decompression kwargs:
        nthreads
        '''
        # TODO: controlled globally for now
        #nthreads = kwargs.pop('nthreads',1)
        #blosc.set_nthreads(nthreads)

        _size = 0
        _pos = 0
        _buffer = None
        _partial_len = b''

        decompression_time = 0.
        bytesout = 0

        # Blosc code probably assumes contiguous buffer
        if not out.contiguous:
            raise ValueError(out.contiguous)

        # get the out address
        out = np.frombuffer(out, dtype=np.uint8).ctypes.data

        for block in blocks:
            block = memoryview(block).cast('c')
            try:
                block = block.toreadonly()  # python>=3.8 only
            except AttributeError:
                pass

            if not block.contiguous:
                raise ValueError(block.contiguous)

            while len(block):
                if not _size:
                    # Don't know the (compressed) length of this block yet
                    if len(_partial_len) + len(block) < 4:
                        _partial_len += block
                        break  # we've exhausted the data
                    if _partial_len:
                        # If we started to fill a len key, finish filling it
                        remaining = 4 - len(_partial_len)
                        if remaining:
                            _partial_len += block[:remaining]
                            block = block[remaining:]
                        _size = struct.unpack('!I', _partial_len)[0]
                        _partial_len = b''
                    else:
                        # Otherwise just read the len key directly
                        _size = struct.unpack('!I', block[:4])[0]
                        block = block[4:]

                if len(block) < _size or _buffer is not None:
                    # If we have a partial block, or we're already filling a buffer, use the buffer
                    if _buffer is None:
                        _buffer = np.empty(
                            _size, dtype=np.byte
                        )  # use numpy instead of bytearray so we can avoid zero initialization
                        _pos = 0
                    newbytes = min(
                        _size - _pos,
                        len(block))  # don't fill past the buffer len!
                    _buffer[_pos:_pos + newbytes] = np.frombuffer(
                        block[:newbytes], dtype=np.byte)
                    _pos += newbytes
                    block = block[newbytes:]

                    if _pos == _size:
                        start = time.perf_counter()
                        n_thisout = blosc.decompress_ptr(
                            memoryview(_buffer), out + bytesout, **kwargs)
                        decompression_time += time.perf_counter() - start
                        bytesout += n_thisout
                        _buffer = None
                        _size = 0
                else:
                    # We have at least one full block
                    start = time.perf_counter()
                    n_thisout = blosc.decompress_ptr(memoryview(block[:_size]),
                                                     out + bytesout, **kwargs)
                    decompression_time += time.perf_counter() - start
                    bytesout += n_thisout
                    block = block[_size:]
                    _size = 0

        return bytesout
Esempio n. 26
0
 def decompress(shape, dtype, compressed):
     array = np.empty(shape, dtype=dtype)
     blosc.decompress_ptr(compressed, array.__array_interface__['data'][0])
     return BloscItem(array)