Example #1
0
def test_config_blocksize():
    # N.B., we want to be backwards compatible with any config where blocksize is not
    # explicitly stated

    # blocksize not stated
    config = dict(cname='lz4', clevel=1, shuffle=Blosc.SHUFFLE)
    codec = Blosc.from_config(config)
    assert codec.blocksize == 0

    # blocksize stated
    config = dict(cname='lz4', clevel=1, shuffle=Blosc.SHUFFLE, blocksize=2**8)
    codec = Blosc.from_config(config)
    assert codec.blocksize == 2**8
Example #2
0
def test_repr():
    expect = "Blosc(cname='zstd', clevel=3, shuffle=SHUFFLE, blocksize=0)"
    actual = repr(
        Blosc(cname='zstd', clevel=3, shuffle=Blosc.SHUFFLE, blocksize=0))
    assert expect == actual
    expect = "Blosc(cname='lz4', clevel=1, shuffle=NOSHUFFLE, blocksize=256)"
    actual = repr(
        Blosc(cname='lz4', clevel=1, shuffle=Blosc.NOSHUFFLE, blocksize=256))
    assert expect == actual
    expect = "Blosc(cname='zlib', clevel=9, shuffle=BITSHUFFLE, blocksize=512)"
    actual = repr(
        Blosc(cname='zlib', clevel=9, shuffle=Blosc.BITSHUFFLE, blocksize=512))
    assert expect == actual
Example #3
0
def writer(q, outfile, dtype, intermediate, dimensions):
    disk = zarr.open(outfile,
                     shape=(0, ),
                     chunks=(intermediate, ),
                     dtype=dtype,
                     mode="w",
                     fill_value=None,
                     compressor=Blosc(cname="lz4", clevel=5, shuffle=2))

    def _dump_to_disk(buffer):
        buffer = np.sort(buffer, dimensions.reverse(), kind="heapsort")
        dset = disk.append(buffer)

    buffer = np.empty(intermediate, dtype=dtype)
    record = q.get()
    cur = 0

    while (not isinstance(record, FinishSignal)):
        buffer[cur] = record
        cur += 1

        if (intermediate and cur >= intermediate
            ):  # if intermediate is 0, then just dump at the end
            _dump_to_disk(buffer)
            cur = 0

        record = q.get()

    if (cur > 0):
        _dump_to_disk(buffer[:cur])
Example #4
0
def convert_slide_to_zarr0(filename, patch_size):
    slide = TiffSlide(filename)
    size = slide.sizes[0]
    width = size[0]
    height = size[1]
    compressor = Blosc(cname='zstd', clevel=3,
                       shuffle=Blosc.BITSHUFFLE)  # clevel=9

    zarr_filename = os.path.splitext(filename)[0] + '.zarr'
    root = zarr.open_group(zarr_filename, mode='a')

    nx = int(np.ceil(width / patch_size[0]))
    ny = int(np.ceil(height / patch_size[1]))

    # thumbnail
    level = 1
    label = str(level)
    if label not in root.array_keys():
        thumb = np.asarray(slide.get_thumbnail((nx, ny)))
        # ensure correct size in case thumb scaled using aspect ratio
        if thumb.shape[1] < nx or thumb.shape[0] < ny:
            if thumb.shape[1] < nx:
                dx = nx - thumb.shape[1]
            else:
                dx = 0
            if thumb.shape[0] < ny:
                dy = ny - thumb.shape[0]
            else:
                dy = 0
            thumb = np.pad(thumb, ((0, dy), (0, dx), (0, 0)), 'edge')
        thumb = thumb[0:ny, 0:nx]
        root.create_dataset(label, data=thumb, compressor=compressor)

    # slide
    level = 0
    label = str(level)
    if label not in root.array_keys():
        data = root.create_dataset(label,
                                   shape=(height, width, 3),
                                   chunks=(patch_size[0], patch_size[1], None),
                                   dtype='uint8',
                                   compressor=compressor)
        #compressor=None, filters=[YUV420(), JPEG2000(50)])
        for y in range(ny):
            ys = y * patch_size[1]
            h = patch_size[1]
            if ys + h > height:
                h = height - ys
            for x in range(nx):
                xs = x * patch_size[0]
                w = patch_size[0]
                if xs + w > width:
                    w = width - xs
                tile = slide.asarray(xs, ys, xs + w, ys + h)
                data[ys:ys + h, xs:xs + w] = tile
Example #5
0
def test_blosc():
    import numpy as np
    from numcodecs.blosc import Blosc
    from numcodecs.zstd import Zstd
    from numcodecs.lz4 import LZ4
    from numcodecs.compat import ensure_bytes, ensure_ndarray
    from numpy.testing import assert_array_almost_equal, assert_array_equal
    import array

    def compare_arrays(arr, res, precision=None):
        # ensure numpy array with matching dtype
        res = ensure_ndarray(res).view(arr.dtype)

        # convert to correct shape
        if arr.flags.f_contiguous:
            order = "F"
        else:
            order = "C"
        res = res.reshape(arr.shape, order=order)

        # exact compare
        if precision is None:
            assert_array_equal(arr, res)

        # fuzzy compare
        else:
            assert_array_almost_equal(arr, res, decimal=precision)

    def check_encode_decode(arr, codec, precision=None):

        # N.B., watch out here with blosc compressor, if the itemsize of
        # the source buffer is different then the results of encoding
        # (i.e., compression) may be different. Hence we *do not* require that
        # the results of encoding be identical for all possible inputs, rather
        # we just require that the results of the encode/decode round-trip can
        # be compared to the original array.

        # encoding should support any object exporting the buffer protocol

        # test encoding of numpy array
        enc = codec.encode(arr)
        dec = codec.decode(enc)
        compare_arrays(arr, dec, precision=precision)

        # test encoding of bytes
        buf = arr.tobytes(order="A")
        enc = codec.encode(buf)
        dec = codec.decode(enc)
        compare_arrays(arr, dec, precision=precision)

        # test encoding of bytearray
        buf = bytearray(arr.tobytes(order="A"))
        enc = codec.encode(buf)
        dec = codec.decode(enc)
        compare_arrays(arr, dec, precision=precision)

        # test encoding of array.array
        buf = array.array("b", arr.tobytes(order="A"))
        enc = codec.encode(buf)
        dec = codec.decode(enc)
        compare_arrays(arr, dec, precision=precision)

        # decoding should support any object exporting the buffer protocol,

        # setup
        enc_bytes = ensure_bytes(enc)

        # test decoding of raw bytes
        dec = codec.decode(enc_bytes)
        compare_arrays(arr, dec, precision=precision)

        # test decoding of bytearray
        dec = codec.decode(bytearray(enc_bytes))
        compare_arrays(arr, dec, precision=precision)

        # test decoding of array.array
        buf = array.array("b", enc_bytes)
        dec = codec.decode(buf)
        compare_arrays(arr, dec, precision=precision)

        # test decoding of numpy array
        buf = np.frombuffer(enc_bytes, dtype="u1")
        dec = codec.decode(buf)
        compare_arrays(arr, dec, precision=precision)

        # test decoding directly into numpy array
        out = np.empty_like(arr)
        codec.decode(enc_bytes, out=out)
        compare_arrays(arr, out, precision=precision)

        # test decoding directly into bytearray
        out = bytearray(arr.nbytes)
        codec.decode(enc_bytes, out=out)
        # noinspection PyTypeChecker
        compare_arrays(arr, out, precision=precision)

    # mix of dtypes: integer, float, bool, string
    # mix of shapes: 1D, 2D, 3D
    # mix of orders: C, F
    arrays = [
        np.arange(1000, dtype="i4"),
        np.linspace(1000, 1001, 1000, dtype="f8"),
        np.random.normal(loc=1000, scale=1, size=(100, 10)),
        np.random.randint(0, 2, size=1000, dtype=bool).reshape(100,
                                                               10,
                                                               order="F"),
        np.random.choice([b"a", b"bb", b"ccc"], size=1000).reshape(10, 10, 10),
        np.random.randint(0, 2**60, size=1000, dtype="u8").view("M8[ns]"),
        np.random.randint(0, 2**60, size=1000, dtype="u8").view("m8[ns]"),
        np.random.randint(0, 2**25, size=1000, dtype="u8").view("M8[m]"),
        np.random.randint(0, 2**25, size=1000, dtype="u8").view("m8[m]"),
        np.random.randint(-(2**63), -(2**63) + 20, size=1000,
                          dtype="i8").view("M8[ns]"),
        np.random.randint(-(2**63), -(2**63) + 20, size=1000,
                          dtype="i8").view("m8[ns]"),
        np.random.randint(-(2**63), -(2**63) + 20, size=1000,
                          dtype="i8").view("M8[m]"),
        np.random.randint(-(2**63), -(2**63) + 20, size=1000,
                          dtype="i8").view("m8[m]"),
    ]

    codecs = [
        LZ4(),
        LZ4(acceleration=-1),
        LZ4(acceleration=10),
        Zstd(),
        Zstd(level=-1),
        Zstd(level=10),
        Blosc(shuffle=Blosc.SHUFFLE),
        Blosc(clevel=0, shuffle=Blosc.SHUFFLE),
        Blosc(cname="lz4", shuffle=Blosc.SHUFFLE),
        Blosc(cname="lz4", clevel=1, shuffle=Blosc.NOSHUFFLE),
        Blosc(cname="lz4", clevel=5, shuffle=Blosc.SHUFFLE),
        Blosc(cname="lz4", clevel=9, shuffle=Blosc.BITSHUFFLE),
        Blosc(cname="zlib", clevel=1, shuffle=0),
        Blosc(cname="zstd", clevel=1, shuffle=1),
        Blosc(cname="blosclz", clevel=1, shuffle=2),
        Blosc(cname="snappy", clevel=1, shuffle=2),
        Blosc(shuffle=Blosc.SHUFFLE, blocksize=0),
        Blosc(shuffle=Blosc.SHUFFLE, blocksize=2**8),
        Blosc(cname="lz4", clevel=1, shuffle=Blosc.NOSHUFFLE, blocksize=2**8),
    ]
    for codec in codecs:
        for arr in arrays:
            check_encode_decode(arr, codec)
Example #6
0
def test_config():
    codec = Blosc(cname='zstd', clevel=3, shuffle=1)
    check_config(codec)
Example #7
0
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division
import itertools

import numpy as np

from numcodecs.blosc import Blosc
from numcodecs.tests.common import check_encode_decode, check_config, \
    check_repr

codecs = [
    Blosc(),
    Blosc(clevel=0),
    Blosc(cname='lz4'),
    Blosc(cname='lz4', clevel=1, shuffle=Blosc.NOSHUFFLE),
    Blosc(cname='lz4', clevel=5, shuffle=Blosc.SHUFFLE),
    Blosc(cname='lz4', clevel=9, shuffle=Blosc.BITSHUFFLE),
    Blosc(cname='zlib', clevel=1, shuffle=0),
    Blosc(cname='zstd', clevel=1, shuffle=1),
    Blosc(cname='blosclz', clevel=1, shuffle=2),
    Blosc(cname='snappy', clevel=1, shuffle=2),
]

# mix of dtypes: integer, float, bool, string
# mix of shapes: 1D, 2D, 3D
# mix of orders: C, F
arrays = [
    np.arange(1000, dtype='i4'),
    np.linspace(1000, 1001, 1000, dtype='f8'),
    np.random.normal(loc=1000, scale=1, size=(100, 10)),
    np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
Example #8
0
def test_config():
    codec = Blosc(cname='zstd', clevel=3, shuffle=1)
    check_config(codec)
    codec = Blosc(cname='lz4', clevel=1, shuffle=2, blocksize=2**8)
    check_config(codec)
Example #9
0
def test_err_encode_object_buffer():
    check_err_encode_object_buffer(Blosc())
Example #10
0
def _decode_worker(enc):
    compressor = Blosc()
    data = compressor.decode(enc)
    return data
Example #11
0
def _encode_worker(data):
    compressor = Blosc(cname='zlib', clevel=9, shuffle=Blosc.SHUFFLE)
    enc = compressor.encode(data)
    return enc
Example #12
0
def test_eq():
    assert Blosc() == Blosc()
    assert Blosc(cname='lz4') != Blosc(cname='zstd')
    assert Blosc(clevel=1) != Blosc(clevel=9)
    assert Blosc(cname='lz4') != 'foo'
Example #13
0
    pytest.skip(
        "numcodecs.blosc not available", allow_module_level=True
    )


from numcodecs.tests.common import (check_encode_decode,
                                    check_encode_decode_partial,
                                    check_config,
                                    check_backwards_compatibility,
                                    check_err_decode_object_buffer,
                                    check_err_encode_object_buffer,
                                    check_max_buffer_size)


codecs = [
    Blosc(shuffle=Blosc.SHUFFLE),
    Blosc(clevel=0, shuffle=Blosc.SHUFFLE),
    Blosc(cname='lz4', shuffle=Blosc.SHUFFLE),
    Blosc(cname='lz4', clevel=1, shuffle=Blosc.NOSHUFFLE),
    Blosc(cname='lz4', clevel=5, shuffle=Blosc.SHUFFLE),
    Blosc(cname='lz4', clevel=9, shuffle=Blosc.BITSHUFFLE),
    Blosc(cname='zlib', clevel=1, shuffle=0),
    Blosc(cname='zstd', clevel=1, shuffle=1),
    Blosc(cname='blosclz', clevel=1, shuffle=2),
    None,  # was snappy
    Blosc(shuffle=Blosc.SHUFFLE, blocksize=0),
    Blosc(shuffle=Blosc.SHUFFLE, blocksize=2**8),
    Blosc(cname='lz4', clevel=1, shuffle=Blosc.NOSHUFFLE, blocksize=2**8),
]