Example #1
0
def test_compressor_as_filter():
    for compressor in compressors:
        if compressor is None:
            # skip
            continue

        # setup filters
        dtype = 'i8'
        filters = [
            Delta(dtype=dtype),
            compressor
        ]

        # setup data and arrays
        data = np.arange(10000, dtype=dtype)
        a1 = array(data, chunks=1000, compressor=None, filters=filters)
        a2 = array(data, chunks=1000, compressor=compressor,
                   filters=filters[:1])

        # check storage
        for i in range(10):
            x = bytes(a1.store[str(i)])
            y = bytes(a2.store[str(i)])
            eq(x, y)

        # check data
        assert_array_equal(data, a1[:])
        assert_array_equal(a1[:], a2[:])
Example #2
0
def test_array_with_quantize_filter():

    # setup
    dtype = 'f8'
    digits = 3
    flt = Quantize(digits=digits, dtype=dtype)
    filters = [flt]
    data = np.linspace(0, 1, 34, dtype=dtype)

    for compressor in compressors:

        a = array(data, chunks=5, compressor=compressor, filters=filters)

        # check round-trip
        assert_array_almost_equal(data, a[:], decimal=digits)

        # check chunks
        for i in range(6):
            cdata = a.store[str(i)]
            if compressor:
                chunk = compressor.decode(cdata)
            else:
                chunk = cdata
            actual = np.frombuffer(chunk, dtype=dtype)
            expect = flt.encode(data[i*5:(i*5)+5])
            assert_array_equal(expect, actual)
Example #3
0
def test_array_with_categorize_filter():

    # setup
    data = np.random.choice([b'foo', b'bar', b'baz'], size=100)
    flt = Categorize(dtype=data.dtype, labels=['foo', 'bar', 'baz'])
    filters = [flt]

    for compressor in compressors:
        print(repr(compressor))

        a = array(data, chunks=5, compressor=compressor, filters=filters)

        # check round-trip
        assert_array_equal(data, a[:])

        # check chunks
        for i in range(20):
            cdata = a.store[str(i)]
            if a.compressor:
                chunk = a.compressor.decode(cdata)
            else:
                chunk = cdata
            actual = np.frombuffer(chunk, dtype='u1')
            expect = flt.encode(data[i*5:(i*5)+5])
            assert_array_equal(expect, actual)
Example #4
0
def test_array_with_astype_filter():

    # setup
    encode_dtype = 'i1'
    decode_dtype = 'i8'
    filters = [AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)]
    chunks = 10
    chunk_size = 10
    shape = chunks * chunk_size
    data = np.arange(shape, dtype=decode_dtype)

    for compressor in compressors:

        a = array(data, chunks=chunks, compressor=compressor, filters=filters)

        # check round-trip
        assert data.dtype == a.dtype
        assert_array_equal(data, a[:])

        # check chunks
        for i in range(chunks):
            cdata = a.store[str(i)]
            if compressor:
                chunk = compressor.decode(cdata)
            else:
                chunk = cdata
            actual = np.frombuffer(chunk, dtype=encode_dtype)
            expect = data.astype(encode_dtype)[i*chunk_size:(i+1)*chunk_size]
            assert_array_equal(expect, actual)
Example #5
0
def test_array_with_scaleoffset_filter():

    # setup
    astype = 'u1'
    dtype = 'f8'
    flt = FixedScaleOffset(scale=10, offset=1000, astype=astype, dtype=dtype)
    filters = [flt]
    data = np.linspace(1000, 1001, 34, dtype='f8')

    for compressor in compressors:

        a = array(data, chunks=5, compressor=compressor, filters=filters)

        # check round-trip
        assert_array_almost_equal(data, a[:], decimal=1)

        # check chunks
        for i in range(6):
            cdata = a.store[str(i)]
            if compressor:
                chunk = compressor.decode(cdata)
            else:
                chunk = cdata
            actual = np.frombuffer(chunk, dtype=astype)
            expect = flt.encode(data[i*5:(i*5)+5])
            assert_array_equal(expect, actual)
Example #6
0
def test_array_with_delta_filter():

    # setup
    astype = 'u1'
    dtype = 'i8'
    filters = [Delta(astype=astype, dtype=dtype)]
    data = np.arange(100, dtype=dtype)

    for compressor in compressors:

        a = array(data, chunks=10, compressor=compressor, filters=filters)

        # check round-trip
        assert_array_equal(data, a[:])

        # check chunks
        for i in range(10):
            cdata = a.store[str(i)]
            if compressor:
                chunk = compressor.decode(cdata)
            else:
                chunk = cdata
            actual = np.frombuffer(chunk, dtype=astype)
            expect = np.array([i * 10] + ([1] * 9), dtype=astype)
            assert_array_equal(expect, actual)
Example #7
0
def test_array_with_packbits_filter():

    # setup
    flt = PackBits()
    filters = [flt]
    data = np.random.randint(0, 2, size=100, dtype=bool)

    for compressor in compressors:
        print(repr(compressor))

        a = array(data, chunks=5, compressor=compressor, filters=filters)

        # check round-trip
        assert_array_equal(data, a[:])

        # check chunks
        for i in range(20):
            cdata = a.store[str(i)]
            if compressor:
                chunk = compressor.decode(cdata)
            else:
                chunk = cdata
            actual = np.frombuffer(chunk, dtype='u1')
            expect = flt.encode(data[i*5:(i*5)+5])
            assert_array_equal(expect, actual)
Example #8
0
def test_array():

    # with numpy array
    a = np.arange(100)
    z = array(a, chunks=10)
    eq(a.shape, z.shape)
    eq(a.dtype, z.dtype)
    assert_array_equal(a, z[:])

    # with array-like
    a = list(range(100))
    z = array(a, chunks=10)
    eq((100,), z.shape)
    eq(np.asarray(a).dtype, z.dtype)
    assert_array_equal(np.asarray(a), z[:])

    # with another zarr array
    z2 = array(z)
    eq(z.shape, z2.shape)
    eq(z.chunks, z2.chunks)
    eq(z.dtype, z2.dtype)
    assert_array_equal(z[:], z2[:])

    # with chunky array-likes

    b = np.arange(1000).reshape(100, 10)
    c = MockBcolzArray(b, 10)
    z3 = array(c)
    eq(c.shape, z3.shape)
    eq((10, 10), z3.chunks)

    b = np.arange(1000).reshape(100, 10)
    c = MockH5pyDataset(b, chunks=(10, 2))
    z4 = array(c)
    eq(c.shape, z4.shape)
    eq((10, 2), z4.chunks)

    c = MockH5pyDataset(b, chunks=None)
    z5 = array(c)
    eq(c.shape, z5.shape)
    assert_is_instance(z5.chunks, tuple)

    # with dtype=None
    a = np.arange(100, dtype='i4')
    z = array(a, dtype=None)
    assert_array_equal(a[:], z[:])
    eq(a.dtype, z.dtype)

    # with dtype=something else
    a = np.arange(100, dtype='i4')
    z = array(a, dtype='i8')
    assert_array_equal(a[:], z[:])
    eq(np.dtype('i8'), z.dtype)
Example #9
0
def test_array():

    # with numpy array
    a = np.arange(100)
    z = array(a, chunks=10)
    assert a.shape == z.shape
    assert a.dtype == z.dtype
    assert_array_equal(a, z[:])

    # with array-like
    a = list(range(100))
    z = array(a, chunks=10)
    assert (100, ) == z.shape
    assert np.asarray(a).dtype == z.dtype
    assert_array_equal(np.asarray(a), z[:])

    # with another zarr array
    z2 = array(z)
    assert z.shape == z2.shape
    assert z.chunks == z2.chunks
    assert z.dtype == z2.dtype
    assert_array_equal(z[:], z2[:])

    # with chunky array-likes

    b = np.arange(1000).reshape(100, 10)
    c = MockBcolzArray(b, 10)
    z3 = array(c)
    assert c.shape == z3.shape
    assert (10, 10) == z3.chunks

    b = np.arange(1000).reshape(100, 10)
    c = MockH5pyDataset(b, chunks=(10, 2))
    z4 = array(c)
    assert c.shape == z4.shape
    assert (10, 2) == z4.chunks

    c = MockH5pyDataset(b, chunks=None)
    z5 = array(c)
    assert c.shape == z5.shape
    assert isinstance(z5.chunks, tuple)

    # with dtype=None
    a = np.arange(100, dtype='i4')
    z = array(a, dtype=None)
    assert_array_equal(a[:], z[:])
    assert a.dtype == z.dtype

    # with dtype=something else
    a = np.arange(100, dtype='i4')
    z = array(a, dtype='i8')
    assert_array_equal(a[:], z[:])
    assert np.dtype('i8') == z.dtype
Example #10
0
def test_array():

    # with numpy array
    a = np.arange(100)
    z = array(a, chunks=10)
    assert a.shape == z.shape
    assert a.dtype == z.dtype
    assert_array_equal(a, z[:])

    # with array-like
    a = list(range(100))
    z = array(a, chunks=10)
    assert (100,) == z.shape
    assert np.asarray(a).dtype == z.dtype
    assert_array_equal(np.asarray(a), z[:])

    # with another zarr array
    z2 = array(z)
    assert z.shape == z2.shape
    assert z.chunks == z2.chunks
    assert z.dtype == z2.dtype
    assert_array_equal(z[:], z2[:])

    # with chunky array-likes

    b = np.arange(1000).reshape(100, 10)
    c = MockBcolzArray(b, 10)
    z3 = array(c)
    assert c.shape == z3.shape
    assert (10, 10) == z3.chunks

    b = np.arange(1000).reshape(100, 10)
    c = MockH5pyDataset(b, chunks=(10, 2))
    z4 = array(c)
    assert c.shape == z4.shape
    assert (10, 2) == z4.chunks

    c = MockH5pyDataset(b, chunks=None)
    z5 = array(c)
    assert c.shape == z5.shape
    assert isinstance(z5.chunks, tuple)

    # with dtype=None
    a = np.arange(100, dtype='i4')
    z = array(a, dtype=None)
    assert_array_equal(a[:], z[:])
    assert a.dtype == z.dtype

    # with dtype=something else
    a = np.arange(100, dtype='i4')
    z = array(a, dtype='i8')
    assert_array_equal(a[:], z[:])
    assert np.dtype('i8') == z.dtype
Example #11
0
def test_array():

    # with numpy array
    a = np.arange(100)
    z = array(a, chunks=10)
    eq(a.shape, z.shape)
    eq(a.dtype, z.dtype)
    assert_array_equal(a, z[:])

    # with array-like
    a = list(range(100))
    z = array(a, chunks=10)
    eq((100, ), z.shape)
    eq(np.asarray(a).dtype, z.dtype)
    assert_array_equal(np.asarray(a), z[:])

    # with another zarr array
    z2 = array(z)
    eq(z.shape, z2.shape)
    eq(z.chunks, z2.chunks)
    eq(z.dtype, z2.dtype)
    assert_array_equal(z[:], z2[:])

    # with chunky array-likes

    b = np.arange(1000).reshape(100, 10)
    c = MockBcolzArray(b, 10)
    z3 = array(c)
    eq(c.shape, z3.shape)
    eq((10, 10), z3.chunks)

    b = np.arange(1000).reshape(100, 10)
    c = MockH5pyDataset(b, chunks=(10, 2))
    z4 = array(c)
    eq(c.shape, z4.shape)
    eq((10, 2), z4.chunks)

    c = MockH5pyDataset(b, chunks=None)
    z5 = array(c)
    eq(c.shape, z5.shape)
    assert_is_instance(z5.chunks, tuple)

    # with dtype=None
    a = np.arange(100, dtype='i4')
    z = array(a, dtype=None)
    assert_array_equal(a[:], z[:])
    eq(a.dtype, z.dtype)

    # with dtype=something else
    a = np.arange(100, dtype='i4')
    z = array(a, dtype='i8')
    assert_array_equal(a[:], z[:])
    eq(np.dtype('i8'), z.dtype)
Example #12
0
    def _create_dataset_nosync(self, name, data=None, **kwargs):

        path = self._item_path(name)

        # determine synchronizer
        kwargs.setdefault('synchronizer', self._synchronizer)

        # create array
        if data is None:
            a = create(store=self._store, path=path,
                       chunk_store=self._chunk_store, **kwargs)

        else:
            a = array(data, store=self._store, path=path,
                      chunk_store=self._chunk_store, **kwargs)

        return a
Example #13
0
    def _create_dataset_nosync(self, name, data=None, **kwargs):

        path = self._item_path(name)

        # determine synchronizer
        kwargs.setdefault('synchronizer', self._synchronizer)
        kwargs.setdefault('cache_attrs', self.attrs.cache)

        # create array
        if data is None:
            a = create(store=self._store, path=path, chunk_store=self._chunk_store,
                       **kwargs)

        else:
            a = array(data, store=self._store, path=path, chunk_store=self._chunk_store,
                      **kwargs)

        return a
Example #14
0
def test_create_read_only():
    # https://github.com/alimanfoo/zarr/issues/151

    # create an array initially read-only, then enable writing
    z = create(100, read_only=True)
    assert z.read_only
    with pytest.raises(PermissionError):
        z[:] = 42
    z.read_only = False
    z[:] = 42
    assert np.all(z[...] == 42)
    z.read_only = True
    with pytest.raises(PermissionError):
        z[:] = 0

    # this is subtly different, but here we want to create an array with data, and then
    # have it be read-only
    a = np.arange(100)
    z = array(a, read_only=True)
    assert_array_equal(a, z[...])
    assert z.read_only
    with pytest.raises(PermissionError):
        z[:] = 42
Example #15
0
def test_create_read_only():
    # https://github.com/alimanfoo/zarr/issues/151

    # create an array initially read-only, then enable writing
    z = create(100, read_only=True)
    assert z.read_only
    with assert_raises(PermissionError):
        z[:] = 42
    z.read_only = False
    z[:] = 42
    assert np.all(z[...] == 42)
    z.read_only = True
    with assert_raises(PermissionError):
        z[:] = 0

    # this is subtly different, but here we want to create an array with data, and then
    # have it be read-only
    a = np.arange(100)
    z = array(a, read_only=True)
    assert_array_equal(a, z[...])
    assert z.read_only
    with assert_raises(PermissionError):
        z[:] = 42
Example #16
0
def test_array_with_categorize_filter():

    # setup
    data = np.random.choice([u'foo', u'bar', u'baz'], size=100)
    flt = Categorize(dtype=data.dtype, labels=[u'foo', u'bar', u'baz'])
    filters = [flt]

    for compressor in compressors:

        a = array(data, chunks=5, compressor=compressor, filters=filters)

        # check round-trip
        assert_array_equal(data, a[:])

        # check chunks
        for i in range(20):
            cdata = a.store[str(i)]
            if a.compressor:
                chunk = a.compressor.decode(cdata)
            else:
                chunk = cdata
            actual = np.frombuffer(chunk, dtype='u1')
            expect = flt.encode(data[i*5:(i*5)+5])
            assert_array_equal(expect, actual)
Example #17
0
def test_array_with_packbits_filter():

    # setup
    flt = PackBits()
    filters = [flt]
    data = np.random.randint(0, 2, size=100, dtype=bool)

    for compressor in compressors:

        a = array(data, chunks=5, compressor=compressor, filters=filters)

        # check round-trip
        assert_array_equal(data, a[:])

        # check chunks
        for i in range(20):
            cdata = a.store[str(i)]
            if compressor:
                chunk = compressor.decode(cdata)
            else:
                chunk = cdata
            actual = np.frombuffer(chunk, dtype='u1')
            expect = flt.encode(data[i*5:(i*5)+5])
            assert_array_equal(expect, actual)
Example #18
0
 def _array_nosync(self, name, data, **kwargs):
     path = self._item_path(name)
     kwargs.setdefault('synchronizer', self._synchronizer)
     return array(data, store=self._store, path=path,
                  chunk_store=self._chunk_store, **kwargs)
Example #19
0
 def _array_nosync(self, name, data, **kwargs):
     path = self._item_path(name)
     kwargs.setdefault('synchronizer', self._synchronizer)
     kwargs.setdefault('cache_attrs', self.attrs.cache)
     return array(data, store=self._store, path=path, chunk_store=self._chunk_store,
                  **kwargs)