Example #1
0
def test_encode_decode_array_datetime_timedelta():

    # some variations
    for k in ['m8[s]', 'M8[s]']:
        compressor = Blosc(cname='lz4', clevel=3, shuffle=2)
        dtype = np.dtype(k)
        fill_value = dtype.type("NaT")
        meta = dict(
            shape=(100, 100),
            chunks=(10, 10),
            dtype=dtype,
            compressor=compressor.get_config(),
            fill_value=fill_value,
            order=dtype.char,
            filters=[]
        )

        meta_json = '''{
            "chunks": [10, 10],
            "compressor": {
                "id": "blosc",
                "clevel": 3,
                "cname": "lz4",
                "shuffle": 2,
                "blocksize": 0
            },
            "dtype": "%s",
            "fill_value": -9223372036854775808,
            "filters": [],
            "order": "%s",
            "shape": [100, 100],
            "zarr_format": %s
        }''' % (dtype.str, dtype.char, ZARR_FORMAT)

        # test encoding
        meta_enc = encode_array_metadata(meta)
        assert_json_equal(meta_json, meta_enc)

        # test decoding
        meta_dec = decode_array_metadata(meta_enc)
        assert ZARR_FORMAT == meta_dec['zarr_format']
        assert meta['shape'] == meta_dec['shape']
        assert meta['chunks'] == meta_dec['chunks']
        assert meta['dtype'] == meta_dec['dtype']
        assert meta['compressor'] == meta_dec['compressor']
        assert meta['order'] == meta_dec['order']
        # Based off of this SO answer: https://stackoverflow.com/a/49972198
        assert np.all(
            fill_value.view((np.uint8, fill_value.itemsize)) ==
            meta_dec['fill_value'].view((np.uint8, meta_dec['fill_value'].itemsize))
        )
        assert [] == meta_dec['filters']
Example #2
0
def test_encode_decode_array_2():

    # some variations
    df = Delta(astype='u2', dtype='V14')
    compressor = Blosc(cname='lz4', clevel=3, shuffle=2)
    dtype = np.dtype([('a', 'i4'), ('b', 'S10')])
    fill_value = np.zeros((), dtype=dtype)[()]
    meta = dict(
        shape=(100, 100),
        chunks=(10, 10),
        dtype=dtype,
        compressor=compressor.get_config(),
        fill_value=fill_value,
        order='F',
        filters=[df.get_config()]
    )

    meta_json = '''{
        "chunks": [10, 10],
        "compressor": {
            "id": "blosc",
            "clevel": 3,
            "cname": "lz4",
            "shuffle": 2,
            "blocksize": 0
        },
        "dtype": [["a", "<i4"], ["b", "|S10"]],
        "fill_value": "AAAAAAAAAAAAAAAAAAA=",
        "filters": [
            {"id": "delta", "astype": "<u2", "dtype": "|V14"}
        ],
        "order": "F",
        "shape": [100, 100],
        "zarr_format": %s
    }''' % ZARR_FORMAT

    # test encoding
    meta_enc = encode_array_metadata(meta)
    assert_json_equal(meta_json, meta_enc)

    # test decoding
    meta_dec = decode_array_metadata(meta_enc)
    assert ZARR_FORMAT == meta_dec['zarr_format']
    assert meta['shape'] == meta_dec['shape']
    assert meta['chunks'] == meta_dec['chunks']
    assert meta['dtype'] == meta_dec['dtype']
    assert meta['compressor'] == meta_dec['compressor']
    assert meta['order'] == meta_dec['order']
    assert fill_value == meta_dec['fill_value']
    assert [df.get_config()] == meta_dec['filters']
Example #3
0
def test_encode_decode_array_2():

    # some variations
    df = Delta(astype='u2', dtype='V14')
    compressor = Blosc(cname='lz4', clevel=3, shuffle=2)
    dtype = np.dtype([('a', 'i4'), ('b', 'S10')])
    fill_value = np.zeros((), dtype=dtype)[()]
    meta = dict(
        shape=(100, 100),
        chunks=(10, 10),
        dtype=dtype,
        compressor=compressor.get_config(),
        fill_value=fill_value,
        order='F',
        filters=[df.get_config()]
    )

    meta_json = '''{
        "chunks": [10, 10],
        "compressor": {
            "id": "blosc",
            "clevel": 3,
            "cname": "lz4",
            "shuffle": 2,
            "blocksize": 0
        },
        "dtype": [["a", "<i4"], ["b", "|S10"]],
        "fill_value": "AAAAAAAAAAAAAAAAAAA=",
        "filters": [
            {"id": "delta", "astype": "<u2", "dtype": "|V14"}
        ],
        "order": "F",
        "shape": [100, 100],
        "zarr_format": %s
    }''' % ZARR_FORMAT

    # test encoding
    meta_enc = encode_array_metadata(meta)
    assert_json_equal(meta_json, meta_enc)

    # test decoding
    meta_dec = decode_array_metadata(meta_enc)
    assert ZARR_FORMAT == meta_dec['zarr_format']
    assert meta['shape'] == meta_dec['shape']
    assert meta['chunks'] == meta_dec['chunks']
    assert meta['dtype'] == meta_dec['dtype']
    assert meta['compressor'] == meta_dec['compressor']
    assert meta['order'] == meta_dec['order']
    assert fill_value == meta_dec['fill_value']
    assert [df.get_config()] == meta_dec['filters']
Example #4
0
def test_format_compatibility():

    # This test is intended to catch any unintended changes that break the ability to
    # read data stored with a previous minor version (which should be format-compatible).

    # fixture data
    fixture = group(store=DirectoryStore('fixture'))

    # set seed to get consistent random data
    np.random.seed(42)

    arrays_chunks = [
        (np.arange(1111, dtype='i1'), 100),
        (np.arange(1111, dtype='i2'), 100),
        (np.arange(1111, dtype='i4'), 100),
        (np.arange(1111, dtype='i8'), 1000),
        (np.random.randint(0, 200, size=2222, dtype='u1'), 100),
        (np.random.randint(0, 2000, size=2222, dtype='u2'), 100),
        (np.random.randint(0, 2000, size=2222, dtype='u4'), 100),
        (np.random.randint(0, 2000, size=2222, dtype='u8'), 100),
        (np.linspace(0, 1, 3333, dtype='f2'), 100),
        (np.linspace(0, 1, 3333, dtype='f4'), 100),
        (np.linspace(0, 1, 3333, dtype='f8'), 100),
        (np.random.normal(loc=0, scale=1, size=4444).astype('f2'), 100),
        (np.random.normal(loc=0, scale=1, size=4444).astype('f4'), 100),
        (np.random.normal(loc=0, scale=1, size=4444).astype('f8'), 100),
        (np.random.choice([b'A', b'C', b'G', b'T'], size=5555,
                          replace=True).astype('S'), 100),
        (np.random.choice(['foo', 'bar', 'baz', 'quux'],
                          size=5555,
                          replace=True).astype('U'), 100),
        (np.random.choice([0, 1 / 3, 1 / 7, 1 / 9, np.nan],
                          size=5555,
                          replace=True).astype('f8'), 100),
        (np.random.randint(0, 2, size=5555, dtype=bool), 100),
        (np.arange(20000, dtype='i4').reshape(2000, 10, order='C'), (100, 3)),
        (np.arange(20000, dtype='i4').reshape(200, 100, order='F'), (100, 30)),
        (np.arange(20000, dtype='i4').reshape(200, 10, 10,
                                              order='C'), (100, 3, 3)),
        (np.arange(20000, dtype='i4').reshape(20, 100, 10,
                                              order='F'), (10, 30, 3)),
        (np.arange(20000, dtype='i4').reshape(20, 10, 10, 10,
                                              order='C'), (10, 3, 3, 3)),
        (np.arange(20000, dtype='i4').reshape(20, 10, 10, 10,
                                              order='F'), (10, 3, 3, 3)),
    ]

    compressors = [
        None,
        Zlib(level=1),
        BZ2(level=1),
        Blosc(cname='zstd', clevel=1, shuffle=0),
        Blosc(cname='zstd', clevel=1, shuffle=1),
        Blosc(cname='zstd', clevel=1, shuffle=2),
        Blosc(cname='lz4', clevel=1, shuffle=0),
    ]

    for i, (arr, chunks) in enumerate(arrays_chunks):

        if arr.flags.f_contiguous:
            order = 'F'
        else:
            order = 'C'

        for j, compressor in enumerate(compressors):
            path = '{}/{}'.format(i, j)

            if path not in fixture:  # pragma: no cover
                # store the data - should be one-time operation
                fixture.array(path,
                              data=arr,
                              chunks=chunks,
                              order=order,
                              compressor=compressor)

            # setup array
            z = fixture[path]

            # check contents
            if arr.dtype.kind == 'f':
                assert_array_almost_equal(arr, z[:])
            else:
                assert_array_equal(arr, z[:])

            # check dtype
            assert arr.dtype == z.dtype

            # check compressor
            if compressor is None:
                assert z.compressor is None
            else:
                assert compressor.codec_id == z.compressor.codec_id
                assert compressor.get_config() == z.compressor.get_config()
Example #5
0
def test_migrate_1to2():
    from zarr import meta_v1

    # N.B., version 1 did not support hierarchies, so we only have to be
    # concerned about migrating a single array at the root of the store

    # setup
    store = dict()
    meta = dict(shape=(100, ),
                chunks=(10, ),
                dtype=np.dtype('f4'),
                compression='zlib',
                compression_opts=1,
                fill_value=None,
                order='C')
    meta_json = meta_v1.encode_metadata(meta)
    store['meta'] = meta_json
    store['attrs'] = json.dumps(dict()).encode('ascii')

    # run migration
    migrate_1to2(store)

    # check results
    assert 'meta' not in store
    assert array_meta_key in store
    assert 'attrs' not in store
    assert attrs_key in store
    meta_migrated = decode_array_metadata(store[array_meta_key])
    assert 2 == meta_migrated['zarr_format']

    # preserved fields
    for f in 'shape', 'chunks', 'dtype', 'fill_value', 'order':
        assert meta[f] == meta_migrated[f]

    # migrate should have added empty filters field
    assert meta_migrated['filters'] is None

    # check compression and compression_opts migrated to compressor
    assert 'compression' not in meta_migrated
    assert 'compression_opts' not in meta_migrated
    assert meta_migrated['compressor'] == Zlib(1).get_config()

    # check dict compression_opts
    store = dict()
    meta['compression'] = 'blosc'
    meta['compression_opts'] = dict(cname='lz4', clevel=5, shuffle=1)
    meta_json = meta_v1.encode_metadata(meta)
    store['meta'] = meta_json
    store['attrs'] = json.dumps(dict()).encode('ascii')
    migrate_1to2(store)
    meta_migrated = decode_array_metadata(store[array_meta_key])
    assert 'compression' not in meta_migrated
    assert 'compression_opts' not in meta_migrated
    assert (meta_migrated['compressor'] == Blosc(cname='lz4',
                                                 clevel=5,
                                                 shuffle=1).get_config())

    # check 'none' compression is migrated to None (null in JSON)
    store = dict()
    meta['compression'] = 'none'
    meta_json = meta_v1.encode_metadata(meta)
    store['meta'] = meta_json
    store['attrs'] = json.dumps(dict()).encode('ascii')
    migrate_1to2(store)
    meta_migrated = decode_array_metadata(store[array_meta_key])
    assert 'compression' not in meta_migrated
    assert 'compression_opts' not in meta_migrated
    assert meta_migrated['compressor'] is None
Example #6
0
from zarr.util import normalize_shape, normalize_chunks, normalize_order, \
    normalize_storage_path, buffer_size
from zarr.meta import encode_array_metadata, encode_group_metadata
from zarr.compat import PY2, binary_type
from numcodecs.registry import codec_registry
from zarr.errors import err_contains_group, err_contains_array, \
    err_path_not_found, err_bad_compressor, err_fspath_exists_notdir, \
    err_read_only

array_meta_key = '.zarray'
group_meta_key = '.zgroup'
attrs_key = '.zattrs'
try:
    from zarr.codecs import Blosc
    default_compressor = Blosc()
except ImportError:  # pragma: no cover
    from zarr.codecs import Zlib
    default_compressor = Zlib()


def _path_to_prefix(path):
    # assume path already normalized
    if path:
        prefix = path + '/'
    else:
        prefix = ''
    return prefix


def contains_array(store, path=None):
Example #7
0
 def create_array(self, read_only=False, **kwargs):
     store = dict()
     compressor = Blosc(cname='zstd', clevel=1, shuffle=1)
     kwargs.setdefault('compressor', compressor)
     init_array(store, **kwargs)
     return Array(store, read_only=read_only)