def convert_to_zarr(POP, ds, varname, chunkable_dim, path_zarr, comp, na, client): if 'time' in chunkable_dim: """time series file only has one variable to compress""" """ and we can use time diemnsion as chunking dimension """ for _varname in ds.data_vars: if len(ds[_varname].dims) >= 2 and ds[_varname].dtype == 'float32': timestep = calculate_chunks(ds, _varname) varname = _varname zarr.storage.default_compressor = Zlib(level=5) compressor = define_compressor(varname, comp) if bool(na): ds1 = get_missingval_mask(ds, POP, na) ds = ds1 ds1 = ds.chunk(chunks={'time': timestep}) ds1[varname].encoding['compressor'] = compressor[varname] ds1.to_zarr(path_zarr, mode='w', consolidated=True) else: """time history file has many variables to compress""" """ and we need users to specify chunking dimensions from config file """ ds1 = ds.chunk(chunks=chunkable_dim) for _varname in ds.data_vars: if len(ds[_varname].dims) >= 2 and ds[_varname].dtype == 'float32': compressor = define_compressor(_varname, comp) ds1[_varname].encoding['compressor'] = compressor[_varname] # ds1[_varname].encoding['compressor'] = None # a = ds1[_varname].data.map_blocks(zarr.array, compressor=compressor[_varname]).persist().map_blocks(np.array) # ds1[_varname].data = a # reorder_mpas_data(ds, _varname, client, compressor, path_zarr) ds1.to_zarr(path_zarr, mode='w', consolidated=True)
""" generate test data for zarr-js """ import zarr from numpy import arange from numcodecs.zlib import Zlib # 1d.contiguous.compressed.i2 store = zarr.DirectoryStore('data/1d.contiguous.compressed.i2.zarr') z = zarr.array([1, 2, 3, 4], dtype='i2', store=store, chunks=(4, ), compressor=Zlib()) # 1d.contiguous.uncompressed.i2 store = zarr.DirectoryStore('data/1d.contiguous.uncompressed.i2.zarr') z = zarr.array([1, 2, 3, 4], dtype='i2', store=store, chunks=(4, ), compressor=None) # 1d.contiguous.compressed.i4 store = zarr.DirectoryStore('data/1d.contiguous.compressed.i4.zarr') z = zarr.array([1, 2, 3, 4], dtype='i4', store=store, chunks=(4, ), compressor=Zlib())
def zlib_compressor(varname, comp): compressor = Zlib(level=comp['comp_level']) return compressor
def test_err_encode_object_buffer(): check_err_encode_object_buffer(Zlib())
def test_eq(): assert Zlib() == Zlib() assert not Zlib() != Zlib() assert Zlib(1) == Zlib(1) assert Zlib(1) != Zlib(9) assert Zlib() != 'foo' assert 'foo' != Zlib() assert not Zlib() == 'foo'
def test_config(): codec = Zlib(level=3) check_config(codec)
from __future__ import absolute_import, print_function, division import itertools import numpy as np from numcodecs.zlib import Zlib from numcodecs.tests.common import (check_encode_decode, check_config, check_repr, check_backwards_compatibility, check_err_decode_object_buffer, check_err_encode_object_buffer) codecs = [ Zlib(), Zlib(level=-1), Zlib(level=0), Zlib(level=1), Zlib(level=5), Zlib(level=9), ] # mix of dtypes: integer, float, bool, string # mix of shapes: 1D, 2D, 3D # mix of orders: C, F arrays = [ np.arange(1000, dtype='i4'), np.linspace(1000, 1001, 1000, dtype='f8'), np.random.normal(loc=1000, scale=1, size=(100, 10)),
def test_alias(): config = dict(id='gzip', level=1) codec = get_codec(config) assert Zlib(1) == codec