def col_labels(): """ Generates the array with the column labels. """ for codec in blosc.compressor_list(): for filter in [blosc.NOSHUFFLE, blosc.SHUFFLE, blosc.BITSHUFFLE]: for clevel in range(10): col_label = codec + str(filter) + str(clevel) COLS.append(col_label + '_r') COLS.append(col_label + '_tc') COLS.append(col_label + '_td')
def print_versions(): """Print all the versions of software that python-blosc relies on.""" import platform print("-=" * 38) print("python-blosc version: %s" % blosc.__version__) print("Blosc version: %s" % blosc.blosclib_version) print("Blosc compressors in this build: %s" % blosc.compressor_list()) print("Python version: %s" % sys.version) (sysname, nodename, release, version, machine, processor) = platform.uname() print("Platform: %s-%s-%s (%s)" % (sysname, release, machine, version)) if sysname == "Linux": print("Linux dist: %s" % " ".join(platform.linux_distribution()[:-1])) if not processor: processor = "not recognized" print("Processor: %s" % processor) print("Byte-ordering: %s" % sys.byteorder) print("Detected cores: %s" % blosc.detect_number_of_cores()) print("-=" * 38)
def test_all_compressors(self): s = b'0123456789'*100 for cname in blosc.compressor_list(): c = blosc.compress(s, typesize=1, cname=cname) d = blosc.decompress(c) self.assertEqual(s, d)
def test_get_clib(self): s = b'0123456789' for cname in blosc.compressor_list(): c = blosc.compress(s, typesize=1, cname=cname) clib = blosc.get_clib(c) self.assert_(clib == blosc.cname2clib[cname])
clevel = 5 Nexp = np.log10(N) print("Creating a large NumPy array with 10**%d int64 elements:" % Nexp) in_ = np.arange(N, dtype=np.int64) # the trivial linear distribution #in_ = np.linspace(0, 100, N) # another linear distribution #in_ = np.random.random_integers(0, 100, N) # random distribution print(" ", in_) tic = time.time() out_ = np.copy(in_) toc = time.time() print(" Time for copying array with np.copy(): %.3f s" % (toc - tic, )) print() for cname in blosc.compressor_list(): print("Using *** %s *** compressor::" % cname) ctic = time.time() c = blosc.pack_array(in_, clevel=clevel, shuffle=True, cname=cname) ctoc = time.time() dtic = time.time() out = blosc.unpack_array(c) dtoc = time.time() assert ((in_ == out).all()) print(" Time for pack_array/unpack_array: %.3f/%.3f s." % \ (ctoc-ctic, dtoc-dtic), end='') print("\tCompr ratio: %.2f" % (in_.size * in_.dtype.itemsize * 1. / len(c))) ctic = time.time() c = blosc.compress_ptr(in_.__array_interface__['data'][0],
def test_all_compressors(self): s = b'0123456789' * 100 for cname in blosc.compressor_list(): c = blosc.compress(s, typesize=1, cname=cname) d = blosc.decompress(c) self.assertEqual(s, d)
def test_get_clib(self): s = b'0123456789' for cname in blosc.compressor_list(): c = blosc.compress(s, typesize=1, cname=cname) clib = blosc.get_clib(c) self.assertEqual(clib, blosc.cname2clib[cname])
# header lengths BLOSC_HEADER_LENGTH = 16 BLOSCPACK_HEADER_LENGTH = 32 METADATA_HEADER_LENGTH = 32 # maximum/minimum values MAX_FORMAT_VERSION = 255 MAX_CHUNKS = (2**63)-1 MAX_META_SIZE = (2**32-1) # uint32 max val MIN_CLEVEL = 0 MAX_CLEVEL = 9 # lookup table for human readable sizes SUFFIXES = OrderedDict(( ("B", 2**0 ), ("K", 2**10), ("M", 2**20), ("G", 2**30), ("T", 2**40))) # Codecs available from Blosc CNAME_AVAIL = blosc.compressor_list() CNAME_MAPPING = { 0: 'blosclz', 1: 'lz4', 2: 'snappy', 3: 'zlib', 4: 'zstd', }
arrays = [None]*3 labels = [None]*3 arrays[0] = np.arange(N, dtype=np.int64) labels[0] = "the arange linear distribution" arrays[1] = np.linspace(0, 1000, N) labels[1] = "the linspace linear distribution" arrays[2] = np.random.random_integers(0, 1000, N) labels[2] = "the random distribution" tic = time.time() out_ = np.copy(arrays[0]) toc = time.time() print(" *** np.copy() **** Time for memcpy(): %.3f s" % (toc-tic,)) for (in_, label) in zip(arrays, labels): print("\n*** %s ***" % label) for cname in blosc.compressor_list(): ctic = time.time() c = blosc.compress_ptr(in_.__array_interface__['data'][0], in_.size, in_.dtype.itemsize, clevel=clevel, shuffle=True, cname=cname) ctoc = time.time() out = np.empty(in_.size, dtype=in_.dtype) dtic = time.time() blosc.decompress_ptr(c, out.__array_interface__['data'][0]) dtoc = time.time() assert((in_ == out).all()) print(" *** %-8s *** Time for comp/decomp: %.3f/%.3f s." % \ (cname, ctoc-ctic, dtoc-dtic), end='') print("\tCompr ratio: %6.2f" % (in_.size*in_.dtype.itemsize*1. / len(c)))
col_labels() if os.path.isfile('out.csv'): df = pd.read_csv('out.csv', sep='\t') else: df = pd.DataFrame() for filename in FILENAMES: for k, buffer in enumerate(file_reader(filename)): row_data = [filename.split('/')[-1] + '_' + str(k)] for data in extract_features(buffer): row_data.append(data) aux = 1 print("------------", filename.upper(), k, "------------") for codec in blosc.compressor_list(): for filter in [blosc.NOSHUFFLE, blosc.SHUFFLE, blosc.BITSHUFFLE]: for clevel in range(10): rate = 0 c_time = 0 d_time = 0 for i, chunk in enumerate(mega_chunk_generator(buffer)): test = test_codec(chunk, codec, filter, clevel) rate = (rate * i + test[0]) / (i + 1) c_time = (c_time * i + test[1]) / (i + 1) d_time = (d_time * i + test[2]) / (i + 1) print("%-10s %5.2f %%" % (codec + str(filter) + str(clevel), aux/180*100)) aux += 1 row_data.append(rate) row_data.append(c_time) row_data.append(d_time)
@pytest.fixture(params=[ np.arange(10).astype(int), np.arange(10).astype(float), np.arange(12).reshape((2, 2, 3)), np.array([]), np.array([['foo', 'bar']]), [[1.1, 2.2], [3.3, 4.4]], 'foo', 42 ]) def array(request): return request.param @pytest.fixture(params=blosc.compressor_list()) def compression(request): return request.param @pytest.fixture(params=[0, 1, 2], ids=['none', 'byte', 'bit']) def shuffle(request): return request.param @pytest.fixture def write_result(array, compression, shuffle): stream = io.BytesIO() length = write_blosc(stream, array, compression, 5, shuffle) return (stream, length)