def test_offsets(): blosc_args = DEFAULT_BLOSC_ARGS with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) bloscpack.pack_file(in_file, out_file, blosc_args, nchunks=6) with open(out_file, 'r+b') as input_fp: bloscpack_header_raw = input_fp.read(BLOSCPACK_HEADER_LENGTH) bloscpack_header = decode_bloscpack_header(bloscpack_header_raw) nchunks = bloscpack_header['nchunks'] offsets_raw = input_fp.read(8 * nchunks) offsets = [decode_int64(offsets_raw[j - 8: j]) for j in xrange(8, nchunks * 8 + 1, 8)] # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * nchunks # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal([80, 585990, 1071780, 1546083, 2003986, 2460350], offsets) # try to read the second header input_fp.seek(585990, 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = {'versionlz': 1, 'blocksize': 131072, 'ctbytes': 485786, 'version': 2, 'flags': 1, 'nbytes': 3200000, 'typesize': 8} blosc_header = decode_blosc_header(blosc_header_raw) nt.assert_equal(expected, blosc_header)
def test_invalid_format(): # this will cause a bug if we ever reach 255 format versions bloscpack.FORMAT_VERSION = MAX_FORMAT_VERSION blosc_args = DEFAULT_BLOSC_ARGS with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) bloscpack.pack_file(in_file, out_file, blosc_args=blosc_args) nt.assert_raises(FormatVersionMismatch, unpack_file, out_file, dcmp_file) bloscpack.FORMAT_VERSION = FORMAT_VERSION
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) bloscpack.pack_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = bloscpack._read_bloscpack_header(input_fp) total_entries = bloscpack_header['nchunks'] + \ bloscpack_header['max_app_chunks'] offsets = bloscpack._read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal([736, 418578, 736870, 1050327, 1363364, 1660766, 1959218, 2257703], offsets) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = {'versionlz': 1, 'blocksize': 131072, 'ctbytes': 318288, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8} blosc_header = decode_blosc_header(blosc_header_raw) nt.assert_equal(expected, blosc_header) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = DEFAULT_BLOSCPACK_ARGS.copy() bloscpack_args['max_app_chunks'] = 0 bloscpack._pack_fp(input_fp, output_fp, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args ) output_fp.seek(0, 0) bloscpack_header = bloscpack._read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header['max_app_chunks']) offsets = bloscpack._read_offsets(output_fp, bloscpack_header) nt.assert_equal([96, 417938, 736230, 1049687, 1362724, 1660126, 1958578, 2257063], offsets)
with tb.create_tmp_files() as (tdir, in_file, out_file, dcmp_file): gz_out_file = path.join(tdir, 'file.gz') print('create the test data', end='') def progress(i): if i % 10 == 0: print('.', end='') sys.stdout.flush() tb.create_array(100, in_file, progress=progress) print('') print("Input file size: %s" % get_fs(in_file)) print("Will now run bloscpack... ") tic = time.time() bloscpack.pack_file(in_file, out_file, blosc_args, chunk_size=bloscpack.reverse_pretty(bloscpack.DEFAULT_CHUNK_SIZE)) toc = time.time() print("Time: %.2f seconds" % (toc - tic)) print("Output file size: %s" % get_fs(out_file)) print("Ratio: %.2f" % get_ratio(out_file, in_file)) print("Will now run gzip... ") tic = time.time() subprocess.call('gzip -c %s > %s' % (in_file, gz_out_file), shell=True) toc = time.time() print("Time: %.2f seconds" % (toc - tic)) print("Output file size: %s" % get_fs(gz_out_file)) print("Ratio: %.2f" % get_ratio(gz_out_file, in_file))
print('create the test data', end='') def progress(i): if i % 10 == 0: print('.', end='') sys.stdout.flush() tb.create_array(100, in_file, progress=progress) print('') print("Input file size: %s" % get_fs(in_file)) drop_caches() print("Will now run bloscpack... ") tic = time.time() bloscpack.pack_file(in_file, out_file) toc = time.time() print("Time: %.2f seconds" % (toc - tic)) print("Output file size: %s" % get_fs(out_file)) print("Ratio: %.2f" % get_ratio(out_file, in_file)) drop_caches() print("Will now run gzip... ") tic = time.time() subprocess.call('gzip -c %s > %s' % (in_file, gz_out_file), shell=True) toc = time.time() print("Time: %.2f seconds" % (toc - tic)) print("Output file size: %s" % get_fs(gz_out_file)) print("Ratio: %.2f" % get_ratio(gz_out_file, in_file))
from bloscpack.pretty import pretty_size with bpt.create_tmp_files() as (tdir, in_file, out_file, dcmp_file): print('create the test data', end='') bpt.create_array(100, in_file, progress=bpt.simple_progress) repeats = 3 print ("%s\t%s\t\t%s\t\t%s" % ("chunk_size", "comp-time", "decomp-time", "ratio")) for chunk_size in (int(2**i) for i in numpy.arange(19, 23.5, 0.5)): cmp_times, dcmp_times = [], [] for _ in range(repeats): drop_caches() tic = time.time() pack_file(in_file, out_file, chunk_size=chunk_size) sync() toc = time.time() cmp_times.append(toc-tic) drop_caches() tic = time.time() unpack_file(out_file, dcmp_file) sync() toc = time.time() dcmp_times.append(toc-tic) ratio = path.getsize(in_file)/path.getsize(out_file) print ("%s\t\t%f\t\t%f\t\t%f" % (pretty_size(chunk_size), sum(cmp_times)/repeats, sum(dcmp_times)/repeats, ratio,
from bloscpack import pack_file, unpack_file from bloscpack.pretty import pretty_size with bpt.create_tmp_files() as (tdir, in_file, out_file, dcmp_file): print('create the test data', end='') bpt.create_array(100, in_file, progress=bpt.simple_progress) repeats = 3 print("%s\t%s\t\t%s\t\t%s" % ("chunk_size", "comp-time", "decomp-time", "ratio")) for chunk_size in (int(2**i) for i in numpy.arange(19, 23.5, 0.5)): cmp_times, dcmp_times = [], [] for _ in range(repeats): drop_caches() tic = time.time() pack_file(in_file, out_file, chunk_size=chunk_size) sync() toc = time.time() cmp_times.append(toc - tic) drop_caches() tic = time.time() unpack_file(out_file, dcmp_file) sync() toc = time.time() dcmp_times.append(toc - tic) ratio = path.getsize(in_file) / path.getsize(out_file) print("%s\t\t%f\t\t%f\t\t%f" % ( pretty_size(chunk_size), sum(cmp_times) / repeats, sum(dcmp_times) / repeats, ratio,
from __future__ import division import os.path as path import time import numpy import bloscpack import test_bloscpack as tb blosc_args = bloscpack.DEFAULT_BLOSC_ARGS with tb.create_tmp_files() as (tdir, in_file, out_file, dcmp_file): tb.create_array(100, in_file) repeats = 3 print "%s\t\t%s\t\t%s\t\t%s" % ("chunk_size", "comp-time", "decomp-time", "ratio") for chunk_size in (int(2**i) for i in numpy.arange(19, 23.5, 0.5)): cmp_times, dcmp_times = [], [] for _ in range(repeats): tic = time.time() bloscpack.pack_file(in_file, out_file, blosc_args, chunk_size=chunk_size) toc = time.time() cmp_times.append(toc-tic) tic = time.time() bloscpack.unpack_file(out_file, dcmp_file) toc = time.time() dcmp_times.append(toc-tic) ratio = path.getsize(out_file)/path.getsize(in_file) print "%d\t\t%f\t\t%f\t\t%f" % (chunk_size, sum(cmp_times)/repeats, sum(dcmp_times)/repeats, ratio)
else: print('error: need uid 0 (root) to drop caches') sys.exit(1) with bpt.create_tmp_files() as (tdir, in_file, out_file, dcmp_file): gz_out_file = path.join(tdir, 'file.gz') print('create the test data', end='') bpt.create_array(100, in_file, progress=bpt.simple_progress) print('') print("Input file size: %s" % get_fs(in_file)) drop_caches() print("Will now run bloscpack... ") tic = time.time() pack_file(in_file, out_file) toc = time.time() print("Time: %.2f seconds" % (toc - tic)) print("Output file size: %s" % get_fs(out_file)) print("Ratio: %.2f" % get_ratio(in_file, out_file)) drop_caches() print("Will now run gzip... ") tic = time.time() subprocess.call('gzip -c %s > %s' % (in_file, gz_out_file), shell=True) toc = time.time() print("Time: %.2f seconds" % (toc - tic)) print("Output file size: %s" % get_fs(gz_out_file)) print("Ratio: %.2f" % get_ratio(in_file, gz_out_file))
import os.path as path import time import numpy import bloscpack import test_bloscpack as tb blosc_args = bloscpack.DEFAULT_BLOSC_ARGS with tb.create_tmp_files() as (tdir, in_file, out_file, dcmp_file): tb.create_array(100, in_file) repeats = 3 print "%s\t\t%s\t\t%s\t\t%s" % ("chunk_size", "comp-time", "decomp-time", "ratio") for chunk_size in (int(2**i) for i in numpy.arange(19, 23.5, 0.5)): cmp_times, dcmp_times = [], [] for _ in range(repeats): tic = time.time() bloscpack.pack_file(in_file, out_file, blosc_args, chunk_size=chunk_size) toc = time.time() cmp_times.append(toc - tic) tic = time.time() bloscpack.unpack_file(out_file, dcmp_file) toc = time.time() dcmp_times.append(toc - tic) ratio = path.getsize(out_file) / path.getsize(in_file) print "%d\t\t%f\t\t%f\t\t%f" % (chunk_size, sum(cmp_times) / repeats, sum(dcmp_times) / repeats, ratio)
gz_out_file = path.join(tdir, 'file.gz') print('create the test data', end='') def progress(i): if i % 10 == 0: print('.', end='') sys.stdout.flush() tb.create_array(100, in_file, progress=progress) print('') print("Input file size: %s" % get_fs(in_file)) drop_caches() print("Will now run bloscpack... ") tic = time.time() bloscpack.pack_file(in_file, out_file) toc = time.time() print("Time: %.2f seconds" % (toc - tic)) print("Output file size: %s" % get_fs(out_file)) print("Ratio: %.2f" % get_ratio(out_file, in_file)) drop_caches() print("Will now run gzip... ") tic = time.time() subprocess.call('gzip -c %s > %s' % (in_file, gz_out_file), shell=True) toc = time.time() print("Time: %.2f seconds" % (toc - tic)) print("Output file size: %s" % get_fs(gz_out_file)) print("Ratio: %.2f" % get_ratio(gz_out_file, in_file))