def test_invalid_format(): blosc_args = BloscArgs() with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file_to_file(in_file, out_file, blosc_args=blosc_args) nt.assert_raises(FormatVersionMismatch, unpack_file_from_file, out_file, dcmp_file)
def test_invalid_format(): blosc_args = BloscArgs() with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file, blosc_args=blosc_args) nt.assert_raises(FormatVersionMismatch, unpack_file, out_file, dcmp_file)
def test_append(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file) append(out_file, in_file) unpack_file(out_file, dcmp_file) in_content = open(in_file, 'rb').read() dcmp_content = open(dcmp_file, 'rb').read() nt.assert_equal(len(dcmp_content), len(in_content) * 2) nt.assert_equal(dcmp_content, in_content * 2)
def test_append(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file_to_file(in_file, out_file) append(out_file, in_file) unpack_file_from_file(out_file, dcmp_file) in_content = open(in_file, 'rb').read() dcmp_content = open(dcmp_file, 'rb').read() nt.assert_equal(len(dcmp_content), len(in_content) * 2) nt.assert_equal(dcmp_content, in_content * 2)
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = _read_bloscpack_header(input_fp) total_entries = bloscpack_header.total_prospective_chunks offsets = _read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal([ 736, 368207, 633319, 902306, 1173771, 1419535, 1666981, 1913995 ], offsets) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = { 'versionlz': 1, 'blocksize': 262144, 'ctbytes': 265108, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8 } blosc_header = decode_blosc_header(blosc_header_raw) nt.assert_equal(expected, blosc_header) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = BloscpackArgs(max_app_chunks=0) source = PlainFPSource(input_fp) sink = CompressedFPSink(output_fp) pack(source, sink, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args) output_fp.seek(0, 0) bloscpack_header = _read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header.max_app_chunks) offsets = _read_offsets(output_fp, bloscpack_header) nt.assert_equal( [96, 367567, 632679, 901666, 1173131, 1418895, 1666341, 1913355], offsets)
def pack_unpack(repeats, chunk_size=None, progress=False): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): if progress: print("Creating test array") create_array(repeats, in_file, progress=progress) if progress: print("Compressing") pack_file(in_file, out_file, chunk_size=chunk_size) if progress: print("Decompressing") unpack_file(out_file, dcmp_file) if progress: print("Verifying") cmp_file(in_file, dcmp_file)
def pack_unpack(repeats, chunk_size=None, progress=False): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): if progress: print("Creating test array") create_array(repeats, in_file, progress=progress) if progress: print("Compressing") pack_file_to_file(in_file, out_file, chunk_size=chunk_size) if progress: print("Decompressing") unpack_file_from_file(out_file, dcmp_file) if progress: print("Verifying") cmp_file(in_file, dcmp_file)
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file_to_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = _read_bloscpack_header(input_fp) total_entries = bloscpack_header.total_prospective_chunks offsets = _read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal(736, offsets[0]) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = { 'versionlz': 1, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8 } blosc_header = decode_blosc_header(blosc_header_raw) blosc_header_slice = dict( (k, blosc_header[k]) for k in expected.keys()) nt.assert_equal(expected, blosc_header_slice) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = BloscpackArgs(max_app_chunks=0) source = PlainFPSource(input_fp) sink = CompressedFPSink(output_fp) pack(source, sink, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args) output_fp.seek(0, 0) bloscpack_header = _read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header.max_app_chunks) offsets = _read_offsets(output_fp, bloscpack_header) nt.assert_equal(96, offsets[0])
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = _read_bloscpack_header(input_fp) total_entries = bloscpack_header.total_prospective_chunks offsets = _read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal([736, 368207, 633319, 902306, 1173771, 1419535, 1666981, 1913995], offsets) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = {'versionlz': 1, 'blocksize': 262144, 'ctbytes': 265108, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8} blosc_header = decode_blosc_header(blosc_header_raw) nt.assert_equal(expected, blosc_header) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = BloscpackArgs(max_app_chunks=0) source = PlainFPSource(input_fp) sink = CompressedFPSink(output_fp) pack(source, sink, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args ) output_fp.seek(0, 0) bloscpack_header = _read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header.max_app_chunks) offsets = _read_offsets(output_fp, bloscpack_header) nt.assert_equal([96, 367567, 632679, 901666, 1173131, 1418895, 1666341, 1913355], offsets)
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = _read_bloscpack_header(input_fp) total_entries = bloscpack_header.total_prospective_chunks offsets = _read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal([736, 418578, 736870, 1050327, 1363364, 1660766, 1959218, 2257703], offsets) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = {'versionlz': 1, 'blocksize': 131072, 'ctbytes': 318288, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8} blosc_header = decode_blosc_header(blosc_header_raw) nt.assert_equal(expected, blosc_header) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = BloscpackArgs(max_app_chunks=0) source = PlainFPSource(input_fp) sink = CompressedFPSink(output_fp) pack(source, sink, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args ) output_fp.seek(0, 0) bloscpack_header = _read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header.max_app_chunks) offsets = _read_offsets(output_fp, bloscpack_header) nt.assert_equal([96, 417938, 736230, 1049687, 1362724, 1660126, 1958578, 2257063], offsets)
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = _read_bloscpack_header(input_fp) total_entries = bloscpack_header.total_prospective_chunks offsets = _read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal(736, offsets[0]) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = {'versionlz': 1, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8} blosc_header = decode_blosc_header(blosc_header_raw) blosc_header_slice = dict((k, blosc_header[k]) for k in expected.keys()) nt.assert_equal(expected, blosc_header_slice) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = BloscpackArgs(max_app_chunks=0) source = PlainFPSource(input_fp) sink = CompressedFPSink(output_fp) pack(source, sink, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args ) output_fp.seek(0, 0) bloscpack_header = _read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header.max_app_chunks) offsets = _read_offsets(output_fp, bloscpack_header) nt.assert_equal(96, offsets[0])
def test_file_corruption(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file) # now go in and modify a byte in the file with open(out_file, "r+b") as input_fp: # read offsets and header _read_offsets(input_fp, _read_bloscpack_header(input_fp)) # read the blosc header of the first chunk input_fp.read(BLOSC_HEADER_LENGTH) # read four bytes input_fp.read(4) # read the fifth byte fifth = input_fp.read(1) # figure out what to replace it by replace = b"\x00" if fifth == b"\xff" else b"\xff" # seek one byte back relative to current position input_fp.seek(-1, 1) # write the flipped byte input_fp.write(replace) # now attempt to unpack it nt.assert_raises(ChecksumMismatch, unpack_file, out_file, dcmp_file)
def test_file_corruption(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file) # now go in and modify a byte in the file with open(out_file, 'r+b') as input_fp: # read offsets and header _read_offsets(input_fp, _read_bloscpack_header(input_fp)) # read the blosc header of the first chunk input_fp.read(BLOSC_HEADER_LENGTH) # read four bytes input_fp.read(4) # read the fifth byte fifth = input_fp.read(1) # figure out what to replace it by replace = b'\x00' if fifth == b'\xff' else b'\xff' # seek one byte back relative to current position input_fp.seek(-1, 1) # write the flipped byte input_fp.write(replace) # now attempt to unpack it nt.assert_raises(ChecksumMismatch, unpack_file, out_file, dcmp_file)
from __future__ import print_function import os.path as path import time import numpy import bloscpack.testutil as bpt from bloscpack.sysutil import drop_caches, sync from bloscpack import pack_file_to_file, unpack_file_from_file from bloscpack.pretty import pretty_size with bpt.create_tmp_files() as (tdir, in_file, out_file, dcmp_file): print('create the test data', end='') bpt.create_array(100, in_file, progress=bpt.simple_progress) repeats = 3 print("%s\t%s\t\t%s\t\t%s" % ("chunk_size", "comp-time", "decomp-time", "ratio")) for chunk_size in (int(2**i) for i in numpy.arange(19, 23.5, 0.5)): cmp_times, dcmp_times = [], [] for _ in range(repeats): drop_caches() tic = time.time() pack_file_to_file(in_file, out_file, chunk_size=chunk_size) sync() toc = time.time() cmp_times.append(toc - tic) drop_caches() tic = time.time() unpack_file_from_file(out_file, dcmp_file)
return os.geteuid() == 0 if len(sys.argv) == 2 and sys.argv[1] in ('-d', '--drop-caches'): if am_root(): print('will drop caches') DROP_CACHES = True else: print('error: need uid 0 (root) to drop caches') sys.exit(1) with bpt.create_tmp_files() as (tdir, in_file, out_file, dcmp_file): gz_out_file = path.join(tdir, 'file.gz') print('create the test data', end='') bpt.create_array(100, in_file, progress=bpt.simple_progress) print('') print("Input file size: %s" % get_fs(in_file)) drop_caches() print("Will now run bloscpack... ") tic = time.time() pack_file_to_file(in_file, out_file) toc = time.time() print("Time: %.2f seconds" % (toc - tic)) print("Output file size: %s" % get_fs(out_file)) print("Ratio: %.2f" % get_ratio(in_file, out_file)) drop_caches() print("Will now run gzip... ")