Beispiel #1
0
def test_invalid_format():
    blosc_args = BloscArgs()
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file_to_file(in_file, out_file, blosc_args=blosc_args)
        nt.assert_raises(FormatVersionMismatch, unpack_file_from_file,
                         out_file, dcmp_file)
Beispiel #2
0
def test_invalid_format():
    blosc_args = BloscArgs()
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, blosc_args=blosc_args)
        nt.assert_raises(FormatVersionMismatch,
                         unpack_file, out_file, dcmp_file)
Beispiel #3
0
def test_append():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file)
        append(out_file, in_file)
        unpack_file(out_file, dcmp_file)
        in_content = open(in_file, 'rb').read()
        dcmp_content = open(dcmp_file, 'rb').read()
        nt.assert_equal(len(dcmp_content), len(in_content) * 2)
        nt.assert_equal(dcmp_content, in_content * 2)
def test_append():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file_to_file(in_file, out_file)
        append(out_file, in_file)
        unpack_file_from_file(out_file, dcmp_file)
        in_content = open(in_file, 'rb').read()
        dcmp_content = open(dcmp_file, 'rb').read()
        nt.assert_equal(len(dcmp_content), len(in_content) * 2)
        nt.assert_equal(dcmp_content, in_content * 2)
Beispiel #5
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal([
                736, 368207, 633319, 902306, 1173771, 1419535, 1666981, 1913995
            ], offsets)
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {
                'versionlz': 1,
                'blocksize': 262144,
                'ctbytes': 265108,
                'version': 2,
                'flags': 1,
                'nbytes': 2097152,
                'typesize': 8
            }
            blosc_header = decode_blosc_header(blosc_header_raw)
            nt.assert_equal(expected, blosc_header)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source,
         sink,
         nchunks,
         chunk_size,
         last_chunk_size,
         bloscpack_args=bloscpack_args)
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal(
        [96, 367567, 632679, 901666, 1173131, 1418895, 1666341, 1913355],
        offsets)
Beispiel #6
0
def pack_unpack(repeats, chunk_size=None, progress=False):
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        if progress:
            print("Creating test array")
        create_array(repeats, in_file, progress=progress)
        if progress:
            print("Compressing")
        pack_file(in_file, out_file, chunk_size=chunk_size)
        if progress:
            print("Decompressing")
        unpack_file(out_file, dcmp_file)
        if progress:
            print("Verifying")
        cmp_file(in_file, dcmp_file)
Beispiel #7
0
def pack_unpack(repeats, chunk_size=None, progress=False):
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        if progress:
            print("Creating test array")
        create_array(repeats, in_file, progress=progress)
        if progress:
            print("Compressing")
        pack_file_to_file(in_file, out_file, chunk_size=chunk_size)
        if progress:
            print("Decompressing")
        unpack_file_from_file(out_file, dcmp_file)
        if progress:
            print("Verifying")
        cmp_file(in_file, dcmp_file)
Beispiel #8
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file_to_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal(736, offsets[0])
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {
                'versionlz': 1,
                'version': 2,
                'flags': 1,
                'nbytes': 2097152,
                'typesize': 8
            }
            blosc_header = decode_blosc_header(blosc_header_raw)
            blosc_header_slice = dict(
                (k, blosc_header[k]) for k in expected.keys())
            nt.assert_equal(expected, blosc_header_slice)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source,
         sink,
         nchunks,
         chunk_size,
         last_chunk_size,
         bloscpack_args=bloscpack_args)
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal(96, offsets[0])
Beispiel #9
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal([736, 368207, 633319, 902306, 1173771,
                             1419535, 1666981, 1913995],
                            offsets)
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {'versionlz': 1,
                        'blocksize': 262144,
                        'ctbytes':   265108,
                        'version':   2,
                        'flags':     1,
                        'nbytes':    2097152,
                        'typesize':  8}
            blosc_header = decode_blosc_header(blosc_header_raw)
            nt.assert_equal(expected, blosc_header)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source, sink,
         nchunks, chunk_size, last_chunk_size,
         bloscpack_args=bloscpack_args
         )
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal([96, 367567, 632679, 901666,
                     1173131, 1418895, 1666341, 1913355],
                    offsets)
Beispiel #10
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal([736, 418578, 736870, 1050327,
                             1363364, 1660766, 1959218, 2257703],
                            offsets)
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {'versionlz': 1,
                        'blocksize': 131072,
                        'ctbytes':   318288,
                        'version':   2,
                        'flags':     1,
                        'nbytes':    2097152,
                        'typesize':  8}
            blosc_header = decode_blosc_header(blosc_header_raw)
            nt.assert_equal(expected, blosc_header)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source, sink,
         nchunks, chunk_size, last_chunk_size,
         bloscpack_args=bloscpack_args
         )
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal([96, 417938, 736230, 1049687,
                     1362724, 1660126, 1958578, 2257063],
                    offsets)
Beispiel #11
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal(736, offsets[0])
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {'versionlz': 1,
                        'version':   2,
                        'flags':     1,
                        'nbytes':    2097152,
                        'typesize':  8}
            blosc_header = decode_blosc_header(blosc_header_raw)
            blosc_header_slice = dict((k, blosc_header[k]) for k in expected.keys())
            nt.assert_equal(expected, blosc_header_slice)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source, sink,
         nchunks, chunk_size, last_chunk_size,
         bloscpack_args=bloscpack_args
         )
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal(96, offsets[0])
Beispiel #12
0
def test_file_corruption():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file)
        # now go in and modify a byte in the file
        with open(out_file, "r+b") as input_fp:
            # read offsets and header
            _read_offsets(input_fp, _read_bloscpack_header(input_fp))
            # read the blosc header of the first chunk
            input_fp.read(BLOSC_HEADER_LENGTH)
            # read four bytes
            input_fp.read(4)
            # read the fifth byte
            fifth = input_fp.read(1)
            # figure out what to replace it by
            replace = b"\x00" if fifth == b"\xff" else b"\xff"
            # seek one byte back relative to current position
            input_fp.seek(-1, 1)
            # write the flipped byte
            input_fp.write(replace)
        # now attempt to unpack it
        nt.assert_raises(ChecksumMismatch, unpack_file, out_file, dcmp_file)
Beispiel #13
0
def test_file_corruption():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file)
        # now go in and modify a byte in the file
        with open(out_file, 'r+b') as input_fp:
            # read offsets and header
            _read_offsets(input_fp, _read_bloscpack_header(input_fp))
            # read the blosc header of the first chunk
            input_fp.read(BLOSC_HEADER_LENGTH)
            # read four bytes
            input_fp.read(4)
            # read the fifth byte
            fifth = input_fp.read(1)
            # figure out what to replace it by
            replace = b'\x00' if fifth == b'\xff' else b'\xff'
            # seek one byte back relative to current position
            input_fp.seek(-1, 1)
            # write the flipped byte
            input_fp.write(replace)
        # now attempt to unpack it
        nt.assert_raises(ChecksumMismatch, unpack_file, out_file, dcmp_file)
Beispiel #14
0
from __future__ import print_function

import os.path as path
import time

import numpy

import bloscpack.testutil as bpt
from bloscpack.sysutil import drop_caches, sync
from bloscpack import pack_file_to_file, unpack_file_from_file
from bloscpack.pretty import pretty_size

with bpt.create_tmp_files() as (tdir, in_file, out_file, dcmp_file):

    print('create the test data', end='')
    bpt.create_array(100, in_file, progress=bpt.simple_progress)
    repeats = 3
    print("%s\t%s\t\t%s\t\t%s" %
          ("chunk_size", "comp-time", "decomp-time", "ratio"))
    for chunk_size in (int(2**i) for i in numpy.arange(19, 23.5, 0.5)):
        cmp_times, dcmp_times = [], []
        for _ in range(repeats):
            drop_caches()
            tic = time.time()
            pack_file_to_file(in_file, out_file, chunk_size=chunk_size)
            sync()
            toc = time.time()
            cmp_times.append(toc - tic)
            drop_caches()
            tic = time.time()
            unpack_file_from_file(out_file, dcmp_file)
Beispiel #15
0
    return os.geteuid() == 0


if len(sys.argv) == 2 and sys.argv[1] in ('-d', '--drop-caches'):
    if am_root():
        print('will drop caches')
        DROP_CACHES = True
    else:
        print('error: need uid 0 (root) to drop caches')
        sys.exit(1)

with bpt.create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
    gz_out_file = path.join(tdir, 'file.gz')

    print('create the test data', end='')
    bpt.create_array(100, in_file, progress=bpt.simple_progress)
    print('')

    print("Input file size: %s" % get_fs(in_file))
    drop_caches()

    print("Will now run bloscpack... ")
    tic = time.time()
    pack_file_to_file(in_file, out_file)
    toc = time.time()
    print("Time: %.2f seconds" % (toc - tic))
    print("Output file size: %s" % get_fs(out_file))
    print("Ratio: %.2f" % get_ratio(in_file, out_file))
    drop_caches()

    print("Will now run gzip... ")