Beispiel #1
0
def test_decode_blosc_header():
    array_ = np.linspace(0, 100, 2e4).tostring()
    # basic test case
    blosc_args = BloscArgs()
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'blocksize': 160000,
        'ctbytes': len(compressed),
        'version': 2,
        'flags': 1,
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    nt.assert_equal(expected, header)
    # deactivate shuffle
    blosc_args.shuffle = False
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'blocksize': 160000,
        'ctbytes': len(compressed),
        'version': 2,
        'flags': 0,  # no shuffle flag
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    nt.assert_equal(expected, header)
    # uncompressible data
    array_ = np.asarray(np.random.randn(23), dtype=np.float32).tostring()
    blosc_args.shuffle = True
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'blocksize': 88,
        'ctbytes': len(array_) + 16,  # original + 16 header bytes
        'version': 2,
        'flags': 3,  # 1 for shuffle 2 for non-compressed
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    nt.assert_equal(expected, header)
Beispiel #2
0
def test_decode_blosc_header_basic():
    array_ = np.linspace(0, 100, 2e4).tostring()
    blosc_args = BloscArgs()
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {'versionlz': 1,
                'version': 2,
                'flags': 1,
                'nbytes': len(array_),
                'typesize': blosc_args.typesize}
    header_slice = dict((k, header[k]) for k in expected.keys())
    nt.assert_equal(expected, header_slice)
Beispiel #3
0
def test_decode_blosc_header():
    array_ = np.linspace(0, 100, 2e4).tostring()
    # basic test case
    blosc_args = BloscArgs()
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {'versionlz': 1,
                'blocksize': 160000,
                'ctbytes': len(compressed),
                'version': 2,
                'flags': 1,
                'nbytes': len(array_),
                'typesize': blosc_args.typesize}
    nt.assert_equal(expected, header)
    # deactivate shuffle
    blosc_args.shuffle = False
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {'versionlz': 1,
                'blocksize': 160000,
                'ctbytes': len(compressed),
                'version': 2,
                'flags': 0,  # no shuffle flag
                'nbytes': len(array_),
                'typesize': blosc_args.typesize}
    nt.assert_equal(expected, header)
    # uncompressible data
    array_ = np.asarray(np.random.randn(23),
                        dtype=np.float32).tostring()
    blosc_args.shuffle = True
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {'versionlz': 1,
                'blocksize': 88,
                'ctbytes': len(array_) + 16,  # original + 16 header bytes
                'version': 2,
                'flags': 3,  # 1 for shuffle 2 for non-compressed
                'nbytes': len(array_),
                'typesize': blosc_args.typesize}
    nt.assert_equal(expected, header)
Beispiel #4
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal([
                736, 368207, 633319, 902306, 1173771, 1419535, 1666981, 1913995
            ], offsets)
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {
                'versionlz': 1,
                'blocksize': 262144,
                'ctbytes': 265108,
                'version': 2,
                'flags': 1,
                'nbytes': 2097152,
                'typesize': 8
            }
            blosc_header = decode_blosc_header(blosc_header_raw)
            nt.assert_equal(expected, blosc_header)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source,
         sink,
         nchunks,
         chunk_size,
         last_chunk_size,
         bloscpack_args=bloscpack_args)
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal(
        [96, 367567, 632679, 901666, 1173131, 1418895, 1666341, 1913355],
        offsets)
Beispiel #5
0
def test_decode_blosc_header_deactivate_shuffle():
    array_ = np.ones(16000, dtype=np.uint8)
    blosc_args = BloscArgs()
    blosc_args.shuffle = False
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {'versionlz': 1,
                'version': 2,
                'flags': 0,  # no shuffle flag
                'nbytes': len(array_),
                'typesize': blosc_args.typesize}
    header_slice = dict((k, header[k]) for k in expected.keys())
    nt.assert_equal(expected, header_slice)
Beispiel #6
0
def test_alternate_cname():
    for cname, int_id in [
        ('blosclz', 0),
        ('lz4', 1),
        ('lz4hc', 1),
        ('snappy', 2),
        ('zlib', 3),
    ]:
        blosc_args = BloscArgs(cname=cname)
        array_ = np.linspace(0, 1, 2e6)
        sink = CompressedMemorySink()
        pack_ndarray(array_, sink, blosc_args=blosc_args)
        blosc_header = decode_blosc_header(sink.chunks[0])
        yield nt.assert_equal, blosc_header['flags'] >> 5, int_id
Beispiel #7
0
def test_decode_blosc_header_basic():
    array_ = np.linspace(0, 100, int(2e4)).tostring()
    blosc_args = BloscArgs()
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'version': 2,
        'flags': 1,
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    header_slice = dict((k, header[k]) for k in expected.keys())
    nt.assert_equal(expected, header_slice)
Beispiel #8
0
def test_alternate_cname():
    for cname, int_id in [
            ('blosclz', 0),
            ('lz4', 1),
            ('lz4hc', 1),
            ('snappy', 2),
            ('zlib', 3),
            ]:
        blosc_args = BloscArgs(cname=cname)
        array_ = np.linspace(0, 1, 2e6)
        sink = CompressedMemorySink()
        pack_ndarray(array_, sink, blosc_args=blosc_args)
        blosc_header = decode_blosc_header(sink.chunks[0])
        yield nt.assert_equal, blosc_header['flags'] >> 5, int_id
Beispiel #9
0
def test_decode_blosc_header_deactivate_shuffle():
    array_ = np.ones(16000, dtype=np.uint8)
    blosc_args = BloscArgs()
    blosc_args.shuffle = False
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'version': 2,
        'flags': 0,  # no shuffle flag
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    header_slice = dict((k, header[k]) for k in expected.keys())
    nt.assert_equal(expected, header_slice)
Beispiel #10
0
def test_decode_blosc_header_uncompressible_data():
    array_ = np.asarray(np.random.randn(255),
                        dtype=np.float32).tostring()
    blosc_args = BloscArgs()
    blosc_args.shuffle = True
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {'versionlz': 1,
                'blocksize': 1016,
                'ctbytes': len(array_) + 16,  # original + 16 header bytes
                'version': 2,
                'flags': 0x13,  # 1 for shuffle 2 for non-compressed 4 for small blocksize
                'nbytes': len(array_),
                'typesize': blosc_args.typesize}
    nt.assert_equal(expected, header)
Beispiel #11
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file_to_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal(736, offsets[0])
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {
                'versionlz': 1,
                'version': 2,
                'flags': 1,
                'nbytes': 2097152,
                'typesize': 8
            }
            blosc_header = decode_blosc_header(blosc_header_raw)
            blosc_header_slice = dict(
                (k, blosc_header[k]) for k in expected.keys())
            nt.assert_equal(expected, blosc_header_slice)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source,
         sink,
         nchunks,
         chunk_size,
         last_chunk_size,
         bloscpack_args=bloscpack_args)
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal(96, offsets[0])
Beispiel #12
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal([736, 368207, 633319, 902306, 1173771,
                             1419535, 1666981, 1913995],
                            offsets)
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {'versionlz': 1,
                        'blocksize': 262144,
                        'ctbytes':   265108,
                        'version':   2,
                        'flags':     1,
                        'nbytes':    2097152,
                        'typesize':  8}
            blosc_header = decode_blosc_header(blosc_header_raw)
            nt.assert_equal(expected, blosc_header)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source, sink,
         nchunks, chunk_size, last_chunk_size,
         bloscpack_args=bloscpack_args
         )
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal([96, 367567, 632679, 901666,
                     1173131, 1418895, 1666341, 1913355],
                    offsets)
Beispiel #13
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal([736, 418578, 736870, 1050327,
                             1363364, 1660766, 1959218, 2257703],
                            offsets)
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {'versionlz': 1,
                        'blocksize': 131072,
                        'ctbytes':   318288,
                        'version':   2,
                        'flags':     1,
                        'nbytes':    2097152,
                        'typesize':  8}
            blosc_header = decode_blosc_header(blosc_header_raw)
            nt.assert_equal(expected, blosc_header)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source, sink,
         nchunks, chunk_size, last_chunk_size,
         bloscpack_args=bloscpack_args
         )
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal([96, 417938, 736230, 1049687,
                     1362724, 1660126, 1958578, 2257063],
                    offsets)
Beispiel #14
0
def test_decode_blosc_header_uncompressible_data_dont_split_false():
    array_ = np.asarray(np.random.randn(256), dtype=np.float32).tostring()
    blosc_args = BloscArgs()
    blosc_args.shuffle = True
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'version': 2,
        'blocksize': 1024,
        'ctbytes': len(array_) + 16,  # original + 16 header bytes
        'flags': 0x3,  # 1 for shuffle 2 for non-compressed
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    nt.assert_equal(expected, header)
Beispiel #15
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal(736, offsets[0])
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {'versionlz': 1,
                        'version':   2,
                        'flags':     1,
                        'nbytes':    2097152,
                        'typesize':  8}
            blosc_header = decode_blosc_header(blosc_header_raw)
            blosc_header_slice = dict((k, blosc_header[k]) for k in expected.keys())
            nt.assert_equal(expected, blosc_header_slice)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source, sink,
         nchunks, chunk_size, last_chunk_size,
         bloscpack_args=bloscpack_args
         )
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal(96, offsets[0])