Beispiel #1
0
def test_typesize_is_set_correctly_with_custom_blosc_args():
    a = np.array([1, 2, 3], dtype='uint8')
    sink = CompressedMemorySink()
    input_args = BloscArgs(clevel=9)
    pack_ndarray(a, sink, blosc_args=input_args)
    expected_args = BloscArgs(clevel=9, typesize=1)
    nt.assert_equal(expected_args, sink.blosc_args)
Beispiel #2
0
def test_decode_blosc_header_deactivate_shuffle():
    array_ = np.ones(16000, dtype=np.uint8)
    blosc_args = BloscArgs()
    blosc_args.shuffle = False
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {'versionlz': 1,
                'version': 2,
                'flags': 0,  # no shuffle flag
                'nbytes': len(array_),
                'typesize': blosc_args.typesize}
    header_slice = dict((k, header[k]) for k in expected.keys())
    nt.assert_equal(expected, header_slice)
Beispiel #3
0
def test_decode_blosc_header_deactivate_shuffle():
    array_ = np.ones(16000, dtype=np.uint8)
    blosc_args = BloscArgs()
    blosc_args.shuffle = False
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'version': 2,
        'flags': 0,  # no shuffle flag
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    header_slice = dict((k, header[k]) for k in expected.keys())
    nt.assert_equal(expected, header_slice)
Beispiel #4
0
def test_decode_blosc_header_uncompressible_data():
    array_ = np.asarray(np.random.randn(255),
                        dtype=np.float32).tostring()
    blosc_args = BloscArgs()
    blosc_args.shuffle = True
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {'versionlz': 1,
                'blocksize': 1016,
                'ctbytes': len(array_) + 16,  # original + 16 header bytes
                'version': 2,
                'flags': 0x13,  # 1 for shuffle 2 for non-compressed 4 for small blocksize
                'nbytes': len(array_),
                'typesize': blosc_args.typesize}
    nt.assert_equal(expected, header)
Beispiel #5
0
def test_decode_blosc_header_uncompressible_data_dont_split_false():
    array_ = np.asarray(np.random.randn(256), dtype=np.float32).tostring()
    blosc_args = BloscArgs()
    blosc_args.shuffle = True
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'version': 2,
        'blocksize': 1024,
        'ctbytes': len(array_) + 16,  # original + 16 header bytes
        'flags': 0x3,  # 1 for shuffle 2 for non-compressed
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    nt.assert_equal(expected, header)
Beispiel #6
0
def test_invalid_format():
    blosc_args = BloscArgs()
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file_to_file(in_file, out_file, blosc_args=blosc_args)
        nt.assert_raises(FormatVersionMismatch, unpack_file_from_file,
                         out_file, dcmp_file)
Beispiel #7
0
def test_append_mix_shuffle():
    orig, new, new_size, dcmp = prep_array_for_append()
    # use the typesize from the file
    # deactivate shuffle
    # crank up the clevel to ensure compression happens, otherwise the flags
    # will be screwed later on
    blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9)
    reset_append_fp(orig, new, new_size, blosc_args=blosc_args)
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    unpack(source, sink)
    orig.seek(0)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()
    nt.assert_equal(len(dcmp_str), len(new_str * 2))
    nt.assert_equal(dcmp_str, new_str * 2)

    # now get the first and the last chunk and check that the shuffle doesn't
    # match
    bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3]
    orig.seek(offsets[0])
    checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']]
    compressed_zero,  blosc_header_zero, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_zero = blosc.decompress(compressed_zero)
    orig.seek(offsets[-1])
    compressed_last,  blosc_header_last, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_last = blosc.decompress(compressed_last)
    # first chunk has shuffle active
    nt.assert_equal(blosc_header_zero['flags'], 1)
    # last chunk doesn't
    nt.assert_equal(blosc_header_last['flags'], 0)
def test_mixing_clevel():
    # the first set of chunks has max compression
    blosc_args = BloscArgs(clevel=9)
    orig, new, new_size, dcmp = prep_array_for_append()
    # get the original size
    orig.seek(0, 2)
    orig_size = orig.tell()
    orig.seek(0)
    # get a backup of the settings
    bloscpack_header, metadata, metadata_header, offsets = \
            reset_read_beginning(orig)
    # compressed size of the last chunk, including checksum
    last_chunk_compressed_size = orig_size - offsets[-1]

    # do append
    # use the typesize from the file and
    # make the second set of chunks have no compression
    blosc_args = BloscArgs(typesize=None, clevel=0)
    nchunks = append_fp(orig, new, new_size, blosc_args=blosc_args)

    # get the final size
    orig.seek(0, 2)
    final_size = orig.tell()
    orig.seek(0)

    # the original file minus the compressed size of the last chunk
    discounted_orig_size = orig_size - last_chunk_compressed_size
    # size of the appended data
    #  * raw new size, since we have no compression
    #  * uncompressed size of the last chunk
    #  * nchunks + 1 times the blosc and checksum overhead
    appended_size = new_size + bloscpack_header['last_chunk'] + (nchunks +
                                                                 1) * (16 + 4)
    # final size should be original plus appended data
    nt.assert_equal(final_size, appended_size + discounted_orig_size)

    # check by unpacking
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    unpack(source, sink)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()
    nt.assert_equal(len(dcmp_str), len(new_str * 2))
    nt.assert_equal(dcmp_str, new_str * 2)
Beispiel #9
0
def test_decode_blosc_header():
    array_ = np.linspace(0, 100, 2e4).tostring()
    # basic test case
    blosc_args = BloscArgs()
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'blocksize': 160000,
        'ctbytes': len(compressed),
        'version': 2,
        'flags': 1,
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    nt.assert_equal(expected, header)
    # deactivate shuffle
    blosc_args.shuffle = False
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'blocksize': 160000,
        'ctbytes': len(compressed),
        'version': 2,
        'flags': 0,  # no shuffle flag
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    nt.assert_equal(expected, header)
    # uncompressible data
    array_ = np.asarray(np.random.randn(23), dtype=np.float32).tostring()
    blosc_args.shuffle = True
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'blocksize': 88,
        'ctbytes': len(array_) + 16,  # original + 16 header bytes
        'version': 2,
        'flags': 3,  # 1 for shuffle 2 for non-compressed
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    nt.assert_equal(expected, header)
Beispiel #10
0
def test_decode_blosc_header():
    array_ = np.linspace(0, 100, 2e4).tostring()
    # basic test case
    blosc_args = BloscArgs()
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {'versionlz': 1,
                'blocksize': 160000,
                'ctbytes': len(compressed),
                'version': 2,
                'flags': 1,
                'nbytes': len(array_),
                'typesize': blosc_args.typesize}
    nt.assert_equal(expected, header)
    # deactivate shuffle
    blosc_args.shuffle = False
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {'versionlz': 1,
                'blocksize': 160000,
                'ctbytes': len(compressed),
                'version': 2,
                'flags': 0,  # no shuffle flag
                'nbytes': len(array_),
                'typesize': blosc_args.typesize}
    nt.assert_equal(expected, header)
    # uncompressible data
    array_ = np.asarray(np.random.randn(23),
                        dtype=np.float32).tostring()
    blosc_args.shuffle = True
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {'versionlz': 1,
                'blocksize': 88,
                'ctbytes': len(array_) + 16,  # original + 16 header bytes
                'version': 2,
                'flags': 3,  # 1 for shuffle 2 for non-compressed
                'nbytes': len(array_),
                'typesize': blosc_args.typesize}
    nt.assert_equal(expected, header)
def test_append_mix_shuffle():
    orig, new, new_size, dcmp = prep_array_for_append()
    # use the typesize from the file
    # deactivate shuffle
    # crank up the clevel to ensure compression happens, otherwise the flags
    # will be screwed later on
    blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9)

    # need to create something that will be compressible even without shuffle,
    # the linspace used in 'new' doesn't work anymore as of python-blosc 1.6.1
    to_append = np.zeros(int(2e6))
    to_append_fp = StringIO()
    to_append_fp.write(to_append.tostring())
    to_append_fp_size = to_append_fp.tell()
    to_append_fp.seek(0)

    # now do the append
    reset_append_fp(orig,
                    to_append_fp,
                    to_append_fp_size,
                    blosc_args=blosc_args)

    # decompress 'orig' so that we can examine it
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    unpack(source, sink)
    orig.seek(0)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()

    # now sanity check the length and content of the decompressed
    nt.assert_equal(len(dcmp_str), len(new_str) + to_append_fp_size)
    nt.assert_equal(dcmp_str, new_str + to_append.tostring())

    # now get the first and the last chunk and check that the shuffle doesn't
    # match
    bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3]
    orig.seek(offsets[0])
    checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']]
    compressed_zero,  blosc_header_zero, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_zero = blosc.decompress(compressed_zero)
    orig.seek(offsets[-1])
    compressed_last,  blosc_header_last, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_last = blosc.decompress(compressed_last)
    # first chunk has shuffle active
    nt.assert_equal(blosc_header_zero['flags'], 1)
    # last chunk doesn't
    nt.assert_equal(blosc_header_last['flags'], 0)
Beispiel #12
0
def test_alternate_cname():
    for cname, int_id in [
        ('blosclz', 0),
        ('lz4', 1),
        ('lz4hc', 1),
        ('snappy', 2),
        ('zlib', 3),
    ]:
        blosc_args = BloscArgs(cname=cname)
        array_ = np.linspace(0, 1, 2e6)
        sink = CompressedMemorySink()
        pack_ndarray(array_, sink, blosc_args=blosc_args)
        blosc_header = decode_blosc_header(sink.chunks[0])
        yield nt.assert_equal, blosc_header['flags'] >> 5, int_id
Beispiel #13
0
def test_decode_blosc_header_basic():
    array_ = np.linspace(0, 100, int(2e4)).tostring()
    blosc_args = BloscArgs()
    compressed = blosc.compress(array_, **blosc_args)
    header = decode_blosc_header(compressed)
    expected = {
        'versionlz': 1,
        'version': 2,
        'flags': 1,
        'nbytes': len(array_),
        'typesize': blosc_args.typesize
    }
    header_slice = dict((k, header[k]) for k in expected.keys())
    nt.assert_equal(expected, header_slice)
Beispiel #14
0
def prep_array_for_append(blosc_args=BloscArgs(),
                          bloscpack_args=BloscpackArgs()):
    orig, new, dcmp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, new)
    new_size = new.tell()
    new.seek(0)
    chunking = calculate_nchunks(new_size)
    source = PlainFPSource(new)
    sink = CompressedFPSink(orig)
    pack(source, sink, *chunking,
         blosc_args=blosc_args,
         bloscpack_args=bloscpack_args)
    orig.seek(0)
    new.seek(0)
    return orig, new, new_size, dcmp
Beispiel #15
0
def test_typesize_is_set_correctly_with_default_blosc_args():
    a = np.array([1, 2, 3], dtype='uint8')
    sink = CompressedMemorySink()
    pack_ndarray(a, sink)
    expected_args = BloscArgs(typesize=1)
    nt.assert_equal(expected_args, sink.blosc_args)
Beispiel #16
0
 def test_init(self):
     blosc_args = BloscArgs()
     self.assertEqual(DEFAULT_TYPESIZE, blosc_args.typesize)
     self.assertEqual(DEFAULT_CLEVEL, blosc_args.clevel)
     self.assertEqual(DEFAULT_SHUFFLE, blosc_args.shuffle)
     self.assertEqual(DEFAULT_CNAME, blosc_args.cname)