Ejemplo n.º 1
0
def test_append_single_chunk():
    orig, new, dcmp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, new)
    new_size = new.tell()
    new.seek(0)
    chunking = calculate_nchunks(new_size, chunk_size=new_size)
    source = PlainFPSource(new)
    sink = CompressedFPSink(orig)
    pack(source, sink, *chunking)
    orig.seek(0)
    new.seek(0)

    # append a single chunk
    reset_append_fp(orig, new, new_size)
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(bloscpack_header['nchunks'], 2)

    # append a large content, that amounts to two chunks
    new_content = new.read()
    new.seek(0)
    reset_append_fp(orig, StringIO(new_content * 2), new_size * 2)
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(bloscpack_header['nchunks'], 4)

    # append half a chunk
    reset_append_fp(orig, StringIO(new_content[:len(new_content)]), new_size//2)
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(bloscpack_header['nchunks'], 5)

    # append a few bytes
    reset_append_fp(orig, StringIO(new_content[:1023]), 1024)
    # make sure it is squashed into the lat chunk
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(bloscpack_header['nchunks'], 5)
Ejemplo n.º 2
0
def pack_unpack_fp(repeats, chunk_size=DEFAULT_CHUNK_SIZE,
                   progress=False, metadata=None):
    in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO()
    if progress:
        print("Creating test array")
    create_array_fp(repeats, in_fp, progress=progress)
    in_fp_size = in_fp.tell()
    if progress:
        print("Compressing")
    in_fp.seek(0)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(in_fp_size, chunk_size)
    source = PlainFPSource(in_fp)
    sink = CompressedFPSink(out_fp)
    pack(source, sink,
         nchunks, chunk_size, last_chunk_size,
         metadata=metadata)
    out_fp.seek(0)
    if progress:
        print("Decompressing")
    source = CompressedFPSource(out_fp)
    sink = PlainFPSink(dcmp_fp)
    unpack(source, sink)
    if progress:
        print("Verifying")
    cmp_fp(in_fp, dcmp_fp)
    return source.metadata
Ejemplo n.º 3
0
def test_append_into_last_chunk():
    # first create an array with a single chunk
    orig, new, dcmp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, new)
    new_size = new.tell()
    new.seek(0)
    chunking = calculate_nchunks(new_size, chunk_size=new_size)
    source = PlainFPSource(new)
    sink = CompressedFPSink(orig)
    pack(source, sink, *chunking)
    orig.seek(0)
    new.seek(0)
    # append a few bytes, creating a new, smaller, last_chunk
    new_content = new.read()
    new.seek(0)
    nchunks = reset_append_fp(orig, StringIO(new_content[:1023]), 1023)
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(nchunks, 1)
    nt.assert_equal(bloscpack_header['last_chunk'], 1023)
    # now append into that last chunk
    nchunks = reset_append_fp(orig, StringIO(new_content[:1023]), 1023)
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(nchunks, 0)
    nt.assert_equal(bloscpack_header['last_chunk'], 2046)

    # now check by unpacking
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    unpack(source, sink)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()
    nt.assert_equal(len(dcmp_str), len(new_str) + 2046)
    nt.assert_equal(dcmp_str, new_str + new_str[:1023] * 2)
Ejemplo n.º 4
0
def test_unpack_exception():
    a = np.arange(50)
    sio = StringIO()
    a_str = a.tostring()
    source = PlainFPSource(StringIO(a_str))
    sink = CompressedFPSink(sio)
    pack(source, sink, *calculate_nchunks(len(a_str)))
    nt.assert_raises(NotANumpyArray, unpack_ndarray_str, sio.getvalue())
Ejemplo n.º 5
0
def roundtrip_numpy_file_pointers(ndarray):
    sio = StringIO()
    sink = CompressedFPSink(sio)
    pack_ndarray(ndarray, sink)
    sio.seek(0)
    source = CompressedFPSource(sio)
    b = unpack_ndarray(source)
    return npt.assert_array_equal, ndarray, b
Ejemplo n.º 6
0
def roundtrip_numpy_file_pointers(ndarray):
    sio = StringIO()
    sink = CompressedFPSink(sio)
    pack_ndarray(ndarray, sink)
    sio.seek(0)
    source = CompressedFPSource(sio)
    b = unpack_ndarray(source)
    return npt.assert_array_equal, ndarray, b
Ejemplo n.º 7
0
def test_unpack_exception():
    a = np.arange(50)
    sio = StringIO()
    a_str = a.tostring()
    source = PlainFPSource(StringIO(a_str))
    sink = CompressedFPSink(sio)
    pack(source, sink, *calculate_nchunks(len(a_str)))
    nt.assert_raises(NotANumpyArray, unpack_ndarray_str, sio.getvalue())
Ejemplo n.º 8
0
def test_append_single_chunk():
    orig, new, dcmp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, new)
    new_size = new.tell()
    new.seek(0)
    chunking = calculate_nchunks(new_size, chunk_size=new_size)
    source = PlainFPSource(new)
    sink = CompressedFPSink(orig)
    pack(source, sink, *chunking)
    orig.seek(0)
    new.seek(0)

    # append a single chunk
    reset_append_fp(orig, new, new_size)
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(bloscpack_header['nchunks'], 2)

    # append a large content, that amounts to two chunks
    new_content = new.read()
    new.seek(0)
    reset_append_fp(orig, StringIO(new_content * 2), new_size * 2)
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(bloscpack_header['nchunks'], 4)

    # append half a chunk
    reset_append_fp(orig, StringIO(new_content[:len(new_content)]),
                    new_size // 2)
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(bloscpack_header['nchunks'], 5)

    # append a few bytes
    reset_append_fp(orig, StringIO(new_content[:1023]), 1024)
    # make sure it is squashed into the lat chunk
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(bloscpack_header['nchunks'], 5)
Ejemplo n.º 9
0
def pack_unpack_mem(repeats,
                    chunk_size=DEFAULT_CHUNK_SIZE,
                    progress=False,
                    metadata=None):
    in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO()
    if progress:
        print("Creating test array")
    create_array_fp(repeats, in_fp, progress=progress)
    in_fp_size = in_fp.tell()
    if progress:
        print("Compressing")
    in_fp.seek(0)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(in_fp_size, chunk_size)
    # let us play merry go round
    source = PlainFPSource(in_fp)
    sink = CompressedMemorySink()
    pack(source, sink, nchunks, chunk_size, last_chunk_size, metadata=metadata)
    source = CompressedMemorySource(sink)
    sink = PlainMemorySink()
    unpack(source, sink)
    nt.assert_equal(metadata, source.metadata)
    source = PlainMemorySource(sink.chunks)
    sink = CompressedFPSink(out_fp)
    pack(source, sink, nchunks, chunk_size, last_chunk_size, metadata=metadata)
    out_fp.seek(0)
    source = CompressedFPSource(out_fp)
    sink = PlainFPSink(dcmp_fp)
    unpack(source, sink)
    nt.assert_equal(metadata, source.metadata)
    in_fp.seek(0)
    dcmp_fp.seek(0)
    cmp_fp(in_fp, dcmp_fp)
    return source.metadata
Ejemplo n.º 10
0
def test_disable_offsets():
    in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, in_fp)
    in_fp_size = in_fp.tell()
    in_fp.seek(0)
    bloscpack_args = BloscpackArgs(offsets=False)
    source = PlainFPSource(in_fp)
    sink = CompressedFPSink(out_fp)
    pack(source, sink, *calculate_nchunks(in_fp_size), bloscpack_args=bloscpack_args)
    out_fp.seek(0)
    bloscpack_header, metadata, metadata_header, offsets = _read_beginning(out_fp)
    nt.assert_true(len(offsets) == 0)
Ejemplo n.º 11
0
def test_append_into_last_chunk():
    # first create an array with a single chunk
    orig, new, dcmp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, new)
    new_size = new.tell()
    new.seek(0)
    chunking = calculate_nchunks(new_size, chunk_size=new_size)
    source = PlainFPSource(new)
    sink = CompressedFPSink(orig)
    pack(source, sink, *chunking)
    orig.seek(0)
    new.seek(0)
    # append a few bytes, creating a new, smaller, last_chunk
    new_content = new.read()
    new.seek(0)
    nchunks = reset_append_fp(orig, StringIO(new_content[:1023]), 1023)
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(nchunks, 1)
    nt.assert_equal(bloscpack_header['last_chunk'], 1023)
    # now append into that last chunk
    nchunks = reset_append_fp(orig, StringIO(new_content[:1023]), 1023)
    bloscpack_header = reset_read_beginning(orig)[0]
    nt.assert_equal(nchunks, 0)
    nt.assert_equal(bloscpack_header['last_chunk'], 2046)

    # now check by unpacking
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    unpack(source, sink)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()
    nt.assert_equal(len(dcmp_str), len(new_str) + 2046)
    nt.assert_equal(dcmp_str, new_str + new_str[:1023] * 2)
Ejemplo n.º 12
0
def prep_array_for_append(blosc_args=BloscArgs(),
                          bloscpack_args=BloscpackArgs()):
    orig, new, dcmp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, new)
    new_size = new.tell()
    new.seek(0)
    chunking = calculate_nchunks(new_size)
    source = PlainFPSource(new)
    sink = CompressedFPSink(orig)
    pack(source, sink, *chunking,
         blosc_args=blosc_args,
         bloscpack_args=bloscpack_args)
    orig.seek(0)
    new.seek(0)
    return orig, new, new_size, dcmp
Ejemplo n.º 13
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal([736, 368207, 633319, 902306, 1173771,
                             1419535, 1666981, 1913995],
                            offsets)
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {'versionlz': 1,
                        'blocksize': 262144,
                        'ctbytes':   265108,
                        'version':   2,
                        'flags':     1,
                        'nbytes':    2097152,
                        'typesize':  8}
            blosc_header = decode_blosc_header(blosc_header_raw)
            nt.assert_equal(expected, blosc_header)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source, sink,
         nchunks, chunk_size, last_chunk_size,
         bloscpack_args=bloscpack_args
         )
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal([96, 367567, 632679, 901666,
                     1173131, 1418895, 1666341, 1913355],
                    offsets)
Ejemplo n.º 14
0
def pack_unpack_fp(repeats,
                   chunk_size=DEFAULT_CHUNK_SIZE,
                   progress=False,
                   metadata=None):
    in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO()
    if progress:
        print("Creating test array")
    create_array_fp(repeats, in_fp, progress=progress)
    in_fp_size = in_fp.tell()
    if progress:
        print("Compressing")
    in_fp.seek(0)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(in_fp_size, chunk_size)
    source = PlainFPSource(in_fp)
    sink = CompressedFPSink(out_fp)
    pack(source, sink, nchunks, chunk_size, last_chunk_size, metadata=metadata)
    out_fp.seek(0)
    if progress:
        print("Decompressing")
    source = CompressedFPSource(out_fp)
    sink = PlainFPSink(dcmp_fp)
    unpack(source, sink)
    if progress:
        print("Verifying")
    cmp_fp(in_fp, dcmp_fp)
    return source.metadata
Ejemplo n.º 15
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal(736, offsets[0])
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {'versionlz': 1,
                        'version':   2,
                        'flags':     1,
                        'nbytes':    2097152,
                        'typesize':  8}
            blosc_header = decode_blosc_header(blosc_header_raw)
            blosc_header_slice = dict((k, blosc_header[k]) for k in expected.keys())
            nt.assert_equal(expected, blosc_header_slice)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source, sink,
         nchunks, chunk_size, last_chunk_size,
         bloscpack_args=bloscpack_args
         )
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal(96, offsets[0])
Ejemplo n.º 16
0
def pack_unpack_mem(repeats, chunk_size=DEFAULT_CHUNK_SIZE,
                    progress=False, metadata=None):
    in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO()
    if progress:
        print("Creating test array")
    create_array_fp(repeats, in_fp, progress=progress)
    in_fp_size = in_fp.tell()
    if progress:
        print("Compressing")
    in_fp.seek(0)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(in_fp_size, chunk_size)
    # let us play merry go round
    source = PlainFPSource(in_fp)
    sink = CompressedMemorySink()
    pack(source, sink, nchunks, chunk_size, last_chunk_size, metadata=metadata)
    source = CompressedMemorySource(sink)
    sink = PlainMemorySink()
    unpack(source, sink)
    nt.assert_equal(metadata, source.metadata)
    source = PlainMemorySource(sink.chunks)
    sink = CompressedFPSink(out_fp)
    pack(source, sink, nchunks, chunk_size, last_chunk_size, metadata=metadata)
    out_fp.seek(0)
    source = CompressedFPSource(out_fp)
    sink = PlainFPSink(dcmp_fp)
    unpack(source, sink)
    nt.assert_equal(metadata, source.metadata)
    in_fp.seek(0)
    dcmp_fp.seek(0)
    cmp_fp(in_fp, dcmp_fp)
    return source.metadata
Ejemplo n.º 17
0
def test_append_mix_shuffle():
    orig, new, new_size, dcmp = prep_array_for_append()
    # use the typesize from the file
    # deactivate shuffle
    # crank up the clevel to ensure compression happens, otherwise the flags
    # will be screwed later on
    blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9)

    # need to create something that will be compressible even without shuffle,
    # the linspace used in 'new' doesn't work anymore as of python-blosc 1.6.1
    to_append = np.zeros(int(2e6))
    to_append_fp = StringIO()
    to_append_fp.write(to_append.tostring())
    to_append_fp_size = to_append_fp.tell()
    to_append_fp.seek(0)

    # now do the append
    reset_append_fp(orig,
                    to_append_fp,
                    to_append_fp_size,
                    blosc_args=blosc_args)

    # decompress 'orig' so that we can examine it
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    unpack(source, sink)
    orig.seek(0)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()

    # now sanity check the length and content of the decompressed
    nt.assert_equal(len(dcmp_str), len(new_str) + to_append_fp_size)
    nt.assert_equal(dcmp_str, new_str + to_append.tostring())

    # now get the first and the last chunk and check that the shuffle doesn't
    # match
    bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3]
    orig.seek(offsets[0])
    checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']]
    compressed_zero,  blosc_header_zero, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_zero = blosc.decompress(compressed_zero)
    orig.seek(offsets[-1])
    compressed_last,  blosc_header_last, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_last = blosc.decompress(compressed_last)
    # first chunk has shuffle active
    nt.assert_equal(blosc_header_zero['flags'], 1)
    # last chunk doesn't
    nt.assert_equal(blosc_header_last['flags'], 0)
Ejemplo n.º 18
0
def test_append_metadata():
    orig, new, dcmp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, new)
    new_size = new.tell()
    new.seek(0)

    metadata = {"dtype": "float64", "shape": [1024], "others": []}
    chunking = calculate_nchunks(new_size, chunk_size=new_size)
    source = PlainFPSource(new)
    sink = CompressedFPSink(orig)
    pack(source, sink, *chunking, metadata=metadata)
    orig.seek(0)
    new.seek(0)
    reset_append_fp(orig, new, new_size)
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    ans = unpack(source, sink)
    print(ans)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()
    nt.assert_equal(len(dcmp_str), len(new_str) * 2)
    nt.assert_equal(dcmp_str, new_str * 2)
Ejemplo n.º 19
0
def test_metadata_opportunisitic_compression():
    # make up some metadata that can be compressed with benefit
    test_metadata = ("{'dtype': 'float64', 'shape': [1024], 'others': [],"
                     "'original_container': 'carray'}")
    target_fp = StringIO()
    _write_metadata(target_fp, test_metadata, MetadataArgs())
    target_fp.seek(0, 0)
    metadata, header = _read_metadata(target_fp)
    nt.assert_equal('zlib', header['meta_codec'])

    # now do the same thing, but use badly compressible metadata
    test_metadata = "abc"
    target_fp = StringIO()
    # default args say: do compression...
    _write_metadata(target_fp, test_metadata, MetadataArgs())
    target_fp.seek(0, 0)
    metadata, header = _read_metadata(target_fp)
    # but it wasn't of any use
    nt.assert_equal('None', header['meta_codec'])
Ejemplo n.º 20
0
def test_disable_offsets():
    in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, in_fp)
    in_fp_size = in_fp.tell()
    in_fp.seek(0)
    bloscpack_args = BloscpackArgs(offsets=False)
    source = PlainFPSource(in_fp)
    sink = CompressedFPSink(out_fp)
    pack(source,
         sink,
         *calculate_nchunks(in_fp_size),
         bloscpack_args=bloscpack_args)
    out_fp.seek(0)
    bloscpack_header, metadata, metadata_header, offsets = \
            _read_beginning(out_fp)
    nt.assert_true(len(offsets) == 0)
Ejemplo n.º 21
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal([
                736, 368207, 633319, 902306, 1173771, 1419535, 1666981, 1913995
            ], offsets)
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {
                'versionlz': 1,
                'blocksize': 262144,
                'ctbytes': 265108,
                'version': 2,
                'flags': 1,
                'nbytes': 2097152,
                'typesize': 8
            }
            blosc_header = decode_blosc_header(blosc_header_raw)
            nt.assert_equal(expected, blosc_header)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source,
         sink,
         nchunks,
         chunk_size,
         last_chunk_size,
         bloscpack_args=bloscpack_args)
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal(
        [96, 367567, 632679, 901666, 1173131, 1418895, 1666341, 1913355],
        offsets)
Ejemplo n.º 22
0
def test_append_mix_shuffle():
    orig, new, new_size, dcmp = prep_array_for_append()
    # use the typesize from the file
    # deactivate shuffle
    # crank up the clevel to ensure compression happens, otherwise the flags
    # will be screwed later on
    blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9)

    # need to create something that will be compressible even without shuffle,
    # the linspace used in 'new' doesn't work anymore as of python-blosc 1.6.1
    to_append = np.zeros(int(2e6))
    to_append_fp = StringIO()
    to_append_fp.write(to_append.tostring())
    to_append_fp_size = to_append_fp.tell()
    to_append_fp.seek(0)

    # now do the append
    reset_append_fp(orig, to_append_fp, to_append_fp_size, blosc_args=blosc_args)

    # decompress 'orig' so that we can examine it
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    unpack(source, sink)
    orig.seek(0)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()

    # now sanity check the length and content of the decompressed
    nt.assert_equal(len(dcmp_str), len(new_str) + to_append_fp_size)
    nt.assert_equal(dcmp_str, new_str + to_append.tostring())

    # now get the first and the last chunk and check that the shuffle doesn't
    # match
    bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3]
    orig.seek(offsets[0])
    checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']]
    compressed_zero,  blosc_header_zero, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_zero = blosc.decompress(compressed_zero)
    orig.seek(offsets[-1])
    compressed_last,  blosc_header_last, digest = \
        _read_compressed_chunk_fp(orig, checksum_impl)
    decompressed_last = blosc.decompress(compressed_last)
    # first chunk has shuffle active
    nt.assert_equal(blosc_header_zero['flags'], 1)
    # last chunk doesn't
    nt.assert_equal(blosc_header_last['flags'], 0)
Ejemplo n.º 23
0
def prep_array_for_append(blosc_args=BloscArgs(),
                          bloscpack_args=BloscpackArgs()):
    orig, new, dcmp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, new)
    new_size = new.tell()
    new.seek(0)
    chunking = calculate_nchunks(new_size)
    source = PlainFPSource(new)
    sink = CompressedFPSink(orig)
    pack(source, sink, *chunking,
         blosc_args=blosc_args,
         bloscpack_args=bloscpack_args)
    orig.seek(0)
    new.seek(0)
    return orig, new, new_size, dcmp
Ejemplo n.º 24
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        pack_file_to_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = _read_bloscpack_header(input_fp)
            total_entries = bloscpack_header.total_prospective_chunks
            offsets = _read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal(736, offsets[0])
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {
                'versionlz': 1,
                'version': 2,
                'flags': 1,
                'nbytes': 2097152,
                'typesize': 8
            }
            blosc_header = decode_blosc_header(blosc_header_raw)
            blosc_header_slice = dict(
                (k, blosc_header[k]) for k in expected.keys())
            nt.assert_equal(expected, blosc_header_slice)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = BloscpackArgs(max_app_chunks=0)
    source = PlainFPSource(input_fp)
    sink = CompressedFPSink(output_fp)
    pack(source,
         sink,
         nchunks,
         chunk_size,
         last_chunk_size,
         bloscpack_args=bloscpack_args)
    output_fp.seek(0, 0)
    bloscpack_header = _read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header.max_app_chunks)
    offsets = _read_offsets(output_fp, bloscpack_header)
    nt.assert_equal(96, offsets[0])
Ejemplo n.º 25
0
def test_metadata_opportunisitic_compression():
    # make up some metadata that can be compressed with benefit
    test_metadata = "{'dtype': 'float64', 'shape': [1024], 'others': []," "'original_container': 'carray'}"
    target_fp = StringIO()
    _write_metadata(target_fp, test_metadata, MetadataArgs())
    target_fp.seek(0, 0)
    metadata, header = _read_metadata(target_fp)
    nt.assert_equal("zlib", header["meta_codec"])

    # now do the same thing, but use badly compressible metadata
    test_metadata = "abc"
    target_fp = StringIO()
    # default args say: do compression...
    _write_metadata(target_fp, test_metadata, MetadataArgs())
    target_fp.seek(0, 0)
    metadata, header = _read_metadata(target_fp)
    # but it wasn't of any use
    nt.assert_equal("None", header["meta_codec"])
Ejemplo n.º 26
0
def test_append_metadata():
    orig, new, dcmp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, new)
    new_size = new.tell()
    new.seek(0)

    metadata = {"dtype": "float64", "shape": [1024], "others": []}
    chunking = calculate_nchunks(new_size, chunk_size=new_size)
    source = PlainFPSource(new)
    sink = CompressedFPSink(orig)
    pack(source, sink, *chunking, metadata=metadata)
    orig.seek(0)
    new.seek(0)
    reset_append_fp(orig, new, new_size)
    source = CompressedFPSource(orig)
    sink = PlainFPSink(dcmp)
    ans = unpack(source, sink)
    print(ans)
    dcmp.seek(0)
    new.seek(0)
    new_str = new.read()
    dcmp_str = dcmp.read()
    nt.assert_equal(len(dcmp_str), len(new_str) * 2)
    nt.assert_equal(dcmp_str, new_str * 2)
Ejemplo n.º 27
0
def test_rewrite_metadata():
    test_metadata = {
        'dtype': 'float64',
        'shape': [1024],
        'others': [],
    }
    # assemble the metadata args from the default
    metadata_args = MetadataArgs()
    # avoid checksum and codec
    metadata_args.meta_checksum = 'None'
    metadata_args.meta_codec = 'None'
    # preallocate a fixed size
    metadata_args.max_meta_size = 1000  # fixed preallocation
    target_fp = StringIO()
    # write the metadata section
    _write_metadata(target_fp, test_metadata, metadata_args)
    # check that the length is correct
    nt.assert_equal(METADATA_HEADER_LENGTH + metadata_args.max_meta_size,
                    len(target_fp.getvalue()))

    # now add stuff to the metadata
    test_metadata['container'] = 'numpy'
    test_metadata['data_origin'] = 'LHC'
    # compute the new length
    new_metadata_length = len(SERIALIZERS[0].dumps(test_metadata))
    # jam the new metadata into the StringIO
    target_fp.seek(0, 0)
    _rewrite_metadata_fp(target_fp, test_metadata, codec=None, level=None)
    # now seek back, read the metadata and make sure it has been updated
    # correctly
    target_fp.seek(0, 0)
    result_metadata, result_header = _read_metadata(target_fp)
    nt.assert_equal(test_metadata, result_metadata)
    nt.assert_equal(new_metadata_length, result_header.meta_comp_size)

    # make sure that NoChangeInMetadata is raised
    target_fp.seek(0, 0)
    nt.assert_raises(NoChangeInMetadata,
                     _rewrite_metadata_fp,
                     target_fp,
                     test_metadata,
                     codec=None,
                     level=None)

    # make sure that ChecksumLengthMismatch is raised, needs modified metadata
    target_fp.seek(0, 0)
    test_metadata['fluxcompensator'] = 'back to the future'
    nt.assert_raises(ChecksumLengthMismatch,
                     _rewrite_metadata_fp,
                     target_fp,
                     test_metadata,
                     codec=None,
                     level=None,
                     checksum='sha512')

    # make sure if level is not None, this works
    target_fp.seek(0, 0)
    test_metadata['hoverboard'] = 'back to the future 2'
    _rewrite_metadata_fp(target_fp, test_metadata, codec=None)

    # len of metadata when dumped to json should be around 1105
    for i in range(100):
        test_metadata[str(i)] = str(i)
    target_fp.seek(0, 0)
    nt.assert_raises(MetadataSectionTooSmall,
                     _rewrite_metadata_fp,
                     target_fp,
                     test_metadata,
                     codec=None,
                     level=None)
Ejemplo n.º 28
0
def test_rewrite_metadata():
    test_metadata = {'dtype': 'float64',
                     'shape': [1024],
                     'others': [],
                     }
    # assemble the metadata args from the default
    metadata_args = MetadataArgs()
    # avoid checksum and codec
    metadata_args.meta_checksum = 'None'
    metadata_args.meta_codec = 'None'
    # preallocate a fixed size
    metadata_args.max_meta_size = 1000  # fixed preallocation
    target_fp = StringIO()
    # write the metadata section
    _write_metadata(target_fp, test_metadata, metadata_args)
    # check that the length is correct
    nt.assert_equal(METADATA_HEADER_LENGTH + metadata_args.max_meta_size,
                    len(target_fp.getvalue()))

    # now add stuff to the metadata
    test_metadata['container'] = 'numpy'
    test_metadata['data_origin'] = 'LHC'
    # compute the new length
    new_metadata_length = len(SERIALIZERS[0].dumps(test_metadata))
    # jam the new metadata into the StringIO
    target_fp.seek(0, 0)
    _rewrite_metadata_fp(target_fp, test_metadata,
                         codec=None, level=None)
    # now seek back, read the metadata and make sure it has been updated
    # correctly
    target_fp.seek(0, 0)
    result_metadata, result_header = _read_metadata(target_fp)
    nt.assert_equal(test_metadata, result_metadata)
    nt.assert_equal(new_metadata_length, result_header.meta_comp_size)

    # make sure that NoChangeInMetadata is raised
    target_fp.seek(0, 0)
    nt.assert_raises(NoChangeInMetadata, _rewrite_metadata_fp,
                     target_fp, test_metadata, codec=None, level=None)

    # make sure that ChecksumLengthMismatch is raised, needs modified metadata
    target_fp.seek(0, 0)
    test_metadata['fluxcompensator'] = 'back to the future'
    nt.assert_raises(ChecksumLengthMismatch, _rewrite_metadata_fp,
                     target_fp, test_metadata,
                     codec=None, level=None, checksum='sha512')

    # make sure if level is not None, this works
    target_fp.seek(0, 0)
    test_metadata['hoverboard'] = 'back to the future 2'
    _rewrite_metadata_fp(target_fp, test_metadata,
                         codec=None)

    # len of metadata when dumped to json should be around 1105
    for i in range(100):
        test_metadata[str(i)] = str(i)
    target_fp.seek(0, 0)
    nt.assert_raises(MetadataSectionTooSmall, _rewrite_metadata_fp,
                     target_fp, test_metadata, codec=None, level=None)