Example #1
0
def pack_unpack_fp(repeats, nchunks=None, chunk_size=None,
        progress=False, metadata=None):
    blosc_args = DEFAULT_BLOSC_ARGS
    offsets = DEFAULT_OFFSETS
    checksum = DEFAULT_CHECKSUM
    in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO()
    if progress:
        print("Creating test array")
    create_array_fp(repeats, in_fp, progress=progress)
    in_fp_size = in_fp.tell()
    if progress:
        print("Compressing")
    in_fp.seek(0)
    bloscpack._pack_fp(in_fp, out_fp, in_fp_size,
            blosc_args, metadata,
            nchunks, chunk_size, offsets, checksum,
            DEFAULT_METADATA_ARGS)
    out_fp.seek(0)
    if progress:
        print("Decompressing")
    metadata = bloscpack._unpack_fp(out_fp, dcmp_fp)
    if progress:
        print("Verifying")
    cmp_fp(in_fp, dcmp_fp)
    if metadata:
        return metadata
Example #2
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        bloscpack.pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = bloscpack._read_bloscpack_header(input_fp)
            total_entries = bloscpack_header['nchunks'] + \
                    bloscpack_header['max_app_chunks']
            offsets = bloscpack._read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal([736, 418578, 736870, 1050327,
                1363364, 1660766, 1959218, 2257703],
                    offsets)
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {'versionlz': 1,
                        'blocksize': 131072,
                        'ctbytes':   318288,
                        'version':   2,
                        'flags':     1,
                        'nbytes':    2097152,
                        'typesize':  8}
            blosc_header = decode_blosc_header(blosc_header_raw)
            nt.assert_equal(expected, blosc_header)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = DEFAULT_BLOSCPACK_ARGS.copy()
    bloscpack_args['max_app_chunks'] = 0
    bloscpack._pack_fp(input_fp, output_fp,
            nchunks, chunk_size, last_chunk_size,
            bloscpack_args=bloscpack_args
            )
    output_fp.seek(0, 0)
    bloscpack_header = bloscpack._read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header['max_app_chunks'])
    offsets = bloscpack._read_offsets(output_fp, bloscpack_header)
    nt.assert_equal([96, 417938, 736230, 1049687,
        1362724, 1660126, 1958578, 2257063],
            offsets)
Example #3
0
def test_metadata_opportunisitic_compression():
    # make up some metadata that can be compressed with benefit
    test_metadata = ("{'dtype': 'float64', 'shape': [1024], 'others': [],"
            "'original_container': 'carray'}")
    in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, in_fp)
    in_fp_size = in_fp.tell()
    in_fp.seek(0)
    bloscpack._pack_fp(in_fp, out_fp, in_fp_size,
            DEFAULT_BLOSC_ARGS,
            test_metadata,
            1,
            None,
            DEFAULT_OFFSETS,
            DEFAULT_CHECKSUM,
            DEFAULT_METADATA_ARGS)
    out_fp.seek(0)
    raw_header = out_fp.read(32)
    header = decode_bloscpack_header(raw_header)
    raw_options = header['options']
    options = decode_options(raw_options)
    #nt.assert_true(options['compress_meta'])

    # now do the same thing, but use badly compressible metadata
    test_metadata = "abc"
    in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO()
    create_array_fp(1, in_fp)
    in_fp_size = in_fp.tell()
    in_fp.seek(0)
    bloscpack._pack_fp(in_fp, out_fp, in_fp_size,
            DEFAULT_BLOSC_ARGS,
            test_metadata,
            1,
            None,
            DEFAULT_OFFSETS,
            DEFAULT_CHECKSUM,
            DEFAULT_METADATA_ARGS)
    out_fp.seek(0)
    raw_header = out_fp.read(32)
    header = decode_bloscpack_header(raw_header)
    raw_options = header['options']
    options = decode_options(raw_options)
Example #4
0
def pack_unpack_fp(repeats, chunk_size=DEFAULT_CHUNK_SIZE,
        progress=False, metadata=None):
    in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO()
    if progress:
        print("Creating test array")
    create_array_fp(repeats, in_fp, progress=progress)
    in_fp_size = in_fp.tell()
    if progress:
        print("Compressing")
    in_fp.seek(0)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(in_fp_size, chunk_size)
    bloscpack._pack_fp(in_fp, out_fp,
            nchunks, chunk_size, last_chunk_size,
            metadata=metadata)
    out_fp.seek(0)
    if progress:
        print("Decompressing")
    metadata = bloscpack._unpack_fp(out_fp, dcmp_fp)
    if progress:
        print("Verifying")
    cmp_fp(in_fp, dcmp_fp)
    if metadata:
        return metadata