예제 #1
0
def test_offsets():
    blosc_args = DEFAULT_BLOSC_ARGS
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        bloscpack.pack_file(in_file, out_file, blosc_args, nchunks=6)
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header_raw = input_fp.read(BLOSCPACK_HEADER_LENGTH)
            bloscpack_header = decode_bloscpack_header(bloscpack_header_raw)
            nchunks = bloscpack_header['nchunks']
            offsets_raw = input_fp.read(8 * nchunks)
            offsets = [decode_int64(offsets_raw[j - 8: j])
                    for j in xrange(8, nchunks * 8 + 1, 8)]
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * nchunks
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal([80, 585990, 1071780, 1546083, 2003986, 2460350],
                    offsets)
            # try to read the second header
            input_fp.seek(585990, 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {'versionlz': 1,
                        'blocksize': 131072,
                        'ctbytes':   485786,
                        'version':   2,
                        'flags':     1,
                        'nbytes':    3200000,
                        'typesize':  8}
            blosc_header = decode_blosc_header(blosc_header_raw)
            nt.assert_equal(expected, blosc_header)
예제 #2
0
def test_invalid_format():
    # this will cause a bug if we ever reach 255 format versions
    bloscpack.FORMAT_VERSION = MAX_FORMAT_VERSION
    blosc_args = DEFAULT_BLOSC_ARGS
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        bloscpack.pack_file(in_file, out_file, blosc_args=blosc_args)
        nt.assert_raises(FormatVersionMismatch, unpack_file, out_file, dcmp_file)
    bloscpack.FORMAT_VERSION = FORMAT_VERSION
예제 #3
0
def test_offsets():
    with create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
        create_array(1, in_file)
        bloscpack.pack_file(in_file, out_file, chunk_size='2M')
        with open(out_file, 'r+b') as input_fp:
            bloscpack_header = bloscpack._read_bloscpack_header(input_fp)
            total_entries = bloscpack_header['nchunks'] + \
                    bloscpack_header['max_app_chunks']
            offsets = bloscpack._read_offsets(input_fp, bloscpack_header)
            # First chunks should start after header and offsets
            first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries
            # We assume that the others are correct
            nt.assert_equal(offsets[0], first)
            nt.assert_equal([736, 418578, 736870, 1050327,
                1363364, 1660766, 1959218, 2257703],
                    offsets)
            # try to read the second header
            input_fp.seek(offsets[1], 0)
            blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH)
            expected = {'versionlz': 1,
                        'blocksize': 131072,
                        'ctbytes':   318288,
                        'version':   2,
                        'flags':     1,
                        'nbytes':    2097152,
                        'typesize':  8}
            blosc_header = decode_blosc_header(blosc_header_raw)
            nt.assert_equal(expected, blosc_header)

    # now check the same thing again, but w/o any max_app_chunks
    input_fp, output_fp = StringIO(), StringIO()
    create_array_fp(1, input_fp)
    nchunks, chunk_size, last_chunk_size = \
            calculate_nchunks(input_fp.tell(), chunk_size='2M')
    input_fp.seek(0, 0)
    bloscpack_args = DEFAULT_BLOSCPACK_ARGS.copy()
    bloscpack_args['max_app_chunks'] = 0
    bloscpack._pack_fp(input_fp, output_fp,
            nchunks, chunk_size, last_chunk_size,
            bloscpack_args=bloscpack_args
            )
    output_fp.seek(0, 0)
    bloscpack_header = bloscpack._read_bloscpack_header(output_fp)
    nt.assert_equal(0, bloscpack_header['max_app_chunks'])
    offsets = bloscpack._read_offsets(output_fp, bloscpack_header)
    nt.assert_equal([96, 417938, 736230, 1049687,
        1362724, 1660126, 1958578, 2257063],
            offsets)
예제 #4
0
with tb.create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
    gz_out_file = path.join(tdir, 'file.gz')

    print('create the test data', end='')
    def progress(i):
        if i % 10 == 0:
            print('.', end='')
        sys.stdout.flush()
    tb.create_array(100, in_file, progress=progress)
    print('')

    print("Input file size: %s" % get_fs(in_file))

    print("Will now run bloscpack... ")
    tic = time.time()
    bloscpack.pack_file(in_file, out_file, blosc_args,
            chunk_size=bloscpack.reverse_pretty(bloscpack.DEFAULT_CHUNK_SIZE))
    toc = time.time()
    print("Time: %.2f seconds" % (toc - tic))
    print("Output file size: %s" % get_fs(out_file))
    print("Ratio: %.2f" % get_ratio(out_file, in_file))

    print("Will now run gzip... ")
    tic = time.time()
    subprocess.call('gzip -c %s > %s' % (in_file, gz_out_file), shell=True)
    toc = time.time()
    print("Time: %.2f seconds" % (toc - tic))
    print("Output file size: %s" % get_fs(gz_out_file))
    print("Ratio: %.2f" % get_ratio(gz_out_file, in_file))
예제 #5
0
    print('create the test data', end='')

    def progress(i):
        if i % 10 == 0:
            print('.', end='')
        sys.stdout.flush()

    tb.create_array(100, in_file, progress=progress)
    print('')

    print("Input file size: %s" % get_fs(in_file))
    drop_caches()

    print("Will now run bloscpack... ")
    tic = time.time()
    bloscpack.pack_file(in_file, out_file)
    toc = time.time()
    print("Time: %.2f seconds" % (toc - tic))
    print("Output file size: %s" % get_fs(out_file))
    print("Ratio: %.2f" % get_ratio(out_file, in_file))
    drop_caches()

    print("Will now run gzip... ")
    tic = time.time()
    subprocess.call('gzip -c %s > %s' % (in_file, gz_out_file), shell=True)
    toc = time.time()
    print("Time: %.2f seconds" % (toc - tic))
    print("Output file size: %s" % get_fs(gz_out_file))
    print("Ratio: %.2f" % get_ratio(gz_out_file, in_file))
from bloscpack.pretty import pretty_size


with bpt.create_tmp_files() as (tdir, in_file, out_file, dcmp_file):

    print('create the test data', end='')
    bpt.create_array(100, in_file, progress=bpt.simple_progress)
    repeats = 3
    print ("%s\t%s\t\t%s\t\t%s" %
           ("chunk_size", "comp-time", "decomp-time", "ratio"))
    for chunk_size in (int(2**i) for i in numpy.arange(19, 23.5, 0.5)):
        cmp_times, dcmp_times = [], []
        for _ in range(repeats):
            drop_caches()
            tic = time.time()
            pack_file(in_file, out_file, chunk_size=chunk_size)
            sync()
            toc = time.time()
            cmp_times.append(toc-tic)
            drop_caches()
            tic = time.time()
            unpack_file(out_file, dcmp_file)
            sync()
            toc = time.time()
            dcmp_times.append(toc-tic)
        ratio = path.getsize(in_file)/path.getsize(out_file)
        print ("%s\t\t%f\t\t%f\t\t%f" %
               (pretty_size(chunk_size),
                sum(cmp_times)/repeats,
                sum(dcmp_times)/repeats,
                ratio,
예제 #7
0
from bloscpack import pack_file, unpack_file
from bloscpack.pretty import pretty_size

with bpt.create_tmp_files() as (tdir, in_file, out_file, dcmp_file):

    print('create the test data', end='')
    bpt.create_array(100, in_file, progress=bpt.simple_progress)
    repeats = 3
    print("%s\t%s\t\t%s\t\t%s" %
          ("chunk_size", "comp-time", "decomp-time", "ratio"))
    for chunk_size in (int(2**i) for i in numpy.arange(19, 23.5, 0.5)):
        cmp_times, dcmp_times = [], []
        for _ in range(repeats):
            drop_caches()
            tic = time.time()
            pack_file(in_file, out_file, chunk_size=chunk_size)
            sync()
            toc = time.time()
            cmp_times.append(toc - tic)
            drop_caches()
            tic = time.time()
            unpack_file(out_file, dcmp_file)
            sync()
            toc = time.time()
            dcmp_times.append(toc - tic)
        ratio = path.getsize(in_file) / path.getsize(out_file)
        print("%s\t\t%f\t\t%f\t\t%f" % (
            pretty_size(chunk_size),
            sum(cmp_times) / repeats,
            sum(dcmp_times) / repeats,
            ratio,
from __future__ import division

import os.path as path
import time
import numpy
import bloscpack
import test_bloscpack as tb

blosc_args = bloscpack.DEFAULT_BLOSC_ARGS

with tb.create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
    tb.create_array(100, in_file)
    repeats = 3
    print "%s\t\t%s\t\t%s\t\t%s" % ("chunk_size", "comp-time", "decomp-time", "ratio")
    for chunk_size in (int(2**i) for i in numpy.arange(19, 23.5, 0.5)):
        cmp_times, dcmp_times = [], []
        for _ in range(repeats):
            tic = time.time()
            bloscpack.pack_file(in_file, out_file, blosc_args, chunk_size=chunk_size)
            toc = time.time()
            cmp_times.append(toc-tic)
            tic = time.time()
            bloscpack.unpack_file(out_file, dcmp_file)
            toc = time.time()
            dcmp_times.append(toc-tic)
        ratio = path.getsize(out_file)/path.getsize(in_file)
        print "%d\t\t%f\t\t%f\t\t%f" % (chunk_size,
                sum(cmp_times)/repeats,
                sum(dcmp_times)/repeats,
                ratio)
예제 #9
0
    else:
        print('error: need uid 0 (root) to drop caches')
        sys.exit(1)

with bpt.create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
    gz_out_file = path.join(tdir, 'file.gz')

    print('create the test data', end='')
    bpt.create_array(100, in_file, progress=bpt.simple_progress)
    print('')

    print("Input file size: %s" % get_fs(in_file))
    drop_caches()

    print("Will now run bloscpack... ")
    tic = time.time()
    pack_file(in_file, out_file)
    toc = time.time()
    print("Time: %.2f seconds" % (toc - tic))
    print("Output file size: %s" % get_fs(out_file))
    print("Ratio: %.2f" % get_ratio(in_file, out_file))
    drop_caches()

    print("Will now run gzip... ")
    tic = time.time()
    subprocess.call('gzip -c %s > %s' % (in_file, gz_out_file), shell=True)
    toc = time.time()
    print("Time: %.2f seconds" % (toc - tic))
    print("Output file size: %s" % get_fs(gz_out_file))
    print("Ratio: %.2f" % get_ratio(in_file, gz_out_file))
import os.path as path
import time
import numpy
import bloscpack
import test_bloscpack as tb

blosc_args = bloscpack.DEFAULT_BLOSC_ARGS

with tb.create_tmp_files() as (tdir, in_file, out_file, dcmp_file):
    tb.create_array(100, in_file)
    repeats = 3
    print "%s\t\t%s\t\t%s\t\t%s" % ("chunk_size", "comp-time", "decomp-time",
                                    "ratio")
    for chunk_size in (int(2**i) for i in numpy.arange(19, 23.5, 0.5)):
        cmp_times, dcmp_times = [], []
        for _ in range(repeats):
            tic = time.time()
            bloscpack.pack_file(in_file,
                                out_file,
                                blosc_args,
                                chunk_size=chunk_size)
            toc = time.time()
            cmp_times.append(toc - tic)
            tic = time.time()
            bloscpack.unpack_file(out_file, dcmp_file)
            toc = time.time()
            dcmp_times.append(toc - tic)
        ratio = path.getsize(out_file) / path.getsize(in_file)
        print "%d\t\t%f\t\t%f\t\t%f" % (chunk_size, sum(cmp_times) / repeats,
                                        sum(dcmp_times) / repeats, ratio)
예제 #11
0
    gz_out_file = path.join(tdir, 'file.gz')

    print('create the test data', end='')

    def progress(i):
        if i % 10 == 0:
            print('.', end='')
        sys.stdout.flush()
    tb.create_array(100, in_file, progress=progress)
    print('')

    print("Input file size: %s" % get_fs(in_file))
    drop_caches()

    print("Will now run bloscpack... ")
    tic = time.time()
    bloscpack.pack_file(in_file, out_file)
    toc = time.time()
    print("Time: %.2f seconds" % (toc - tic))
    print("Output file size: %s" % get_fs(out_file))
    print("Ratio: %.2f" % get_ratio(out_file, in_file))
    drop_caches()

    print("Will now run gzip... ")
    tic = time.time()
    subprocess.call('gzip -c %s > %s' % (in_file, gz_out_file), shell=True)
    toc = time.time()
    print("Time: %.2f seconds" % (toc - tic))
    print("Output file size: %s" % get_fs(gz_out_file))
    print("Ratio: %.2f" % get_ratio(gz_out_file, in_file))