Exemple #1
0
def col_labels():
    """
    Generates the array with the column labels.
    """
    for codec in blosc.compressor_list():
        for filter in [blosc.NOSHUFFLE, blosc.SHUFFLE, blosc.BITSHUFFLE]:
            for clevel in range(10):
                col_label = codec + str(filter) + str(clevel)
                COLS.append(col_label + '_r')
                COLS.append(col_label + '_tc')
                COLS.append(col_label + '_td')
Exemple #2
0
def print_versions():
    """Print all the versions of software that python-blosc relies on."""
    import platform
    print("-=" * 38)
    print("python-blosc version: %s" % blosc.__version__)
    print("Blosc version: %s" % blosc.blosclib_version)
    print("Blosc compressors in this build: %s" % blosc.compressor_list())
    print("Python version: %s" % sys.version)
    (sysname, nodename, release, version, machine, processor) = platform.uname()
    print("Platform: %s-%s-%s (%s)" % (sysname, release, machine, version))
    if sysname == "Linux":
        print("Linux dist: %s" % " ".join(platform.linux_distribution()[:-1]))
    if not processor:
        processor = "not recognized"
    print("Processor: %s" % processor)
    print("Byte-ordering: %s" % sys.byteorder)
    print("Detected cores: %s" % blosc.detect_number_of_cores())
    print("-=" * 38)
Exemple #3
0
def print_versions():
    """Print all the versions of software that python-blosc relies on."""
    import platform
    print("-=" * 38)
    print("python-blosc version: %s" % blosc.__version__)
    print("Blosc version: %s" % blosc.blosclib_version)
    print("Blosc compressors in this build: %s" % blosc.compressor_list())
    print("Python version: %s" % sys.version)
    (sysname, nodename, release, version, machine,
     processor) = platform.uname()
    print("Platform: %s-%s-%s (%s)" % (sysname, release, machine, version))
    if sysname == "Linux":
        print("Linux dist: %s" % " ".join(platform.linux_distribution()[:-1]))
    if not processor:
        processor = "not recognized"
    print("Processor: %s" % processor)
    print("Byte-ordering: %s" % sys.byteorder)
    print("Detected cores: %s" % blosc.detect_number_of_cores())
    print("-=" * 38)
Exemple #4
0
 def test_all_compressors(self):
     s = b'0123456789'*100
     for cname in blosc.compressor_list():
         c = blosc.compress(s, typesize=1, cname=cname)
         d = blosc.decompress(c)
         self.assertEqual(s, d)
Exemple #5
0
 def test_get_clib(self):
     s = b'0123456789'
     for cname in blosc.compressor_list():
         c = blosc.compress(s, typesize=1, cname=cname)
         clib = blosc.get_clib(c)
         self.assert_(clib == blosc.cname2clib[cname])
clevel = 5

Nexp = np.log10(N)
print("Creating a large NumPy array with 10**%d int64 elements:" % Nexp)
in_ = np.arange(N, dtype=np.int64)  # the trivial linear distribution
#in_ = np.linspace(0, 100, N)  # another linear distribution
#in_ = np.random.random_integers(0, 100, N)  # random distribution
print(" ", in_)

tic = time.time()
out_ = np.copy(in_)
toc = time.time()
print("  Time for copying array with np.copy():     %.3f s" % (toc - tic, ))
print()

for cname in blosc.compressor_list():
    print("Using *** %s *** compressor::" % cname)
    ctic = time.time()
    c = blosc.pack_array(in_, clevel=clevel, shuffle=True, cname=cname)
    ctoc = time.time()
    dtic = time.time()
    out = blosc.unpack_array(c)
    dtoc = time.time()
    assert ((in_ == out).all())
    print("  Time for pack_array/unpack_array:     %.3f/%.3f s." % \
          (ctoc-ctic, dtoc-dtic), end='')
    print("\tCompr ratio: %.2f" %
          (in_.size * in_.dtype.itemsize * 1. / len(c)))

    ctic = time.time()
    c = blosc.compress_ptr(in_.__array_interface__['data'][0],
Exemple #7
0
 def test_all_compressors(self):
     s = b'0123456789' * 100
     for cname in blosc.compressor_list():
         c = blosc.compress(s, typesize=1, cname=cname)
         d = blosc.decompress(c)
         self.assertEqual(s, d)
Exemple #8
0
 def test_get_clib(self):
     s = b'0123456789'
     for cname in blosc.compressor_list():
         c = blosc.compress(s, typesize=1, cname=cname)
         clib = blosc.get_clib(c)
         self.assertEqual(clib, blosc.cname2clib[cname])
# header lengths
BLOSC_HEADER_LENGTH = 16
BLOSCPACK_HEADER_LENGTH = 32
METADATA_HEADER_LENGTH = 32

# maximum/minimum values
MAX_FORMAT_VERSION = 255
MAX_CHUNKS = (2**63)-1
MAX_META_SIZE = (2**32-1)  # uint32 max val
MIN_CLEVEL = 0
MAX_CLEVEL = 9

# lookup table for human readable sizes
SUFFIXES = OrderedDict((
             ("B", 2**0 ),
             ("K", 2**10),
             ("M", 2**20),
             ("G", 2**30),
             ("T", 2**40)))

# Codecs available from Blosc
CNAME_AVAIL = blosc.compressor_list()
CNAME_MAPPING = {
    0: 'blosclz',
    1: 'lz4',
    2: 'snappy',
    3: 'zlib',
    4: 'zstd',
}
Exemple #10
0
arrays = [None]*3
labels = [None]*3
arrays[0] = np.arange(N, dtype=np.int64)
labels[0] = "the arange linear distribution"
arrays[1] = np.linspace(0, 1000, N)
labels[1] = "the linspace linear distribution"
arrays[2] = np.random.random_integers(0, 1000, N)
labels[2] = "the random distribution"

tic = time.time()
out_ = np.copy(arrays[0])
toc = time.time()
print("  *** np.copy() **** Time for memcpy():     %.3f s" % (toc-tic,))

for (in_, label) in zip(arrays, labels):
    print("\n*** %s ***" % label)
    for cname in blosc.compressor_list():
        ctic = time.time()
        c = blosc.compress_ptr(in_.__array_interface__['data'][0],
                               in_.size, in_.dtype.itemsize,
                               clevel=clevel, shuffle=True, cname=cname)
        ctoc = time.time()
        out = np.empty(in_.size, dtype=in_.dtype)
        dtic = time.time()
        blosc.decompress_ptr(c, out.__array_interface__['data'][0])
        dtoc = time.time()
        assert((in_ == out).all())
        print("  *** %-8s *** Time for comp/decomp: %.3f/%.3f s." % \
              (cname, ctoc-ctic, dtoc-dtic), end='')
        print("\tCompr ratio: %6.2f" % (in_.size*in_.dtype.itemsize*1. / len(c)))
Exemple #11
0
col_labels()

if os.path.isfile('out.csv'):
    df = pd.read_csv('out.csv', sep='\t')
else:
    df = pd.DataFrame()

for filename in FILENAMES:
    for k, buffer in enumerate(file_reader(filename)):
        row_data = [filename.split('/')[-1] + '_' + str(k)]
        for data in extract_features(buffer):
            row_data.append(data)
        aux = 1
        print("------------", filename.upper(), k, "------------")
        for codec in blosc.compressor_list():
            for filter in [blosc.NOSHUFFLE, blosc.SHUFFLE, blosc.BITSHUFFLE]:
                for clevel in range(10):
                    rate = 0
                    c_time = 0
                    d_time = 0
                    for i, chunk in enumerate(mega_chunk_generator(buffer)):
                        test = test_codec(chunk, codec, filter, clevel)
                        rate = (rate * i + test[0]) / (i + 1)
                        c_time = (c_time * i + test[1]) / (i + 1)
                        d_time = (d_time * i + test[2]) / (i + 1)
                    print("%-10s  %5.2f %%" % (codec + str(filter) + str(clevel), aux/180*100))
                    aux += 1
                    row_data.append(rate)
                    row_data.append(c_time)
                    row_data.append(d_time)
Exemple #12
0
@pytest.fixture(params=[
    np.arange(10).astype(int),
    np.arange(10).astype(float),
    np.arange(12).reshape((2, 2, 3)),
    np.array([]),
    np.array([['foo', 'bar']]),
    [[1.1, 2.2], [3.3, 4.4]],
    'foo',
    42
])
def array(request):
    return request.param


@pytest.fixture(params=blosc.compressor_list())
def compression(request):
    return request.param


@pytest.fixture(params=[0, 1, 2], ids=['none', 'byte', 'bit'])
def shuffle(request):
    return request.param


@pytest.fixture
def write_result(array, compression, shuffle):
    stream = io.BytesIO()
    length = write_blosc(stream, array, compression, 5, shuffle)
    return (stream, length)