Ejemplo n.º 1
0
def test_compression_equal_regardless_parallel_mode():
    a = [b'spam '] * 666
    with patch('arctic._compression.ENABLE_PARALLEL', True):
        parallel = compress_array(a)
    with patch('arctic._compression.ENABLE_PARALLEL', False):
        serial = compress_array(a)
    assert serial == parallel
Ejemplo n.º 2
0
def test_compression_equal_regardless_parallel_mode():
    a = [b'spam '] * 666
    with patch('arctic._compression.ENABLE_PARALLEL', True):
        parallel = compress_array(a)
    with patch('arctic._compression.ENABLE_PARALLEL', False):
        serial = compress_array(a)
    assert serial == parallel
Ejemplo n.º 3
0
def test_compress_array_no_parallel():
    a = [b'spam', b'egg', b'spamm', b'spammm']
    with patch('arctic._compression.clz4', sentinel.clz4), \
         patch('arctic._compression.ENABLE_PARALLEL', False), \
         patch('arctic._compression.lz4', wraps=lz4) as patch_lz4:
        assert decompress_array(compress_array(a)) == a
        assert patch_lz4.compress.call_args_list == [call(x) for x in a]
        assert patch_lz4.decompress.call_args_list == [call(compress(x)) for x in a]
Ejemplo n.º 4
0
def test_compress_array_no_parallel():
    a = ["spam", "egg", "spamm", "spammm"]
    with patch("arctic._compression.clz4", sentinel.clz4), patch("arctic._compression.ENABLE_PARALLEL", False), patch(
        "arctic._compression.lz4", wraps=lz4
    ) as patch_lz4:
        assert decompress_array(compress_array(a)) == a
        assert patch_lz4.compress.call_args_list == [call(x) for x in a]
        assert patch_lz4.decompress.call_args_list == [call(compress(x)) for x in a]
Ejemplo n.º 5
0
def test_compress_array_no_parallel():
    a = [b'spam', b'egg', b'spamm', b'spammm']
    with patch('arctic._compression.clz4', sentinel.clz4), \
         patch('arctic._compression.ENABLE_PARALLEL', False), \
         patch('arctic._compression.lz4', wraps=lz4) as patch_lz4:
        assert decompress_array(compress_array(a)) == a
        assert patch_lz4.compress.call_args_list == [call(x) for x in a]
        assert patch_lz4.decompress.call_args_list == [
            call(compress(x)) for x in a
        ]
Ejemplo n.º 6
0
    def _do_write(self,
                  backing_store,
                  library_name,
                  version,
                  symbol,
                  item,
                  previous_version,
                  segment_offset=0):

        previous_segment_keys = []
        if previous_version:
            previous_segment_keys = previous_version['segment_keys']

        if segment_offset > 0 and 'segment_index' in previous_version:
            existing_index = previous_version['segment_index']
        else:
            existing_index = None

        sze = int(item.dtype.itemsize * np.prod(item.shape[1:]))
        length = len(item)

        # chunk and store the data by (uncompressed) size
        chunk_size = int(backing_store.chunk_size / sze)

        # Compress
        idxs = xrange(int(np.ceil(float(length) / chunk_size)))
        chunks = [(item[i * chunk_size:(i + 1) * chunk_size]).tostring()
                  for i in idxs]
        compressed_segments = compress_array(chunks)

        segment_keys = []
        raw_segment_index = []
        for i, segment_data in zip(idxs, compressed_segments):
            segment_idx = min(
                (i + 1) * chunk_size - 1, length - 1) + segment_offset
            segment_key = backing_store.write_segment(library_name, symbol,
                                                      segment_data,
                                                      previous_segment_keys)
            raw_segment_index.append(segment_idx)
            segment_keys.append(segment_key)

        segment_index = self._segment_index(item,
                                            existing_index=existing_index,
                                            start=segment_offset,
                                            new_segments=raw_segment_index)
        if segment_index:
            version['segment_index'] = segment_index
        version['raw_segment_index'] = raw_segment_index
        version['segment_count'] = len(
            segment_keys
        )  # on appends this value is incorrect but is updated later on
        version['append_size'] = 0
        version['append_count'] = 0
        version['segment_keys'] = segment_keys
Ejemplo n.º 7
0
def bench_multi(repeats, _strarr, use_HC, pool=None):
    measurements = []
    for j in range(repeats):
        now = dt.now()
        if pool:
            # Raw LZ4 lib
            if use_HC:
                res = pool.map(c.lz4_compressHC, _strarr)
            else:
                res = pool.map(c.lz4_compress, _strarr)
        else:
            # Arctic's compression layer
            if use_HC:
                res = c.compressHC_array(_strarr)
            else:
                res = c.compress_array(_strarr, withHC=False)
        sample = (dt.now() - now).total_seconds()
        assert len(res) == len(_strarr)
        assert all(res)
        measurements.append(sample)
    return measurements
Ejemplo n.º 8
0
def bench_multi(repeats, _strarr, use_HC, pool=None):
    measurements = []
    for j in range(repeats):
        now = dt.now()
        if pool:
            # Raw LZ4 lib
            if use_HC:
                res = pool.map(c.lz4_compressHC, _strarr)
            else:
                res = pool.map(c.lz4_compress, _strarr)
        else:
            # Arctic's compression layer
            if use_HC:
                res = c.compressHC_array(_strarr)
            else:
                res = c.compress_array(_strarr, withHC=False)
        sample = (dt.now() - now).total_seconds()
        assert len(res) == len(_strarr)
        assert all(res)
        measurements.append(sample)
    return measurements
Ejemplo n.º 9
0
def test_decompress_array():
    ll = ['foo%s' % i for i in range(100)]
    assert decompress_array(compress_array(ll)) == ll
Ejemplo n.º 10
0
def test_compress_array_LZ4_sequential():
    use_lz4hc(False)
    cfn = Mock()
    with patch('arctic._compression.clz4.compress', cfn):
        compress_array(["foo"] * 49)
        assert cfn.call_count == 49
Ejemplo n.º 11
0
def test_compress_array_usesLZ4():
    use_lz4hc(False)
    cfn = Mock()
    with patch('arctic._compression.clz4.compressarr', cfn):
        compress_array(["foo"] * 100)
        assert cfn.call_count == 1
Ejemplo n.º 12
0
def test_compressarr_LZ4HC():
    assert len(compress_array(["foobar" * 10])) > 0
    assert isinstance(compress_array(["foobar" * 10]), list)
Ejemplo n.º 13
0
def test_decompress_array():
    ll = ['foo%s' % i for i in range(100)]
    assert decompress_array(compress_array(ll)) == ll
Ejemplo n.º 14
0
def test_compress_array_LZ4_sequential():
    cfn = Mock()
    with patch('arctic._compression.lz4_compress', cfn):
        compress_array([b"foo"] * 49)
        assert len(cfn.call_args_list) == 49
Ejemplo n.º 15
0
def test_compress_array_LZ4_sequential():
    cfn = Mock()
    with patch('arctic._compression.clz4.compress', cfn):
        compress_array([b"foo"] * 49)
        assert cfn.call_count == 49
Ejemplo n.º 16
0
def test_decompress_array():
    ll = [('foo%s' % i).encode('ascii') for i in range(100)]
    assert decompress_array(compress_array(ll)) == ll
Ejemplo n.º 17
0
def test_compress_array_LZ4_sequential():
    cfn = Mock()
    with patch('arctic._compression.lz4_compress', cfn):
        compress_array([b"foo"] * 49)
        assert len(cfn.call_args_list) == 49
Ejemplo n.º 18
0
def test_compress_array_usesLZ4():
    cfn = Mock()
    with patch('arctic._compression.lz4_compress', cfn):
        compress_array([b"foo"] * 100)
        assert len(cfn.call_args_list) == 100  # call_count is not thread safe
Ejemplo n.º 19
0
def test_roundtrip_arr(n, length):
    _strarr = [random_string(length) for _ in range(n)]
    cstr = c.compress_array(_strarr)
    assert _strarr == c.decompress_array(cstr)
Ejemplo n.º 20
0
def test_compressarr():
    assert len(compress_array([b"foobar"*10])) > 0
    assert isinstance(compress_array([b"foobar"*10]), list)
Ejemplo n.º 21
0
def test_compress_array_usesLZ4():
    cfn = Mock()
    with patch('arctic._compression.clz4.compressarr', cfn):
        compress_array([b"foo"] * 100)
        assert cfn.call_count == 1
Ejemplo n.º 22
0
def test_compress_array_usesLZ4():
    cfn = Mock()
    with patch('arctic._compression.lz4_compress', cfn):
        compress_array([b"foo"] * 100)
        assert len(cfn.call_args_list) == 100  # call_count is not thread safe
Ejemplo n.º 23
0
def test_decompress_array():
    ll = [('foo%s' % i).encode('ascii') for i in range(100)]
    assert decompress_array(compress_array(ll)) == ll
Ejemplo n.º 24
0
def test_compress_array_usesLZ4HC():
    use_lz4hc(True)
    cfn = Mock()
    with patch('arctic._compression.clz4.compressarrHC', cfn):
        compress_array(["foo"] * 100)
        assert cfn.call_count == 1
Ejemplo n.º 25
0
def test_roundtrip_arr(n, length):
    _strarr = [random_string(length) for _ in range(n)]
    cstr = c.compress_array(_strarr)
    assert _strarr == c.decompress_array(cstr)
Ejemplo n.º 26
0
def test_compress_array_LZ4HC_sequential():
    use_lz4hc(True)
    cfn = Mock()
    with patch('arctic._compression.clz4.compressHC', cfn):
        compress_array(["foo"] * 4)
        assert cfn.call_count == 4