def test_compression_equal_regardless_parallel_mode(): a = [b'spam '] * 666 with patch('arctic._compression.ENABLE_PARALLEL', True): parallel = compress_array(a) with patch('arctic._compression.ENABLE_PARALLEL', False): serial = compress_array(a) assert serial == parallel
def test_compress_array_no_parallel(): a = [b'spam', b'egg', b'spamm', b'spammm'] with patch('arctic._compression.clz4', sentinel.clz4), \ patch('arctic._compression.ENABLE_PARALLEL', False), \ patch('arctic._compression.lz4', wraps=lz4) as patch_lz4: assert decompress_array(compress_array(a)) == a assert patch_lz4.compress.call_args_list == [call(x) for x in a] assert patch_lz4.decompress.call_args_list == [call(compress(x)) for x in a]
def test_compress_array_no_parallel(): a = ["spam", "egg", "spamm", "spammm"] with patch("arctic._compression.clz4", sentinel.clz4), patch("arctic._compression.ENABLE_PARALLEL", False), patch( "arctic._compression.lz4", wraps=lz4 ) as patch_lz4: assert decompress_array(compress_array(a)) == a assert patch_lz4.compress.call_args_list == [call(x) for x in a] assert patch_lz4.decompress.call_args_list == [call(compress(x)) for x in a]
def test_compress_array_no_parallel(): a = [b'spam', b'egg', b'spamm', b'spammm'] with patch('arctic._compression.clz4', sentinel.clz4), \ patch('arctic._compression.ENABLE_PARALLEL', False), \ patch('arctic._compression.lz4', wraps=lz4) as patch_lz4: assert decompress_array(compress_array(a)) == a assert patch_lz4.compress.call_args_list == [call(x) for x in a] assert patch_lz4.decompress.call_args_list == [ call(compress(x)) for x in a ]
def _do_write(self, backing_store, library_name, version, symbol, item, previous_version, segment_offset=0): previous_segment_keys = [] if previous_version: previous_segment_keys = previous_version['segment_keys'] if segment_offset > 0 and 'segment_index' in previous_version: existing_index = previous_version['segment_index'] else: existing_index = None sze = int(item.dtype.itemsize * np.prod(item.shape[1:])) length = len(item) # chunk and store the data by (uncompressed) size chunk_size = int(backing_store.chunk_size / sze) # Compress idxs = xrange(int(np.ceil(float(length) / chunk_size))) chunks = [(item[i * chunk_size:(i + 1) * chunk_size]).tostring() for i in idxs] compressed_segments = compress_array(chunks) segment_keys = [] raw_segment_index = [] for i, segment_data in zip(idxs, compressed_segments): segment_idx = min( (i + 1) * chunk_size - 1, length - 1) + segment_offset segment_key = backing_store.write_segment(library_name, symbol, segment_data, previous_segment_keys) raw_segment_index.append(segment_idx) segment_keys.append(segment_key) segment_index = self._segment_index(item, existing_index=existing_index, start=segment_offset, new_segments=raw_segment_index) if segment_index: version['segment_index'] = segment_index version['raw_segment_index'] = raw_segment_index version['segment_count'] = len( segment_keys ) # on appends this value is incorrect but is updated later on version['append_size'] = 0 version['append_count'] = 0 version['segment_keys'] = segment_keys
def bench_multi(repeats, _strarr, use_HC, pool=None): measurements = [] for j in range(repeats): now = dt.now() if pool: # Raw LZ4 lib if use_HC: res = pool.map(c.lz4_compressHC, _strarr) else: res = pool.map(c.lz4_compress, _strarr) else: # Arctic's compression layer if use_HC: res = c.compressHC_array(_strarr) else: res = c.compress_array(_strarr, withHC=False) sample = (dt.now() - now).total_seconds() assert len(res) == len(_strarr) assert all(res) measurements.append(sample) return measurements
def test_decompress_array(): ll = ['foo%s' % i for i in range(100)] assert decompress_array(compress_array(ll)) == ll
def test_compress_array_LZ4_sequential(): use_lz4hc(False) cfn = Mock() with patch('arctic._compression.clz4.compress', cfn): compress_array(["foo"] * 49) assert cfn.call_count == 49
def test_compress_array_usesLZ4(): use_lz4hc(False) cfn = Mock() with patch('arctic._compression.clz4.compressarr', cfn): compress_array(["foo"] * 100) assert cfn.call_count == 1
def test_compressarr_LZ4HC(): assert len(compress_array(["foobar" * 10])) > 0 assert isinstance(compress_array(["foobar" * 10]), list)
def test_compress_array_LZ4_sequential(): cfn = Mock() with patch('arctic._compression.lz4_compress', cfn): compress_array([b"foo"] * 49) assert len(cfn.call_args_list) == 49
def test_compress_array_LZ4_sequential(): cfn = Mock() with patch('arctic._compression.clz4.compress', cfn): compress_array([b"foo"] * 49) assert cfn.call_count == 49
def test_decompress_array(): ll = [('foo%s' % i).encode('ascii') for i in range(100)] assert decompress_array(compress_array(ll)) == ll
def test_compress_array_usesLZ4(): cfn = Mock() with patch('arctic._compression.lz4_compress', cfn): compress_array([b"foo"] * 100) assert len(cfn.call_args_list) == 100 # call_count is not thread safe
def test_roundtrip_arr(n, length): _strarr = [random_string(length) for _ in range(n)] cstr = c.compress_array(_strarr) assert _strarr == c.decompress_array(cstr)
def test_compressarr(): assert len(compress_array([b"foobar"*10])) > 0 assert isinstance(compress_array([b"foobar"*10]), list)
def test_compress_array_usesLZ4(): cfn = Mock() with patch('arctic._compression.clz4.compressarr', cfn): compress_array([b"foo"] * 100) assert cfn.call_count == 1
def test_compress_array_usesLZ4HC(): use_lz4hc(True) cfn = Mock() with patch('arctic._compression.clz4.compressarrHC', cfn): compress_array(["foo"] * 100) assert cfn.call_count == 1
def test_compress_array_LZ4HC_sequential(): use_lz4hc(True) cfn = Mock() with patch('arctic._compression.clz4.compressHC', cfn): compress_array(["foo"] * 4) assert cfn.call_count == 4