def test_double(): a = np.random.randn(100, 200).astype(np.float64) b = lilcom.compress(a, axis=-1) c = lilcom.decompress(b, dtype=np.float64) rel_error = (np.fabs(a - c)).sum() / (np.fabs(a)).sum() print( "Relative error in double compression, decompressing as double, is: ", rel_error) c = lilcom.decompress(b, dtype=np.float32) rel_error = (np.fabs(a - c)).sum() / (np.fabs(a)).sum() print("Relative error in double compression, decompressing as float, is: ", rel_error)
def lilcomReconstruct(audioArray, lpcOrder): """ This function will reconstruct the given audio array in form of a conescutive compression and decompression procedure. Args: audioArray: A numpy array as the audio signal lcpOrder: Same as lcpOrder in the main lilcom functions Returns: an Audio array with same size to the array passed as input which is a result of compresion and decompresion """ bitPerSample = 6 # Issue make it passed by the operator # bitsPerSample Should be recieved from settings audioArray = audioArray.astype(np.float32) outputShape = list(audioArray.shape) outputShape[0] += 4 outputShape = tuple(outputShape) outputArray = np.ndarray(outputShape, np.int8) reconstructedArray = np.ndarray(audioArray.shape, np.int16) c = lilcom.compress(audioArray, lpc_order=lpcOrder, bits_per_sample=bitPerSample, axis=0) reconstructedArray = lilcom.decompress(c, dtype=audioArray.dtype) return reconstructedArray
def test_rtf(): for dtype in [np.int16, np.float32, np.float64]: # view the following as 100 channels where each channel # is one second's worth of 16kHz-sampled data. audio_time = 1000.0 # seconds a = np.random.randn(int(audio_time), 16000) if dtype == np.int16: a *= 32768 a = a.astype(dtype) for bits_per_sample in [4, 8]: for lpc_order in [0, 1, 2, 4, 8]: for axis in [0, 1]: start = time.process_time() b = lilcom.compress(a, axis=axis, bits_per_sample=bits_per_sample, lpc_order=lpc_order) mid = time.process_time() c = lilcom.decompress(b, dtype=dtype) end = time.process_time() # f is a factor that we'll multiply the times by. The # factor of 100.0 is to make the output percentages. f = 100.0 / audio_time print( "RTF for dtype={}, bits-per-sample={}, lpc_order={}, axis={}, " "compress/decompress/total RTF is: {:.3f}%,{:.3f}%,{:.3f}%" .format(dtype, bits_per_sample, lpc_order, axis, (mid - start) * f, (end - mid) * f, (end - start) * f))
def test_int16(): for bits_per_sample in [4, 5, 8]: for axis in [-1, 1, 0, -2]: a = ((np.random.rand(100 + bits_per_sample + axis, 200 + 10 * bits_per_sample + axis) * 65535) - 32768).astype(np.int16) for use_out in [False, True]: out_shape = lilcom.get_compressed_shape( a.shape, axis, bits_per_sample) b = lilcom.compress(a, axis=axis, bits_per_sample=bits_per_sample, out=(np.empty(out_shape, dtype=np.int8) if use_out else None)) # decompressing as int16, float or double should give the same result except # it would be scaled by 1/32768 for d in [np.int16, np.float32, np.float64]: c = lilcom.decompress( b, dtype=(None if use_out else d), out=(np.empty(a.shape, dtype=d) if use_out else None)) a2 = a.astype( np.float32) * (1.0 / 32768.0 if d != np.int16 else 1.0) c2 = c.astype(np.float32) rel_error = (np.fabs(a2 - c2)).sum() / (np.fabs(a2)).sum() print( "Relative error in int16 compression (decompressing as {}, axis={}, num-bits={}, use_out={}) is {}" .format(d, axis, bits_per_sample, use_out, rel_error))
def load( self, root_dir: Optional[Pathlike] = None, start: Seconds = 0.0, duration: Optional[Seconds] = None, ) -> np.ndarray: # Load the features from the storage storage_path = self.storage_path if root_dir is None else Path( root_dir) / self.storage_path if self.storage_type == 'lilcom': with open(storage_path, 'rb') as f: features = lilcom.decompress(f.read()) elif self.storage_type == 'numpy': features = np.load(storage_path, allow_pickle=False) else: raise ValueError(f"Unknown storage_type: {self.storage_type}") # In case the caller requested only a subset of features, trim them # Left trim if not isclose(start, self.start): frames_to_trim = round((start - self.start) / self.frame_shift) features = features[frames_to_trim:, :] # Right trim end = start + duration if duration is not None else None if duration is not None and not isclose(end, self.end): frames_to_trim = round((self.end - end) / self.frame_shift) features = features[:-frames_to_trim, :] return features
def read( self, key: str, left_offset_frames: int = 0, right_offset_frames: Optional[int] = None, ) -> np.ndarray: # First, determine which range of chunks need to be read. chunk_size = lookup_chunk_size(self.hdf) left_chunk_idx = floor(left_offset_frames / chunk_size) if right_offset_frames is not None: right_chunk_idx = ceil(right_offset_frames / chunk_size) else: right_chunk_idx = None # Read, decode, concat decompressed_chunks = [ lilcom.decompress(data.tobytes()) for data in self.hdf[key][left_chunk_idx:right_chunk_idx] ] if decompressed_chunks: arr = np.concatenate(decompressed_chunks, axis=0) else: arr = np.array([]) # Determine what piece of decoded data should be returned; # we offset the input offsets by left_chunk_idx * chunk_size. shift_frames = chunk_size * left_chunk_idx left_offset_shift = left_offset_frames - shift_frames if right_offset_frames is not None: right_offset_shift = right_offset_frames - shift_frames else: right_offset_shift = None return arr[left_offset_shift:right_offset_shift]
def load( self, start: Optional[Seconds] = None, duration: Optional[Seconds] = None, ) -> np.ndarray: # Load the features from the storage if self.storage_type == 'lilcom': with open(self.storage_path, 'rb') as f: features = lilcom.decompress(f.read()) elif self.storage_type == 'numpy': features = np.load(self.storage_path, allow_pickle=False) else: raise ValueError(f"Unknown storage_type: {self.storage_type}") if start is None: start = self.start # In case the caller requested only a sub-span of the features, trim them. # Left trim if start < self.start - 1e-5: raise ValueError(f"Cannot load features for recording {self.recording_id} starting from {start}s. " f"The available range is ({self.start}, {self.end}) seconds.") if not isclose(start, self.start): frames_to_trim = round((start - self.start) / self.frame_shift) features = features[frames_to_trim:, :] # Right trim end = start + duration if duration is not None else None if duration is not None and not isclose(end, self.end): frames_to_trim = round((self.end - end) / self.frame_shift) # When duration is specified and very close to the original duration, frames_to_trim can be zero; # the conditional below is a safe-guard against these cases. if frames_to_trim: features = features[:-frames_to_trim, :] return features
def read(self, key: str, left_offset_frames: int = 0, right_offset_frames: Optional[int] = None) -> np.ndarray: with open(self.storage_path / key, 'rb') as f: arr = lilcom.decompress(f.read()) return arr[left_offset_frames:right_offset_frames]
def test_rtf(): for dtype in [np.float32, np.float64]: test_duration = 0.2 for tick_power in [-8,-6,-4]: flops = 0 a = np.random.randn(300,300).astype(dtype) start = time.process_time() while time.process_time() - start < test_duration: flops += a.size a = np.random.randn(*a.shape).astype(dtype) print("Flops/sec for randn with dtype={} is {} ".format( dtype, flops / (time.process_time() - start))) start = time.process_time() while time.process_time() - start < test_duration: flops += a.size b = lilcom.compress(a, tick_power=tick_power) print("Flops/sec for compression with dtype={} and tick_power={} is {} ".format( dtype, tick_power, flops / (time.process_time() - start))) start = time.process_time() while time.process_time() - start < test_duration: flops += a.size a2 = lilcom.decompress(b) print("Flops/sec for decompression with tick_power={} is {}".format( tick_power, flops / (time.process_time() - start)))
def read( self, key: str, left_offset_frames: int = 0, right_offset_frames: Optional[int] = None ) -> np.ndarray: arr = lilcom.decompress(self.hdf[key].value.tobytes()) return arr[left_offset_frames: right_offset_frames]
def read( self, raw_data: bytes, left_offset_frames: int = 0, right_offset_frames: Optional[int] = None, ) -> np.ndarray: arr = lilcom.decompress(raw_data) return arr[left_offset_frames:right_offset_frames]
def read(self, key: str, left_offset_frames: int = 0, right_offset_frames: Optional[int] = None) -> np.ndarray: # This weird indexing with [()] is a replacement for ".value" attribute, # that got deprecated with the following warning: # H5pyDeprecationWarning: dataset.value has been deprecated. Use dataset[()] instead. # arr = lilcom.decompress(self.hdf[key].value.tobytes()) arr = lilcom.decompress(self.hdf[key][()].tobytes()) return arr[left_offset_frames:right_offset_frames]
def read(self, key: str, left_offset_frames: int = 0, right_offset_frames: Optional[int] = None) -> np.ndarray: # We are manually adding the slash to join the base URL and the key. if key.startswith('/'): key = key[1:] with SmartOpen.open(f'{self.base_url}/{key}', 'rb') as f: arr = lilcom.decompress(f.read()) return arr[left_offset_frames:right_offset_frames]
def test_int16_lpc_order(): a = ((np.random.rand(100, 200) * 65535) - 32768).astype(np.int16) for lpc in range(0, 15): b = lilcom.compress(a, axis=-1, lpc_order=lpc) c = lilcom.decompress(b, dtype=np.int16) a2 = a.astype(np.float32) c2 = c.astype(np.float32) rel_error = (np.fabs(a2 - c2)).sum() / (np.fabs(a2)).sum() print("Relative error in int16 with lpc order={} is {}".format( lpc, rel_error))
def read( self, key: str, left_offset_frames: int = 0, right_offset_frames: Optional[int] = None, ) -> np.ndarray: # First, determine which range of chunks need to be read. left_chunk_idx = floor(left_offset_frames / self.CHUNK_SIZE) if right_offset_frames is not None: # Note: +1 is to include the end of the last chunk right_chunk_idx = ceil(right_offset_frames / self.CHUNK_SIZE) + 1 else: right_chunk_idx = None chunk_offsets = list(map(int, key.split(","))) chunk_offsets = np.cumsum(chunk_offsets) chunk_offsets = chunk_offsets[left_chunk_idx:right_chunk_idx] chunk_data = [] for offset, end in pairwise(chunk_offsets): # We need to use locks to avoid race conditions between seek # and read in multi-threaded reads. with self.lock: self.file.seek(offset) chunk_data.append(self.file.read(end - offset)) # Read, decode, concat decompressed_chunks = [lilcom.decompress(data) for data in chunk_data] if decompressed_chunks: arr = np.concatenate(decompressed_chunks, axis=0) else: arr = np.array([]) # Determine what piece of decoded data should be returned; # we offset the input offsets by left_chunk_idx * chunk_size. shift_frames = self.CHUNK_SIZE * left_chunk_idx left_offset_shift = left_offset_frames - shift_frames if right_offset_frames is not None: right_offset_shift = right_offset_frames - shift_frames else: right_offset_shift = None return arr[left_offset_shift:right_offset_shift]
def test_float(): for bits_per_sample in [4, 6, 8]: for axis in [-1, 1, 0, -2]: for use_out in [False, True]: a = np.random.randn(100 + bits_per_sample + axis, 200 + bits_per_sample + axis).astype(np.float32) out_shape = lilcom.get_compressed_shape( a.shape, axis, bits_per_sample) b = lilcom.compress(a, axis=axis, bits_per_sample=bits_per_sample, out=(np.empty(out_shape, dtype=np.int8) if use_out else None)) c = lilcom.decompress(b, dtype=(None if use_out else np.float32), out=(np.empty(a.shape, dtype=np.float32) if use_out else None)) rel_error = (np.fabs(a - c)).sum() / (np.fabs(a)).sum() print( "Relative error in float compression (axis={}, bits-per-sample={}) is {}" .format(axis, bits_per_sample, rel_error))
#!/usr/bin/env python3 import lilcom import numpy as np for shape in [(40, 50), (3, 4, 5), (1, 5, 7), (8, 1, 10), (100, 2, 57)]: a = np.random.randn(*shape) for power in [-15, -8, -6]: b = lilcom.compress(a, power) a2 = lilcom.decompress(b) print("len(b) = ", len(b), ", bytes per number = ", (len(b) / a.size)) diff = (a2 - a) mx = diff.max() mn = diff.min() limit = (2**(power - 1)) + 5.0e-05 # add a small margin to account for # floating point roundoff. print("max,min diff = {}, {}, expected magnitude was {}".format( mx, mn, limit)) assert mx <= limit and -mn <= limit