def compare(dcn, batch_x): """ Compare the quantized and decompressed image with its fully TF-processed counterpart. """ code_book = dcn.sess.run(dcn.codebook).reshape((-1, )) batch_z = dcn.compress(batch_x) batch_y = dcn.decompress(batch_z) # Quantization and coding indices, distortion = cluster.vq.vq(batch_z.reshape((-1)), code_book) # Compress each image data = bytes(indices.astype(np.uint8)) coded_fse = pyfse.compress(data) decoded_fse = pyfse.decompress(coded_fse, int(np.prod(indices.shape))) # Check sanity assert data == decoded_fse, 'Entropy decoding error' shape = list(dcn.latent_shape) shape[0] = 1 decoded_indices = np.array([x for x in decoded_fse]).reshape(shape) image_q = code_book[decoded_indices] image_y = dcn.decompress(image_q) return batch_y, image_y
def compress(batch_x, model, verbose=False): """ Serialize the image as a bytes sequence. The feature maps are encoded as separate layers. ## Bit-stream structure: - Latent shape H x W x N = 3 x 1 byte (uint8) - Length of coded layer sizes = 2 bytes (uint16) - Coded layer sizes: - FSE encoded uint16 array of size 2 * N bytes (if possible to compress) - ...or RAW bytes - Coded layers: - FSE encoded uint8 array of latent vector size - ...or RLE encoded uint16 (number) + uint8 (byte) if all bytes are the same """ if batch_x.ndim == 3: batch_x = np.expand_dims(batch_x, axis=0) assert batch_x.ndim == 4 assert batch_x.shape[0] == 1 image_stream = io.BytesIO() # Get latent space representation batch_z = model.compress(batch_x).numpy() latent_shape = np.array(batch_z.shape[1:], dtype=np.uint8) # Write latent space shape to the bytestream image_stream.write(latent_shape.tobytes()) # Encode feature layers separately coded_layers = [] code_book = model.get_codebook() if verbose: print('[l3ic encoder]', 'Code book:', code_book) if len(code_book) > 256: raise L3ICError( 'Code-books with more than 256 centers are not supported') for n in range(latent_shape[-1]): # TODO Should a code book always be used? What about integers? indices, _ = cluster.vq.vq(batch_z[:, :, :, n].reshape((-1)), code_book) try: # Compress layer with FSE coded_layer = pyfse.compress(bytes(indices.astype(np.uint8))) except pyfse.FSESymbolRepetitionError: # All bytes are identical, fallback to RLE coded_layer = np.uint16(len(indices)).tobytes() + np.uint8( indices[0]).tobytes() except pyfse.FSENotCompressibleError: # Stream does not compress coded_layer = np.uint8(indices).tobytes() finally: if len(coded_layer) == 1: if verbose: layer_stats = Counter(batch_z[:, :, :, n].reshape( (-1))).items() print('[l3ic encoder]', 'Layer {} values:'.format(n), batch_z[:, :, :, n].reshape((-1))) print('[l3ic encoder]', 'Layer {} code-book indices:'.format(n), indices.reshape((-1))[:20]) print('[l3ic encoder]', 'Layer {} hist:'.format(n), layer_stats) raise L3ICError( 'Layer {} data compresses to a single byte? Something is wrong!' .format(n)) coded_layers.append(coded_layer) # Show example layer if verbose: n = 0 layer_stats = Counter(batch_z[:, :, :, n].reshape((-1))).items() print('[l3ic encoder]', 'Layer {} values:'.format(n), batch_z[:, :, :, n].reshape((-1))) print('[l3ic encoder]', 'Layer {} code-book indices:'.format(n), indices.reshape((-1))[:20]) print('[l3ic encoder]', 'Layer {} hist:'.format(n), layer_stats) # Write the layer size array layer_lengths = np.array([len(x) for x in coded_layers], dtype=np.uint16) try: coded_lengths = pyfse.compress(layer_lengths.tobytes()) if verbose: print('[l3ic encoder]', 'FSE coded lengths') except pyfse.FSENotCompressibleError: # If the FSE coded stream is empty - it is not compressible - save natively if verbose: print('[l3ic encoder]', 'RAW coded lengths') coded_lengths = layer_lengths.tobytes() if verbose: print('[l3ic encoder]', 'Coded lengths #', len(coded_lengths), '=', coded_lengths) print('[l3ic encoder]', 'Layer lengths = ', layer_lengths) if len(coded_lengths) == 0: raise RuntimeError('Empty coded layer lengths!') image_stream.write(np.uint16(len(coded_lengths)).tobytes()) image_stream.write(coded_lengths) # Write individual layers for layer in coded_layers: image_stream.write(layer) return image_stream.getvalue()
def global_compress(dcn, batch_x): # Naive FSE compression of the entire latent repr. batch_z = dcn.compress(batch_x).numpy() indices, distortion = vq(batch_z.reshape((-1)), dcn.get_codebook()) return pyfse.compress(bytes(indices.astype(np.uint8)))