def compress(self, x,f1,f2,f3,f4,f5,f6,f7,f8,outputfile,path,row): """Build model and compress latents.""" mse, bpp, x_hat, pack = self._run("compress", x=x,feature1=f1,feature2=f2,feature3=f3,feature4=f4, feature5=f5,feature6=f6,feature7=f7,feature8=f8) # Write a binary file with the shape information and the compressed string. packed = tfc.PackedTensors() tensors, arrays = zip(*pack) packed.pack(tensors, arrays) with open(outputfile, "wb") as f: f.write(packed.string) x *= 255 # x_hat is already in the [0..255] range psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) # The actual bits per pixel including overhead. x_shape = tf.shape(x) num_pixels = tf.cast(tf.reduce_prod(x_shape[:-1]), dtype=tf.float32) packed_bpp = len(packed.string) * 8 / num_pixels for col in range(np.shape(x_hat)[1]): img = x_hat[0,col,:,:,:]/255 save_img(path,0,img,row,col+1) return x_hat, psnr, msssim, packed_bpp
def compress(args): """Compresses an image.""" # Load model and use it to compress the image. model = tf.keras.models.load_model(args.model_path) x = read_png(args.input_file) tensors = model.compress(x) # Write a binary file with the shape information and the compressed string. packed = tfc.PackedTensors() packed.pack(tensors) with open(args.output_file, "wb") as f: f.write(packed.string) # If requested, decompress the image and measure performance. if args.verbose: x_hat = model.decompress(*tensors) # Cast to float in order to compute metrics. x = tf.cast(x, tf.float32) x_hat = tf.cast(x_hat, tf.float32) mse = tf.reduce_mean(tf.math.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x, x_hat, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x, x_hat, 255)) msssim_db = -10. * tf.math.log(1 - msssim) / tf.math.log(10.) # The actual bits per pixel including entropy coding overhead. num_pixels = tf.reduce_prod(tf.shape(x)[:-1]) bpp = len(packed.string) * 8 / num_pixels print(f"Mean squared error: {mse:0.4f}") print(f"PSNR (dB): {psnr:0.2f}") print(f"Multiscale SSIM: {msssim:0.4f}") print(f"Multiscale SSIM (dB): {msssim_db:0.2f}") print(f"Bits per pixel: {bpp:0.4f}")
def decompress(args): """Decompresses an image.""" # Read the shape information and compressed string from the binary file. string = tf.placeholder(tf.string, [1]) x_shape = tf.placeholder(tf.int32, [2]) y_shape = tf.placeholder(tf.int32, [2]) with open(args.input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) tensors = [string, x_shape, y_shape] arrays = packed.unpack(tensors) # Instantiate model. entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32) synthesis_transform = SynthesisTransform(args.num_filters) # Decompress and transform the image back. y_shape = tf.concat([y_shape, [args.num_filters]], axis=0) y_hat = entropy_bottleneck.decompress(string, y_shape, channels=args.num_filters) x_hat = synthesis_transform(y_hat) # Remove batch dimension, and crop away any extraneous padding on the bottom # or right boundaries. x_hat = x_hat[0, :x_shape[0], :x_shape[1], :] # Write reconstructed image out as a PNG file. op = write_png(args.output_file, x_hat) # Load the latest model checkpoint, and perform the above actions. with tf.Session() as sess: latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) sess.run(op, feed_dict=dict(zip(tensors, arrays)))
def compress(self, x): """Build model and compress latents.""" mse, bpp, x_hat, pack = self._run("compress", x=x) # Write a binary file with the shape information and the compressed string. packed = tfc.PackedTensors() tensors, arrays = zip(*pack) packed.pack(tensors, arrays) with open(self.args.output_file, "wb") as f: f.write(packed.string) # If requested, transform the quantized image back and measure performance. if self.args.verbose: x *= 255 # x_hat is already in the [0..255] range psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) # The actual bits per pixel including overhead. x_shape = tf.shape(x) num_pixels = tf.cast(tf.reduce_prod(x_shape[:-1]), dtype=tf.float32) packed_bpp = len(packed.string) * 8 / num_pixels print("Mean squared error: {:0.4f}".format(mse)) print("PSNR (dB): {:0.2f}".format(psnr)) print("Multiscale SSIM: {:0.4f}".format(msssim)) print("Multiscale SSIM (dB): {:0.2f}".format(-10 * np.log10(1 - msssim))) print("Information content in bpp: {:0.4f}".format(bpp)) print("Actual bits per pixel: {:0.4f}".format(packed_bpp)) return x_hat
def decompress(input_file, output_file): """Decompresses a TFCI file and writes a PNG file.""" if not output_file: output_file = input_file + ".png" with tf.Graph().as_default(): # Unserialize packed data from disk. with tf.io.gfile.GFile(input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) # Load model metagraph. signature_defs = import_metagraph(packed.model) inputs, outputs = instantiate_signature(signature_defs["receiver"]) # Multiple input tensors, ordered alphabetically, without names. inputs = [ inputs[k] for k in sorted(inputs) if k.startswith("channel:") ] # Just one output operation. outputs = write_png(output_file, outputs["output_image"]) # Unpack data. arrays = packed.unpack(inputs) # Run decoder. with tf.Session() as sess: sess.run(outputs, feed_dict=dict(zip(inputs, arrays)))
def compress_image(model, input_image): """Compresses an image array into a bitstring.""" time2 = {} with tf.Graph().as_default(): t1 = time.perf_counter() # Load model metagraph. signature_defs = import_metagraph(model) inputs, outputs = instantiate_signature(signature_defs["sender"]) t2 = time.perf_counter() time2['load_meta'] = t2 - t1 # Just one input tensor. inputs = inputs["input_image"] # Multiple output tensors, ordered alphabetically, without names. outputs = [ outputs[k] for k in sorted(outputs) if k.startswith("channel:") ] # Run encoder. t1 = time.perf_counter() with tf.Session() as sess: arrays = sess.run(outputs, feed_dict={inputs: input_image}) t2 = time.perf_counter() time2['run_enc'] = t2 - t1 t1 = time.perf_counter() # Pack data into bitstring. packed = tfc.PackedTensors() packed.model = model packed.pack(outputs, arrays) t2 = time.perf_counter() time2['create_bit'] = t2 - t1 return packed.string, time2
def compress_image(model, input_image): """Compresses an image tensor into a bitstring.""" sender = instantiate_model_signature(model, "sender") tensors = sender(input_image) packed = tfc.PackedTensors() packed.model = model packed.pack(tensors) return packed.string
def decompress(args): """Decompresses an image.""" # Adapted from https://github.com/tensorflow/compression/blob/master/examples/bmshj2018.py # Read the shape information and compressed string from the binary file. string = tf.placeholder(tf.string, [1]) side_string = tf.placeholder(tf.string, [1]) x_shape = tf.placeholder(tf.int32, [2]) y_shape = tf.placeholder(tf.int32, [2]) z_shape = tf.placeholder(tf.int32, [2]) with open(args.input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) tensors = [string, side_string, x_shape, y_shape, z_shape] arrays = packed.unpack(tensors) # Instantiate model. TODO: automate this with build_graph synthesis_transform = SynthesisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters, num_output_filters=2 * args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32) # Decompress and transform the image back. z_shape = tf.concat([z_shape, [args.num_filters]], axis=0) z_hat = entropy_bottleneck.decompress(side_string, z_shape, channels=args.num_filters) mu, sigma = tf.split(hyper_synthesis_transform(z_hat), num_or_size_splits=2, axis=-1) sigma = tf.exp(sigma) # make positive training = False if not training: # need to handle images with non-standard sizes during compression; mu/sigma must have the same shape as y mu = mu[:, :y_shape[0], :y_shape[1], :] sigma = sigma[:, :y_shape[0], :y_shape[1], :] scale_table = np.exp( np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table, mean=mu, dtype=tf.float32) y_hat = conditional_bottleneck.decompress(string) x_hat = synthesis_transform(y_hat) # Remove batch dimension, and crop away any extraneous padding on the bottom # or right boundaries. x_hat = x_hat[0, :x_shape[0], :x_shape[1], :] # Write reconstructed image out as a PNG file. op = write_png(args.output_file, x_hat) # Load the latest model checkpoint, and perform the above actions. with tf.Session() as sess: save_dir = os.path.join(args.checkpoint_dir, args.runname) latest = tf.train.latest_checkpoint(checkpoint_dir=save_dir) tf.train.Saver().restore(sess, save_path=latest) sess.run(op, feed_dict=dict(zip(tensors, arrays)))
def decompress(input_file, output_file): """Decompresses a TFCI file and writes a PNG file.""" if not output_file: output_file = input_file + ".png" with tf.io.gfile.GFile(input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) receiver = instantiate_model_signature(packed.model, "receiver") tensors = packed.unpack([t.dtype for t in receiver.inputs]) output_image, = receiver(*tensors) write_png(output_file, output_image)
def compress(args): """Compresses an event file.""" x = tf.constant(read_events(args.input_file)) x_shape = tf.shape(x) analysis_transform = AnalysisTransform(32) entropy_bottleneck = tfc.EntropyBottleneck() synthesis_transform = SynthesisTransform(32) y = analysis_transform(x) string = entropy_bottleneck.compress(y) y_hat, likelihoods = entropy_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) timestamps, polarities = tf.split(x_hat, num_or_size_splits=2, axis=-1) timestamps = tf.math.abs(timestamps) polarities = tf.round(tf.math.tanh(polarities)) x_hat = tf.concat([timestamps, polarities], axis=-1) eval_bpp = tf.reduce_mean( -tf.reduce_sum(likelihoods * tf.log(likelihoods), axis=[1, 2]) / np.log(2)) mse = tf.reduce_mean((x - x_hat)**2.) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) tensors = [string, tf.shape(x)[1:-1], tf.shape(y)[1:-1]] arrays = sess.run(tensors) # Write a binary file with the shape information and the compressed string. packed = tfc.PackedTensors() packed.pack(tensors, arrays) with open(args.output_file, "wb") as f: f.write(packed.string) # If requested, transform the quantized image back and measure performance. if args.verbose: # eval_bpp, mse, psnr, msssim, num_pixels = sess.run( # [eval_bpp, mse, psnr, msssim, num_pixels]) eval_bpp, mse = sess.run([eval_bpp, mse]) compression_ratio = os.path.getsize(args.input_file) / len(packed.string) print("Mean squared error: {:0.4f}".format(mse)) print("Estimated entropy: {}".format(eval_bpp)) print("Compression ratio: {}".format(compression_ratio))
def dataset_compressor(model, input_files, output_files, target_bpp=None, bpp_strict=False): data_bytes = [] if not target_bpp: # Just compress with a specific model. with tf.Graph().as_default(): signature_defs = import_metagraph(model) inputs_c, outputs_c = instantiate_signature(signature_defs["sender"]) inputs_d, outputs_d = instantiate_signature(signature_defs["receiver"]) for i in tqdm(range(len(input_files))): input_file = input_files[i] output_file = output_files[i] with tf.Session() as sess: input_image = sess.run(read_png(input_file)) num_pixels = input_image.shape[-2] * input_image.shape[-3] # Just one input tensor. inputs_compress = inputs_c["input_image"] # Multiple output tensors, ordered alphabetically, without names. outputs_compress = [outputs_c[k] for k in sorted(outputs_c) if k.startswith("channel:")] # Run encoder. with tf.Session() as sess: arrays = sess.run(outputs_compress, feed_dict={inputs_compress: input_image}) # Pack data into bitstring. packed = tfc.PackedTensors() # packed.model = model packed.pack(outputs_compress, arrays) #need to get size of image here (packed.string) data_bytes.append(len(packed.string)) #Decompression # Multiple input tensors, ordered alphabetically, without names. inputs_decompress = [inputs_d[k] for k in sorted(inputs_d) if k.startswith("channel:")] # Just one output operation. outputs_decompress = write_png(output_file, outputs_d["output_image"]) # Unpack data. arrays = packed.unpack(inputs_decompress) # Run decoder. with tf.Session() as sess: sess.run(outputs_decompress, feed_dict=dict(zip(inputs_decompress, arrays))) else: raise RuntimeError("Not implemented yet") np.save(os.path.join(os.path.join(*output_files[0].split("/")[:-1]), "data_bytes.npy"), data_bytes)
def decompress(input_file, output_file): """Decompresses a TFCI file and writes a PNG file.""" if not output_file: output_file = input_file + ".png" with tf.io.gfile.GFile(input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) receiver = instantiate_model_signature(packed.model, "receiver") tensors = packed.unpack([t.dtype for t in receiver.inputs]) # Find potential RD parameter and turn it back into a scalar. for i, t in enumerate(tensors): if t.dtype.is_floating and t.shape == (1,): tensors[i] = tf.squeeze(t, 0) output_image, = receiver(*tensors) write_png(output_file, output_image)
def decompress(args): """Decompresses an image.""" # Load the model and determine the dtypes of tensors required to decompress. model = tf.keras.models.load_model(args.model_path) dtypes = [t.dtype for t in model.decompress.input_signature] # Read the shape information and compressed string from the binary file, # and decompress the image using the model. with open(args.input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) tensors = packed.unpack(dtypes) x_hat = model.decompress(*tensors) # Write reconstructed image out as a PNG file. write_png(args.output_file, x_hat)
def decompress(args): """Decompresses an image.""" # Read the shape information and compressed string from the binary file. string = tf.placeholder(tf.string, [1]) side_string = tf.placeholder(tf.string, [1]) x_shape = tf.placeholder(tf.int32, [2]) y_shape = tf.placeholder(tf.int32, [2]) z_shape = tf.placeholder(tf.int32, [2]) with open('/media/xproject/file/Surige/compression-master/examples/rnn_baseline/recon/recon.bin', "rb") as f: packed = tfc.PackedTensors(f.read()) tensors = [string, side_string, x_shape, y_shape, z_shape] arrays = packed.unpack(tensors) # Add a batch dimension, then decompress and transform the image back. d = decoder(args.batchsize, height=x_shape[0], width=x_shape[1]) hd = HyperDecoder(args.batchsize, height=x_shape[0] // 16, width=x_shape[1] // 16) entropy_bottleneck = tfc.EntropyBottleneck(name='entropy_iter', dtype=tf.float32) # Decompress and transform the image back. z_shape = tf.concat([z_shape, [args.num_filters]], axis=0) z_hat = entropy_bottleneck.decompress( side_string, z_shape, channels=args.num_filters) sigma = hd.hyper_decode(z_hat) sigma = sigma[:, :y_shape[0], :y_shape[1], :] scale_table = np.exp(np.linspace( np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional( sigma, scale_table, dtype=tf.float32) y_hat = conditional_bottleneck.decompress(string) x_hat = d.decode(y_hat) # Remove batch dimension, and crop away any extraneous padding on the bottom # or right boundaries. x_hat = x_hat[0, :x_shape[0], :x_shape[1], :] # Write reconstructed image out as a PNG file. op = write_png(args.output_file, x_hat) # Load the latest model checkpoint, and perform the above actions. with tf.Session() as sess: latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) sess.run(op, feed_dict=dict(zip(tensors, arrays)))
def compress(self, input_img, orig_img): arrays = self.sess.run(self.tensors, feed_dict={self.x: input_img}) packed = tfc.PackedTensors() packed.pack(self.tensors, arrays) eval_bpp_val, mse_val, psnr_val, msssim_val, num_pixels_val, reconstruct_img = self.sess.run( [ self.eval_bpp, self.mse, self.psnr, self.msssim, self.num_pixels, self.x_hat ], feed_dict={ self.x: input_img, self.orig_x: orig_img }) actual_bpp = len(packed.string) * 8 / num_pixels_val return actual_bpp, reconstruct_img, eval_bpp_val, mse_val, psnr_val, msssim_val, num_pixels_val
def decompress(args): """Decompresses a row of LF image.""" # Three integers for tensor shapes + nine encoded strings. np_dtypes = [np.integer] * 3 + [np.bytes_] * 9 with open(args.input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) arrays = packed.unpack_from_np_dtypes(np_dtypes) # Build model and restore optimized parameters. model = CompressionModel(args) checkpoint = tf.train.Checkpoint(model=model) restore_path = tf.train.latest_checkpoint(args.checkpoint_dir) checkpoint.restore(restore_path) curr_decoded = model.decompress(arrays) row=int(args.input_file.split('/')[-1].split('.')[0]) # Write reconstructed images out as PNG files. for col in range(np.shape(curr_decoded)[1]): img = curr_decoded[0,col,:,:,:]/255 save_img(args.output_file,0,img,row,col+1)
def decompress(args): """Decompresses an image.""" # Three integers for tensor shapes + hyperprior and N slice strings. np_dtypes = [np.integer] * 3 + [np.bytes_] * (NUM_SLICES + 1) with open(args.input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) arrays = packed.unpack_from_np_dtypes(np_dtypes) # Build model, restore optimized parameters, and compress the input image. model = CompressionModel(args) checkpoint = tf.train.Checkpoint(model=model) restore_path = tf.train.latest_checkpoint(args.checkpoint_dir) print("Restore checkpoint:", restore_path) checkpoint.restore(restore_path) x_hat = model.decompress(arrays) # Write reconstructed image out as a PNG file. write_png(args.output_file, x_hat[0] / 255)
def decompress(input_file, output_file): ta = time.perf_counter() """Decompresses a TFCI file and writes a PNG file.""" t = {} if not output_file: output_file = input_file + ".png" with tf.Graph().as_default(): # Unserialize packed data from disk. t1 = time.perf_counter() with tf.io.gfile.GFile(input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) t2 = time.perf_counter() t['DEC_open_bit'] = t2 - t1 # Load model metagraph. t1 = time.perf_counter() signature_defs = import_metagraph(packed.model) inputs, outputs = instantiate_signature(signature_defs["receiver"]) t2 = time.perf_counter() t['DEC_load_meta'] = t2 - t1 # Multiple input tensors, ordered alphabetically, without names. inputs = [ inputs[k] for k in sorted(inputs) if k.startswith("channel:") ] # Just one output operation. outputs = write_png(output_file, outputs["output_image"]) # Unpack data. arrays = packed.unpack(inputs) # Run decoder. t1 = time.perf_counter() with tf.Session() as sess: sess.run(outputs, feed_dict=dict(zip(inputs, arrays))) t2 = time.perf_counter() t['DEC_run_dec'] = t2 - t1 tb = time.perf_counter() t['DEC_total_time'] = tb - ta return t
def compress_image(model, input_image): """Compresses an image array into a bitstring.""" with tf.Graph().as_default(): # Load model metagraph. signature_defs = import_metagraph(model) inputs, outputs = instantiate_signature(signature_defs["sender"]) # Just one input tensor. inputs = inputs["input_image"] # Multiple output tensors, ordered alphabetically, without names. outputs = [outputs[k] for k in sorted(outputs) if k.startswith("channel:")] # Run encoder. with tf.Session() as sess: arrays = sess.run(outputs, feed_dict={inputs: input_image}) # Pack data into bitstring. packed = tfc.PackedTensors() packed.model = model packed.pack(outputs, arrays) return packed.string
def compress_image(model, input_image, rd_parameter=None): """Compresses an image tensor into a bitstring.""" sender = instantiate_model_signature(model, "sender") if len(sender.inputs) == 1: if rd_parameter is not None: raise ValueError("This model doesn't expect an RD parameter.") tensors = sender(input_image) elif len(sender.inputs) == 2: if rd_parameter is None: raise ValueError("This model expects an RD parameter.") rd_parameter = tf.constant(rd_parameter, dtype=sender.inputs[1].dtype) tensors = sender(input_image, rd_parameter) # Find RD parameter and expand it to a 1D tensor so it fits into the # PackedTensors format. for i, t in enumerate(tensors): if t.dtype.is_floating and t.shape.rank == 0: tensors[i] = tf.expand_dims(t, 0) else: raise RuntimeError("Unexpected model signature.") packed = tfc.PackedTensors() packed.model = model packed.pack(tensors) return packed.string
def decompress(input_bin_path, input_res_path, output_img_path, ckp_dir, tau): with tf.device('/cpu:0'): # Load bin and res string = tf.placeholder(tf.string, [1]) side_string = tf.placeholder(tf.string, [1]) x_shape = tf.placeholder(tf.int32, [2]) y_shape = tf.placeholder(tf.int32, [2]) z_shape = tf.placeholder(tf.int32, [2]) with open(input_bin_path, "rb") as f: packed = tfc.PackedTensors(f.read()) tensors = [string, side_string, x_shape, y_shape, z_shape] arrays = packed.unpack(tensors) # instantiate model decoder = nll_codec.Decoder(192) hyper_decoder_sigma = nll_codec.HyperDecoder(192) hyper_decoder_mu = nll_codec.HyperDecoder(192) entropy_parameters_sigma = nll_codec.EntropyParameters(192) entropy_parameters_mu = nll_codec.EntropyParameters(192) entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32) res_compressor = nll_codec.ResidualCompressor(128, 5) masked_conv = nll_codec.MaskedConv2d("A", 64, (5, 5), padding="VALID") res_compressor_cond = bc.ResidualCompressor_cond(128, 5) # build decoder z_shape = tf.concat([z_shape, [192]], axis=0) z_hat_decode = entropy_bottleneck.decompress( side_string, z_shape, channels=192) # decode z (including dequantization) psi_sigma = hyper_decoder_sigma(z_hat_decode) psi_mu = hyper_decoder_mu(z_hat_decode) sigma = entropy_parameters_sigma(psi_sigma) mu = entropy_parameters_mu(psi_mu) sigma = sigma[:, :y_shape[0], :y_shape[1], :] mu = mu[:, :y_shape[0], :y_shape[1], :] scale_table = np.exp( np.linspace(np.log(SCALE_MIN), np.log(SCALE_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table, mean=mu, dtype=tf.float32) y_hat_decode = conditional_bottleneck.decompress( string) # decode y (including dequantization) x_hat, res_prior = decoder(y_hat_decode) x_hat = x_hat[:, :x_shape[0], :x_shape[1], :] x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.math.floor(x_hat * 255 + 0.5) res_prior = res_prior[:, :x_shape[0], :x_shape[1], :] tau_list = tf.constant([int(tau - 1)], tf.int32) cond = tf.one_hot(tau_list, 5) num_pixels = tf.cast(tf.reduce_prod(x_shape[:-1]), dtype=tf.float32) res_q_patch = tf.placeholder(dtype=tf.float32, shape=(1, 5, 5, 3)) res_prior_channel_num = 64 res_prior_patch = tf.placeholder(dtype=tf.float32, shape=(1, 1, 1, res_prior_channel_num)) res_q_vector = tf.placeholder(dtype=tf.float32, shape=(1, 1, 1, 3)) bin_sz = 2 * tau + 1 pmf_length = int(510 // bin_sz + 1) pmf_end = (255 // bin_sz) * bin_sz context = masked_conv(res_q_patch) res_prior_context = tf.concat([res_prior_patch, context], 3) bias_correction = True if bias_correction and int(tau) > 0: res_mu, res_log_sigma, res_pi, res_lambda = res_compressor_cond( res_prior_context, cond) else: res_mu, res_log_sigma, res_pi, res_lambda = res_compressor( res_prior_context) res_mu_tiled = tf.tile(res_mu, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_log_sigma_tiled = tf.tile( res_log_sigma, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_pi_tiled = tf.tile(res_pi, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_lambda_tiled = tf.tile( res_lambda, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_bottleneck = lmm.LogisticMixtureModel(res_mu_tiled, res_log_sigma_tiled, res_pi_tiled, res_lambda_tiled) res_pmf = res_bottleneck.pmf_tau(res_q_vector, tau) with tf.Session() as sess: latest = tf.train.latest_checkpoint(checkpoint_dir=ckp_dir) tf.train.Saver().restore(sess, save_path=latest) # lossy image decoding print("Lossy Image Decoding Start.") res_prior_out, x_out, num_pixels_out, x_shape_out = sess.run( [res_prior, x_hat, num_pixels, x_shape], feed_dict=dict(zip(tensors, arrays))) print("Lossy Image Decoding Finish.") k_sz = 5 pad_sz = 2 x_h, x_w = x_shape_out x_c = 3 res_q_dec_padded = np.zeros( (1, x_h + 2 * pad_sz, x_w + 2 * pad_sz, x_c)) decoder = RangeDecoder(input_res_path) print('Residual Decoding Start.') for h_idx in range(x_h): for w_idx in range(x_w): res_q_extracted = res_q_dec_padded[:, h_idx:h_idx + k_sz, w_idx:w_idx + k_sz, :] res_prior_extracted = res_prior_out[:, h_idx, w_idx, :].reshape( 1, 1, 1, res_prior_channel_num ) for c_idx in range(x_c): res_q_vector_extracted = res_q_dec_padded[:, h_idx + pad_sz, w_idx + pad_sz, :].reshape( 1, 1, 1, 3) res_pmf_out = sess.run(res_pmf, feed_dict={ res_q_patch: res_q_extracted, res_prior_patch: res_prior_extracted, res_q_vector: res_q_vector_extracted }) c_pmf = res_pmf_out[:, 0, 0, c_idx] c_pmf = np.clip(c_pmf, 1.0 / 65025, 1.0) c_pmf = c_pmf / np.sum(c_pmf) cumFreq = np.floor( np.append([0.], np.cumsum(c_pmf)) * 65536. + 0.5).astype(np.int32).tolist() dataRec = decoder.decode(1, cumFreq) res_q_dec_padded[0, h_idx + pad_sz, w_idx + pad_sz, c_idx] = dataRec[0] * bin_sz - pmf_end print("Decode Finish.") decoder.close() res_q_dec = res_q_dec_padded[:, pad_sz:x_h + pad_sz, pad_sz:x_w + pad_sz, :] x_rec = np.clip(np.squeeze(x_out + res_q_dec), 0, 255) im = Image.fromarray(np.uint8(x_rec)) im.save(output_img_path) return x_rec
def get_arithmetic_coding_bpp(bitstring, bitstring_np, num_pixels): """Calculate bitrate we obtain with arithmetic coding.""" # TODO(fab-jul): Add `compress` and `decompress` methods. packed = tfc.PackedTensors() packed.pack(tensors=bitstring, arrays=bitstring_np) return len(packed.string) * 8 / num_pixels
def compress(args): """Compresses an image.""" # Load input image and add batch dimension. image = imread(args.input_file).astype(np.float32) img = read_png(args.input_file) img = tf.expand_dims(img, 0) img.set_shape([1, img.shape[1], img.shape[2], 3]) x_shape = tf.shape(img) x = img - 0.5 # Transform and compress the image, then remove batch dimension. e = encoder(args.batchsize, height=image.shape[0], width=image.shape[1]) d = decoder(args.batchsize, height=image.shape[0], width=image.shape[1]) he = HyperEncoder(args.batchsize, height=image.shape[0] // 16, width=image.shape[1] // 16) hd = HyperDecoder(args.batchsize, height=image.shape[0] // 16, width=image.shape[1] // 16) #iteration # Transform and compress the image. encodes = [] hyper_encodes = [] strings = [] side_strings = [] MSE = [] PSNR = [] MSSSIM = [] eval_bpp = 0 x_hats = tf.zeros_like(x) + 0.5 num_pixels = tf.cast(tf.reduce_prod(tf.shape(img)[:-1]), dtype=tf.float32) comps = [] for i in range(args.iter): y = e.encode(x) encodes.append(y) y_shape = tf.shape(y) z = he.hyper_encode(abs(y)) hyper_encodes.append(z) entropy_bottleneck = tfc.EntropyBottleneck(name='entropy_iter'+ str(i)) z_hat, z_likelihoods = entropy_bottleneck(z, training=False) sigma = hd.hyper_decode(z_hat) sigma = sigma[:, :y_shape[1], :y_shape[2], :] scale_table = np.exp(np.linspace( np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table,name='conditional'+ str(i)) side_string = entropy_bottleneck.compress(z) side_strings.append(side_string) string = conditional_bottleneck.compress(y) strings.append(string) # Transform the quantized image back (if requested). y_hat, y_likelihoods = conditional_bottleneck(y, training=False) x_hat = d.decode(y_hat) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] # Total number of bits divided by number of pixels. eval_bpp += ((tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)) x = x - x_hat x_hats += x_hat # Bring both images back to 0..255 range. original = img * 255 compressdes = tf.clip_by_value(x_hats, 0, 1) compressdes = tf.round(compressdes * 255) comps.append(compressdes) mse = tf.reduce_mean(tf.squared_difference(original, compressdes)) psnr = tf.squeeze(tf.image.psnr(compressdes, original, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(compressdes, original, 255)) MSE.append(mse) PSNR.append(psnr) MSSSIM.append(msssim) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) bpp = 0 for i in range(args.iter): tensors = [strings[i], side_strings[i], tf.shape(img)[1:-1], tf.shape(encodes[i])[1:-1], tf.shape(hyper_encodes[i])[1:-1]] arrays = sess.run(tensors) # Write a binary file with the shape information and the compressed string. packed = tfc.PackedTensors() packed.pack(tensors, arrays) with open(args.output_file, "wb") as f: f.write(packed.string) # If requested, transform the quantized image back and measure performance. eval_bpps, mses, psnrs, msssims, num_pixelses = sess.run( [eval_bpp, MSE[i], PSNR[i], MSSSIM[i], num_pixels]) comp = comps[i].eval() # The actual bits per pixel including overhead. bpp += (len(packed.string) * 8 / num_pixelses) print("Mean squared error: {:0.4f}".format(mses)) print("PSNR (dB): {:0.2f}".format(psnrs)) print("Multiscale SSIM: {:0.4f}".format(msssims)) print("Multiscale SSIM (dB): {:0.2f}".format(-10 * np.log10(1 - msssims))) print("Information content in bpp: {:0.4f}".format(eval_bpps)) print("Actual bits per pixel: {:0.4f}".format(bpp)) fin = open("rnn_256-512_0.01-0.08_results.txt", 'a+') fin.write("Iter %d, %.8f, %.8f, %.8f, %.8f" % (i, mses, psnrs, msssims, bpp)) fin.write("\n") comp = np.squeeze(comp) imsave('compressed/recon_'+str(i) + '.png', comp)
def compress(input_path, output_bin_path, output_res_path, ckp_dir, tau): with tf.device('/cpu:0'): # Load and Pad Image x = read_png(input_path) mod = tf.constant([64, 64, 1], dtype=tf.int32) div = tf.cast(tf.math.ceil(tf.math.truediv(tf.shape(x), mod)), tf.int32) paddings = tf.math.subtract(tf.math.multiply(div, mod), tf.shape(x)) paddings = tf.expand_dims(paddings, 1) paddings = tf.concat( [tf.convert_to_tensor(np.zeros((3, 1)), dtype=tf.int32), paddings], axis=1) x_pad = tf.pad(x, paddings, "REFLECT") x_pad = tf.expand_dims(x_pad, 0) x_pad.set_shape([1, None, None, 3]) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) x_norm = x_pad / 255 # instantiate model encoder = nll_codec.Encoder(192) decoder = nll_codec.Decoder(192) hyper_encoder = nll_codec.HyperEncoder(192) hyper_decoder_sigma = nll_codec.HyperDecoder(192) hyper_decoder_mu = nll_codec.HyperDecoder(192) entropy_parameters_sigma = nll_codec.EntropyParameters(192) entropy_parameters_mu = nll_codec.EntropyParameters(192) entropy_bottleneck = tfc.EntropyBottleneck() res_compressor = nll_codec.ResidualCompressor(128, 5) masked_conv = nll_codec.MaskedConv2d("A", 64, (5, 5), padding="VALID") res_compressor_cond = bc.ResidualCompressor_cond(128, 5) # build model and encode/decode y = encoder(x_norm) y_shape = tf.shape(y) z = hyper_encoder(y) side_string = entropy_bottleneck.compress( z) # encode z (including quantization) z_hat_decode = entropy_bottleneck.decompress( side_string, tf.shape(z)[1:], channels=192) # decode z (including dequantization) psi_sigma = hyper_decoder_sigma(z_hat_decode) psi_mu = hyper_decoder_mu(z_hat_decode) sigma = entropy_parameters_sigma(psi_sigma) mu = entropy_parameters_mu(psi_mu) scale_table = np.exp( np.linspace(np.log(SCALE_MIN), np.log(SCALE_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table, mean=mu) string = conditional_bottleneck.compress( y) # encode y (including quantization) y_hat_decode = conditional_bottleneck.decompress( string) # decode y (including dequantization) x_hat, res_prior = decoder(y_hat_decode) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.math.floor(x_hat * 255 + 0.5) res_prior = res_prior[:, :x_shape[1], :x_shape[2], :] res = x - x_hat res_q = tf.where(res >= 0, (2 * tau + 1) * tf.math.floor( (res + tau) / (2 * tau + 1)), (2 * tau + 1) * tf.math.ceil( (res - tau) / (2 * tau + 1))) tau_list = tf.constant([int(tau - 1)], tf.int32) cond = tf.one_hot(tau_list, 5) num_pixels = tf.cast(tf.reduce_prod(x_shape[:-1]), dtype=tf.float32) res_q_patch = tf.placeholder(dtype=tf.float32, shape=(1, 5, 5, 3)) res_prior_channel_num = 64 res_prior_patch = tf.placeholder(dtype=tf.float32, shape=(1, 1, 1, res_prior_channel_num)) res_q_vector = tf.placeholder(dtype=tf.float32, shape=(1, 1, 1, 3)) bin_sz = 2 * tau + 1 pmf_length = int(510 // bin_sz + 1) pmf_end = (255 // bin_sz) * bin_sz context = masked_conv(res_q_patch) res_prior_context = tf.concat([res_prior_patch, context], 3) bias_correction = True if bias_correction and int(tau) > 0: res_mu, res_log_sigma, res_pi, res_lambda = res_compressor_cond( res_prior_context, cond) else: res_mu, res_log_sigma, res_pi, res_lambda = res_compressor( res_prior_context) res_mu_tiled = tf.tile(res_mu, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_log_sigma_tiled = tf.tile( res_log_sigma, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_pi_tiled = tf.tile(res_pi, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_lambda_tiled = tf.tile( res_lambda, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_bottleneck = lmm.LogisticMixtureModel(res_mu_tiled, res_log_sigma_tiled, res_pi_tiled, res_lambda_tiled) res_pmf = res_bottleneck.pmf_tau(res_q_vector, tau) # MSE eval_mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) # PSNR eval_psnr = 10 * tf.math.log(255**2 / eval_mse) / tf.math.log(10.0) # max abs diff eval_max_abs_diff = tf.reduce_max(tf.abs(tf.subtract(x, x_hat))) with tf.Session() as sess: latest = tf.train.latest_checkpoint(checkpoint_dir=ckp_dir) tf.train.Saver().restore(sess, save_path=latest) tensors = [ string, side_string, tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1] ] arrays = sess.run(tensors) # write binary file packed = tfc.PackedTensors() packed.pack(tensors, arrays) with open(output_bin_path, "wb") as f: f.write(packed.string) # Lossy Image Encoding print("Lossy Image Encoding Start.") res_prior_out, res_q_out, _, x_org, x_out, lossy_mse, lossy_psnr, lossy_max_abs_diff, num_pixels_out, x_shape_out = sess.run( [ res_prior, res_q, string, x, x_hat, eval_mse, eval_psnr, eval_max_abs_diff, num_pixels, x_shape ]) print("Lossy Image Encoding Finish.") k_sz = 5 pad_sz = 2 _, x_h, x_w, x_c = x_shape_out res_q_padded = np.pad(res_q_out, ((0, 0), (pad_sz, pad_sz), (pad_sz, pad_sz), (0, 0)), 'constant') encoder = RangeEncoder(output_res_path) print('Residual Encoding Start.') for h_idx in range(x_h): for w_idx in range(x_w): res_q_extracted = res_q_padded[:, h_idx:h_idx + k_sz, w_idx:w_idx + k_sz, :] res_prior_extracted = res_prior_out[:, h_idx, w_idx, :].reshape( 1, 1, 1, res_prior_channel_num ) res_q_vector_extracted = res_q_out[:, h_idx, w_idx, :].reshape( 1, 1, 1, 3) res_pmf_out = sess.run(res_pmf, feed_dict={ res_q_patch: res_q_extracted, res_prior_patch: res_prior_extracted, res_q_vector: res_q_vector_extracted }) res_q_vector_extracted = ( res_q_vector_extracted[0, 0, 0, :] + pmf_end) // bin_sz for c_idx in range(x_c): c_pmf = res_pmf_out[:, 0, 0, c_idx] c_pmf = np.clip(c_pmf, 1.0 / 65025, 1.0) c_pmf = c_pmf / np.sum(c_pmf) cumFreq = np.floor( np.append([0.], np.cumsum(c_pmf)) * 65536. + 0.5).astype(np.int32).tolist() encoder.encode([int(res_q_vector_extracted[c_idx])], cumFreq) print("Encoding Finish.") encoder.close() print("Lossy MSE:{}, Lossy PSNR:{}, Lossy max_abs_diff:{}".format( lossy_mse, lossy_psnr, lossy_max_abs_diff)) img_sz_out = os.path.getsize(output_bin_path) res_sz_out = os.path.getsize(output_res_path) eval_sz_out = img_sz_out + res_sz_out img_bpsp = os.path.getsize(output_bin_path) * 8 / (x_c * x_h * x_w) res_bpsp = os.path.getsize(output_res_path) * 8 / (x_c * x_h * x_w) eval_bpsp = img_bpsp + res_bpsp print("tau:{}, bpsp:{}, img_bpsp:{}, res_bpsp:{}".format( tau, eval_bpsp, img_bpsp, res_bpsp)) x_rec = np.clip(np.squeeze(x_out + res_q_out), 0, 255) max_abs_diff = np.amax(np.abs(x_org - x_rec)) mse = np.mean((x_org - x_rec)**2) psnr = 10 * np.log10(255**2 / mse) print("Max abs diff:{}, NLL MSE:{}, NLL PSNR:{}".format( max_abs_diff, mse, psnr)) return eval_sz_out, img_sz_out, res_sz_out
def compress(args): """Compresses an image.""" # Load input image and add batch dimension. x = read_png(args.input_file) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) synthesis_transform = SynthesisTransform(args.num_filters) hyper_analysis_transform = HyperAnalysisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() # Transform and compress the image. y = analysis_transform(x) y_shape = tf.shape(y) z = hyper_analysis_transform(abs(y)) z_hat, z_likelihoods = entropy_bottleneck(z, training=False) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[1], :y_shape[2], :] scale_table = np.exp(np.linspace( np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table) side_string = entropy_bottleneck.compress(z) string = conditional_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, y_likelihoods = conditional_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) # Total number of bits divided by number of pixels. eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # Bring both images back to 0..255 range. x *= 255 x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) tensors = [string, side_string, tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1]] arrays = sess.run(tensors) # Write a binary file with the shape information and the compressed string. packed = tfc.PackedTensors() packed.pack(tensors, arrays) with open(args.output_file, "wb") as f: f.write(packed.string) # If requested, transform the quantized image back and measure performance. if args.verbose: eval_bpp, mse, psnr, msssim, num_pixels = sess.run( [eval_bpp, mse, psnr, msssim, num_pixels]) # The actual bits per pixel including overhead. bpp = len(packed.string) * 8 / num_pixels print("Mean squared error: {:0.4f}".format(mse)) print("PSNR (dB): {:0.2f}".format(psnr)) print("Multiscale SSIM: {:0.4f}".format(msssim)) print("Multiscale SSIM (dB): {:0.2f}".format(-10 * np.log10(1 - msssim))) print("Information content in bpp: {:0.4f}".format(eval_bpp)) print("Actual bits per pixel: {:0.4f}".format(bpp))
def compress_tiny(args): """Compresses an image.""" output_folder = "/media/expansion1/navneedhmaudgalya/Datasets/tiny_imagenet/train_bls_001n" if not os.path.exists(output_folder): os.mkdir(output_folder) bpp = [] full_bpp = [] compressed_imgs = [] # Load input image and add batch dimension. index = tf.placeholder(tf.string) # image_file_name = "{}.png".format(index.eval()) # image_file_path = os.path.join("../data/cifar/test/", image_file_name) x = read_png(index) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() synthesis_transform = SynthesisTransform(args.num_filters) # Transform and compress the image. y = analysis_transform(x) string = entropy_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, likelihoods = entropy_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) x_hat_orig = x_hat[:, :x_shape[1], :x_shape[2], :] num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) # Total number of bits divided by number of pixels. eval_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels) # Bring both images back to 0..255 range. x *= 255 x_hat_orig = tf.clip_by_value(x_hat_orig, 0, 1) x_hat = tf.round(x_hat_orig * 255) # mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) # psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) # msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) tensors = [string, tf.shape(x)[1:-1], tf.shape(y)[1:-1]] data_folder = "/media/expansion1/navneedhmaudgalya/Datasets/tiny_imagenet/train/" data_files = os.listdir(data_folder) for i, image_file_name in tqdm(enumerate(data_files)): image_file_path = str(os.path.join(data_folder, image_file_name)) # op = write_png("test_005/{}.png".format(i), x_hat) x_h, arrays, inf_bpp = sess.run([x_hat, tensors, eval_bpp], feed_dict={index: image_file_path}) plt.imsave(os.path.join(output_folder, image_file_name), x_h[0] / 255.) # Write a binary file with the shape information and the compressed string. packed = tfc.PackedTensors() packed.pack(tensors, arrays) bpp.append(inf_bpp) full_bpp.append(len(packed.string) * 8 / (64 * 64)) # compressed_imgs.append(packed.string) # sess.run(op, feed_dict={index: image_file_path}) np.save("{}/bpp.npy".format(output_folder), bpp) np.save("{}/full_bpp.npy".format(output_folder), full_bpp)
def test_decompress(args): """Decompresses an image.""" # Read the shape information and compressed string from the binary file. string = tf.placeholder(tf.string, [1]) side_string = tf.placeholder(tf.string, [1]) x_shape = tf.placeholder(tf.int32, [2]) y_shape = tf.placeholder(tf.int32, [2]) z_shape = tf.placeholder(tf.int32, [2]) with open(args.input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) tensors = [string, side_string, x_shape, y_shape, z_shape] arrays = packed.unpack(tensors) # Instantiate model. synthesis_transform = SynthesisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32) # Decompress and transform the image back. z_shape = tf.concat([z_shape, [args.num_filters]], axis=0) z_hat = entropy_bottleneck.decompress(side_string, z_shape, channels=args.num_filters) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[0], :y_shape[1], :] scale_table = np.exp( np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table, dtype=tf.float32) y_hat_all = conditional_bottleneck.decompress(string) x = read_png("kodak/kodim01.png") x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) x *= 255 active = 192 y_hat = y_hat_all[:, :, :, :active] x_hat = synthesis_transform(y_hat) x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) #x_hat = x_hat[0, :x_shape[0], :x_shape[1], :] #op = write_png(args.output_file, x_hat) sess = tf.Session() latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) #sess.run(op, feed_dict=dict(zip(tensors, arrays))) #vmse, vpsnr, vmsssim = sess.run([mse, psnr, msssim], feed_dict=dict(zip(tensors, arrays))) #print(vmse, vpsnr, vmsssim) for active in range(192, 0, -8): y_hat = y_hat_all[:, :, :, :active] x_hat = synthesis_transform(y_hat) x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) vmse, vpsnr, vmsssim = sess.run([mse, psnr, msssim], feed_dict=dict(zip(tensors, arrays))) print(active, vmse, vpsnr, vmsssim)
def test_compress(args): """Compresses an image.""" # Load input image and add batch dimension. x = read_png(args.input_file) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) step = 0.1 lmbda_log_dist = np.arange(0, 7, step) lmbda_log_dist = tf.constant(lmbda_log_dist, dtype=tf.float32) s = tf.data.Dataset.from_tensor_slices(lmbda_log_dist) lmbda_log = s.make_one_shot_iterator().get_next() # levels lmbda = 0.1 * tf.pow(2.0, lmbda_log - 6.0) # true value # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters, lmbda_log) synthesis_transform = SynthesisTransform(args.num_filters, lmbda_log) hyper_analysis_transform = HyperAnalysisTransform(args.num_filters, lmbda_log) hyper_synthesis_transform = HyperSynthesisTransform( args.num_filters, lmbda_log) entropy_bottleneck = tfc.EntropyBottleneck() # Transform and compress the image. y = analysis_transform(x) y_shape = tf.shape(y) z = hyper_analysis_transform(abs(y)) z_hat, z_likelihoods = entropy_bottleneck(z, training=False) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[1], :y_shape[2], :] scale_table = np.exp( np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table) side_string = entropy_bottleneck.compress(z) string = conditional_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, y_likelihoods = conditional_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) # Total number of bits divided by number of pixels. eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum( tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # Bring both images back to 0..255 range. x *= 255 x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) tensors = [ string, side_string, tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1] ] for i in np.arange(0, 7, step): arrays, v_eval_bpp, v_mse, v_psnr, v_msssim, v_num_pixels = sess.run( [tensors, eval_bpp, mse, psnr, msssim, num_pixels]) packed = tfc.PackedTensors() packed.pack(tensors, arrays) with open(args.output_file, "wb") as f: f.write(packed.string) # The actual bits per pixel including overhead. bpp = len(packed.string) * 8 / v_num_pixels print(bpp, v_eval_bpp, v_mse, v_psnr, v_msssim)
sess.run( write_png(args.output + str(i * args.frequency + 1) + ".png", tenFirst)) if i == math.ceil( num_frames / args.frequency) - 1 and num_frames % args.frequency != 0: batch_range = num_frames % args.frequency + 1 for batch in range(2, batch_range): with open( os.path.join( args.input, 'of' + str(i * args.frequency + batch - 1) + '.vcn'), "rb") as f: flowpacked = tfc.PackedTensors(f.read()) with open( os.path.join( args.input, "res" + str(i * args.frequency + batch - 1) + '.vcn'), "rb") as f: respacked = tfc.PackedTensors(f.read()) flowtensors = [compflow, cfx_shape, cfy_shape] flowarrays = flowpacked.unpack(flowtensors) restensors = [compres, rex_shape, rey_shape] resarrays = respacked.unpack(restensors) fd = dict(zip(flowtensors, flowarrays)) fd.update(dict(zip(restensors, resarrays))) fd.update(dict({testtfprvs: tenFirst}))
def test_compress(args): """Compresses an image.""" # Load input image and add batch dimension. x = read_png(args.input_file) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) synthesis_transform = SynthesisTransform(args.num_filters) hyper_analysis_transform = HyperAnalysisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() # Transform and compress the image. y = analysis_transform(x) y_shape = tf.shape(y) z = hyper_analysis_transform(abs(y)) z_hat, z_likelihoods = entropy_bottleneck(z, training=False) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[1], :y_shape[2], :] scale_table = np.exp( np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = DynamicGaussianConditional( sigma, scale_table, name="gaussian_conditional") side_string = entropy_bottleneck.compress(z) string = conditional_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, y_likelihoods = conditional_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) # Total number of bits divided by number of pixels. eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum( tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # Bring both images back to 0..255 range. x *= 255 x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) #a = sess.run( tf.reduce_sum(tf.log(y_likelihoods), axis=(0,1,2)) / (-np.log(2) * num_pixels)) #b = sess.run( tf.reduce_sum(tf.log(z_likelihoods), axis=(0,1,2)) / (-np.log(2) * num_pixels)) #np.savetxt('ay.csv', a, delimiter = ',') #np.savetxt('bz.csv', b, delimiter = ',') #return const = tf.constant([1] * 256 + [0] * 224, dtype=tf.float32) for active in range(256, 31, -16): #conditional_bottleneck.input_spec = tf.keras.layers.InputSpec(ndim=4, axes={3: active}) mask = const[256 - active:512 - active] rate = tf.reduce_sum(mask) / 256 y_itc = y * mask / rate string = conditional_bottleneck.compress(y_itc) y_itc_hat = conditional_bottleneck.decompress(string) # Transform the quantized image back (if requested). x_hat = synthesis_transform(y_itc_hat) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods[:, :, :, :active])) + tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) tensors = [ string, side_string, tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1] ] arrays = sess.run(tensors) # Write a binary file with the shape information and the compressed string. packed = tfc.PackedTensors() packed.pack(tensors, arrays) v_eval_bpp, v_mse, v_psnr, v_msssim, v_num_pixels = sess.run( [eval_bpp, mse, psnr, msssim, num_pixels]) bpp = len(packed.string) * 8 / v_num_pixels print(active, v_eval_bpp, bpp, v_mse, v_psnr, v_msssim, v_num_pixels)