def test_range_encoder_fuzz(): """ Test random inputs to the encoder. """ random.seed(111) randomState = np.random.RandomState(111) filepath = mkstemp()[1] for _ in range(10): # generate random frequency table numSymbols = random.randint(1, 20) maxFreq = random.randint(2, 100) cumFreq = np.cumsum(randomState.randint(1, maxFreq, size=numSymbols)) cumFreq = [0] + [int(i) for i in cumFreq] # convert numpy.int64 to int # encode random symbols dataLen = randomState.randint(0, 10) data = [random.randint(0, numSymbols - 1) for _ in range(dataLen)] encoder = RangeEncoder(filepath) encoder.encode(data, cumFreq) encoder.close() os.remove(filepath)
def test_range_encoder_decoder(): """ Additional tests to check whether RangeDecoder reproduces symbols encoded by RangeEncoder. """ random.seed(0) filepath = mkstemp()[1] for _ in range(20): numSymbols = np.random.randint(1, 6) cumFreq = [0] + np.cumsum(np.random.randint(1, 10, size=numSymbols)).tolist() data = np.random.randint(numSymbols, size=np.random.randint(20)).tolist() encoder = RangeEncoder(filepath) encoder.encode(data, cumFreq) encoder.close() decoder = RangeDecoder(filepath) dataRec = decoder.decode(len(data), cumFreq) decoder.close() assert data == dataRec os.remove(filepath)
def test_range_decoder(): """ Tests whether RangeDecoder reproduces symbols encoded by RangeEncoder. """ random.seed(558) filepath = mkstemp()[1] # encoding one sequence should require 1 byte cumFreq0 = [0, 4, 6, 8] cumFreq1 = [0, 2, 5, 7, 10, 14] data0 = [random.randint(0, len(cumFreq0) - 2) for _ in range(10)] data1 = [random.randint(0, len(cumFreq1) - 2) for _ in range(17)] encoder = RangeEncoder(filepath) encoder.encode(data0, cumFreq0) encoder.encode(data1, cumFreq1) encoder.close() decoder = RangeDecoder(filepath) dataRec0 = decoder.decode(len(data0), cumFreq0) dataRec1 = decoder.decode(len(data1), cumFreq1) decoder.close() # encoded and decoded data should be the same assert data0 == dataRec0 assert data1 == dataRec1 # make sure reference counting is implemented correctly (call to getrefcount increases it by 1) assert sys.getrefcount(dataRec0) == 2 assert sys.getrefcount(dataRec1) == 2 decoder = RangeDecoder(filepath) with pytest.raises(ValueError): # invalid frequency table decoder.decode(len(data0), []) with pytest.raises(ValueError): # invalid frequency table decoder.decode(len(data0), [0]) assert decoder.decode(0, cumFreq0) == [] os.remove(filepath)
def test_range_coder_overflow(): """ Cumulative frequencies must fit in an unsigned integer (assumed to be represented by 32 bits). This test checks that no error is thrown if the frequencies exceed that limit. """ numBytes = 17 filepath = mkstemp()[1] # encoding one sequence should require 1 byte prob = [4, 6, 8] prob = np.asarray(prob, dtype=np.float64) / np.sum(prob) cumFreq = prob_to_cum_freq(prob, 128) cumFreq[-1] = 2**32 sequence = [2, 2] data = sequence * numBytes encoder = RangeEncoder(filepath) with pytest.raises(OverflowError): encoder.encode(data, cumFreq) encoder.close()
def compress(input_path, output_bin_path, output_res_path, ckp_dir, tau): with tf.device('/cpu:0'): # Load and Pad Image x = read_png(input_path) mod = tf.constant([64, 64, 1], dtype=tf.int32) div = tf.cast(tf.math.ceil(tf.math.truediv(tf.shape(x), mod)), tf.int32) paddings = tf.math.subtract(tf.math.multiply(div, mod), tf.shape(x)) paddings = tf.expand_dims(paddings, 1) paddings = tf.concat( [tf.convert_to_tensor(np.zeros((3, 1)), dtype=tf.int32), paddings], axis=1) x_pad = tf.pad(x, paddings, "REFLECT") x_pad = tf.expand_dims(x_pad, 0) x_pad.set_shape([1, None, None, 3]) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) x_norm = x_pad / 255 # instantiate model encoder = nll_codec.Encoder(192) decoder = nll_codec.Decoder(192) hyper_encoder = nll_codec.HyperEncoder(192) hyper_decoder_sigma = nll_codec.HyperDecoder(192) hyper_decoder_mu = nll_codec.HyperDecoder(192) entropy_parameters_sigma = nll_codec.EntropyParameters(192) entropy_parameters_mu = nll_codec.EntropyParameters(192) entropy_bottleneck = tfc.EntropyBottleneck() res_compressor = nll_codec.ResidualCompressor(128, 5) masked_conv = nll_codec.MaskedConv2d("A", 64, (5, 5), padding="VALID") res_compressor_cond = bc.ResidualCompressor_cond(128, 5) # build model and encode/decode y = encoder(x_norm) y_shape = tf.shape(y) z = hyper_encoder(y) side_string = entropy_bottleneck.compress( z) # encode z (including quantization) z_hat_decode = entropy_bottleneck.decompress( side_string, tf.shape(z)[1:], channels=192) # decode z (including dequantization) psi_sigma = hyper_decoder_sigma(z_hat_decode) psi_mu = hyper_decoder_mu(z_hat_decode) sigma = entropy_parameters_sigma(psi_sigma) mu = entropy_parameters_mu(psi_mu) scale_table = np.exp( np.linspace(np.log(SCALE_MIN), np.log(SCALE_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table, mean=mu) string = conditional_bottleneck.compress( y) # encode y (including quantization) y_hat_decode = conditional_bottleneck.decompress( string) # decode y (including dequantization) x_hat, res_prior = decoder(y_hat_decode) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.math.floor(x_hat * 255 + 0.5) res_prior = res_prior[:, :x_shape[1], :x_shape[2], :] res = x - x_hat res_q = tf.where(res >= 0, (2 * tau + 1) * tf.math.floor( (res + tau) / (2 * tau + 1)), (2 * tau + 1) * tf.math.ceil( (res - tau) / (2 * tau + 1))) tau_list = tf.constant([int(tau - 1)], tf.int32) cond = tf.one_hot(tau_list, 5) num_pixels = tf.cast(tf.reduce_prod(x_shape[:-1]), dtype=tf.float32) res_q_patch = tf.placeholder(dtype=tf.float32, shape=(1, 5, 5, 3)) res_prior_channel_num = 64 res_prior_patch = tf.placeholder(dtype=tf.float32, shape=(1, 1, 1, res_prior_channel_num)) res_q_vector = tf.placeholder(dtype=tf.float32, shape=(1, 1, 1, 3)) bin_sz = 2 * tau + 1 pmf_length = int(510 // bin_sz + 1) pmf_end = (255 // bin_sz) * bin_sz context = masked_conv(res_q_patch) res_prior_context = tf.concat([res_prior_patch, context], 3) bias_correction = True if bias_correction and int(tau) > 0: res_mu, res_log_sigma, res_pi, res_lambda = res_compressor_cond( res_prior_context, cond) else: res_mu, res_log_sigma, res_pi, res_lambda = res_compressor( res_prior_context) res_mu_tiled = tf.tile(res_mu, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_log_sigma_tiled = tf.tile( res_log_sigma, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_pi_tiled = tf.tile(res_pi, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_lambda_tiled = tf.tile( res_lambda, tf.constant([pmf_length, 1, 1, 1], tf.int32)) res_bottleneck = lmm.LogisticMixtureModel(res_mu_tiled, res_log_sigma_tiled, res_pi_tiled, res_lambda_tiled) res_pmf = res_bottleneck.pmf_tau(res_q_vector, tau) # MSE eval_mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) # PSNR eval_psnr = 10 * tf.math.log(255**2 / eval_mse) / tf.math.log(10.0) # max abs diff eval_max_abs_diff = tf.reduce_max(tf.abs(tf.subtract(x, x_hat))) with tf.Session() as sess: latest = tf.train.latest_checkpoint(checkpoint_dir=ckp_dir) tf.train.Saver().restore(sess, save_path=latest) tensors = [ string, side_string, tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1] ] arrays = sess.run(tensors) # write binary file packed = tfc.PackedTensors() packed.pack(tensors, arrays) with open(output_bin_path, "wb") as f: f.write(packed.string) # Lossy Image Encoding print("Lossy Image Encoding Start.") res_prior_out, res_q_out, _, x_org, x_out, lossy_mse, lossy_psnr, lossy_max_abs_diff, num_pixels_out, x_shape_out = sess.run( [ res_prior, res_q, string, x, x_hat, eval_mse, eval_psnr, eval_max_abs_diff, num_pixels, x_shape ]) print("Lossy Image Encoding Finish.") k_sz = 5 pad_sz = 2 _, x_h, x_w, x_c = x_shape_out res_q_padded = np.pad(res_q_out, ((0, 0), (pad_sz, pad_sz), (pad_sz, pad_sz), (0, 0)), 'constant') encoder = RangeEncoder(output_res_path) print('Residual Encoding Start.') for h_idx in range(x_h): for w_idx in range(x_w): res_q_extracted = res_q_padded[:, h_idx:h_idx + k_sz, w_idx:w_idx + k_sz, :] res_prior_extracted = res_prior_out[:, h_idx, w_idx, :].reshape( 1, 1, 1, res_prior_channel_num ) res_q_vector_extracted = res_q_out[:, h_idx, w_idx, :].reshape( 1, 1, 1, 3) res_pmf_out = sess.run(res_pmf, feed_dict={ res_q_patch: res_q_extracted, res_prior_patch: res_prior_extracted, res_q_vector: res_q_vector_extracted }) res_q_vector_extracted = ( res_q_vector_extracted[0, 0, 0, :] + pmf_end) // bin_sz for c_idx in range(x_c): c_pmf = res_pmf_out[:, 0, 0, c_idx] c_pmf = np.clip(c_pmf, 1.0 / 65025, 1.0) c_pmf = c_pmf / np.sum(c_pmf) cumFreq = np.floor( np.append([0.], np.cumsum(c_pmf)) * 65536. + 0.5).astype(np.int32).tolist() encoder.encode([int(res_q_vector_extracted[c_idx])], cumFreq) print("Encoding Finish.") encoder.close() print("Lossy MSE:{}, Lossy PSNR:{}, Lossy max_abs_diff:{}".format( lossy_mse, lossy_psnr, lossy_max_abs_diff)) img_sz_out = os.path.getsize(output_bin_path) res_sz_out = os.path.getsize(output_res_path) eval_sz_out = img_sz_out + res_sz_out img_bpsp = os.path.getsize(output_bin_path) * 8 / (x_c * x_h * x_w) res_bpsp = os.path.getsize(output_res_path) * 8 / (x_c * x_h * x_w) eval_bpsp = img_bpsp + res_bpsp print("tau:{}, bpsp:{}, img_bpsp:{}, res_bpsp:{}".format( tau, eval_bpsp, img_bpsp, res_bpsp)) x_rec = np.clip(np.squeeze(x_out + res_q_out), 0, 255) max_abs_diff = np.amax(np.abs(x_org - x_rec)) mse = np.mean((x_org - x_rec)**2) psnr = 10 * np.log10(255**2 / mse) print("Max abs diff:{}, NLL MSE:{}, NLL PSNR:{}".format( max_abs_diff, mse, psnr)) return eval_sz_out, img_sz_out, res_sz_out
def test_range_encoder(): """ Tests that RangeEncoder writes the expected bits. Tests that writing after closing file throws an exception. """ numBytes = 17 filepath = mkstemp()[1] # encoding one sequence should require 1 byte cumFreq = [0, 4, 6, 8] sequence = [0, 0, 0, 0, 1, 2] sequenceByte = b'\x0b' data = sequence * numBytes encoder = RangeEncoder(filepath) encoder.encode(data, cumFreq) encoder.close() with pytest.raises(RuntimeError): # file is already closed, should raise an exception encoder.encode(sequence, cumFreq) assert os.stat(filepath).st_size == numBytes with open(filepath, 'rb') as handle: # the first 4 bytes are special handle.read(4) for _ in range(numBytes - 4): assert handle.read(1) == sequenceByte encoder = RangeEncoder(filepath) with pytest.raises(OverflowError): # cumFreq contains negative frequencies encoder.encode(data, [-1, 1]) with pytest.raises(ValueError): # cumFreq does not start at zero encoder.encode(data, [1, 2, 3]) with pytest.raises(ValueError): # cumFreq too short encoder.encode(data, [0, 1]) with pytest.raises(ValueError): # symbols with zero probability cannot be encoded encoder.encode(data, [0, 8, 8, 8]) with pytest.raises(ValueError): # invalid frequency table encoder.encode(data, []) with pytest.raises(ValueError): # invalid frequency table encoder.encode(data, [0]) encoder.close() os.remove(filepath)
from range_coder import RangeEncoder, RangeDecoder, prob_to_cum_freq import os data = [2, 0, 1, 0, 0, 0, 1, 2, 2] prob = [0.5, 0.2, 0.3] # convert probabilities to cumulative integer frequency table cumFreq = prob_to_cum_freq(prob, resolution=4) print(cumFreq) filepath="output.txt" # encode data encoder = RangeEncoder(filepath) encoder.encode(data, cumFreq) encoder.close() # decode data decoder = RangeDecoder(filepath) dataRec = decoder.decode(len(data), cumFreq) decoder.close() print(os.stat(filepath)) print (dataRec)
def compress(input, output, num_filters, checkpoint_dir): start = time.time() tf.set_random_seed(1) tf.reset_default_graph() with tf.device('/cpu:0'): # Load input image and add batch dimension. x = load_image(input) # Pad the x to x_pad mod = tf.constant([64, 64, 1], dtype=tf.int32) div = tf.ceil(tf.truediv(tf.shape(x), mod)) div = tf.cast(div, tf.int32) paddings = tf.subtract(tf.multiply(div, mod), tf.shape(x)) paddings = tf.expand_dims(paddings, 1) paddings = tf.concat( [tf.convert_to_tensor(np.zeros((3, 1)), dtype=tf.int32), paddings], axis=1) x_pad = tf.pad(x, paddings, "REFLECT") x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_pad = tf.expand_dims(x_pad, 0) x_pad.set_shape([1, None, None, 3]) # Transform and compress the image, then remove batch dimension. y = analysis_transform(x_pad, num_filters) # Build a hyper autoencoder z = hyper_analysis(y, num_filters) entropy_bottleneck = tfc.EntropyBottleneck() string = entropy_bottleneck.compress(z) string = tf.squeeze(string, axis=0) z_tilde, z_likelihoods = entropy_bottleneck(z, training=False) # To decompress the z_tilde back to avoid the inconsistence error string_rec = tf.expand_dims(string, 0) z_tilde = entropy_bottleneck.decompress(string_rec, tf.shape(z)[1:], channels=num_filters) phi = hyper_synthesis(z_tilde, num_filters) # REVISION: for Gaussian Mixture Model (GMM), use window-based fast implementation #y = tf.clip_by_value(y, -255, 256) y_hat = tf.round(y) tiny_y = tf.placeholder(dtype=tf.float32, shape=[1] + [5] + [5] + [num_filters]) tiny_phi = tf.placeholder(dtype=tf.float32, shape=[1] + [5] + [5] + [num_filters * 2]) _, _, y_means, y_variances, y_probs = entropy_parameter(tiny_phi, tiny_y, num_filters, training=False) x_hat = synthesis_transform(y_hat, num_filters) num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1])) x_hat = x_hat[0, :tf.shape(x)[1], :tf.shape(x)[2], :] #op = save_image('temp/temp.png', x_hat) # Mean squared error across pixels. x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x * 255, x_hat)) with tf.Session() as sess: #print(tf.trainable_variables()) sess.run(tf.global_variables_initializer()) # Load the latest model checkpoint, get the compressed string and the tensor # shapes. #latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir) latest = "models/model-1399000" #lambda = 14 print(latest) tf.train.Saver().restore(sess, save_path=latest) string, x_shape, y_shape, num_pixels, y_hat_value, phi_value = \ sess.run([string, tf.shape(x), tf.shape(y), num_pixels, y_hat, phi]) minmax = np.maximum(abs(y_hat_value.max()), abs(y_hat_value.min())) minmax = int(np.maximum(minmax, 1)) #num_symbols = int(2 * minmax + 3) print(minmax) #print(num_symbols) # Fast implementations by only encoding non-zero channels with 128/8 = 16bytes overhead flag = np.zeros(y_shape[3], dtype=np.int) for ch_idx in range(y_shape[3]): if np.sum(abs(y_hat_value[:, :, :, ch_idx])) > 0: flag[ch_idx] = 1 non_zero_idx = np.squeeze(np.where(flag == 1)) num = np.packbits(np.reshape(flag, [8, y_shape[3] // 8])) # ============== encode the bits for z=========== if os.path.exists(output): os.remove(output) fileobj = open(output, mode='wb') fileobj.write(np.array(x_shape[1:-1], dtype=np.uint16).tobytes()) fileobj.write( np.array([len(string), minmax], dtype=np.uint16).tobytes()) fileobj.write(np.array(num, dtype=np.uint8).tobytes()) fileobj.write(string) fileobj.close() # ============ encode the bits for y ========== print("INFO: start encoding y") encoder = RangeEncoder(output[:-4] + '.bin') samples = np.arange(0, minmax * 2 + 1) TINY = 1e-10 kernel_size = 5 pad_size = (kernel_size - 1) // 2 padded_y = np.pad(y_hat_value, ((0, 0), (pad_size, pad_size), (pad_size, pad_size), (0, 0)), 'constant', constant_values=((0., 0.), (0., 0.), (0., 0.), (0., 0.))) padded_phi = np.pad(phi_value, ((0, 0), (pad_size, pad_size), (pad_size, pad_size), (0, 0)), 'constant', constant_values=((0., 0.), (0., 0.), (0., 0.), (0., 0.))) for h_idx in range(y_shape[1]): for w_idx in range(y_shape[2]): extracted_y = padded_y[:, h_idx:h_idx + kernel_size, w_idx:w_idx + kernel_size, :] extracted_phi = padded_phi[:, h_idx:h_idx + kernel_size, w_idx:w_idx + kernel_size, :] y_means_values, y_variances_values, y_probs_values = \ sess.run([y_means, y_variances, y_probs], \ feed_dict={tiny_y: extracted_y, tiny_phi: extracted_phi}) for i in range(len(non_zero_idx)): ch_idx = non_zero_idx[i] mu = y_means_values[0, pad_size, pad_size, ch_idx, :] + minmax sigma = y_variances_values[0, pad_size, pad_size, ch_idx, :] weight = y_probs_values[0, pad_size, pad_size, ch_idx, :] start00 = time.time() # Calculate the pmf/cdf pmf = (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5))) - \ 0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5)))) * weight[0] + \ (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5))) - \ 0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5)))) * weight[1] +\ (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5))) - \ 0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5)))) * weight[2] ''' # Add the tail mass pmf[0] += 0.5 * (1 + scipy.special.erf(( -0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5))) * weight[0] + \ 0.5 * (1 + scipy.special.erf(( -0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5))) * weight[1] + \ 0.5 * (1 + scipy.special.erf(( -0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5))) * weight[2] pmf[-1] += (1. - 0.5 * (1 + scipy.special.erf((minmax*2 + 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5)))) * weight[0] + \ (1. - 0.5 * (1 + scipy.special.erf((minmax*2 + 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5)))) * weight[1] + \ (1. - 0.5 * (1 + scipy.special.erf((minmax*2 + 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5)))) * weight[2] ''' # To avoid the zero-probability pmf_clip = np.clip(pmf, 1.0 / 65536, 1.0) pmf_clip = np.round(pmf_clip / np.sum(pmf_clip) * 65536) cdf = list(np.add.accumulate(pmf_clip)) cdf = [0] + [int(i) for i in cdf] symbol = np.int(y_hat_value[0, h_idx, w_idx, ch_idx] + minmax) encoder.encode([symbol], cdf) encoder.close() size_real = os.path.getsize(output) + os.path.getsize(output[:-4] + '.bin') bpp_real = (os.path.getsize(output) + os.path.getsize(output[:-4] + '.bin')) * 8 / num_pixels bpp_side = (os.path.getsize(output)) * 8 / num_pixels end = time.time() print("Time : {:0.3f}".format(end - start)) psnr = sess.run(tf.image.psnr(x_hat, x * 255, 255)) msssim = sess.run(tf.image.ssim_multiscale(x_hat, x * 255, 255)) print("Actual bits per pixel for this image: {:0.4}".format( bpp_real)) print("Side bits per pixel for z: {:0.4}".format(bpp_side)) print("PSNR (dB) : {:0.4}".format(psnr[0])) print("MS-SSIM : {:0.4}".format(msssim[0]))