def compress(args): """Compresses an image.""" # Load input image and add batch dimension. x_input = tf.placeholder(dtype=tf.float32, shape=(1, None, None, 3)) x = x_input weights_224_3c = tf.placeholder(tf.float32, [1, None, None, 3]) weights_224_3c_label = tf.placeholder(tf.float32, [1, None, None, 3]) x_attention = x * weights_224_3c x_attention_input = tf.concat([x, weights_224_3c, x_attention], axis=3) # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() synthesis_transform = SynthesisTransform(args.num_filters) # Transform and compress the image. y = analysis_transform(x_attention_input) string = entropy_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, likelihoods = entropy_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) rec = synthesis_transform(y_hat) imgName = tf.placeholder(tf.string) op = write_png(imgName, rec[0, :, :, :]) vgg = vgg16.Vgg16('/gdata/gaocs/pretrained_models/vgg16_no_fc.npy') vgg.build(x) feature_x = [ vgg.conv1_2, vgg.conv2_2, vgg.conv3_3, vgg.conv4_3, vgg.conv5_3 ] vgg.build(x_hat) feature_x_tilde = [ vgg.conv1_2, vgg.conv2_2, vgg.conv3_3, vgg.conv4_3, vgg.conv5_3 ] feature_x_mask = [] feature_x_mask_invert = [] for n in range(len(feature_x)): one = tf.ones_like(feature_x[n]) zero = tf.zeros_like(feature_x[n]) feat_mask = tf.where(feature_x[n] > 0, x=one, y=zero) feature_x_mask.append(feat_mask) feature_x_mask_invert.append(feat_mask * (-1) + 1) loss_feat_fore_all = [] loss_feat_fore_sum = 0.0 loss_feat_back_all = [] loss_feat_back_sum = 0.0 loss_feat_all = [] loss_feat_sum = 0.0 for n in range(len(feature_x)): loss_temp_fore = tf.reduce_mean( ((feature_x[n] - feature_x_tilde[n]) / (tf.reduce_mean(feature_x[n]) + 0.00000001) * feature_x_mask[n])**2) loss_feat_fore_all.append(loss_temp_fore) loss_feat_fore_sum += loss_temp_fore loss_temp_back = tf.reduce_mean( ((feature_x[n] - feature_x_tilde[n]) / (tf.reduce_mean(feature_x[n]) + 0.00000001) * feature_x_mask_invert[n])**2) loss_feat_back_all.append(loss_temp_back) loss_feat_back_sum += loss_temp_back loss_temp = tf.reduce_mean( ((feature_x[n] - feature_x_tilde[n]) / (tf.reduce_mean(feature_x[n]) + 0.00000001))**2) loss_feat_all.append(loss_temp) loss_feat_sum += loss_temp loss_f_fore = loss_feat_fore_sum / len(feature_x) loss_f_back = loss_feat_back_sum / len(feature_x) loss_f = loss_feat_sum / len(feature_x) num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) # Total number of bits divided by number of pixels. eval_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels) # Bring both images back to 0..255 range. x *= 255 x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse_foreground = tf.reduce_sum( tf.squared_difference(x * weights_224_3c, x_hat * weights_224_3c)) / tf.reduce_sum(weights_224_3c) psnr_foreground = 20 * tf.math.log( 255.0 / tf.math.sqrt(mse_foreground)) / tf.math.log(10.0) msssim_foreground = tf.squeeze( tf.image.ssim_multiscale(x_hat * weights_224_3c, x * weights_224_3c, 255)) weights_224_3c_invert = -1 * weights_224_3c + 1 mse_background = tf.reduce_sum( tf.squared_difference( x * weights_224_3c_invert, x_hat * weights_224_3c_invert)) / tf.reduce_sum(weights_224_3c_invert) psnr_background = 20 * tf.math.log( 255.0 / tf.math.sqrt(mse_background)) / tf.math.log(10.0) msssim_background = tf.squeeze( tf.image.ssim_multiscale(x_hat * weights_224_3c_invert, x * weights_224_3c_invert, 255)) mse_fore = tf.reduce_sum( tf.squared_difference( x * weights_224_3c_label, x_hat * weights_224_3c_label)) / tf.reduce_sum(weights_224_3c_label) psnr_fore = 20 * tf.math.log( 255.0 / tf.math.sqrt(mse_fore)) / tf.math.log(10.0) msssim_fore = tf.squeeze( tf.image.ssim_multiscale(x_hat * weights_224_3c_label, x * weights_224_3c_label, 255)) weights_224_3c_label_invert = -1 * weights_224_3c_label + 1 mse_back = tf.reduce_sum( tf.squared_difference(x * weights_224_3c_label_invert, x_hat * weights_224_3c_label_invert)) / tf.reduce_sum( weights_224_3c_label_invert) # mse_back = tf.reduce_sum(tf.squared_difference(x, x_hat)) / (tf.reduce_sum(weights_224_3c_label_invert) + tf.reduce_sum(weights_224_3c_label)) psnr_back = 20 * tf.math.log( 255.0 / tf.math.sqrt(mse_back)) / tf.math.log(10.0) msssim_back = tf.squeeze( tf.image.ssim_multiscale(x_hat * weights_224_3c_label_invert, x * weights_224_3c_label_invert, 255)) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) tensors = [string, tf.shape(x)[1:-1], tf.shape(y)[1:-1]] orgPath = args.OrgPath binPath = args.BinPath recPath = args.RecPath if not os.path.exists(binPath): os.mkdir(binPath) if not os.path.exists(recPath): os.mkdir(recPath) orgFiles = os.listdir(orgPath) orgFiles = sorted(orgFiles) # print(orgFiles) mse_foreground_all = [] psnr_foreground_all = [] msssim_foreground_all = [] msssimdb_foreground_all = [] loss_f_foreground_all = [] loss_feat_foreground_list = [] mse_background_all = [] psnr_background_all = [] msssim_background_all = [] msssimdb_background_all = [] loss_f_background_all = [] loss_feat_background_list = [] mse_fore_all = [] psnr_fore_all = [] msssim_fore_all = [] msssimdb_fore_all = [] mse_back_all = [] psnr_back_all = [] msssim_back_all = [] msssimdb_back_all = [] mse_all = [] psnr_all = [] msssim_all = [] msssimdb_all = [] eval_bpp_all = [] bpp_all = [] loss_f_all = [] loss_feat_list = [] pickle_name_label = '/gdata1/gaocs/pretrained_models/minVal2014_Test5000_0_1.pickle' with open(pickle_name_label, 'rb') as fp: print(pickle_name_label) all_weights_val5000_label = pickle.load(fp) pickle_name = '/gdata1/gaocs/pretrained_models/minVal2014_5000_Conv5_3_binary_dilation_0_1.pickle' with open(pickle_name, 'rb') as fp: print(pickle_name) all_weights_val5000 = pickle.load(fp) for idx, imgFile in enumerate(orgFiles): # print(imgFile) img = Image.open(orgPath + imgFile) img = np.asarray(img, dtype=np.float32) if len(img.shape) != 3: # print(image_file) imgarr = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.float32) imgarr[:, :, 0] = img imgarr[:, :, 1] = img imgarr[:, :, 2] = img img = np.expand_dims(imgarr, 0) else: img = np.expand_dims(img, 0) imgArr = img / 255 pngRecName = recPath + imgFile[:-4] + '.png' weight_input_1c = all_weights_val5000[idx] #[h,w] weight_input_3c = np.zeros( (1, weight_input_1c.shape[0], weight_input_1c.shape[1], 3), dtype=np.float32) for n in range(weight_input_3c.shape[3]): weight_input_3c[0, :, :, n] = weight_input_1c weight_input_1c_label = all_weights_val5000_label[idx] weight_input_3c_label = np.zeros( (1, weight_input_1c_label.shape[0], weight_input_1c_label.shape[1], 3), dtype=np.float32) for n in range(weight_input_3c_label.shape[3]): weight_input_3c_label[0, :, :, n] = weight_input_1c_label arrays = sess.run(tensors, feed_dict={ x_input: imgArr, weights_224_3c: weight_input_3c, weights_224_3c_label: weight_input_3c_label, imgName: pngRecName }) # Write a binary file with the shape information and the compressed string. packed = PackedTensors() packed.pack(tensors, arrays) # with open(binPath+imgFile[:-4]+'.bin', "wb") as f: # f.write(packed.string) # If requested, transform the quantized image back and measure performance. if args.verbose: print(pngRecName) # if not os.path.exists(pngRecName): eval_bpp_, mse_foreground_, mse_background_, mse_fore_, mse_back_, mse_, \ psnr_foreground_, psnr_background_, psnr_fore_, psnr_back_, psnr_, \ msssim_foreground_, msssim_background_, msssim_fore_, msssim_back_, msssim_, \ num_pixels_, loss_f_fore_, loss_f_back_, loss_f_, \ loss_feat_fore_, loss_feat_back_, loss_feat_, rec_, _ \ = sess.run( [eval_bpp, mse_foreground, mse_background, mse_fore, mse_back, mse, \ psnr_foreground, psnr_background, psnr_fore, psnr_back, psnr, \ msssim_foreground, msssim_background, msssim_fore, msssim_back, msssim, \ num_pixels, loss_f_fore, loss_f_back, loss_f, \ loss_feat_fore_all, loss_feat_back_all, loss_feat_all, rec, op], feed_dict={x_input: imgArr, weights_224_3c:weight_input_3c, weights_224_3c_label:weight_input_3c_label, imgName:pngRecName}) # else: # eval_bpp_, mse_foreground_, mse_background_, mse_fore_, mse_back_, mse_, \ # psnr_foreground_, psnr_background_, psnr_fore_, psnr_back_, psnr_, \ # msssim_foreground_, msssim_background_, msssim_fore_, msssim_back_, msssim_, \ # num_pixels_, loss_f_fore_, loss_f_back_, loss_f_, \ # loss_feat_fore_, loss_feat_back_, loss_feat_, rec_\ # = sess.run( [eval_bpp, mse_foreground, mse_background, mse_fore, mse_back, mse, \ # psnr_foreground, psnr_background, psnr_fore, psnr_back, psnr, \ # msssim_foreground, msssim_background, msssim_fore, msssim_back, msssim, \ # num_pixels, loss_f_fore, loss_f_back, loss_f, \ # loss_feat_fore_all, loss_feat_back_all, loss_feat_all, rec], feed_dict={x_input: imgArr, weights_224_3c:weight_input_3c, weights_224_3c_label:weight_input_3c_label, imgName:pngRecName}) # The actual bits per pixel including overhead. bpp = len(packed.string) * 8 / num_pixels_ # if mse_foreground_ == 0: # psnr_foreground_ = 60 # if mse_fore == 0: # psnr_fore = 60 print("fore Mean squared error: {:0.4f}".format(mse_fore_)) print("fore PSNR (dB): {:0.2f}".format(psnr_fore_)) print("fore Multiscale SSIM: {:0.4f}".format(msssim_fore_)) print("fore Multiscale SSIM (dB): {:0.2f}".format( -10 * np.log10(1 - msssim_fore_))) print("back Mean squared error: {:0.4f}".format(mse_back_)) print("back PSNR (dB): {:0.2f}".format(psnr_back_)) print("back Multiscale SSIM: {:0.4f}".format(msssim_back_)) print("back Multiscale SSIM (dB): {:0.2f}".format( -10 * np.log10(1 - msssim_back_))) print("foreground Mean squared error: {:0.4f}".format( mse_foreground_)) print("foreground PSNR (dB): {:0.2f}".format(psnr_foreground_)) print("foreground Multiscale SSIM: {:0.4f}".format( msssim_foreground_)) print("foreground Multiscale SSIM (dB): {:0.2f}".format( -10 * np.log10(1 - msssim_foreground_))) print("foreground VGG loss: {:0.4f}".format(loss_f_fore_)) np.set_printoptions(formatter={'float': '{: 0.8f}'.format}) print(loss_feat_fore_) print("background Mean squared error: {:0.4f}".format( mse_background_)) print("background PSNR (dB): {:0.2f}".format(psnr_background_)) print("background Multiscale SSIM: {:0.4f}".format( msssim_background_)) print("background Multiscale SSIM (dB): {:0.2f}".format( -10 * np.log10(1 - msssim_background_))) print("background VGG loss: {:0.4f}".format(loss_f_back_)) np.set_printoptions(formatter={'float': '{: 0.8f}'.format}) print(loss_feat_back_) print("Mean squared error: {:0.4f}".format(mse_)) print("PSNR (dB): {:0.2f}".format(psnr_)) print("Multiscale SSIM: {:0.4f}".format(msssim_)) print("Multiscale SSIM (dB): {:0.2f}".format( -10 * np.log10(1 - msssim_))) print("Information content in bpp: {:0.4f}".format(eval_bpp_)) print("Actual bits per pixel: {:0.4f}".format(bpp)) print("VGG loss: {:0.4f}".format(loss_f_)) np.set_printoptions(formatter={'float': '{: 0.8f}'.format}) print(loss_feat_) if mse_fore_ > 1e-8: mse_fore_all.append(mse_fore_) psnr_fore_all.append(psnr_fore_) msssim_fore_all.append(msssim_fore_) msssimdb_fore_all.append(-10 * np.log10(1 - msssim_fore_)) if mse_back_ > 1e-8: mse_back_all.append(mse_back_) psnr_back_all.append(psnr_back_) msssim_back_all.append(msssim_back_) msssimdb_back_all.append(-10 * np.log10(1 - msssim_back_)) if mse_foreground_ > 1e-8: mse_foreground_all.append(mse_foreground_) psnr_foreground_all.append(psnr_foreground_) msssim_foreground_all.append(msssim_foreground_) msssimdb_foreground_all.append( -10 * np.log10(1 - msssim_foreground_)) loss_f_foreground_all.append(loss_f_fore_) loss_feat_foreground_list.append(loss_feat_fore_) if mse_background_ > 1e-8: mse_background_all.append(mse_background_) psnr_background_all.append(psnr_background_) msssim_background_all.append(msssim_background_) msssimdb_background_all.append( -10 * np.log10(1 - msssim_background_)) loss_f_background_all.append(loss_f_back_) loss_feat_background_list.append(loss_feat_back_) mse_all.append(mse_) psnr_all.append(psnr_) msssim_all.append(msssim_) msssimdb_all.append(-10 * np.log10(1 - msssim_)) eval_bpp_all.append(eval_bpp_) bpp_all.append(bpp) loss_f_all.append(loss_f_) np.set_printoptions(formatter={'float': '{: 0.8f}'.format}) loss_feat_list.append(loss_feat_) print('\n\n---total averege---') print("fore Mean squared error: {:0.4f}".format(np.mean(mse_fore_all))) print("fore PSNR (dB): {:0.2f}".format(np.mean(psnr_fore_all))) print("fore Multiscale SSIM: {:0.4f}".format(np.mean(msssim_fore_all))) print("fore Multiscale SSIM (dB): {:0.2f}".format( np.mean(msssimdb_fore_all))) print("back Mean squared error: {:0.4f}".format(np.mean(mse_back_all))) print("back PSNR (dB): {:0.2f}".format(np.mean(psnr_back_all))) print("back Multiscale SSIM: {:0.4f}".format(np.mean(msssim_back_all))) print("back Multiscale SSIM (dB): {:0.2f}".format( np.mean(msssimdb_back_all))) print("foreground Mean squared error: {:0.4f}".format( np.mean(mse_foreground_all))) print("foreground PSNR (dB): {:0.2f}".format( np.mean(psnr_foreground_all))) print("foreground Multiscale SSIM: {:0.4f}".format( np.mean(msssim_foreground_all))) print("foreground Multiscale SSIM (dB): {:0.2f}".format( np.mean(msssimdb_foreground_all))) print("foreground VGG loss: {:0.4f}".format( np.mean(loss_f_foreground_all))) np.set_printoptions(formatter={'float': '{: 0.8f}'.format}) print(np.mean(loss_feat_foreground_list, axis=0)) print("background Mean squared error: {:0.4f}".format( np.mean(mse_background_all))) print("background PSNR (dB): {:0.2f}".format( np.mean(psnr_background_all))) print("background Multiscale SSIM: {:0.4f}".format( np.mean(msssim_background_all))) print("background Multiscale SSIM (dB): {:0.2f}".format( np.mean(msssimdb_background_all))) print("background VGG loss: {:0.4f}".format( np.mean(loss_f_background_all))) np.set_printoptions(formatter={'float': '{: 0.8f}'.format}) print(np.mean(loss_feat_background_list, axis=0)) print("Mean squared error: {:0.4f}".format(np.mean(mse_all))) print("PSNR (dB): {:0.2f}".format(np.mean(psnr_all))) print("Multiscale SSIM: {:0.4f}".format(np.mean(msssim_all))) print("Multiscale SSIM (dB): {:0.2f}".format(np.mean(msssimdb_all))) print("Information content in bpp: {:0.4f}".format( np.mean(eval_bpp_all))) print("Actual bits per pixel: {:0.4f}".format(np.mean(bpp_all))) print("VGG loss: {:0.4f}".format(np.mean(loss_f_))) np.set_printoptions(formatter={'float': '{: 0.8f}'.format}) print(np.mean(loss_feat_list, axis=0))
def compress(args): """Compresses an image.""" # Load input image and add batch dimension. x = read_png(args.input_file) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) synthesis_transform = SynthesisTransform(args.num_filters) hyper_analysis_transform = HyperAnalysisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() # Transform and compress the image. y = analysis_transform(x) y_shape = tf.shape(y) z = hyper_analysis_transform(abs(y)) z_hat, z_likelihoods = entropy_bottleneck(z, training=False) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[1], :y_shape[2], :] scale_table = np.exp(np.linspace( np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table) side_string = entropy_bottleneck.compress(z) string = conditional_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, y_likelihoods = conditional_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) # Total number of bits divided by number of pixels. eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # Bring both images back to 0..255 range. x *= 255 x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) tensors = [string, side_string, tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1]] arrays = sess.run(tensors) # Write a binary file with the shape information and the compressed string. packed = tfc.PackedTensors() packed.pack(tensors, arrays) with open(args.output_file, "wb") as f: f.write(packed.string) # If requested, transform the quantized image back and measure performance. if args.verbose: eval_bpp, mse, psnr, msssim, num_pixels = sess.run( [eval_bpp, mse, psnr, msssim, num_pixels]) # The actual bits per pixel including overhead. bpp = len(packed.string) * 8 / num_pixels print("Mean squared error: {:0.4f}".format(mse)) print("PSNR (dB): {:0.2f}".format(psnr)) print("Multiscale SSIM: {:0.4f}".format(msssim)) print("Multiscale SSIM (dB): {:0.2f}".format(-10 * np.log10(1 - msssim))) print("Information content in bpp: {:0.4f}".format(eval_bpp)) print("Actual bits per pixel: {:0.4f}".format(bpp))
def train(): """Trains the model.""" # Log Input Settings logFile = MODEL_DIRECTORY + '/' + 'Train_Log.txt' # Set Tensorflow Logging tf.logging.set_verbosity(tf.logging.INFO) # Create input data pipeline. with tf.device('/cpu:0'): train_files = glob.glob(TRAIN_DIRECTORY) train_labels = glob.glob(LABEL_DIRECTORY) train_dataset = tf.data.Dataset.from_tensor_slices(train_files) # NEW - The below seems to be one option to obtain information from # text files. However, TF is extraordinarily difficult with respect to # being able to parse the text. I've Googled this for hours, and # it's not explained as far as I can tell (it likely is of course) # label_dataset = tf.data.Dataset.from_tensor_slices(train_labels) # This was from the cs230 input pipeline website provided to us. # the only error it throws is that the read-in text files are of # a different size. That is, some text files define multiple bounding # boxes. I recommend we just use the first included bounding box; # this would give us 4 values for each text file then and there would # be no issue. label_dataset = tf.data.TextLineDataset(train_labels) # label_dataset = tf.data.TextLineDataset.from_tensor_slices(label_dataset) label_dataset = label_dataset.map( lambda token: tf.string_split([token]).values) label_dataset = label_dataset.map(lambda token: (token, extract_char(token))) # NEW - PLEASE REVIEW - we load images here # note that TF throws an error if any image is a different size # so we can either use the patch scheme of Balle, or we can resize # the images. I'm not sure if the patch size would work, because # when we compute the MSE I dont know if TF first recombines all the patches # or if computes the MSE of each patch. if its each patch then we would need # a function to check whether a patch includes a portion of a bounding box. # That said, if we resize the images it's unclear to me what size they should be # also we have to scale the bounding boxes to the new size somehow. train_dataset = train_dataset.map( load_image, num_parallel_calls=PREPROCESS_THREADS) train_dataset = train_dataset.map( lambda x: tf.random_crop(x, (PATCHSIZE, PATCHSIZE, 3))) # label_dataset = label_dataset.map(load_labels, num_parallel_calls=PREPROCESS_THREADS) # This combines the two datasets so they are coordinated. total_data = tf.data.Dataset.zip((train_dataset, label_dataset)) total_data = total_data.shuffle(buffer_size=len(train_files)).repeat() # We prefetch some initial batches total_data = total_data.batch(BATCH_SIZE) total_data = total_data.prefetch(32) # train_labels = train_labels.batch(BATCH_SIZE) # train_labels = train_labels.prefetch(32) # Determine number of pixels and print input data info num_pixels = BATCH_SIZE * PATCHSIZE**2 print('Num Train File', len(train_files)) print('Num_Pix', num_pixels, BATCH_SIZE, PATCHSIZE) # Get Data - this includes labels and training images x = total_data.make_one_shot_iterator().get_next() # We then pass the training images in x[0] to our autoencoder y = analysis_transform(x[0], NUM_FILTERS) entropy_bottleneck = tfc.EntropyBottleneck() y_tilde, likelihoods = entropy_bottleneck(y, training=True) x_tilde = synthesis_transform(y_tilde, NUM_FILTERS) # Total number of bits divided by number of pixels. train_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels) # Mean squared error across pixels. train_mse = tf.reduce_mean(tf.squared_difference(x[0], x_tilde)) train_mse *= 255**2 # Multiply by 255^2 to correct for rescaling. ######################START TEST DECOTO############################ #Grab the 4 Corners corners = [ tf.string_to_number(x[1][1][1][2]), tf.string_to_number(x[1][1][1][3]), tf.string_to_number(x[1][1][1][4]), tf.string_to_number(x[1][1][1][5]) ] #Build a Mask of All 0,s of Proper Shape to Multiply With x[0] (Shape = 1,256,256,1) M = tf.zeros([1, x[0].get_shape()[1], x[0].get_shape()[1], 1]) #START PENDING - WORK IN PROGRESS #Replace the 0's in M with 1's for all areas inside the bounding box indices = [] values = [] for i in range(0, 10): #Replace 0 and 10 w/ the corner values for j in range(0, 10): #Replace 0 and 10 w/ the corner values indices.append([0, i, j, 0]) #Indices of Values to Change values.append(1) #What to Change the Values at Indices To shape = M.get_shape() delta = tf.SparseTensor(indices, values, shape) delta = tf.cast(delta, tf.float32) M2 = M + tf.sparse_tensor_to_dense(delta) sums = [ tf.reduce_sum(M), tf.reduce_sum(M2) ] #Used to Print Later to Check This is Working (Sum of M = 0, Sum of M1 > 0) #END PENDING - WORK IN PROGRESS #Mean Squared Error for the Box Portion Only train_mse_box = tf.reduce_mean( tf.multiply(tf.squared_difference(x[0], x_tilde), M2)) train_mse_box *= 255**2 #Training Loss Including the Bounding Box as a separate loss component train_loss = LMBDA * train_mse + train_bpp + LMBDA2 * train_mse_box ###################END TEST DECOTO############################ # Minimize loss and auxiliary loss, and execute update op. step = tf.train.create_global_step() main_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE) main_step = main_optimizer.minimize(train_loss, global_step=step) aux_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE * 10) aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) tf.summary.scalar("loss", train_loss) tf.summary.scalar("bpp", train_bpp) tf.summary.scalar("mse", train_mse) tf.summary.image("original", quantize_image(x[0])) tf.summary.image("reconstruction", quantize_image(x_tilde)) # Creates summary for the probability mass function (PMF) estimated in the bottleneck. entropy_bottleneck.visualize() hooks = [ tf.train.StopAtStepHook(last_step=NUM_STEPS), tf.train.NanTensorHook(train_loss) ] ep = 0 epSub = 0 scaffold = tf.train.Scaffold(saver=tf.train.Saver(max_to_keep=1)) with tf.train.MonitoredTrainingSession( scaffold=scaffold, hooks=hooks, checkpoint_dir=MODEL_DIRECTORY, save_checkpoint_secs=CHECKPOINT_SAVE, save_summaries_secs=CHECKPOINT_SAVE) as sess: while not sess.should_stop(): sess.run(train_op) if epSub >= LOG_STEPS: epSub = 0 ep += 1 if epSub == 0: print(ep * LOG_STEPS + epSub, 'train loss', sess.run(train_loss)) ######################START DECOTO EDITS###################################### print('Corners', sess.run(corners)) print('Sums M and M2', sess.run(sums)) ######################END DECOTO EDITS###################################### with open(logFile, 'a') as f: f.write('step=' + str(ep * LOG_STEPS + epSub) + ',train_loss=' + str(sess.run(train_loss)) + ',train_bpp=' + str(sess.run(train_bpp)) + ',train_mse=' + str(sess.run(train_mse)) + '\n') epSub += 1 print('TRAIN COMPLETED')
def decompress(input, output, num_filters, checkpoint_dir): """Decompresses an image by a fast implementation.""" start = time.time() tf.set_random_seed(1) tf.reset_default_graph() with tf.device('/cpu:0'): print(input) # Read the shape information and compressed string from the binary file. fileobj = open(input, mode='rb') x_shape = np.frombuffer(fileobj.read(4), dtype=np.uint16) length, minmax = np.frombuffer(fileobj.read(4), dtype=np.uint16) num = np.frombuffer(fileobj.read(16), dtype=np.uint8) string = fileobj.read(length) fileobj.close() flag = np.unpackbits(num) non_zero_idx = np.squeeze(np.where(flag == 1)) # Get x_pad_shape, y_shape, z_shape pad_size = 64 x_pad_shape = [1] + [ int(math.ceil(x_shape[0] / pad_size) * pad_size) ] + [int(math.ceil(x_shape[1] / pad_size) * pad_size)] + [3] y_shape = [1] + [x_pad_shape[1] // 16] + [x_pad_shape[2] // 16 ] + [num_filters] z_shape = [y_shape[1] // 4] + [y_shape[2] // 4] + [num_filters] # Add a batch dimension, then decompress and transform the image back. strings = tf.expand_dims(string, 0) entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32) z_tilde = entropy_bottleneck.decompress(strings, z_shape, channels=num_filters) phi = hyper_synthesis(z_tilde, num_filters) # Transform the quantized image back (if requested). tiny_y = tf.placeholder(dtype=tf.float32, shape=[1] + [5] + [5] + [num_filters]) tiny_phi = tf.placeholder(dtype=tf.float32, shape=[1] + [5] + [5] + [num_filters * 2]) _, _, means, variances, probs = entropy_parameter(tiny_phi, tiny_y, num_filters, training=False) # Decode the x_hat usign the decoded y y_hat = tf.placeholder(dtype=tf.float32, shape=y_shape) x_hat = synthesis_transform(y_hat, num_filters) # Remove batch dimension, and crop away any extraneous padding on the bottom or right boundaries. x_hat = x_hat[0, :int(x_shape[0]), :int(x_shape[1]), :] # Write reconstructed image out as a PNG file. op = save_image(output, x_hat) # Load the latest model checkpoint, and perform the above actions. with tf.Session() as sess: #latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir) latest = "models/model-1399000" #lambda = 14 print(latest) tf.train.Saver().restore(sess, save_path=latest) phi_value = sess.run(phi) print("INFO: start decoding y") print(time.time() - start) decoder = RangeDecoder(input[:-4] + '.bin') samples = np.arange(0, minmax * 2 + 1) TINY = 1e-10 # Fast implementation to decode the y_hat kernel_size = 5 pad_size = (kernel_size - 1) // 2 decoded_y = np.zeros([1] + [y_shape[1] + kernel_size - 1] + [y_shape[2] + kernel_size - 1] + [num_filters]) padded_phi = np.pad(phi_value, ((0, 0), (pad_size, pad_size), (pad_size, pad_size), (0, 0)), 'constant', constant_values=((0., 0.), (0., 0.), (0., 0.), (0., 0.))) for h_idx in range(y_shape[1]): for w_idx in range(y_shape[2]): y_means, y_variances, y_probs = \ sess.run([means, variances, probs], \ feed_dict={tiny_y: decoded_y[:, h_idx: h_idx+kernel_size, w_idx:w_idx+kernel_size, :], \ tiny_phi: padded_phi[:, h_idx: h_idx+kernel_size, w_idx:w_idx+kernel_size, :]}) for i in range(len(non_zero_idx)): ch_idx = non_zero_idx[i] mu = y_means[0, pad_size, pad_size, ch_idx, :] + minmax sigma = y_variances[0, pad_size, pad_size, ch_idx, :] weight = y_probs[0, pad_size, pad_size, ch_idx, :] pmf = (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5))) - \ 0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5)))) * weight[0] + \ (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5))) - \ 0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5)))) * weight[1] +\ (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5))) - \ 0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5)))) * weight[2] pmf_clip = np.clip(pmf, 1.0 / 65536, 1.0) pmf_clip = np.round(pmf_clip / np.sum(pmf_clip) * 65536) cdf = list(np.add.accumulate(pmf_clip)) cdf = [0] + [int(i) for i in cdf] decoded_y[0, h_idx + pad_size, w_idx + pad_size, ch_idx] = decoder.decode(1, cdf)[0] - minmax decoded_y = decoded_y[:, pad_size:y_shape[1] + pad_size, pad_size:y_shape[2] + pad_size, :] sess.run(op, feed_dict={y_hat: decoded_y}) end = time.time() print("Time (s): {:0.3f}".format(end - start))
def train(args): """Trains the model.""" if args.verbose: tf.logging.set_verbosity(tf.logging.INFO) # Create input data pipeline. with tf.device("/cpu:0"): train_files = glob.glob(args.train_glob) if not train_files: raise RuntimeError( "No training images found with glob '{}'.".format(args.train_glob)) train_dataset = tf.data.Dataset.from_tensor_slices(train_files) train_dataset = train_dataset.shuffle(buffer_size=len(train_files)).repeat() train_dataset = train_dataset.map( read_png, num_parallel_calls=args.preprocess_threads) train_dataset = train_dataset.map( lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3))) train_dataset = train_dataset.batch(args.batchsize) train_dataset = train_dataset.prefetch(32) num_pixels = args.batchsize * args.patchsize ** 2 # Get training patch from dataset. x = train_dataset.make_one_shot_iterator().get_next() # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) synthesis_transform = SynthesisTransform(args.num_filters) hyper_analysis_transform = HyperAnalysisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() # Build autoencoder and hyperprior. y = analysis_transform(x) z = hyper_analysis_transform(abs(y)) z_tilde, z_likelihoods = entropy_bottleneck(z, training=True) sigma = hyper_synthesis_transform(z_tilde) scale_table = np.exp(np.linspace( np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table) y_tilde, y_likelihoods = conditional_bottleneck(y, training=True) x_tilde = synthesis_transform(y_tilde) # Total number of bits divided by number of pixels. train_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # Mean squared error across pixels. train_mse = tf.reduce_mean(tf.squared_difference(x, x_tilde)) # Multiply by 255^2 to correct for rescaling. train_mse *= 255 ** 2 # The rate-distortion cost. train_loss = args.lmbda * train_mse + train_bpp # Minimize loss and auxiliary loss, and execute update op. step = tf.train.create_global_step() main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) main_step = main_optimizer.minimize(train_loss, global_step=step) aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) tf.summary.scalar("loss", train_loss) tf.summary.scalar("bpp", train_bpp) tf.summary.scalar("mse", train_mse) tf.summary.image("original", quantize_image(x)) tf.summary.image("reconstruction", quantize_image(x_tilde)) hooks = [ tf.train.StopAtStepHook(last_step=args.last_step), tf.train.NanTensorHook(train_loss), ] with tf.train.MonitoredTrainingSession( hooks=hooks, checkpoint_dir=args.checkpoint_dir, save_checkpoint_secs=300, save_summaries_secs=60) as sess: while not sess.should_stop(): sess.run(train_op)
# Y1_raw_img = imageio.imread(args.raw) Y0_com_img = np.expand_dims(Y0_com_img, 0) # Y1_raw_img = np.expand_dims(Y1_raw_img, 0) Height = np.size(Y0_com_img, 1) Width = np.size(Y0_com_img, 2) Y0_com = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel]) # Y1_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel]) string_mv_tensor = tf.placeholder(tf.string, []) string_res_tensor = tf.placeholder(tf.string, []) # Motion Decoding entropy_bottleneck_mv = tfc.EntropyBottleneck(dtype=tf.float32, name='entropy_bottleneck') flow_latent_hat = entropy_bottleneck_mv.decompress(tf.expand_dims( string_mv_tensor, 0), [Height // 16, Width // 16, args.M], channels=args.M) # Residual Decoding entropy_bottleneck_res = tfc.EntropyBottleneck(dtype=tf.float32, name='entropy_bottleneck_1_1') res_latent_hat = entropy_bottleneck_res.decompress(tf.expand_dims( string_res_tensor, 0), [Height // 16, Width // 16, args.M], channels=args.M) flow_hat = CNN_img.MV_synthesis(flow_latent_hat, args.N) # Motion Compensation Y1_warp = tf.contrib.image.dense_image_warp(Y0_com, flow_hat)
def train(): """Trains the model.""" # if args.verbose: # tf.logging.set_verbosity(tf.logging.INFO) # # Load all training images into a constant. # images = tf.map_fn( # load_image, tf.matching_files(args.data_glob), # dtype=tf.float32, back_prop=False) # with tf.Session() as sess: # images = tf.constant(sess.run(images), name="images") # # Training inputs are random crops out of the images tensor. # crop_shape = (args.batchsize, args.patchsize, args.patchsize, 3) # x = tf.random_crop(images, crop_shape) # num_pixels = np.prod(crop_shape[:-1]) crop_shape = (args.batchsize, args.patchsize, args.patchsize, 3) x = tf.placeholder(tf.float32, crop_shape) num_pixels = np.prod(crop_shape[:-1]) # Build autoencoder. y = analysis_transform(x, args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() y_tilde, likelihoods = entropy_bottleneck(y, training=True) x_tilde = synthesis_transform(y_tilde, args.num_filters) # Total number of bits divided by number of pixels. train_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels) # Mean squared error across pixels. train_mse = tf.reduce_sum(tf.squared_difference(x, x_tilde)) # Multiply by 255^2 to correct for rescaling. train_mse *= 255**2 / num_pixels # The rate-distortion cost. train_loss = args.lmbda * train_mse + train_bpp # Minimize loss and auxiliary loss, and execute update op. step = tf.Variable(0, trainable=False, name='global_step') main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) main_step = main_optimizer.minimize(train_loss, global_step=step) aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) # number of parameters num_params = count_num_trainable_params() print("num_params: %d" % num_params) # For tensorboard tf.summary.scalar('loss', train_loss) tf.summary.scalar('bpp', train_bpp) tf.summary.scalar('mse', train_mse) merged = tf.summary.merge_all() writer = tf.summary.FileWriter(args.checkpoint_dir + "/logs") saver = tf.train.Saver(max_to_keep=100) # create tensorflow session with tf.Session() as sess: sess.run(tf.global_variables_initializer()) file_dir = args.checkpoint_dir + '/results' os.makedirs(file_dir) print('Training is started!') dataset, img_names = _load_image() for _ in range(args.last_step): img_batch = get_batch(dataset, len(img_names)) _, train_summary, loss, global_step = sess.run( [train_op, merged, train_loss, step], feed_dict={x: img_batch}) if global_step % 1000 == 0: writer.add_summary(train_summary, global_step=global_step) print('step: %d / %d' % (global_step, args.last_step)) if global_step % 100000 == 0: saver.save(sess=sess, save_path=args.checkpoint_dir + "/model.ckpt", global_step=global_step) print('Model is saved!') print('Training is finished!')
def train(): # #Log Input Settings logFile = MODEL_DIRECTORY + '/' 'Train_Log.txt' #Set Tensorflow Logging tf.logging.set_verbosity(tf.logging.INFO) # Create input data pipeline. with tf.device('/cpu:0'): train_files = glob.glob(TRAIN_DIRECTORY) train_dataset = tf.data.Dataset.from_tensor_slices(train_files) train_dataset = train_dataset.shuffle(buffer_size=len(train_files)).repeat() train_dataset = train_dataset.map(load_image, num_parallel_calls=PREPROCESS_THREADS) train_dataset = train_dataset.map( lambda x: tf.random_crop(x, (PATCHSIZE, PATCHSIZE, 3))) train_dataset = train_dataset.batch(BATCH_SIZE) train_dataset = train_dataset.prefetch(32) #Determine number of pixels and print input data info num_pixels = BATCH_SIZE * PATCHSIZE ** 2 print('Num Train File', len(train_files)) print('Num_Pix', num_pixels, BATCH_SIZE, PATCHSIZE) # Get training patch from dataset. x = train_dataset.make_one_shot_iterator().get_next() ###########################Li Algrithm Start################################# # Build autoencoder & decoder E, fx = encoder_li(x) P = importance_map(fx) M = gen_mask(P) B = binarizer(E) bc = tf.multiply(E,M) #NOTE: Skipping 'B' and Using "E' instead seemed to work better entropy_bottleneck = tfc.EntropyBottleneck() bc_tilde, likelihoods = entropy_bottleneck(bc, training=True) x_tilde = decoder_li(bc_tilde) print('x', x) print('E', E) print('fx', fx) print('x_tilde', x_tilde) print('map', P) print('B', B) print('M', M) print('bc', bc) #Rate Loss rateLoss = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels) # Mean squared error across pixels. train_mse = tf.reduce_mean(tf.squared_difference(x, x_tilde)) train_mse *= 255 ** 2 # Multiply by 255^2 to correct for rescaling. # The rate-distortion cost. train_loss = LMBDA * train_mse + rateLoss #TEST1234 ###########################Li Algrithm End################################# # Minimize loss and auxiliary loss, and execute update op. step = tf.train.create_global_step() main_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE) main_step = main_optimizer.minimize(train_loss, global_step=step) aux_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE*10) aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) # # #Check Values################ # Pstats = [tf.math.reduce_min(P), tf.math.reduce_max(P), tf.reduce_sum(P)] # Estats = [tf.math.reduce_min(E), tf.math.reduce_max(E), tf.reduce_sum(E), tf.size(E)] # Mstats = [tf.math.reduce_min(M), tf.math.reduce_max(M),tf.reduce_sum(M), tf.size(M)] # Bstats = [tf.math.reduce_min(B), tf.math.reduce_max(B),tf.reduce_sum(B), tf.size(B)] # BCstats = [tf.math.reduce_min(bc), tf.math.reduce_max(bc), tf.reduce_sum(bc), tf.size(bc)] # XTstats = [tf.math.reduce_min(x_tilde), tf.math.reduce_max(x_tilde), tf.reduce_sum(x_tilde)] # # # ############################## tf.summary.scalar("loss", train_loss) tf.summary.scalar("bpp", rateLoss) tf.summary.scalar("mse", train_mse) tf.summary.image("original", quantize_image(x)) tf.summary.image("reconstruction", quantize_image(x_tilde)) # Creates summary for the probability mass function (PMF) estimated in the bottleneck. entropy_bottleneck.visualize() hooks = [tf.train.StopAtStepHook(last_step=NUM_STEPS),tf.train.NanTensorHook(train_loss),] ep = 0 epSub = 0 scaffold = tf.train.Scaffold(saver=tf.train.Saver(max_to_keep=1)) with tf.train.MonitoredTrainingSession(scaffold=scaffold, hooks=hooks, checkpoint_dir=MODEL_DIRECTORY, save_checkpoint_secs=CHECKPOINT_SAVE, save_summaries_secs=CHECKPOINT_SAVE) as sess: while not sess.should_stop(): sess.run(train_op) if epSub >= LOG_STEPS: epSub = 0 ep += 1 if epSub == 0: print(ep*LOG_STEPS+epSub, 'TRAIN/DIST/RATE LOSS:', sess.run(train_loss), sess.run(train_mse), sess.run(rateLoss)) # print(' Estats', sess.run(Estats)) # print(' Pstats', sess.run(Pstats)) # print(' Mstats', sess.run(Mstats)) # print(' Bstats', sess.run(Bstats)) # print(' BCstats', sess.run(BCstats)) # print(' XTstats', sess.run(XTstats)) with open(logFile, 'a') as f: f.write('step=' + str(ep*LOG_STEPS+epSub) + ',train_loss=' + str(sess.run(train_loss)) + ',rateLoss=' + str(sess.run(rateLoss)) + ',distortionLoss=' + str(sess.run(train_mse)) + '\n') epSub += 1
folder = np.load('folder.npy') Y0_com = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel]) Y1_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel]) learning_rate = tf.placeholder(tf.float32, []) with tf.variable_scope("flow_motion"): flow_tensor, _, _, _, _, _ = motion.optical_flow(Y0_com, Y1_raw, batch_size, Height, Width) # Y1_warp_0 = tf.contrib.image.dense_image_warp(Y0_com, flow_tensor) # Encode flow flow_latent = CNN_img.MV_analysis(flow_tensor, args.N, args.M) entropy_bottleneck_mv = tfc.EntropyBottleneck() string_mv = entropy_bottleneck_mv.compress(flow_latent) # string_mv = tf.squeeze(string_mv, axis=0) flow_latent_hat, MV_likelihoods = entropy_bottleneck_mv(flow_latent, training=True) flow_hat = CNN_img.MV_synthesis(flow_latent_hat, args.N) # Motion Compensation Y1_warp = tf.contrib.image.dense_image_warp(Y0_com, flow_hat) MC_input = tf.concat([flow_hat, Y0_com, Y1_warp], axis=-1) Y1_MC = MC_network.MC(MC_input) # Encode residual Res = Y1_raw - Y1_MC
def compress(): """Compresses an image.""" # Load input image and add batch dimension. x = load_image(args.input) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) # Transform and compress the image, then remove batch dimension. y = analysis_transform(x, args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() string = entropy_bottleneck.compress(y) string = tf.squeeze(string, axis=0) # Transform the quantized image back (if requested). y_hat, likelihoods = entropy_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat, args.num_filters) num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1])) # Total number of bits divided by number of pixels. eval_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels) # Mean squared error across pixels. x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_sum(tf.squared_difference(x * 255, x_hat)) / num_pixels with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) result_dir = 'result-' + latest.split('\\')[-1] tf.train.Saver().restore(sess, save_path=latest) string, x_shape, y_shape = sess.run([string, tf.shape(x), tf.shape(y)]) # Write a binary file with the shape information and the compressed string. with open(args.output, "wb") as f: f.write(np.array(x_shape[1:-1], dtype=np.uint16).tobytes()) f.write(np.array(y_shape[1:-1], dtype=np.uint16).tobytes()) f.write(string) # If requested, transform the quantized image back and measure performance. if args.verbose: eval_bpp, mse, num_pixels = sess.run([eval_bpp, mse, num_pixels]) # The actual bits per pixel including overhead. bpp = (8 + len(string)) * 8 / num_pixels psnr = 10 * np.log10(255 * 255 / mse) with open('Output_MSE.txt', 'a+') as text_file: text_file.write('%10f\n' % mse) with open('Output_PSNR.txt', 'a+') as text_psnr_file: text_psnr_file.write('%10f\n' % psnr) with open('Output_InformationInBPP.txt', 'a+') as text_information_file: text_information_file.write('%10f\n' % eval_bpp) with open('Output_ActualBPP.txt', 'a+') as text_Actual_file: text_Actual_file.write('%10f\n' % bpp) print('PSNR: {:0.4}'.format(psnr)) print("Mean squared error: {:0.4}".format(mse)) print("Information content of this image in bpp: {:0.4}".format( eval_bpp)) print("Actual bits per pixel for this image: {:0.4}".format(bpp))
def model_fn(features, labels, mode, params): ''' :param features: batch_features from input_fn :param labels: batch_labels from input_fn :param mode: An instance of tf.estimator.ModeKeys :param params: Additional configuration :return: ''' if params.get('decompress') is None: params['decompress'] = False params = namedtuple('Struct', params.keys())(*params.values()) del labels if params.decompress: assert mode == tf.estimator.ModeKeys.PREDICT, 'Decompression must use prediction mode' entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32) y_tilde = entropy_bottleneck.decompress(features, [512], channels=512) # B*N x_hat = pc_decoder(y_tilde, params.batch_size, is_training=False, bn_decay=False) predictions = { 'y_tilde': y_tilde, 'x_hat': x_hat } return tf.estimator.EstimatorSpec(mode, predictions=predictions) training = (mode == tf.estimator.ModeKeys.TRAIN) # Get training patch from dataset. # num_points = (params.batch_size * params.num_points) batch_size = int(features.shape[0]) num_points = int(features.shape[1]) pc = features bn_decay = get_bn_decay(tf.train.get_global_step()) learning_rate = get_learning_rate(tf.train.get_global_step()) tf.summary.scalar('bn_decay', bn_decay) tf.summary.scalar('learning_rate', learning_rate) # ============= encoder ============= y = pc_encoder(pc, params.knn, is_training=training, bn_decay=bn_decay) # ============= bottleneck layer ============= entropy_bottleneck = tfc.EntropyBottleneck() y_tilde, likelihoods = entropy_bottleneck(y, training=True) # ============= decoder ============= x_tilde = pc_decoder(y_tilde, params.batch_size, is_training=training, bn_decay=bn_decay) # number of bits divided by number of points train_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * int(num_points)) if mode == tf.estimator.ModeKeys.PREDICT: string = entropy_bottleneck.compress(y) predictions = { 'string': string, 'x_tilde': x_tilde, 'y_tilde': y_tilde } return tf.estimator.EstimatorSpec(mode, predictions=predictions) losses = get_emd_loss(x_tilde, pc, 1) rd_loss = params.lmbda * train_bpp + losses # tf.summary.scalar('likelihoods',likelihoods) tf.summary.scalar('loss', losses) tf.summary.scalar('rd_loss', rd_loss) tf.summary.scalar('bpp', train_bpp) if mode == tf.estimator.ModeKeys.TRAIN: main_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) main_step = main_optimizer.minimize(rd_loss, global_step=tf.train.get_global_step()) aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) return tf.estimator.EstimatorSpec(mode, loss=rd_loss, train_op=train_op) if mode == tf.estimator.ModeKeys.EVAL: summary_hook = tf.train.SummarySaverHook( save_steps=5, output_dir=os.path.join(params.checkpoint_dir, 'eval'), summary_op=tf.summary.merge_all()) return tf.estimator.EstimatorSpec(mode, loss=rd_loss, evaluation_hooks=[summary_hook])
def train(): """Trains the model.""" if args.verbose: tf.logging.set_verbosity(tf.logging.INFO) # Load all training images into a constant. images = tf.map_fn(load_image, tf.matching_files(args.data_glob), dtype=tf.float32, back_prop=False) with tf.Session() as sess: images = tf.constant(sess.run(images), name="images") # Training inputs are random crops out of the images tensor. crop_shape = (args.batchsize, args.patchsize, args.patchsize, 3) # x = images x = tf.random_crop(images, crop_shape) # with tf.Session() as sess: # sess.run(x) num_pixels = np.prod(crop_shape[:-1]) # for x_num in range(8): # tmp = x[x_num,:,:,:] # # op = save_image('random_croped'+str(x_num)+'.png', tf.reshape(tmp, shape=(256,256,3))) # op = save_image('random_croped' + str(x_num) + '.png', tf.reshape(tmp)) # with tf.Session() as sess: # sess.run(op) # Build autoencoder. y = analysis_transform(x, args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() y_tilde, likelihoods = entropy_bottleneck(y, training=True) x_tilde = synthesis_transform(y_tilde, args.num_filters) # Total number of bits divided by number of pixels. train_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels) # Mean squared error across pixels. train_mse = tf.reduce_sum(tf.squared_difference(x, x_tilde)) # Multiply by 255^2 to correct for rescaling. train_mse *= 255**2 / num_pixels # The rate-distortion cost. train_loss = args.lmbda * train_mse + train_bpp # Minimize loss and auxiliary loss, and execute update op. step = tf.train.create_global_step() main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) main_step = main_optimizer.minimize(train_loss, global_step=step) aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) logged_tensors = [ tf.identity(train_loss, name="train_loss"), tf.identity(train_bpp, name="train_bpp"), tf.identity(train_mse, name="train_mse"), ] hooks = [ tf.train.StopAtStepHook(last_step=args.last_step), tf.train.NanTensorHook(train_loss), tf.train.LoggingTensorHook(logged_tensors, every_n_secs=60), ] with tf.train.MonitoredTrainingSession( hooks=hooks, checkpoint_dir=args.checkpoint_dir) as sess: while not sess.should_stop(): sess.run(train_op)
def test_compress(args): """Compresses an image.""" # Load input image and add batch dimension. x = read_png(args.input_file) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) step = 0.01 lmbda_log_dist = np.arange(0,7,step) lmbda_log_dist = tf.constant(lmbda_log_dist, dtype=tf.float32) s = tf.data.Dataset.from_tensor_slices(lmbda_log_dist) lmbda_log = s.make_one_shot_iterator().get_next() # levels lmbda = 0.1 * tf.pow(2.0, lmbda_log - 6.0) # true value # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters, lmbda_log) synthesis_transform = SynthesisTransform(args.num_filters, lmbda_log) hyper_analysis_transform = HyperAnalysisTransform(args.num_filters, lmbda_log) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters, lmbda_log) entropy_bottleneck = tfc.EntropyBottleneck() # Transform and compress the image. y = analysis_transform(x) y_shape = tf.shape(y) z = hyper_analysis_transform(abs(y)) z_hat, z_likelihoods = entropy_bottleneck(z, training=False) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[1], :y_shape[2], :] scale_table = np.exp(np.linspace( np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table) side_string = entropy_bottleneck.compress(z) string = conditional_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, y_likelihoods = conditional_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) # Total number of bits divided by number of pixels. eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # Bring both images back to 0..255 range. im_x = x * 255 x_hat = tf.clip_by_value(x_hat, 0, 1) im_x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(im_x, im_x_hat)) psnr = tf.squeeze(tf.image.psnr(im_x_hat, im_x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(im_x_hat, im_x, 255)) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) for i in np.arange(0,7,step): v_lmbda_log, v_eval_bpp, v_mse = sess.run( [lmbda_log, eval_bpp, mse]) print("%.2f\t%.4f\t%.4f"%(v_lmbda_log, v_eval_bpp, v_mse))
Y0_com = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel]) Y1_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel]) Y2_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel]) with tf.variable_scope("flow_motion", reuse=False): flow_20, _, _, _, _, _ = motion.optical_flow(Y0_com, Y2_raw, batch_size, Height, Width) # Y2_warp_0 = tf.contrib.image.dense_image_warp(Y0_com_tensor, flow_20) with tf.variable_scope("motion_compression", reuse=False): flow_latent = CNN_img.MV_analysis(flow_20, num_filters=args.N, M=args.M) entropy_mv = tfc.EntropyBottleneck() string_mv = entropy_mv.compress(flow_latent) string_mv = tf.squeeze(string_mv, axis=0) flow_latent_hat, MV_likelihoods = entropy_mv(flow_latent, training=False) flow_20_hat = CNN_img.MV_synthesis(flow_latent_hat, num_filters=args.N) with tf.variable_scope("motion_estimation", reuse=False): flow_02_hat = motion.tf_inverse_flow(flow_20_hat, batch_size, Height, Width) flow_01_hat = 0.5 * flow_02_hat flow_10_hat = motion.tf_inverse_flow(flow_01_hat, batch_size, Height, Width)
def test_compress(args): """Compresses an image.""" fn = tf.placeholder(tf.string, []) # Load input image and add batch dimension. x = read_png(fn) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) synthesis_transform = SynthesisTransform(args.num_filters) hyper_analysis_transform = HyperAnalysisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() # Transform and compress the image. y = analysis_transform(x) y_shape = tf.shape(y) z = hyper_analysis_transform(abs(y)) z_hat, z_likelihoods = entropy_bottleneck(z, training=False) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[1], :y_shape[2], :] scale_table = np.exp(np.linspace( np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = DynamicGaussianConditional(sigma, scale_table, name="gaussian_conditional") side_string = entropy_bottleneck.compress(z) string = conditional_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, y_likelihoods = conditional_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) # Total number of bits divided by number of pixels. eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # Bring both images back to 0..255 range. x *= 255 x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) #a = sess.run( tf.reduce_sum(tf.log(y_likelihoods), axis=(0,1,2)) / (-np.log(2) * num_pixels)) #b = sess.run( tf.reduce_sum(tf.log(z_likelihoods), axis=(0,1,2)) / (-np.log(2) * num_pixels)) #np.savetxt('ay.csv', a, delimiter = ',') #np.savetxt('bz.csv', b, delimiter = ',') #return const = tf.constant([1]*256+[0]*224,dtype=tf.float32) f = open("e6.csv", "w") print("active, fn, bpp, mse, np", file=f) for active in range(256,31,-16): #conditional_bottleneck.input_spec = tf.keras.layers.InputSpec(ndim=4, axes={3: active}) mask = const[256-active:512-active] rate = tf.reduce_sum(mask) / 256 y_itc = y * mask/rate string = conditional_bottleneck.compress(y_itc) y_itc_hat = conditional_bottleneck.decompress(string) # Transform the quantized image back (if requested). x_hat = synthesis_transform(y_itc_hat) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods[:,:,:,:active])) + tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) #tensors = [string, side_string, # tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1]] #arrays = sess.run(tensors) # Write a binary file with the shape information and the compressed string. #packed = tfc.PackedTensors() #packed.pack(tensors, arrays) for filename in glob.glob("kodak/*.png"): v_eval_bpp, v_mse, v_num_pixels = sess.run( [eval_bpp, mse, num_pixels], feed_dict={fn: filename}) print("%.2f, %s, %.4f, %.4f, %d"%(active, filename, v_eval_bpp, v_mse, v_num_pixels), file=f) f.close()
def train(args): """Trains the model.""" if args.verbose: tf.logging.set_verbosity(tf.logging.INFO) # Create input data pipeline. with tf.device("/cpu:0"): train_files = glob.glob(args.train_glob) if not train_files: raise RuntimeError( "No training images found with glob '{}'.".format( args.train_glob)) train_dataset = tf.data.Dataset.from_tensor_slices(train_files) train_dataset = train_dataset.shuffle( buffer_size=len(train_files)).repeat() train_dataset = train_dataset.map( read_png, num_parallel_calls=args.preprocess_threads) train_dataset = train_dataset.map( lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3))) train_dataset = train_dataset.batch(args.batchsize) train_dataset = train_dataset.prefetch(32) num_pixels = args.batchsize * args.patchsize**2 # Get training patch from dataset. x = train_dataset.make_one_shot_iterator().get_next() # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() synthesis_transform = SynthesisTransform(args.num_filters) # Build autoencoder. y = analysis_transform(x) y_tilde, likelihoods = entropy_bottleneck(y, training=True) x_tilde = synthesis_transform(y_tilde) # Total number of bits divided by number of pixels. train_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels) # Mean squared error across pixels. train_mse = tf.reduce_mean(tf.squared_difference(x, x_tilde)) # Multiply by 255^2 to correct for rescaling. train_mse *= 255**2 # Calculate psnr and ssim train_psnr = tf.reduce_mean(tf.image.psnr(x_tilde, x, 255)) train_msssim_value = tf.reduce_mean( tf.image.ssim_multiscale(x_tilde, x, 255)) # structural similarity loss train_ssim = tf.reduce_mean(1 - tf.image.ssim_multiscale(x_tilde, x, 1)) #Choose distortion metric distortion = train_ssim if args.ssim_loss else train_mse # The rate-distortion cost. train_loss = args.lmbda * distortion + train_bpp # Minimize loss and auxiliary loss, and execute update op. step = tf.train.create_global_step() main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) main_step = main_optimizer.minimize(train_loss, global_step=step) aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0]) train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0]) # Log scalar values s_loss = tf.summary.scalar("train/loss", train_loss) s_bpp = tf.summary.scalar("train/bpp", train_bpp) s_mse = tf.summary.scalar("train/mse", train_mse) s_psnr = tf.summary.scalar("train/psnr", train_psnr) s_msssim_value = tf.summary.scalar("train/multiscale ssim value", train_msssim_value) s_ssim = tf.summary.scalar("train/multiscale ssim", -10 * tf.log(train_ssim)) # Log training images s_original = tf.summary.image("images/original", quantize_image(x)) s_reconstruction = tf.summary.image("images/reconstruction", quantize_image(x_tilde)) # Merge scalars into a summary train_summary = tf.summary.merge( [s_loss, s_bpp, s_mse, s_psnr, s_msssim_value, s_ssim]) #Merge images into a summary image_summary = tf.summary.merge([s_original, s_reconstruction]) hooks = [ tf.train.StopAtStepHook(last_step=args.last_step), tf.train.NanTensorHook(train_loss), tf.train.SummarySaverHook(save_secs=30, output_dir=args.checkpoint_dir, summary_op=train_summary), tf.train.SummarySaverHook(save_secs=3600, output_dir=args.checkpoint_dir, summary_op=image_summary) ] with tf.train.MonitoredTrainingSession(hooks=hooks, checkpoint_dir=args.checkpoint_dir, save_checkpoint_secs=300, save_summaries_steps=None, save_summaries_secs=None) as sess: while not sess.should_stop(): sess.run(train_op)
def test_decompress(args): """Decompresses an image.""" # Read the shape information and compressed string from the binary file. string = tf.placeholder(tf.string, [1]) side_string = tf.placeholder(tf.string, [1]) x_shape = tf.placeholder(tf.int32, [2]) y_shape = tf.placeholder(tf.int32, [2]) z_shape = tf.placeholder(tf.int32, [2]) with open(args.input_file, "rb") as f: packed = tfc.PackedTensors(f.read()) tensors = [string, side_string, x_shape, y_shape, z_shape] arrays = packed.unpack(tensors) # Instantiate model. synthesis_transform = SynthesisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32) # Decompress and transform the image back. z_shape = tf.concat([z_shape, [args.num_filters]], axis=0) z_hat = entropy_bottleneck.decompress( side_string, z_shape, channels=args.num_filters) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[0], :y_shape[1], :] scale_table = np.exp(np.linspace( np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional( sigma, scale_table, dtype=tf.float32) y_hat_all = conditional_bottleneck.decompress(string) x = read_png("kodak/kodim01.png") x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) x *= 255 active = 192 y_hat = y_hat_all[:,:,:,:active] x_hat = synthesis_transform(y_hat) x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) #x_hat = x_hat[0, :x_shape[0], :x_shape[1], :] #op = write_png(args.output_file, x_hat) sess = tf.Session() latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) #sess.run(op, feed_dict=dict(zip(tensors, arrays))) #vmse, vpsnr, vmsssim = sess.run([mse, psnr, msssim], feed_dict=dict(zip(tensors, arrays))) #print(vmse, vpsnr, vmsssim) for active in range(192,0,-8): y_hat = y_hat_all[:,:,:,:active] x_hat = synthesis_transform(y_hat) x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) vmse, vpsnr, vmsssim = sess.run([mse, psnr, msssim], feed_dict=dict(zip(tensors, arrays))) print(active, vmse, vpsnr, vmsssim)
def __init__(self): """Instantiate layer.""" super(FactorizedPriorLayer, self).__init__(name="FactorizedPrior") self._entropy_model = tfc.EntropyBottleneck( name="entropy_model")
# Y2_raw_img = np.expand_dims(Y2_raw_img, 0) Height = np.size(Y0_com_img, 1) Width = np.size(Y0_com_img, 2) Y0_com = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel]) # Y1_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel]) # Y2_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel]) string_mv_tensor = tf.placeholder(tf.string, []) string_res1_tensor = tf.placeholder(tf.string, []) string_res2_tensor = tf.placeholder(tf.string, []) with tf.variable_scope("motion_compression", reuse=False): entropy_mv = tfc.EntropyBottleneck(dtype=tf.float32) flow_latent_hat = entropy_mv.decompress(tf.expand_dims( string_mv_tensor, 0), [Height // 16, Width // 16, args.M], channels=args.M) flow_20_hat = CNN_img.MV_synthesis(flow_latent_hat, num_filters=args.N) with tf.variable_scope("motion_estimation", reuse=False): flow_02_hat = motion.tf_inverse_flow(flow_20_hat, batch_size, Height, Width) flow_01_hat = 0.5 * flow_02_hat flow_10_hat = motion.tf_inverse_flow(flow_01_hat, batch_size, Height, Width) flow_21_hat = 0.5 * flow_20_hat
def compress(args): """Compresses an image.""" output_folder = "/media/expansion1/navneedhmaudgalya/Datasets/tiny_imagenet/train_bmshj_001n" if not os.path.exists(output_folder): os.mkdir(output_folder) bpp = [] full_bpp = [] index = tf.placeholder(tf.string) # Load input image and add batch dimension. x = read_png(index) x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_shape = tf.shape(x) # Instantiate model. analysis_transform = AnalysisTransform(args.num_filters) synthesis_transform = SynthesisTransform(args.num_filters) hyper_analysis_transform = HyperAnalysisTransform(args.num_filters) hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() # Transform and compress the image. y = analysis_transform(x) y_shape = tf.shape(y) z = hyper_analysis_transform(abs(y)) z_hat, z_likelihoods = entropy_bottleneck(z, training=False) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[1], :y_shape[2], :] scale_table = np.exp( np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table) side_string = entropy_bottleneck.compress(z) string = conditional_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, y_likelihoods = conditional_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) x_hat = x_hat[:, :x_shape[1], :x_shape[2], :] num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) # Total number of bits divided by number of pixels. eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum( tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # Bring both images back to 0..255 range. x *= 255 x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255)) with tf.Session() as sess: # Load the latest model checkpoint, get the compressed string and the tensor # shapes. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) tensors = [ string, side_string, tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1] ] data_folder = "/media/expansion1/navneedhmaudgalya/Datasets/tiny_imagenet/train/" data_files = os.listdir(data_folder) for i, image_file_name in tqdm(enumerate(data_files)): image_file_path = str(os.path.join(data_folder, image_file_name)) # op = write_png("test_005/{}.png".format(i), x_hat) x_h, arrays, inf_bpp = sess.run([x_hat, tensors, eval_bpp], feed_dict={index: image_file_path}) plt.imsave(os.path.join(output_folder, image_file_name), x_h[0] / 255.) # Write a binary file with the shape information and the compressed string. packed = tfc.PackedTensors() packed.pack(tensors, arrays) bpp.append(inf_bpp) full_bpp.append(len(packed.string) * 8 / (64 * 64)) # sess.run(op, feed_dict={index: image_file_path}) np.save("{}/bpp.npy".format(output_folder), bpp) np.save("{}/full_bpp.npy".format(output_folder), full_bpp) # Write a binary file with the shape information and the compressed string. # with open(args.output_file, "wb") as f: # f.write(packed.string) # If requested, transform the quantized image back and measure performance. if args.verbose: eval_bpp, mse, psnr, msssim, num_pixels = sess.run( [eval_bpp, mse, psnr, msssim, num_pixels]) # The actual bits per pixel including overhead. bpp = len(packed.string) * 8 / num_pixels print("Mean squared error: {:0.4f}".format(mse)) print("PSNR (dB): {:0.2f}".format(psnr)) print("Multiscale SSIM: {:0.4f}".format(msssim)) print("Multiscale SSIM (dB): {:0.2f}".format(-10 * np.log10(1 - msssim))) print("Information content in bpp: {:0.4f}".format(eval_bpp)) print("Actual bits per pixel: {:0.4f}".format(bpp))
def compress(): """Compresses an image.""" # Load input image and add batch dimension. # x = load_image(args.input) # x = tf.expand_dims(x, 0) # x.set_shape([1, None, None, 3]) x = tf.placeholder(tf.float32, [1, None, None, 3]) # Transform and compress the image, then remove batch dimension. y = analysis_transform(x, args.num_filters) entropy_bottleneck = tfc.EntropyBottleneck() string = entropy_bottleneck.compress(y) string = tf.squeeze(string, axis=0) # Transform the quantized image back (if requested). y_hat, likelihoods = entropy_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat, args.num_filters) num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1])) # Total number of bits divided by number of pixels. eval_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels) # Mean squared error across pixels. x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) print(x_hat.shape) mse = tf.reduce_sum(tf.squared_difference(x * 255, x_hat)) / num_pixels with tf.Session() as sess: # Load the latest model checkpoint and test images. latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) dataset, img_names = _load_image() for img, img_name in zip(dataset, img_names): # Get the compressed string and the tensor shapes. _string, x_shape, y_shape = sess.run( [string, tf.shape(x), tf.shape(y)], feed_dict={x: [img]}) # Write a binary file with the shape information and the compressed string. file_name = args.checkpoint_dir + '/results/' + img_name[:-4] + '.bin' with open(file_name, "wb") as f: # with open(args.output, "wb") as f: f.write(np.array(x_shape[1:-1], dtype=np.uint16).tobytes()) f.write(np.array(y_shape[1:-1], dtype=np.uint16).tobytes()) f.write(_string) # If requested, transform the quantized image back and measure performance. if args.verbose: # To print the results, the size of images must be a multiple of 16. # eval_bpp, mse, num_pixels = sess.run([eval_bpp, mse, num_pixels], feed_dict={x: [img]}) _eval_bpp, _num_pixels = sess.run([eval_bpp, num_pixels], feed_dict={x: [img]}) # The actual bits per pixel including overhead. bpp = (8 + len(_string)) * 8 / _num_pixels # print("Mean squared error: {:0.4}".format(mse)) print( "Information content of this image in bpp: {:0.4}".format( _eval_bpp)) print( "Actual bits per pixel for this image: {:0.4}".format(bpp))
def compress(input, output, num_filters, checkpoint_dir): start = time.time() tf.set_random_seed(1) tf.reset_default_graph() with tf.device('/cpu:0'): # Load input image and add batch dimension. x = load_image(input) # Pad the x to x_pad mod = tf.constant([64, 64, 1], dtype=tf.int32) div = tf.ceil(tf.truediv(tf.shape(x), mod)) div = tf.cast(div, tf.int32) paddings = tf.subtract(tf.multiply(div, mod), tf.shape(x)) paddings = tf.expand_dims(paddings, 1) paddings = tf.concat( [tf.convert_to_tensor(np.zeros((3, 1)), dtype=tf.int32), paddings], axis=1) x_pad = tf.pad(x, paddings, "REFLECT") x = tf.expand_dims(x, 0) x.set_shape([1, None, None, 3]) x_pad = tf.expand_dims(x_pad, 0) x_pad.set_shape([1, None, None, 3]) # Transform and compress the image, then remove batch dimension. y = analysis_transform(x_pad, num_filters) # Build a hyper autoencoder z = hyper_analysis(y, num_filters) entropy_bottleneck = tfc.EntropyBottleneck() string = entropy_bottleneck.compress(z) string = tf.squeeze(string, axis=0) z_tilde, z_likelihoods = entropy_bottleneck(z, training=False) # To decompress the z_tilde back to avoid the inconsistence error string_rec = tf.expand_dims(string, 0) z_tilde = entropy_bottleneck.decompress(string_rec, tf.shape(z)[1:], channels=num_filters) phi = hyper_synthesis(z_tilde, num_filters) # REVISION: for Gaussian Mixture Model (GMM), use window-based fast implementation #y = tf.clip_by_value(y, -255, 256) y_hat = tf.round(y) tiny_y = tf.placeholder(dtype=tf.float32, shape=[1] + [5] + [5] + [num_filters]) tiny_phi = tf.placeholder(dtype=tf.float32, shape=[1] + [5] + [5] + [num_filters * 2]) _, _, y_means, y_variances, y_probs = entropy_parameter(tiny_phi, tiny_y, num_filters, training=False) x_hat = synthesis_transform(y_hat, num_filters) num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1])) x_hat = x_hat[0, :tf.shape(x)[1], :tf.shape(x)[2], :] #op = save_image('temp/temp.png', x_hat) # Mean squared error across pixels. x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(x * 255, x_hat)) with tf.Session() as sess: #print(tf.trainable_variables()) sess.run(tf.global_variables_initializer()) # Load the latest model checkpoint, get the compressed string and the tensor # shapes. #latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir) latest = "models/model-1399000" #lambda = 14 print(latest) tf.train.Saver().restore(sess, save_path=latest) string, x_shape, y_shape, num_pixels, y_hat_value, phi_value = \ sess.run([string, tf.shape(x), tf.shape(y), num_pixels, y_hat, phi]) minmax = np.maximum(abs(y_hat_value.max()), abs(y_hat_value.min())) minmax = int(np.maximum(minmax, 1)) #num_symbols = int(2 * minmax + 3) print(minmax) #print(num_symbols) # Fast implementations by only encoding non-zero channels with 128/8 = 16bytes overhead flag = np.zeros(y_shape[3], dtype=np.int) for ch_idx in range(y_shape[3]): if np.sum(abs(y_hat_value[:, :, :, ch_idx])) > 0: flag[ch_idx] = 1 non_zero_idx = np.squeeze(np.where(flag == 1)) num = np.packbits(np.reshape(flag, [8, y_shape[3] // 8])) # ============== encode the bits for z=========== if os.path.exists(output): os.remove(output) fileobj = open(output, mode='wb') fileobj.write(np.array(x_shape[1:-1], dtype=np.uint16).tobytes()) fileobj.write( np.array([len(string), minmax], dtype=np.uint16).tobytes()) fileobj.write(np.array(num, dtype=np.uint8).tobytes()) fileobj.write(string) fileobj.close() # ============ encode the bits for y ========== print("INFO: start encoding y") encoder = RangeEncoder(output[:-4] + '.bin') samples = np.arange(0, minmax * 2 + 1) TINY = 1e-10 kernel_size = 5 pad_size = (kernel_size - 1) // 2 padded_y = np.pad(y_hat_value, ((0, 0), (pad_size, pad_size), (pad_size, pad_size), (0, 0)), 'constant', constant_values=((0., 0.), (0., 0.), (0., 0.), (0., 0.))) padded_phi = np.pad(phi_value, ((0, 0), (pad_size, pad_size), (pad_size, pad_size), (0, 0)), 'constant', constant_values=((0., 0.), (0., 0.), (0., 0.), (0., 0.))) for h_idx in range(y_shape[1]): for w_idx in range(y_shape[2]): extracted_y = padded_y[:, h_idx:h_idx + kernel_size, w_idx:w_idx + kernel_size, :] extracted_phi = padded_phi[:, h_idx:h_idx + kernel_size, w_idx:w_idx + kernel_size, :] y_means_values, y_variances_values, y_probs_values = \ sess.run([y_means, y_variances, y_probs], \ feed_dict={tiny_y: extracted_y, tiny_phi: extracted_phi}) for i in range(len(non_zero_idx)): ch_idx = non_zero_idx[i] mu = y_means_values[0, pad_size, pad_size, ch_idx, :] + minmax sigma = y_variances_values[0, pad_size, pad_size, ch_idx, :] weight = y_probs_values[0, pad_size, pad_size, ch_idx, :] start00 = time.time() # Calculate the pmf/cdf pmf = (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5))) - \ 0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5)))) * weight[0] + \ (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5))) - \ 0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5)))) * weight[1] +\ (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5))) - \ 0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5)))) * weight[2] ''' # Add the tail mass pmf[0] += 0.5 * (1 + scipy.special.erf(( -0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5))) * weight[0] + \ 0.5 * (1 + scipy.special.erf(( -0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5))) * weight[1] + \ 0.5 * (1 + scipy.special.erf(( -0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5))) * weight[2] pmf[-1] += (1. - 0.5 * (1 + scipy.special.erf((minmax*2 + 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5)))) * weight[0] + \ (1. - 0.5 * (1 + scipy.special.erf((minmax*2 + 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5)))) * weight[1] + \ (1. - 0.5 * (1 + scipy.special.erf((minmax*2 + 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5)))) * weight[2] ''' # To avoid the zero-probability pmf_clip = np.clip(pmf, 1.0 / 65536, 1.0) pmf_clip = np.round(pmf_clip / np.sum(pmf_clip) * 65536) cdf = list(np.add.accumulate(pmf_clip)) cdf = [0] + [int(i) for i in cdf] symbol = np.int(y_hat_value[0, h_idx, w_idx, ch_idx] + minmax) encoder.encode([symbol], cdf) encoder.close() size_real = os.path.getsize(output) + os.path.getsize(output[:-4] + '.bin') bpp_real = (os.path.getsize(output) + os.path.getsize(output[:-4] + '.bin')) * 8 / num_pixels bpp_side = (os.path.getsize(output)) * 8 / num_pixels end = time.time() print("Time : {:0.3f}".format(end - start)) psnr = sess.run(tf.image.psnr(x_hat, x * 255, 255)) msssim = sess.run(tf.image.ssim_multiscale(x_hat, x * 255, 255)) print("Actual bits per pixel for this image: {:0.4}".format( bpp_real)) print("Side bits per pixel for z: {:0.4}".format(bpp_side)) print("PSNR (dB) : {:0.4}".format(psnr[0])) print("MS-SSIM : {:0.4}".format(msssim[0]))