Python EntropyBottleneck Examples, tensorflow_compression.EntropyBottleneck Python Examples

Example #1

0

Show file

File: compress.py Project: chansongoal/semantic_image_compression

def compress(args):
    """Compresses an image."""
    # Load input image and add batch dimension.
    x_input = tf.placeholder(dtype=tf.float32, shape=(1, None, None, 3))
    x = x_input
    weights_224_3c = tf.placeholder(tf.float32, [1, None, None, 3])
    weights_224_3c_label = tf.placeholder(tf.float32, [1, None, None, 3])
    x_attention = x * weights_224_3c
    x_attention_input = tf.concat([x, weights_224_3c, x_attention], axis=3)

    # Instantiate model.
    analysis_transform = AnalysisTransform(args.num_filters)
    entropy_bottleneck = tfc.EntropyBottleneck()
    synthesis_transform = SynthesisTransform(args.num_filters)

    # Transform and compress the image.
    y = analysis_transform(x_attention_input)
    string = entropy_bottleneck.compress(y)

    # Transform the quantized image back (if requested).
    y_hat, likelihoods = entropy_bottleneck(y, training=False)
    x_hat = synthesis_transform(y_hat)

    rec = synthesis_transform(y_hat)

    imgName = tf.placeholder(tf.string)
    op = write_png(imgName, rec[0, :, :, :])

    vgg = vgg16.Vgg16('/gdata/gaocs/pretrained_models/vgg16_no_fc.npy')
    vgg.build(x)
    feature_x = [
        vgg.conv1_2, vgg.conv2_2, vgg.conv3_3, vgg.conv4_3, vgg.conv5_3
    ]
    vgg.build(x_hat)
    feature_x_tilde = [
        vgg.conv1_2, vgg.conv2_2, vgg.conv3_3, vgg.conv4_3, vgg.conv5_3
    ]

    feature_x_mask = []
    feature_x_mask_invert = []
    for n in range(len(feature_x)):
        one = tf.ones_like(feature_x[n])
        zero = tf.zeros_like(feature_x[n])
        feat_mask = tf.where(feature_x[n] > 0, x=one, y=zero)
        feature_x_mask.append(feat_mask)
        feature_x_mask_invert.append(feat_mask * (-1) + 1)

    loss_feat_fore_all = []
    loss_feat_fore_sum = 0.0
    loss_feat_back_all = []
    loss_feat_back_sum = 0.0
    loss_feat_all = []
    loss_feat_sum = 0.0
    for n in range(len(feature_x)):
        loss_temp_fore = tf.reduce_mean(
            ((feature_x[n] - feature_x_tilde[n]) /
             (tf.reduce_mean(feature_x[n]) + 0.00000001) *
             feature_x_mask[n])**2)
        loss_feat_fore_all.append(loss_temp_fore)
        loss_feat_fore_sum += loss_temp_fore

        loss_temp_back = tf.reduce_mean(
            ((feature_x[n] - feature_x_tilde[n]) /
             (tf.reduce_mean(feature_x[n]) + 0.00000001) *
             feature_x_mask_invert[n])**2)
        loss_feat_back_all.append(loss_temp_back)
        loss_feat_back_sum += loss_temp_back

        loss_temp = tf.reduce_mean(
            ((feature_x[n] - feature_x_tilde[n]) /
             (tf.reduce_mean(feature_x[n]) + 0.00000001))**2)
        loss_feat_all.append(loss_temp)
        loss_feat_sum += loss_temp
    loss_f_fore = loss_feat_fore_sum / len(feature_x)
    loss_f_back = loss_feat_back_sum / len(feature_x)
    loss_f = loss_feat_sum / len(feature_x)

    num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32)

    # Total number of bits divided by number of pixels.
    eval_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels)

    # Bring both images back to 0..255 range.
    x *= 255
    x_hat = tf.clip_by_value(x_hat, 0, 1)
    x_hat = tf.round(x_hat * 255)

    mse_foreground = tf.reduce_sum(
        tf.squared_difference(x * weights_224_3c, x_hat *
                              weights_224_3c)) / tf.reduce_sum(weights_224_3c)
    psnr_foreground = 20 * tf.math.log(
        255.0 / tf.math.sqrt(mse_foreground)) / tf.math.log(10.0)
    msssim_foreground = tf.squeeze(
        tf.image.ssim_multiscale(x_hat * weights_224_3c, x * weights_224_3c,
                                 255))
    weights_224_3c_invert = -1 * weights_224_3c + 1
    mse_background = tf.reduce_sum(
        tf.squared_difference(
            x * weights_224_3c_invert, x_hat *
            weights_224_3c_invert)) / tf.reduce_sum(weights_224_3c_invert)
    psnr_background = 20 * tf.math.log(
        255.0 / tf.math.sqrt(mse_background)) / tf.math.log(10.0)
    msssim_background = tf.squeeze(
        tf.image.ssim_multiscale(x_hat * weights_224_3c_invert,
                                 x * weights_224_3c_invert, 255))

    mse_fore = tf.reduce_sum(
        tf.squared_difference(
            x * weights_224_3c_label, x_hat *
            weights_224_3c_label)) / tf.reduce_sum(weights_224_3c_label)
    psnr_fore = 20 * tf.math.log(
        255.0 / tf.math.sqrt(mse_fore)) / tf.math.log(10.0)
    msssim_fore = tf.squeeze(
        tf.image.ssim_multiscale(x_hat * weights_224_3c_label,
                                 x * weights_224_3c_label, 255))
    weights_224_3c_label_invert = -1 * weights_224_3c_label + 1
    mse_back = tf.reduce_sum(
        tf.squared_difference(x * weights_224_3c_label_invert, x_hat *
                              weights_224_3c_label_invert)) / tf.reduce_sum(
                                  weights_224_3c_label_invert)
    # mse_back = tf.reduce_sum(tf.squared_difference(x, x_hat)) / (tf.reduce_sum(weights_224_3c_label_invert) + tf.reduce_sum(weights_224_3c_label))
    psnr_back = 20 * tf.math.log(
        255.0 / tf.math.sqrt(mse_back)) / tf.math.log(10.0)
    msssim_back = tf.squeeze(
        tf.image.ssim_multiscale(x_hat * weights_224_3c_label_invert,
                                 x * weights_224_3c_label_invert, 255))

    mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
    psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
    msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))

    with tf.Session() as sess:
        # Load the latest model checkpoint, get the compressed string and the tensor
        # shapes.
        latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
        tf.train.Saver().restore(sess, save_path=latest)
        tensors = [string, tf.shape(x)[1:-1], tf.shape(y)[1:-1]]

        orgPath = args.OrgPath
        binPath = args.BinPath
        recPath = args.RecPath
        if not os.path.exists(binPath):
            os.mkdir(binPath)
        if not os.path.exists(recPath):
            os.mkdir(recPath)
        orgFiles = os.listdir(orgPath)
        orgFiles = sorted(orgFiles)
        # print(orgFiles)

        mse_foreground_all = []
        psnr_foreground_all = []
        msssim_foreground_all = []
        msssimdb_foreground_all = []
        loss_f_foreground_all = []
        loss_feat_foreground_list = []
        mse_background_all = []
        psnr_background_all = []
        msssim_background_all = []
        msssimdb_background_all = []
        loss_f_background_all = []
        loss_feat_background_list = []

        mse_fore_all = []
        psnr_fore_all = []
        msssim_fore_all = []
        msssimdb_fore_all = []
        mse_back_all = []
        psnr_back_all = []
        msssim_back_all = []
        msssimdb_back_all = []

        mse_all = []
        psnr_all = []
        msssim_all = []
        msssimdb_all = []
        eval_bpp_all = []
        bpp_all = []
        loss_f_all = []
        loss_feat_list = []

        pickle_name_label = '/gdata1/gaocs/pretrained_models/minVal2014_Test5000_0_1.pickle'
        with open(pickle_name_label, 'rb') as fp:
            print(pickle_name_label)
            all_weights_val5000_label = pickle.load(fp)

        pickle_name = '/gdata1/gaocs/pretrained_models/minVal2014_5000_Conv5_3_binary_dilation_0_1.pickle'
        with open(pickle_name, 'rb') as fp:
            print(pickle_name)
            all_weights_val5000 = pickle.load(fp)

        for idx, imgFile in enumerate(orgFiles):
            #   print(imgFile)
            img = Image.open(orgPath + imgFile)
            img = np.asarray(img, dtype=np.float32)
            if len(img.shape) != 3:
                # print(image_file)
                imgarr = np.zeros((img.shape[0], img.shape[1], 3),
                                  dtype=np.float32)
                imgarr[:, :, 0] = img
                imgarr[:, :, 1] = img
                imgarr[:, :, 2] = img
                img = np.expand_dims(imgarr, 0)
            else:
                img = np.expand_dims(img, 0)
            imgArr = img / 255

            pngRecName = recPath + imgFile[:-4] + '.png'
            weight_input_1c = all_weights_val5000[idx]  #[h,w]
            weight_input_3c = np.zeros(
                (1, weight_input_1c.shape[0], weight_input_1c.shape[1], 3),
                dtype=np.float32)
            for n in range(weight_input_3c.shape[3]):
                weight_input_3c[0, :, :, n] = weight_input_1c

            weight_input_1c_label = all_weights_val5000_label[idx]
            weight_input_3c_label = np.zeros(
                (1, weight_input_1c_label.shape[0],
                 weight_input_1c_label.shape[1], 3),
                dtype=np.float32)
            for n in range(weight_input_3c_label.shape[3]):
                weight_input_3c_label[0, :, :, n] = weight_input_1c_label
            arrays = sess.run(tensors,
                              feed_dict={
                                  x_input: imgArr,
                                  weights_224_3c: weight_input_3c,
                                  weights_224_3c_label: weight_input_3c_label,
                                  imgName: pngRecName
                              })

            # Write a binary file with the shape information and the compressed string.
            packed = PackedTensors()
            packed.pack(tensors, arrays)
            # with open(binPath+imgFile[:-4]+'.bin', "wb") as f:
            #   f.write(packed.string)

            # If requested, transform the quantized image back and measure performance.
            if args.verbose:
                print(pngRecName)
                # if not os.path.exists(pngRecName):
                eval_bpp_, mse_foreground_, mse_background_, mse_fore_, mse_back_, mse_, \
                  psnr_foreground_, psnr_background_, psnr_fore_, psnr_back_, psnr_, \
                  msssim_foreground_, msssim_background_, msssim_fore_, msssim_back_, msssim_, \
                  num_pixels_, loss_f_fore_, loss_f_back_, loss_f_, \
                  loss_feat_fore_, loss_feat_back_, loss_feat_, rec_, _ \
                  = sess.run( [eval_bpp, mse_foreground, mse_background, mse_fore, mse_back, mse, \
                    psnr_foreground, psnr_background, psnr_fore, psnr_back, psnr, \
                    msssim_foreground, msssim_background, msssim_fore, msssim_back, msssim, \
                    num_pixels, loss_f_fore, loss_f_back, loss_f, \
                    loss_feat_fore_all, loss_feat_back_all, loss_feat_all, rec, op], feed_dict={x_input: imgArr, weights_224_3c:weight_input_3c, weights_224_3c_label:weight_input_3c_label, imgName:pngRecName})
                # else:
                #   eval_bpp_, mse_foreground_, mse_background_, mse_fore_, mse_back_, mse_, \
                #     psnr_foreground_, psnr_background_, psnr_fore_, psnr_back_, psnr_, \
                #     msssim_foreground_, msssim_background_, msssim_fore_, msssim_back_, msssim_, \
                #     num_pixels_, loss_f_fore_, loss_f_back_, loss_f_, \
                #     loss_feat_fore_, loss_feat_back_, loss_feat_, rec_\
                #     = sess.run( [eval_bpp, mse_foreground, mse_background, mse_fore, mse_back, mse, \
                #       psnr_foreground, psnr_background, psnr_fore, psnr_back, psnr, \
                #       msssim_foreground, msssim_background, msssim_fore, msssim_back, msssim, \
                #       num_pixels, loss_f_fore, loss_f_back, loss_f, \
                #       loss_feat_fore_all, loss_feat_back_all, loss_feat_all, rec], feed_dict={x_input: imgArr, weights_224_3c:weight_input_3c, weights_224_3c_label:weight_input_3c_label, imgName:pngRecName})

                # The actual bits per pixel including overhead.
                bpp = len(packed.string) * 8 / num_pixels_

                # if mse_foreground_ == 0:
                #   psnr_foreground_ = 60
                # if mse_fore == 0:
                #   psnr_fore = 60

                print("fore Mean squared error: {:0.4f}".format(mse_fore_))
                print("fore PSNR (dB): {:0.2f}".format(psnr_fore_))
                print("fore Multiscale SSIM: {:0.4f}".format(msssim_fore_))
                print("fore Multiscale SSIM (dB): {:0.2f}".format(
                    -10 * np.log10(1 - msssim_fore_)))
                print("back Mean squared error: {:0.4f}".format(mse_back_))
                print("back PSNR (dB): {:0.2f}".format(psnr_back_))
                print("back Multiscale SSIM: {:0.4f}".format(msssim_back_))
                print("back Multiscale SSIM (dB): {:0.2f}".format(
                    -10 * np.log10(1 - msssim_back_)))

                print("foreground Mean squared error: {:0.4f}".format(
                    mse_foreground_))
                print("foreground PSNR (dB): {:0.2f}".format(psnr_foreground_))
                print("foreground Multiscale SSIM: {:0.4f}".format(
                    msssim_foreground_))
                print("foreground Multiscale SSIM (dB): {:0.2f}".format(
                    -10 * np.log10(1 - msssim_foreground_)))
                print("foreground VGG loss: {:0.4f}".format(loss_f_fore_))
                np.set_printoptions(formatter={'float': '{: 0.8f}'.format})
                print(loss_feat_fore_)

                print("background Mean squared error: {:0.4f}".format(
                    mse_background_))
                print("background PSNR (dB): {:0.2f}".format(psnr_background_))
                print("background Multiscale SSIM: {:0.4f}".format(
                    msssim_background_))
                print("background Multiscale SSIM (dB): {:0.2f}".format(
                    -10 * np.log10(1 - msssim_background_)))
                print("background VGG loss: {:0.4f}".format(loss_f_back_))
                np.set_printoptions(formatter={'float': '{: 0.8f}'.format})
                print(loss_feat_back_)

                print("Mean squared error: {:0.4f}".format(mse_))
                print("PSNR (dB): {:0.2f}".format(psnr_))
                print("Multiscale SSIM: {:0.4f}".format(msssim_))
                print("Multiscale SSIM (dB): {:0.2f}".format(
                    -10 * np.log10(1 - msssim_)))
                print("Information content in bpp: {:0.4f}".format(eval_bpp_))
                print("Actual bits per pixel: {:0.4f}".format(bpp))
                print("VGG loss: {:0.4f}".format(loss_f_))
                np.set_printoptions(formatter={'float': '{: 0.8f}'.format})
                print(loss_feat_)

                if mse_fore_ > 1e-8:
                    mse_fore_all.append(mse_fore_)
                    psnr_fore_all.append(psnr_fore_)
                    msssim_fore_all.append(msssim_fore_)
                    msssimdb_fore_all.append(-10 * np.log10(1 - msssim_fore_))
                if mse_back_ > 1e-8:
                    mse_back_all.append(mse_back_)
                    psnr_back_all.append(psnr_back_)
                    msssim_back_all.append(msssim_back_)
                    msssimdb_back_all.append(-10 * np.log10(1 - msssim_back_))
                if mse_foreground_ > 1e-8:
                    mse_foreground_all.append(mse_foreground_)
                    psnr_foreground_all.append(psnr_foreground_)
                    msssim_foreground_all.append(msssim_foreground_)
                    msssimdb_foreground_all.append(
                        -10 * np.log10(1 - msssim_foreground_))
                    loss_f_foreground_all.append(loss_f_fore_)
                    loss_feat_foreground_list.append(loss_feat_fore_)
                if mse_background_ > 1e-8:
                    mse_background_all.append(mse_background_)
                    psnr_background_all.append(psnr_background_)
                    msssim_background_all.append(msssim_background_)
                    msssimdb_background_all.append(
                        -10 * np.log10(1 - msssim_background_))
                    loss_f_background_all.append(loss_f_back_)
                    loss_feat_background_list.append(loss_feat_back_)

                mse_all.append(mse_)
                psnr_all.append(psnr_)
                msssim_all.append(msssim_)
                msssimdb_all.append(-10 * np.log10(1 - msssim_))
                eval_bpp_all.append(eval_bpp_)
                bpp_all.append(bpp)
                loss_f_all.append(loss_f_)
                np.set_printoptions(formatter={'float': '{: 0.8f}'.format})
                loss_feat_list.append(loss_feat_)
        print('\n\n---total averege---')
        print("fore Mean squared error: {:0.4f}".format(np.mean(mse_fore_all)))
        print("fore PSNR (dB): {:0.2f}".format(np.mean(psnr_fore_all)))
        print("fore Multiscale SSIM: {:0.4f}".format(np.mean(msssim_fore_all)))
        print("fore Multiscale SSIM (dB): {:0.2f}".format(
            np.mean(msssimdb_fore_all)))
        print("back Mean squared error: {:0.4f}".format(np.mean(mse_back_all)))
        print("back PSNR (dB): {:0.2f}".format(np.mean(psnr_back_all)))
        print("back Multiscale SSIM: {:0.4f}".format(np.mean(msssim_back_all)))
        print("back Multiscale SSIM (dB): {:0.2f}".format(
            np.mean(msssimdb_back_all)))

        print("foreground Mean squared error: {:0.4f}".format(
            np.mean(mse_foreground_all)))
        print("foreground PSNR (dB): {:0.2f}".format(
            np.mean(psnr_foreground_all)))
        print("foreground Multiscale SSIM: {:0.4f}".format(
            np.mean(msssim_foreground_all)))
        print("foreground Multiscale SSIM (dB): {:0.2f}".format(
            np.mean(msssimdb_foreground_all)))
        print("foreground VGG loss: {:0.4f}".format(
            np.mean(loss_f_foreground_all)))
        np.set_printoptions(formatter={'float': '{: 0.8f}'.format})
        print(np.mean(loss_feat_foreground_list, axis=0))
        print("background Mean squared error: {:0.4f}".format(
            np.mean(mse_background_all)))
        print("background PSNR (dB): {:0.2f}".format(
            np.mean(psnr_background_all)))
        print("background Multiscale SSIM: {:0.4f}".format(
            np.mean(msssim_background_all)))
        print("background Multiscale SSIM (dB): {:0.2f}".format(
            np.mean(msssimdb_background_all)))
        print("background VGG loss: {:0.4f}".format(
            np.mean(loss_f_background_all)))
        np.set_printoptions(formatter={'float': '{: 0.8f}'.format})
        print(np.mean(loss_feat_background_list, axis=0))

        print("Mean squared error: {:0.4f}".format(np.mean(mse_all)))
        print("PSNR (dB): {:0.2f}".format(np.mean(psnr_all)))
        print("Multiscale SSIM: {:0.4f}".format(np.mean(msssim_all)))
        print("Multiscale SSIM (dB): {:0.2f}".format(np.mean(msssimdb_all)))
        print("Information content in bpp: {:0.4f}".format(
            np.mean(eval_bpp_all)))
        print("Actual bits per pixel: {:0.4f}".format(np.mean(bpp_all)))
        print("VGG loss: {:0.4f}".format(np.mean(loss_f_)))
        np.set_printoptions(formatter={'float': '{: 0.8f}'.format})
        print(np.mean(loss_feat_list, axis=0))

Example #2

0

Show file

File: mobile_msh2018.py Project: adityassrana/mobile-compression

def compress(args):
  """Compresses an image."""

  # Load input image and add batch dimension.
  x = read_png(args.input_file)
  x = tf.expand_dims(x, 0)
  x.set_shape([1, None, None, 3])
  x_shape = tf.shape(x)

  # Instantiate model.
  analysis_transform = AnalysisTransform(args.num_filters)
  synthesis_transform = SynthesisTransform(args.num_filters)
  hyper_analysis_transform = HyperAnalysisTransform(args.num_filters)
  hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
  entropy_bottleneck = tfc.EntropyBottleneck()

  # Transform and compress the image.
  y = analysis_transform(x)
  y_shape = tf.shape(y)
  z = hyper_analysis_transform(abs(y))
  z_hat, z_likelihoods = entropy_bottleneck(z, training=False)
  sigma = hyper_synthesis_transform(z_hat)
  sigma = sigma[:, :y_shape[1], :y_shape[2], :]
  scale_table = np.exp(np.linspace(
      np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
  conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table)
  side_string = entropy_bottleneck.compress(z)
  string = conditional_bottleneck.compress(y)

  # Transform the quantized image back (if requested).
  y_hat, y_likelihoods = conditional_bottleneck(y, training=False)
  x_hat = synthesis_transform(y_hat)
  x_hat = x_hat[:, :x_shape[1], :x_shape[2], :]

  num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32)

  # Total number of bits divided by number of pixels.
  eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) +
              tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)

  # Bring both images back to 0..255 range.
  x *= 255
  x_hat = tf.clip_by_value(x_hat, 0, 1)
  x_hat = tf.round(x_hat * 255)

  mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
  psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
  msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))

  with tf.Session() as sess:
    # Load the latest model checkpoint, get the compressed string and the tensor
    # shapes.
    latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
    tf.train.Saver().restore(sess, save_path=latest)
    tensors = [string, side_string,
               tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1]]
    arrays = sess.run(tensors)

    # Write a binary file with the shape information and the compressed string.
    packed = tfc.PackedTensors()
    packed.pack(tensors, arrays)
    with open(args.output_file, "wb") as f:
      f.write(packed.string)

    # If requested, transform the quantized image back and measure performance.
    if args.verbose:
      eval_bpp, mse, psnr, msssim, num_pixels = sess.run(
          [eval_bpp, mse, psnr, msssim, num_pixels])

      # The actual bits per pixel including overhead.
      bpp = len(packed.string) * 8 / num_pixels

      print("Mean squared error: {:0.4f}".format(mse))
      print("PSNR (dB): {:0.2f}".format(psnr))
      print("Multiscale SSIM: {:0.4f}".format(msssim))
      print("Multiscale SSIM (dB): {:0.2f}".format(-10 * np.log10(1 - msssim)))
      print("Information content in bpp: {:0.4f}".format(eval_bpp))
      print("Actual bits per pixel: {:0.4f}".format(bpp))

Example #3

0

Show file

File: balle_image_box.py Project: decotoj/dnn-compression

def train():
    """Trains the model."""

    # Log Input Settings
    logFile = MODEL_DIRECTORY + '/' + 'Train_Log.txt'

    # Set Tensorflow Logging
    tf.logging.set_verbosity(tf.logging.INFO)

    # Create input data pipeline.
    with tf.device('/cpu:0'):
        train_files = glob.glob(TRAIN_DIRECTORY)
        train_labels = glob.glob(LABEL_DIRECTORY)
        train_dataset = tf.data.Dataset.from_tensor_slices(train_files)

        # NEW - The below seems to be one option to obtain information from
        # text files.  However, TF is extraordinarily difficult with respect to
        # being able to parse the text.  I've Googled this for hours, and
        # it's not explained as far as I can tell (it likely is of course)

        # label_dataset = tf.data.Dataset.from_tensor_slices(train_labels)

        # This was from the cs230 input pipeline website provided to us.
        # the only error it throws is that the read-in text files are of
        # a different size.  That is, some text files define multiple bounding
        # boxes.  I recommend we just use the first included bounding box;
        # this would give us 4 values for each text file then and there would
        # be no issue.
        label_dataset = tf.data.TextLineDataset(train_labels)
        # label_dataset = tf.data.TextLineDataset.from_tensor_slices(label_dataset)
        label_dataset = label_dataset.map(
            lambda token: tf.string_split([token]).values)
        label_dataset = label_dataset.map(lambda token:
                                          (token, extract_char(token)))

        # NEW - PLEASE REVIEW - we load images here
        # note that TF throws an error if any image is a different size
        # so we can either use the patch scheme of Balle, or we can resize
        # the images.  I'm not sure if the patch size would work, because
        # when we compute the MSE I dont know if TF first recombines all the patches
        # or if computes the MSE of each patch.  if its each patch then we would need
        # a function to check whether a patch includes a portion of a bounding box.
        # That said, if we resize the images it's unclear to me what size they should be
        # also we have to scale the bounding boxes to the new size somehow.

        train_dataset = train_dataset.map(
            load_image, num_parallel_calls=PREPROCESS_THREADS)
        train_dataset = train_dataset.map(
            lambda x: tf.random_crop(x, (PATCHSIZE, PATCHSIZE, 3)))

        # label_dataset = label_dataset.map(load_labels, num_parallel_calls=PREPROCESS_THREADS)

        # This combines the two datasets so they are coordinated.
        total_data = tf.data.Dataset.zip((train_dataset, label_dataset))
        total_data = total_data.shuffle(buffer_size=len(train_files)).repeat()

        # We prefetch some initial batches
        total_data = total_data.batch(BATCH_SIZE)
        total_data = total_data.prefetch(32)

        # train_labels = train_labels.batch(BATCH_SIZE)
        # train_labels = train_labels.prefetch(32)

    # Determine number of pixels and print input data info
    num_pixels = BATCH_SIZE * PATCHSIZE**2
    print('Num Train File', len(train_files))
    print('Num_Pix', num_pixels, BATCH_SIZE, PATCHSIZE)

    # Get Data - this includes labels and training images
    x = total_data.make_one_shot_iterator().get_next()

    # We then pass the training images in x[0] to our autoencoder
    y = analysis_transform(x[0], NUM_FILTERS)
    entropy_bottleneck = tfc.EntropyBottleneck()
    y_tilde, likelihoods = entropy_bottleneck(y, training=True)
    x_tilde = synthesis_transform(y_tilde, NUM_FILTERS)

    # Total number of bits divided by number of pixels.
    train_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels)

    # Mean squared error across pixels.
    train_mse = tf.reduce_mean(tf.squared_difference(x[0], x_tilde))
    train_mse *= 255**2  # Multiply by 255^2 to correct for rescaling.

    ######################START TEST DECOTO############################

    #Grab the 4 Corners
    corners = [
        tf.string_to_number(x[1][1][1][2]),
        tf.string_to_number(x[1][1][1][3]),
        tf.string_to_number(x[1][1][1][4]),
        tf.string_to_number(x[1][1][1][5])
    ]

    #Build a Mask of All 0,s of Proper Shape to Multiply With x[0] (Shape = 1,256,256,1)
    M = tf.zeros([1, x[0].get_shape()[1], x[0].get_shape()[1], 1])

    #START PENDING - WORK IN PROGRESS
    #Replace the 0's in M with 1's for all areas inside the bounding box
    indices = []
    values = []
    for i in range(0, 10):  #Replace 0 and 10 w/ the corner values
        for j in range(0, 10):  #Replace 0 and 10 w/ the corner values
            indices.append([0, i, j, 0])  #Indices of Values to Change
            values.append(1)  #What to Change the Values at Indices To
    shape = M.get_shape()
    delta = tf.SparseTensor(indices, values, shape)
    delta = tf.cast(delta, tf.float32)
    M2 = M + tf.sparse_tensor_to_dense(delta)

    sums = [
        tf.reduce_sum(M), tf.reduce_sum(M2)
    ]  #Used to Print Later to Check This is Working (Sum of M = 0, Sum of M1 > 0)

    #END PENDING  - WORK IN PROGRESS

    #Mean Squared Error for the Box Portion Only
    train_mse_box = tf.reduce_mean(
        tf.multiply(tf.squared_difference(x[0], x_tilde), M2))
    train_mse_box *= 255**2

    #Training Loss Including the Bounding Box as a separate loss component
    train_loss = LMBDA * train_mse + train_bpp + LMBDA2 * train_mse_box

    ###################END TEST DECOTO############################

    # Minimize loss and auxiliary loss, and execute update op.
    step = tf.train.create_global_step()
    main_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
    main_step = main_optimizer.minimize(train_loss, global_step=step)

    aux_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE * 10)
    aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])

    train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])

    tf.summary.scalar("loss", train_loss)
    tf.summary.scalar("bpp", train_bpp)
    tf.summary.scalar("mse", train_mse)

    tf.summary.image("original", quantize_image(x[0]))
    tf.summary.image("reconstruction", quantize_image(x_tilde))

    # Creates summary for the probability mass function (PMF) estimated in the bottleneck.
    entropy_bottleneck.visualize()

    hooks = [
        tf.train.StopAtStepHook(last_step=NUM_STEPS),
        tf.train.NanTensorHook(train_loss)
    ]

    ep = 0
    epSub = 0
    scaffold = tf.train.Scaffold(saver=tf.train.Saver(max_to_keep=1))
    with tf.train.MonitoredTrainingSession(
            scaffold=scaffold,
            hooks=hooks,
            checkpoint_dir=MODEL_DIRECTORY,
            save_checkpoint_secs=CHECKPOINT_SAVE,
            save_summaries_secs=CHECKPOINT_SAVE) as sess:
        while not sess.should_stop():
            sess.run(train_op)

            if epSub >= LOG_STEPS:
                epSub = 0
                ep += 1
            if epSub == 0:
                print(ep * LOG_STEPS + epSub, 'train loss',
                      sess.run(train_loss))

                ######################START DECOTO EDITS######################################
                print('Corners', sess.run(corners))
                print('Sums M and M2', sess.run(sums))
                ######################END DECOTO EDITS######################################

                with open(logFile, 'a') as f:
                    f.write('step=' + str(ep * LOG_STEPS + epSub) +
                            ',train_loss=' + str(sess.run(train_loss)) +
                            ',train_bpp=' + str(sess.run(train_bpp)) +
                            ',train_mse=' + str(sess.run(train_mse)) + '\n')
            epSub += 1

    print('TRAIN COMPLETED')

Example #4

0

Show file

def decompress(input, output, num_filters, checkpoint_dir):
    """Decompresses an image by a fast implementation."""

    start = time.time()

    tf.set_random_seed(1)
    tf.reset_default_graph()

    with tf.device('/cpu:0'):

        print(input)

        # Read the shape information and compressed string from the binary file.
        fileobj = open(input, mode='rb')
        x_shape = np.frombuffer(fileobj.read(4), dtype=np.uint16)
        length, minmax = np.frombuffer(fileobj.read(4), dtype=np.uint16)
        num = np.frombuffer(fileobj.read(16), dtype=np.uint8)
        string = fileobj.read(length)

        fileobj.close()

        flag = np.unpackbits(num)
        non_zero_idx = np.squeeze(np.where(flag == 1))

        # Get x_pad_shape, y_shape, z_shape
        pad_size = 64
        x_pad_shape = [1] + [
            int(math.ceil(x_shape[0] / pad_size) * pad_size)
        ] + [int(math.ceil(x_shape[1] / pad_size) * pad_size)] + [3]
        y_shape = [1] + [x_pad_shape[1] // 16] + [x_pad_shape[2] // 16
                                                  ] + [num_filters]
        z_shape = [y_shape[1] // 4] + [y_shape[2] // 4] + [num_filters]

        # Add a batch dimension, then decompress and transform the image back.
        strings = tf.expand_dims(string, 0)

        entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32)
        z_tilde = entropy_bottleneck.decompress(strings,
                                                z_shape,
                                                channels=num_filters)
        phi = hyper_synthesis(z_tilde, num_filters)

        # Transform the quantized image back (if requested).
        tiny_y = tf.placeholder(dtype=tf.float32,
                                shape=[1] + [5] + [5] + [num_filters])
        tiny_phi = tf.placeholder(dtype=tf.float32,
                                  shape=[1] + [5] + [5] + [num_filters * 2])
        _, _, means, variances, probs = entropy_parameter(tiny_phi,
                                                          tiny_y,
                                                          num_filters,
                                                          training=False)

        # Decode the x_hat usign the decoded y
        y_hat = tf.placeholder(dtype=tf.float32, shape=y_shape)
        x_hat = synthesis_transform(y_hat, num_filters)

        # Remove batch dimension, and crop away any extraneous padding on the bottom or right boundaries.
        x_hat = x_hat[0, :int(x_shape[0]), :int(x_shape[1]), :]

        # Write reconstructed image out as a PNG file.
        op = save_image(output, x_hat)

        # Load the latest model checkpoint, and perform the above actions.
        with tf.Session() as sess:
            #latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir)

            latest = "models/model-1399000"  #lambda = 14
            print(latest)

            tf.train.Saver().restore(sess, save_path=latest)

            phi_value = sess.run(phi)

            print("INFO: start decoding y")
            print(time.time() - start)

            decoder = RangeDecoder(input[:-4] + '.bin')
            samples = np.arange(0, minmax * 2 + 1)
            TINY = 1e-10

            # Fast implementation to decode the y_hat
            kernel_size = 5
            pad_size = (kernel_size - 1) // 2

            decoded_y = np.zeros([1] + [y_shape[1] + kernel_size - 1] +
                                 [y_shape[2] + kernel_size - 1] +
                                 [num_filters])
            padded_phi = np.pad(phi_value, ((0, 0), (pad_size, pad_size),
                                            (pad_size, pad_size), (0, 0)),
                                'constant',
                                constant_values=((0., 0.), (0., 0.), (0., 0.),
                                                 (0., 0.)))

            for h_idx in range(y_shape[1]):
                for w_idx in range(y_shape[2]):



                    y_means, y_variances, y_probs = \
                             sess.run([means, variances, probs], \
                                      feed_dict={tiny_y: decoded_y[:, h_idx: h_idx+kernel_size, w_idx:w_idx+kernel_size, :], \
                                                 tiny_phi: padded_phi[:, h_idx: h_idx+kernel_size, w_idx:w_idx+kernel_size, :]})

                    for i in range(len(non_zero_idx)):
                        ch_idx = non_zero_idx[i]

                        mu = y_means[0, pad_size, pad_size, ch_idx, :] + minmax
                        sigma = y_variances[0, pad_size, pad_size, ch_idx, :]
                        weight = y_probs[0, pad_size, pad_size, ch_idx, :]


                        pmf = (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5))) - \
                               0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5)))) * weight[0] + \
                              (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5))) - \
                               0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5)))) * weight[1] +\
                              (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5))) - \
                               0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5)))) * weight[2]

                        pmf_clip = np.clip(pmf, 1.0 / 65536, 1.0)
                        pmf_clip = np.round(pmf_clip / np.sum(pmf_clip) *
                                            65536)
                        cdf = list(np.add.accumulate(pmf_clip))
                        cdf = [0] + [int(i) for i in cdf]

                        decoded_y[0, h_idx + pad_size, w_idx + pad_size,
                                  ch_idx] = decoder.decode(1, cdf)[0] - minmax

            decoded_y = decoded_y[:, pad_size:y_shape[1] + pad_size,
                                  pad_size:y_shape[2] + pad_size, :]

            sess.run(op, feed_dict={y_hat: decoded_y})

            end = time.time()
            print("Time (s): {:0.3f}".format(end - start))

Example #5

0

Show file

File: mobile_msh2018.py Project: adityassrana/mobile-compression

def train(args):
  """Trains the model."""

  if args.verbose:
    tf.logging.set_verbosity(tf.logging.INFO)

  # Create input data pipeline.
  with tf.device("/cpu:0"):
    train_files = glob.glob(args.train_glob)
    if not train_files:
      raise RuntimeError(
          "No training images found with glob '{}'.".format(args.train_glob))
    train_dataset = tf.data.Dataset.from_tensor_slices(train_files)
    train_dataset = train_dataset.shuffle(buffer_size=len(train_files)).repeat()
    train_dataset = train_dataset.map(
        read_png, num_parallel_calls=args.preprocess_threads)
    train_dataset = train_dataset.map(
        lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3)))
    train_dataset = train_dataset.batch(args.batchsize)
    train_dataset = train_dataset.prefetch(32)

  num_pixels = args.batchsize * args.patchsize ** 2

  # Get training patch from dataset.
  x = train_dataset.make_one_shot_iterator().get_next()

  # Instantiate model.
  analysis_transform = AnalysisTransform(args.num_filters)
  synthesis_transform = SynthesisTransform(args.num_filters)
  hyper_analysis_transform = HyperAnalysisTransform(args.num_filters)
  hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
  entropy_bottleneck = tfc.EntropyBottleneck()

  # Build autoencoder and hyperprior.
  y = analysis_transform(x)
  z = hyper_analysis_transform(abs(y))
  z_tilde, z_likelihoods = entropy_bottleneck(z, training=True)
  sigma = hyper_synthesis_transform(z_tilde)
  scale_table = np.exp(np.linspace(
      np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
  conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table)
  y_tilde, y_likelihoods = conditional_bottleneck(y, training=True)
  x_tilde = synthesis_transform(y_tilde)

  # Total number of bits divided by number of pixels.
  train_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) +
               tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)

  # Mean squared error across pixels.
  train_mse = tf.reduce_mean(tf.squared_difference(x, x_tilde))
  # Multiply by 255^2 to correct for rescaling.
  train_mse *= 255 ** 2

  # The rate-distortion cost.
  train_loss = args.lmbda * train_mse + train_bpp

  # Minimize loss and auxiliary loss, and execute update op.
  step = tf.train.create_global_step()
  main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
  main_step = main_optimizer.minimize(train_loss, global_step=step)

  aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
  aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])

  train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])

  tf.summary.scalar("loss", train_loss)
  tf.summary.scalar("bpp", train_bpp)
  tf.summary.scalar("mse", train_mse)

  tf.summary.image("original", quantize_image(x))
  tf.summary.image("reconstruction", quantize_image(x_tilde))

  hooks = [
      tf.train.StopAtStepHook(last_step=args.last_step),
      tf.train.NanTensorHook(train_loss),
  ]
  with tf.train.MonitoredTrainingSession(
      hooks=hooks, checkpoint_dir=args.checkpoint_dir,
      save_checkpoint_secs=300, save_summaries_secs=60) as sess:
    while not sess.should_stop():
      sess.run(train_op)

Example #6

0

Show file

# Y1_raw_img = imageio.imread(args.raw)

Y0_com_img = np.expand_dims(Y0_com_img, 0)
# Y1_raw_img = np.expand_dims(Y1_raw_img, 0)

Height = np.size(Y0_com_img, 1)
Width = np.size(Y0_com_img, 2)

Y0_com = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel])
# Y1_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel])

string_mv_tensor = tf.placeholder(tf.string, [])
string_res_tensor = tf.placeholder(tf.string, [])

# Motion Decoding
entropy_bottleneck_mv = tfc.EntropyBottleneck(dtype=tf.float32,
                                              name='entropy_bottleneck')
flow_latent_hat = entropy_bottleneck_mv.decompress(tf.expand_dims(
    string_mv_tensor, 0), [Height // 16, Width // 16, args.M],
                                                   channels=args.M)

# Residual Decoding
entropy_bottleneck_res = tfc.EntropyBottleneck(dtype=tf.float32,
                                               name='entropy_bottleneck_1_1')
res_latent_hat = entropy_bottleneck_res.decompress(tf.expand_dims(
    string_res_tensor, 0), [Height // 16, Width // 16, args.M],
                                                   channels=args.M)

flow_hat = CNN_img.MV_synthesis(flow_latent_hat, args.N)

# Motion Compensation
Y1_warp = tf.contrib.image.dense_image_warp(Y0_com, flow_hat)

Example #7

0

Show file

def train():
    """Trains the model."""

    # if args.verbose:
    #   tf.logging.set_verbosity(tf.logging.INFO)

    # # Load all training images into a constant.
    # images = tf.map_fn(
    #     load_image, tf.matching_files(args.data_glob),
    #     dtype=tf.float32, back_prop=False)
    # with tf.Session() as sess:
    #   images = tf.constant(sess.run(images), name="images")

    # # Training inputs are random crops out of the images tensor.
    # crop_shape = (args.batchsize, args.patchsize, args.patchsize, 3)
    # x = tf.random_crop(images, crop_shape)
    # num_pixels = np.prod(crop_shape[:-1])

    crop_shape = (args.batchsize, args.patchsize, args.patchsize, 3)
    x = tf.placeholder(tf.float32, crop_shape)
    num_pixels = np.prod(crop_shape[:-1])

    # Build autoencoder.
    y = analysis_transform(x, args.num_filters)
    entropy_bottleneck = tfc.EntropyBottleneck()
    y_tilde, likelihoods = entropy_bottleneck(y, training=True)
    x_tilde = synthesis_transform(y_tilde, args.num_filters)

    # Total number of bits divided by number of pixels.
    train_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels)

    # Mean squared error across pixels.
    train_mse = tf.reduce_sum(tf.squared_difference(x, x_tilde))
    # Multiply by 255^2 to correct for rescaling.
    train_mse *= 255**2 / num_pixels

    # The rate-distortion cost.
    train_loss = args.lmbda * train_mse + train_bpp

    # Minimize loss and auxiliary loss, and execute update op.
    step = tf.Variable(0, trainable=False, name='global_step')
    main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
    main_step = main_optimizer.minimize(train_loss, global_step=step)

    aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
    aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])

    train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])

    # number of parameters
    num_params = count_num_trainable_params()
    print("num_params: %d" % num_params)

    # For tensorboard
    tf.summary.scalar('loss', train_loss)
    tf.summary.scalar('bpp', train_bpp)
    tf.summary.scalar('mse', train_mse)
    merged = tf.summary.merge_all()
    writer = tf.summary.FileWriter(args.checkpoint_dir + "/logs")
    saver = tf.train.Saver(max_to_keep=100)

    # create tensorflow session
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        file_dir = args.checkpoint_dir + '/results'
        os.makedirs(file_dir)

        print('Training is started!')
        dataset, img_names = _load_image()

        for _ in range(args.last_step):
            img_batch = get_batch(dataset, len(img_names))
            _, train_summary, loss, global_step = sess.run(
                [train_op, merged, train_loss, step], feed_dict={x: img_batch})

            if global_step % 1000 == 0:
                writer.add_summary(train_summary, global_step=global_step)
                print('step: %d / %d' % (global_step, args.last_step))

            if global_step % 100000 == 0:
                saver.save(sess=sess,
                           save_path=args.checkpoint_dir + "/model.ckpt",
                           global_step=global_step)
                print('Model is saved!')
        print('Training is finished!')

Example #8

0

Show file

File: li_004.py Project: decotoj/dnn-compression

def train():

  # #Log Input Settings
  logFile = MODEL_DIRECTORY + '/' 'Train_Log.txt'

  #Set Tensorflow Logging
  tf.logging.set_verbosity(tf.logging.INFO)

  # Create input data pipeline.
  with tf.device('/cpu:0'):
    train_files = glob.glob(TRAIN_DIRECTORY)
    train_dataset = tf.data.Dataset.from_tensor_slices(train_files)
    train_dataset = train_dataset.shuffle(buffer_size=len(train_files)).repeat()
    train_dataset = train_dataset.map(load_image, num_parallel_calls=PREPROCESS_THREADS)
    train_dataset = train_dataset.map(
        lambda x: tf.random_crop(x, (PATCHSIZE, PATCHSIZE, 3)))
    train_dataset = train_dataset.batch(BATCH_SIZE)
    train_dataset = train_dataset.prefetch(32)

  #Determine number of pixels and print input data info
  num_pixels = BATCH_SIZE * PATCHSIZE ** 2
  print('Num Train File', len(train_files))
  print('Num_Pix', num_pixels, BATCH_SIZE, PATCHSIZE)

  # Get training patch from dataset.
  x = train_dataset.make_one_shot_iterator().get_next()

  ###########################Li Algrithm Start#################################

  # Build autoencoder & decoder
  E, fx = encoder_li(x)

  P = importance_map(fx)
  M = gen_mask(P)

  B = binarizer(E)

  bc = tf.multiply(E,M) #NOTE: Skipping 'B' and Using "E' instead seemed to work better

  entropy_bottleneck = tfc.EntropyBottleneck()
  bc_tilde, likelihoods = entropy_bottleneck(bc, training=True)

  x_tilde = decoder_li(bc_tilde)

  print('x', x)
  print('E', E)
  print('fx', fx)
  print('x_tilde', x_tilde)
  print('map', P)
  print('B', B)
  print('M', M)
  print('bc', bc)

  #Rate Loss
  rateLoss = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels)

  # Mean squared error across pixels.
  train_mse = tf.reduce_mean(tf.squared_difference(x, x_tilde))
  train_mse *= 255 ** 2 # Multiply by 255^2 to correct for rescaling.

  # The rate-distortion cost.
  train_loss = LMBDA * train_mse + rateLoss #TEST1234

  ###########################Li Algrithm End#################################

  # Minimize loss and auxiliary loss, and execute update op.
  step = tf.train.create_global_step()
  main_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
  main_step = main_optimizer.minimize(train_loss, global_step=step)

  aux_optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE*10)
  aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])

  train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])

  # # #Check Values################
  # Pstats = [tf.math.reduce_min(P), tf.math.reduce_max(P), tf.reduce_sum(P)]
  # Estats = [tf.math.reduce_min(E), tf.math.reduce_max(E), tf.reduce_sum(E), tf.size(E)]
  # Mstats = [tf.math.reduce_min(M), tf.math.reduce_max(M),tf.reduce_sum(M), tf.size(M)]
  # Bstats = [tf.math.reduce_min(B), tf.math.reduce_max(B),tf.reduce_sum(B), tf.size(B)]
  # BCstats = [tf.math.reduce_min(bc), tf.math.reduce_max(bc), tf.reduce_sum(bc), tf.size(bc)]
  # XTstats = [tf.math.reduce_min(x_tilde), tf.math.reduce_max(x_tilde), tf.reduce_sum(x_tilde)]
  # # # ##############################

  tf.summary.scalar("loss", train_loss)
  tf.summary.scalar("bpp", rateLoss)
  tf.summary.scalar("mse", train_mse)

  tf.summary.image("original", quantize_image(x))
  tf.summary.image("reconstruction", quantize_image(x_tilde))

  # Creates summary for the probability mass function (PMF) estimated in the bottleneck.
  entropy_bottleneck.visualize()

  hooks = [tf.train.StopAtStepHook(last_step=NUM_STEPS),tf.train.NanTensorHook(train_loss),]

  ep = 0
  epSub = 0
  scaffold = tf.train.Scaffold(saver=tf.train.Saver(max_to_keep=1))
  with tf.train.MonitoredTrainingSession(scaffold=scaffold, hooks=hooks, checkpoint_dir=MODEL_DIRECTORY,
      save_checkpoint_secs=CHECKPOINT_SAVE, save_summaries_secs=CHECKPOINT_SAVE) as sess:
    while not sess.should_stop():
      sess.run(train_op)

      if epSub >= LOG_STEPS:
        epSub = 0
        ep += 1
      if epSub == 0:
        print(ep*LOG_STEPS+epSub, 'TRAIN/DIST/RATE LOSS:', sess.run(train_loss), sess.run(train_mse), sess.run(rateLoss))
        # print('    Estats', sess.run(Estats))
        # print('    Pstats', sess.run(Pstats))
        # print('    Mstats', sess.run(Mstats))
        # print('    Bstats', sess.run(Bstats))
        # print('    BCstats', sess.run(BCstats))
        # print('    XTstats', sess.run(XTstats))
        with open(logFile, 'a') as f:
            f.write('step=' + str(ep*LOG_STEPS+epSub)  + ',train_loss=' + str(sess.run(train_loss)) + ',rateLoss=' + str(sess.run(rateLoss)) +
                    ',distortionLoss=' + str(sess.run(train_mse)) + '\n')
      epSub += 1

Example #9

0

Show file

File: OpenDVC_train_PSNR.py Project: wgqsjtu/LDVC

folder = np.load('folder.npy')

Y0_com = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel])
Y1_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel])
learning_rate = tf.placeholder(tf.float32, [])

with tf.variable_scope("flow_motion"):

    flow_tensor, _, _, _, _, _ = motion.optical_flow(Y0_com, Y1_raw, batch_size, Height, Width)
    # Y1_warp_0 = tf.contrib.image.dense_image_warp(Y0_com, flow_tensor)

# Encode flow
flow_latent = CNN_img.MV_analysis(flow_tensor, args.N, args.M)

entropy_bottleneck_mv = tfc.EntropyBottleneck()
string_mv = entropy_bottleneck_mv.compress(flow_latent)
# string_mv = tf.squeeze(string_mv, axis=0)

flow_latent_hat, MV_likelihoods = entropy_bottleneck_mv(flow_latent, training=True)

flow_hat = CNN_img.MV_synthesis(flow_latent_hat, args.N)

# Motion Compensation
Y1_warp = tf.contrib.image.dense_image_warp(Y0_com, flow_hat)

MC_input = tf.concat([flow_hat, Y0_com, Y1_warp], axis=-1)
Y1_MC = MC_network.MC(MC_input)

# Encode residual
Res = Y1_raw - Y1_MC

Example #10

0

Show file

File: bls2017_2.py Project: xyp8023/VAE-Image-Compression-

def compress():
    """Compresses an image."""

    # Load input image and add batch dimension.
    x = load_image(args.input)
    x = tf.expand_dims(x, 0)
    x.set_shape([1, None, None, 3])

    # Transform and compress the image, then remove batch dimension.
    y = analysis_transform(x, args.num_filters)
    entropy_bottleneck = tfc.EntropyBottleneck()
    string = entropy_bottleneck.compress(y)
    string = tf.squeeze(string, axis=0)

    # Transform the quantized image back (if requested).
    y_hat, likelihoods = entropy_bottleneck(y, training=False)
    x_hat = synthesis_transform(y_hat, args.num_filters)

    num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1]))

    # Total number of bits divided by number of pixels.
    eval_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels)

    # Mean squared error across pixels.
    x_hat = tf.clip_by_value(x_hat, 0, 1)
    x_hat = tf.round(x_hat * 255)
    mse = tf.reduce_sum(tf.squared_difference(x * 255, x_hat)) / num_pixels

    with tf.Session() as sess:
        # Load the latest model checkpoint, get the compressed string and the tensor
        # shapes.
        latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
        result_dir = 'result-' + latest.split('\\')[-1]
        tf.train.Saver().restore(sess, save_path=latest)
        string, x_shape, y_shape = sess.run([string, tf.shape(x), tf.shape(y)])

        # Write a binary file with the shape information and the compressed string.
        with open(args.output, "wb") as f:
            f.write(np.array(x_shape[1:-1], dtype=np.uint16).tobytes())
            f.write(np.array(y_shape[1:-1], dtype=np.uint16).tobytes())
            f.write(string)

        # If requested, transform the quantized image back and measure performance.
        if args.verbose:
            eval_bpp, mse, num_pixels = sess.run([eval_bpp, mse, num_pixels])

            # The actual bits per pixel including overhead.
            bpp = (8 + len(string)) * 8 / num_pixels

            psnr = 10 * np.log10(255 * 255 / mse)
            with open('Output_MSE.txt', 'a+') as text_file:
                text_file.write('%10f\n' % mse)
            with open('Output_PSNR.txt', 'a+') as text_psnr_file:
                text_psnr_file.write('%10f\n' % psnr)
            with open('Output_InformationInBPP.txt',
                      'a+') as text_information_file:
                text_information_file.write('%10f\n' % eval_bpp)
            with open('Output_ActualBPP.txt', 'a+') as text_Actual_file:
                text_Actual_file.write('%10f\n' % bpp)
            print('PSNR: {:0.4}'.format(psnr))
            print("Mean squared error: {:0.4}".format(mse))
            print("Information content of this image in bpp: {:0.4}".format(
                eval_bpp))
            print("Actual bits per pixel for this image: {:0.4}".format(bpp))

Example #11

0

Show file

File: compression_model_512.py Project: Jingyuying/pc_compress

def model_fn(features, labels, mode, params):
    '''
    :param features:  batch_features from input_fn
    :param labels:  batch_labels from input_fn
    :param mode:    An instance of tf.estimator.ModeKeys
    :param params:  Additional configuration
    :return:
    '''
    if params.get('decompress') is None:
        params['decompress'] = False
    params = namedtuple('Struct', params.keys())(*params.values())
    del labels
    if params.decompress:
        assert mode == tf.estimator.ModeKeys.PREDICT, 'Decompression must use prediction mode'
        entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32)
        y_tilde = entropy_bottleneck.decompress(features, [512], channels=512)  # B*N
        x_hat = pc_decoder(y_tilde, params.batch_size, is_training=False, bn_decay=False)
        predictions = {
            'y_tilde': y_tilde,
            'x_hat': x_hat
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)


    training = (mode == tf.estimator.ModeKeys.TRAIN)
    # Get training patch from dataset.
    # num_points = (params.batch_size * params.num_points)
    batch_size = int(features.shape[0])
    num_points = int(features.shape[1])
    pc = features
    bn_decay = get_bn_decay(tf.train.get_global_step())
    learning_rate = get_learning_rate(tf.train.get_global_step())
    tf.summary.scalar('bn_decay', bn_decay)
    tf.summary.scalar('learning_rate', learning_rate)

    # ============= encoder =============
    y = pc_encoder(pc, params.knn, is_training=training, bn_decay=bn_decay)

    # ============= bottleneck layer =============
    entropy_bottleneck = tfc.EntropyBottleneck()
    y_tilde, likelihoods = entropy_bottleneck(y, training=True)

    # ============= decoder =============
    x_tilde = pc_decoder(y_tilde, params.batch_size, is_training=training, bn_decay=bn_decay)

    # number of bits divided by number of points
    train_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * int(num_points))

    if mode == tf.estimator.ModeKeys.PREDICT:
        string = entropy_bottleneck.compress(y)
        predictions = {
            'string': string,
            'x_tilde': x_tilde,
            'y_tilde': y_tilde
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)  

    losses = get_emd_loss(x_tilde, pc, 1)
    rd_loss = params.lmbda * train_bpp + losses
    # tf.summary.scalar('likelihoods',likelihoods)
    tf.summary.scalar('loss', losses)
    tf.summary.scalar('rd_loss', rd_loss)
    tf.summary.scalar('bpp', train_bpp)

    if mode == tf.estimator.ModeKeys.TRAIN:
        main_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        main_step = main_optimizer.minimize(rd_loss,  global_step=tf.train.get_global_step())

        aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
        aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])

        train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])

        return tf.estimator.EstimatorSpec(mode, loss=rd_loss, train_op=train_op)

    if mode == tf.estimator.ModeKeys.EVAL:
        summary_hook = tf.train.SummarySaverHook(
            save_steps=5,
            output_dir=os.path.join(params.checkpoint_dir, 'eval'),
            summary_op=tf.summary.merge_all())
        return tf.estimator.EstimatorSpec(mode, loss=rd_loss, evaluation_hooks=[summary_hook])

Example #12

0

Show file

File: bls2017_2.py Project: xyp8023/VAE-Image-Compression-

def train():
    """Trains the model."""

    if args.verbose:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Load all training images into a constant.
    images = tf.map_fn(load_image,
                       tf.matching_files(args.data_glob),
                       dtype=tf.float32,
                       back_prop=False)
    with tf.Session() as sess:
        images = tf.constant(sess.run(images), name="images")

    # Training inputs are random crops out of the images tensor.
    crop_shape = (args.batchsize, args.patchsize, args.patchsize, 3)
    # x = images
    x = tf.random_crop(images, crop_shape)
    # with tf.Session() as sess:
    #    sess.run(x)

    num_pixels = np.prod(crop_shape[:-1])
    # for x_num in range(8):
    #   tmp = x[x_num,:,:,:]
    #   # op = save_image('random_croped'+str(x_num)+'.png', tf.reshape(tmp, shape=(256,256,3)))
    #   op = save_image('random_croped' + str(x_num) + '.png', tf.reshape(tmp))
    #   with tf.Session() as sess:
    #       sess.run(op)
    # Build autoencoder.
    y = analysis_transform(x, args.num_filters)
    entropy_bottleneck = tfc.EntropyBottleneck()
    y_tilde, likelihoods = entropy_bottleneck(y, training=True)
    x_tilde = synthesis_transform(y_tilde, args.num_filters)

    # Total number of bits divided by number of pixels.
    train_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels)

    # Mean squared error across pixels.
    train_mse = tf.reduce_sum(tf.squared_difference(x, x_tilde))
    # Multiply by 255^2 to correct for rescaling.
    train_mse *= 255**2 / num_pixels

    # The rate-distortion cost.
    train_loss = args.lmbda * train_mse + train_bpp

    # Minimize loss and auxiliary loss, and execute update op.
    step = tf.train.create_global_step()
    main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
    main_step = main_optimizer.minimize(train_loss, global_step=step)

    aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
    aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])

    train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])

    logged_tensors = [
        tf.identity(train_loss, name="train_loss"),
        tf.identity(train_bpp, name="train_bpp"),
        tf.identity(train_mse, name="train_mse"),
    ]
    hooks = [
        tf.train.StopAtStepHook(last_step=args.last_step),
        tf.train.NanTensorHook(train_loss),
        tf.train.LoggingTensorHook(logged_tensors, every_n_secs=60),
    ]
    with tf.train.MonitoredTrainingSession(
            hooks=hooks, checkpoint_dir=args.checkpoint_dir) as sess:
        while not sess.should_stop():
            sess.run(train_op)

Example #13

0

Show file

def test_compress(args):
  """Compresses an image."""

  # Load input image and add batch dimension.
  x = read_png(args.input_file)
  x = tf.expand_dims(x, 0)
  x.set_shape([1, None, None, 3])
  x_shape = tf.shape(x)

  step = 0.01
  lmbda_log_dist = np.arange(0,7,step)
  lmbda_log_dist = tf.constant(lmbda_log_dist, dtype=tf.float32)
  s = tf.data.Dataset.from_tensor_slices(lmbda_log_dist)
  lmbda_log = s.make_one_shot_iterator().get_next() # levels
  lmbda = 0.1 * tf.pow(2.0, lmbda_log - 6.0) # true value

  # Instantiate model.
  analysis_transform = AnalysisTransform(args.num_filters, lmbda_log)
  synthesis_transform = SynthesisTransform(args.num_filters, lmbda_log)
  hyper_analysis_transform = HyperAnalysisTransform(args.num_filters, lmbda_log)
  hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters, lmbda_log)
  entropy_bottleneck = tfc.EntropyBottleneck()

  # Transform and compress the image.
  y = analysis_transform(x)
  y_shape = tf.shape(y)
  z = hyper_analysis_transform(abs(y))
  z_hat, z_likelihoods = entropy_bottleneck(z, training=False)
  sigma = hyper_synthesis_transform(z_hat)
  sigma = sigma[:, :y_shape[1], :y_shape[2], :]
  scale_table = np.exp(np.linspace(
      np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
  conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table)
  side_string = entropy_bottleneck.compress(z)
  string = conditional_bottleneck.compress(y)

  # Transform the quantized image back (if requested).
  y_hat, y_likelihoods = conditional_bottleneck(y, training=False)
  x_hat = synthesis_transform(y_hat)
  x_hat = x_hat[:, :x_shape[1], :x_shape[2], :]

  num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32)

  # Total number of bits divided by number of pixels.
  eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) +
              tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)

  # Bring both images back to 0..255 range.
  im_x = x * 255
  x_hat = tf.clip_by_value(x_hat, 0, 1)
  im_x_hat = tf.round(x_hat * 255)

  mse = tf.reduce_mean(tf.squared_difference(im_x, im_x_hat))
  psnr = tf.squeeze(tf.image.psnr(im_x_hat, im_x, 255))
  msssim = tf.squeeze(tf.image.ssim_multiscale(im_x_hat, im_x, 255))

  with tf.Session() as sess:
    # Load the latest model checkpoint, get the compressed string and the tensor
    # shapes.
    latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
    tf.train.Saver().restore(sess, save_path=latest)

    for i in np.arange(0,7,step):

      v_lmbda_log, v_eval_bpp, v_mse = sess.run(
          [lmbda_log, eval_bpp, mse])

      print("%.2f\t%.4f\t%.4f"%(v_lmbda_log, v_eval_bpp, v_mse))

Example #14

0

Show file

Y0_com = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel])
Y1_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel])
Y2_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel])

with tf.variable_scope("flow_motion", reuse=False):

    flow_20, _, _, _, _, _ = motion.optical_flow(Y0_com, Y2_raw, batch_size,
                                                 Height, Width)
    # Y2_warp_0 = tf.contrib.image.dense_image_warp(Y0_com_tensor, flow_20)

with tf.variable_scope("motion_compression", reuse=False):

    flow_latent = CNN_img.MV_analysis(flow_20, num_filters=args.N, M=args.M)

    entropy_mv = tfc.EntropyBottleneck()
    string_mv = entropy_mv.compress(flow_latent)
    string_mv = tf.squeeze(string_mv, axis=0)

    flow_latent_hat, MV_likelihoods = entropy_mv(flow_latent, training=False)

    flow_20_hat = CNN_img.MV_synthesis(flow_latent_hat, num_filters=args.N)

with tf.variable_scope("motion_estimation", reuse=False):

    flow_02_hat = motion.tf_inverse_flow(flow_20_hat, batch_size, Height,
                                         Width)
    flow_01_hat = 0.5 * flow_02_hat
    flow_10_hat = motion.tf_inverse_flow(flow_01_hat, batch_size, Height,
                                         Width)

Example #15

0

Show file

File: e6.py Project: faymek/compression

def test_compress(args):
  """Compresses an image."""
  fn = tf.placeholder(tf.string, [])

  # Load input image and add batch dimension.
  x = read_png(fn)
  x = tf.expand_dims(x, 0)
  x.set_shape([1, None, None, 3])
  x_shape = tf.shape(x)

  # Instantiate model.
  analysis_transform = AnalysisTransform(args.num_filters)
  synthesis_transform = SynthesisTransform(args.num_filters)
  hyper_analysis_transform = HyperAnalysisTransform(args.num_filters)
  hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
  entropy_bottleneck = tfc.EntropyBottleneck()

  # Transform and compress the image.
  y = analysis_transform(x)
  y_shape = tf.shape(y)
  z = hyper_analysis_transform(abs(y))
  z_hat, z_likelihoods = entropy_bottleneck(z, training=False)
  sigma = hyper_synthesis_transform(z_hat)
  sigma = sigma[:, :y_shape[1], :y_shape[2], :]
  scale_table = np.exp(np.linspace(
      np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
  conditional_bottleneck = DynamicGaussianConditional(sigma, scale_table, name="gaussian_conditional")
  
  side_string = entropy_bottleneck.compress(z)
  string = conditional_bottleneck.compress(y)

  # Transform the quantized image back (if requested).
  y_hat, y_likelihoods = conditional_bottleneck(y, training=False)
  x_hat = synthesis_transform(y_hat)
  x_hat = x_hat[:, :x_shape[1], :x_shape[2], :]

  num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32)

  # Total number of bits divided by number of pixels.
  eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) +
              tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)

  # Bring both images back to 0..255 range.
  x *= 255
  x_hat = tf.clip_by_value(x_hat, 0, 1)
  x_hat = tf.round(x_hat * 255)

  mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
  psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
  msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))

  with tf.Session() as sess:
    # Load the latest model checkpoint, get the compressed string and the tensor
    # shapes.
    latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
    tf.train.Saver().restore(sess, save_path=latest)
    #a = sess.run( tf.reduce_sum(tf.log(y_likelihoods), axis=(0,1,2)) / (-np.log(2) * num_pixels))
    #b = sess.run( tf.reduce_sum(tf.log(z_likelihoods), axis=(0,1,2)) / (-np.log(2) * num_pixels))
    #np.savetxt('ay.csv', a, delimiter = ',')
    #np.savetxt('bz.csv', b, delimiter = ',')
    #return

    const = tf.constant([1]*256+[0]*224,dtype=tf.float32)
    f = open("e6.csv", "w")
    print("active, fn, bpp, mse, np", file=f)
    for active in range(256,31,-16):
      #conditional_bottleneck.input_spec = tf.keras.layers.InputSpec(ndim=4, axes={3: active})
      mask = const[256-active:512-active]
      rate = tf.reduce_sum(mask) / 256
      y_itc = y * mask/rate

      string = conditional_bottleneck.compress(y_itc)
      y_itc_hat = conditional_bottleneck.decompress(string)

      # Transform the quantized image back (if requested).
      x_hat = synthesis_transform(y_itc_hat)
      x_hat = x_hat[:, :x_shape[1], :x_shape[2], :]

      eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods[:,:,:,:active])) +
                  tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)

    
      x_hat = tf.clip_by_value(x_hat, 0, 1)
      x_hat = tf.round(x_hat * 255)

      mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
      psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
      msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))
        
      #tensors = [string, side_string,
      #          tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1]]
      #arrays = sess.run(tensors)

      # Write a binary file with the shape information and the compressed string.
      #packed = tfc.PackedTensors()
      #packed.pack(tensors, arrays)
      
      
      for filename in glob.glob("kodak/*.png"):

        v_eval_bpp, v_mse, v_num_pixels = sess.run(
            [eval_bpp, mse, num_pixels], feed_dict={fn: filename})

        print("%.2f, %s, %.4f, %.4f, %d"%(active, filename, v_eval_bpp, v_mse, v_num_pixels), file=f)

    f.close()

Example #16

0

Show file

File: mobile_ls2017_without_validation.py Project: adityassrana/mobile-compression

def train(args):
    """Trains the model."""

    if args.verbose:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Create input data pipeline.
    with tf.device("/cpu:0"):
        train_files = glob.glob(args.train_glob)
        if not train_files:
            raise RuntimeError(
                "No training images found with glob '{}'.".format(
                    args.train_glob))
        train_dataset = tf.data.Dataset.from_tensor_slices(train_files)
        train_dataset = train_dataset.shuffle(
            buffer_size=len(train_files)).repeat()
        train_dataset = train_dataset.map(
            read_png, num_parallel_calls=args.preprocess_threads)
        train_dataset = train_dataset.map(
            lambda x: tf.random_crop(x, (args.patchsize, args.patchsize, 3)))
        train_dataset = train_dataset.batch(args.batchsize)
        train_dataset = train_dataset.prefetch(32)

    num_pixels = args.batchsize * args.patchsize**2

    # Get training patch from dataset.
    x = train_dataset.make_one_shot_iterator().get_next()

    # Instantiate model.
    analysis_transform = AnalysisTransform(args.num_filters)
    entropy_bottleneck = tfc.EntropyBottleneck()
    synthesis_transform = SynthesisTransform(args.num_filters)

    # Build autoencoder.
    y = analysis_transform(x)
    y_tilde, likelihoods = entropy_bottleneck(y, training=True)
    x_tilde = synthesis_transform(y_tilde)

    # Total number of bits divided by number of pixels.
    train_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels)

    # Mean squared error across pixels.
    train_mse = tf.reduce_mean(tf.squared_difference(x, x_tilde))

    # Multiply by 255^2 to correct for rescaling.
    train_mse *= 255**2

    # Calculate psnr and ssim
    train_psnr = tf.reduce_mean(tf.image.psnr(x_tilde, x, 255))
    train_msssim_value = tf.reduce_mean(
        tf.image.ssim_multiscale(x_tilde, x, 255))

    # structural similarity loss
    train_ssim = tf.reduce_mean(1 - tf.image.ssim_multiscale(x_tilde, x, 1))

    #Choose distortion metric
    distortion = train_ssim if args.ssim_loss else train_mse

    # The rate-distortion cost.
    train_loss = args.lmbda * distortion + train_bpp

    # Minimize loss and auxiliary loss, and execute update op.
    step = tf.train.create_global_step()
    main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
    main_step = main_optimizer.minimize(train_loss, global_step=step)

    aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
    aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])

    train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])

    # Log scalar values
    s_loss = tf.summary.scalar("train/loss", train_loss)
    s_bpp = tf.summary.scalar("train/bpp", train_bpp)
    s_mse = tf.summary.scalar("train/mse", train_mse)
    s_psnr = tf.summary.scalar("train/psnr", train_psnr)
    s_msssim_value = tf.summary.scalar("train/multiscale ssim value",
                                       train_msssim_value)
    s_ssim = tf.summary.scalar("train/multiscale ssim",
                               -10 * tf.log(train_ssim))

    # Log training images
    s_original = tf.summary.image("images/original", quantize_image(x))
    s_reconstruction = tf.summary.image("images/reconstruction",
                                        quantize_image(x_tilde))

    # Merge scalars into a summary
    train_summary = tf.summary.merge(
        [s_loss, s_bpp, s_mse, s_psnr, s_msssim_value, s_ssim])

    #Merge images into a summary
    image_summary = tf.summary.merge([s_original, s_reconstruction])

    hooks = [
        tf.train.StopAtStepHook(last_step=args.last_step),
        tf.train.NanTensorHook(train_loss),
        tf.train.SummarySaverHook(save_secs=30,
                                  output_dir=args.checkpoint_dir,
                                  summary_op=train_summary),
        tf.train.SummarySaverHook(save_secs=3600,
                                  output_dir=args.checkpoint_dir,
                                  summary_op=image_summary)
    ]
    with tf.train.MonitoredTrainingSession(hooks=hooks,
                                           checkpoint_dir=args.checkpoint_dir,
                                           save_checkpoint_secs=300,
                                           save_summaries_steps=None,
                                           save_summaries_secs=None) as sess:
        while not sess.should_stop():
            sess.run(train_op)

Example #17

0

Show file

File: e6.py Project: faymek/compression

def test_decompress(args):
  """Decompresses an image."""

  # Read the shape information and compressed string from the binary file.
  string = tf.placeholder(tf.string, [1])
  side_string = tf.placeholder(tf.string, [1])
  x_shape = tf.placeholder(tf.int32, [2])
  y_shape = tf.placeholder(tf.int32, [2])
  z_shape = tf.placeholder(tf.int32, [2])
  with open(args.input_file, "rb") as f:
    packed = tfc.PackedTensors(f.read())
  tensors = [string, side_string, x_shape, y_shape, z_shape]
  arrays = packed.unpack(tensors)

  # Instantiate model.
  synthesis_transform = SynthesisTransform(args.num_filters)
  hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
  entropy_bottleneck = tfc.EntropyBottleneck(dtype=tf.float32)

  # Decompress and transform the image back.
  z_shape = tf.concat([z_shape, [args.num_filters]], axis=0)
  z_hat = entropy_bottleneck.decompress(
      side_string, z_shape, channels=args.num_filters)
  sigma = hyper_synthesis_transform(z_hat)
  sigma = sigma[:, :y_shape[0], :y_shape[1], :]
  scale_table = np.exp(np.linspace(
      np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
  conditional_bottleneck = tfc.GaussianConditional(
      sigma, scale_table, dtype=tf.float32)
  y_hat_all = conditional_bottleneck.decompress(string)

  x = read_png("kodak/kodim01.png")
  x = tf.expand_dims(x, 0)
  x.set_shape([1, None, None, 3])
  x_shape = tf.shape(x)
  x *= 255

  active = 192
  y_hat = y_hat_all[:,:,:,:active]
  x_hat = synthesis_transform(y_hat)
  x_hat = tf.clip_by_value(x_hat, 0, 1)
  x_hat = tf.round(x_hat * 255)
  mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
  psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
  msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))

  #x_hat = x_hat[0, :x_shape[0], :x_shape[1], :]
  #op = write_png(args.output_file, x_hat)

  sess = tf.Session()
  latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
  tf.train.Saver().restore(sess, save_path=latest)
  #sess.run(op, feed_dict=dict(zip(tensors, arrays)))

  #vmse, vpsnr, vmsssim = sess.run([mse, psnr, msssim], feed_dict=dict(zip(tensors, arrays)))
  #print(vmse, vpsnr, vmsssim)

  for active in range(192,0,-8):
    y_hat = y_hat_all[:,:,:,:active]
    x_hat = synthesis_transform(y_hat)
    x_hat = tf.clip_by_value(x_hat, 0, 1)
    x_hat = tf.round(x_hat * 255)
    mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
    psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
    msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))
    vmse, vpsnr, vmsssim = sess.run([mse, psnr, msssim], feed_dict=dict(zip(tensors, arrays)))
    print(active, vmse, vpsnr, vmsssim)

Example #18

0

Show file

 def __init__(self):
   """Instantiate layer."""
   super(FactorizedPriorLayer, self).__init__(name="FactorizedPrior")
   self._entropy_model = tfc.EntropyBottleneck(
       name="entropy_model")

Example #19

0

Show file

# Y2_raw_img = np.expand_dims(Y2_raw_img, 0)

Height = np.size(Y0_com_img, 1)
Width = np.size(Y0_com_img, 2)

Y0_com = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel])
# Y1_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel])
# Y2_raw = tf.placeholder(tf.float32, [batch_size, Height, Width, Channel])

string_mv_tensor = tf.placeholder(tf.string, [])
string_res1_tensor = tf.placeholder(tf.string, [])
string_res2_tensor = tf.placeholder(tf.string, [])

with tf.variable_scope("motion_compression", reuse=False):

    entropy_mv = tfc.EntropyBottleneck(dtype=tf.float32)
    flow_latent_hat = entropy_mv.decompress(tf.expand_dims(
        string_mv_tensor, 0), [Height // 16, Width // 16, args.M],
                                            channels=args.M)

    flow_20_hat = CNN_img.MV_synthesis(flow_latent_hat, num_filters=args.N)

with tf.variable_scope("motion_estimation", reuse=False):

    flow_02_hat = motion.tf_inverse_flow(flow_20_hat, batch_size, Height,
                                         Width)
    flow_01_hat = 0.5 * flow_02_hat
    flow_10_hat = motion.tf_inverse_flow(flow_01_hat, batch_size, Height,
                                         Width)

    flow_21_hat = 0.5 * flow_20_hat

Example #20

0

Show file

def compress(args):
    """Compresses an image."""

    output_folder = "/media/expansion1/navneedhmaudgalya/Datasets/tiny_imagenet/train_bmshj_001n"

    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    bpp = []
    full_bpp = []

    index = tf.placeholder(tf.string)

    # Load input image and add batch dimension.
    x = read_png(index)
    x = tf.expand_dims(x, 0)
    x.set_shape([1, None, None, 3])
    x_shape = tf.shape(x)

    # Instantiate model.
    analysis_transform = AnalysisTransform(args.num_filters)
    synthesis_transform = SynthesisTransform(args.num_filters)
    hyper_analysis_transform = HyperAnalysisTransform(args.num_filters)
    hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
    entropy_bottleneck = tfc.EntropyBottleneck()

    # Transform and compress the image.
    y = analysis_transform(x)
    y_shape = tf.shape(y)
    z = hyper_analysis_transform(abs(y))
    z_hat, z_likelihoods = entropy_bottleneck(z, training=False)
    sigma = hyper_synthesis_transform(z_hat)
    sigma = sigma[:, :y_shape[1], :y_shape[2], :]
    scale_table = np.exp(
        np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
    conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table)
    side_string = entropy_bottleneck.compress(z)
    string = conditional_bottleneck.compress(y)

    # Transform the quantized image back (if requested).
    y_hat, y_likelihoods = conditional_bottleneck(y, training=False)
    x_hat = synthesis_transform(y_hat)
    x_hat = x_hat[:, :x_shape[1], :x_shape[2], :]

    num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32)

    # Total number of bits divided by number of pixels.
    eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(
        tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)

    # Bring both images back to 0..255 range.
    x *= 255
    x_hat = tf.clip_by_value(x_hat, 0, 1)
    x_hat = tf.round(x_hat * 255)

    mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
    psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
    msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))

    with tf.Session() as sess:
        # Load the latest model checkpoint, get the compressed string and the tensor
        # shapes.
        latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
        tf.train.Saver().restore(sess, save_path=latest)
        tensors = [
            string, side_string,
            tf.shape(x)[1:-1],
            tf.shape(y)[1:-1],
            tf.shape(z)[1:-1]
        ]

        data_folder = "/media/expansion1/navneedhmaudgalya/Datasets/tiny_imagenet/train/"
        data_files = os.listdir(data_folder)
        for i, image_file_name in tqdm(enumerate(data_files)):
            image_file_path = str(os.path.join(data_folder, image_file_name))
            # op = write_png("test_005/{}.png".format(i), x_hat)
            x_h, arrays, inf_bpp = sess.run([x_hat, tensors, eval_bpp],
                                            feed_dict={index: image_file_path})
            plt.imsave(os.path.join(output_folder, image_file_name),
                       x_h[0] / 255.)
            # Write a binary file with the shape information and the compressed string.
            packed = tfc.PackedTensors()
            packed.pack(tensors, arrays)

            bpp.append(inf_bpp)
            full_bpp.append(len(packed.string) * 8 / (64 * 64))

        # sess.run(op, feed_dict={index: image_file_path})

        np.save("{}/bpp.npy".format(output_folder), bpp)
        np.save("{}/full_bpp.npy".format(output_folder), full_bpp)

        # Write a binary file with the shape information and the compressed string.
        # with open(args.output_file, "wb") as f:
        #   f.write(packed.string)

        # If requested, transform the quantized image back and measure performance.
        if args.verbose:
            eval_bpp, mse, psnr, msssim, num_pixels = sess.run(
                [eval_bpp, mse, psnr, msssim, num_pixels])

            # The actual bits per pixel including overhead.
            bpp = len(packed.string) * 8 / num_pixels

            print("Mean squared error: {:0.4f}".format(mse))
            print("PSNR (dB): {:0.2f}".format(psnr))
            print("Multiscale SSIM: {:0.4f}".format(msssim))
            print("Multiscale SSIM (dB): {:0.2f}".format(-10 *
                                                         np.log10(1 - msssim)))
            print("Information content in bpp: {:0.4f}".format(eval_bpp))
            print("Actual bits per pixel: {:0.4f}".format(bpp))

Example #21

0

Show file

def compress():
    """Compresses an image."""

    # Load input image and add batch dimension.
    # x = load_image(args.input)
    # x = tf.expand_dims(x, 0)
    # x.set_shape([1, None, None, 3])
    x = tf.placeholder(tf.float32, [1, None, None, 3])

    # Transform and compress the image, then remove batch dimension.
    y = analysis_transform(x, args.num_filters)
    entropy_bottleneck = tfc.EntropyBottleneck()
    string = entropy_bottleneck.compress(y)
    string = tf.squeeze(string, axis=0)

    # Transform the quantized image back (if requested).
    y_hat, likelihoods = entropy_bottleneck(y, training=False)
    x_hat = synthesis_transform(y_hat, args.num_filters)

    num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1]))

    # Total number of bits divided by number of pixels.
    eval_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels)

    # Mean squared error across pixels.
    x_hat = tf.clip_by_value(x_hat, 0, 1)
    x_hat = tf.round(x_hat * 255)
    print(x_hat.shape)
    mse = tf.reduce_sum(tf.squared_difference(x * 255, x_hat)) / num_pixels

    with tf.Session() as sess:
        # Load the latest model checkpoint and test images.
        latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
        tf.train.Saver().restore(sess, save_path=latest)
        dataset, img_names = _load_image()

        for img, img_name in zip(dataset, img_names):
            # Get the compressed string and the tensor shapes.
            _string, x_shape, y_shape = sess.run(
                [string, tf.shape(x), tf.shape(y)], feed_dict={x: [img]})

            # Write a binary file with the shape information and the compressed string.
            file_name = args.checkpoint_dir + '/results/' + img_name[:-4] + '.bin'
            with open(file_name, "wb") as f:

                # with open(args.output, "wb") as f:
                f.write(np.array(x_shape[1:-1], dtype=np.uint16).tobytes())
                f.write(np.array(y_shape[1:-1], dtype=np.uint16).tobytes())
                f.write(_string)

            # If requested, transform the quantized image back and measure performance.
            if args.verbose:
                # To print the results, the size of images must be a multiple of 16.
                # eval_bpp, mse, num_pixels = sess.run([eval_bpp, mse, num_pixels], feed_dict={x: [img]})
                _eval_bpp, _num_pixels = sess.run([eval_bpp, num_pixels],
                                                  feed_dict={x: [img]})

                # The actual bits per pixel including overhead.
                bpp = (8 + len(_string)) * 8 / _num_pixels

                # print("Mean squared error: {:0.4}".format(mse))
                print(
                    "Information content of this image in bpp: {:0.4}".format(
                        _eval_bpp))
                print(
                    "Actual bits per pixel for this image: {:0.4}".format(bpp))

Example #22

0

Show file

def compress(input, output, num_filters, checkpoint_dir):

    start = time.time()
    tf.set_random_seed(1)
    tf.reset_default_graph()

    with tf.device('/cpu:0'):
        # Load input image and add batch dimension.

        x = load_image(input)

        # Pad the x to x_pad
        mod = tf.constant([64, 64, 1], dtype=tf.int32)
        div = tf.ceil(tf.truediv(tf.shape(x), mod))
        div = tf.cast(div, tf.int32)
        paddings = tf.subtract(tf.multiply(div, mod), tf.shape(x))
        paddings = tf.expand_dims(paddings, 1)
        paddings = tf.concat(
            [tf.convert_to_tensor(np.zeros((3, 1)), dtype=tf.int32), paddings],
            axis=1)

        x_pad = tf.pad(x, paddings, "REFLECT")
        x = tf.expand_dims(x, 0)
        x.set_shape([1, None, None, 3])

        x_pad = tf.expand_dims(x_pad, 0)
        x_pad.set_shape([1, None, None, 3])

        # Transform and compress the image, then remove batch dimension.
        y = analysis_transform(x_pad, num_filters)

        # Build a hyper autoencoder
        z = hyper_analysis(y, num_filters)
        entropy_bottleneck = tfc.EntropyBottleneck()
        string = entropy_bottleneck.compress(z)
        string = tf.squeeze(string, axis=0)

        z_tilde, z_likelihoods = entropy_bottleneck(z, training=False)

        # To decompress the z_tilde back to avoid the inconsistence error
        string_rec = tf.expand_dims(string, 0)
        z_tilde = entropy_bottleneck.decompress(string_rec,
                                                tf.shape(z)[1:],
                                                channels=num_filters)

        phi = hyper_synthesis(z_tilde, num_filters)

        # REVISION： for Gaussian Mixture Model (GMM), use window-based fast implementation
        #y = tf.clip_by_value(y, -255, 256)
        y_hat = tf.round(y)

        tiny_y = tf.placeholder(dtype=tf.float32,
                                shape=[1] + [5] + [5] + [num_filters])
        tiny_phi = tf.placeholder(dtype=tf.float32,
                                  shape=[1] + [5] + [5] + [num_filters * 2])
        _, _, y_means, y_variances, y_probs = entropy_parameter(tiny_phi,
                                                                tiny_y,
                                                                num_filters,
                                                                training=False)

        x_hat = synthesis_transform(y_hat, num_filters)

        num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1]))
        x_hat = x_hat[0, :tf.shape(x)[1], :tf.shape(x)[2], :]

        #op = save_image('temp/temp.png', x_hat)

        # Mean squared error across pixels.
        x_hat = tf.clip_by_value(x_hat, 0, 1)
        x_hat = tf.round(x_hat * 255)
        mse = tf.reduce_mean(tf.squared_difference(x * 255, x_hat))

        with tf.Session() as sess:
            #print(tf.trainable_variables())
            sess.run(tf.global_variables_initializer())
            # Load the latest model checkpoint, get the compressed string and the tensor
            # shapes.
            #latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir)

            latest = "models/model-1399000"  #lambda = 14

            print(latest)
            tf.train.Saver().restore(sess, save_path=latest)



            string, x_shape, y_shape, num_pixels, y_hat_value, phi_value = \
                    sess.run([string, tf.shape(x), tf.shape(y), num_pixels, y_hat, phi])

            minmax = np.maximum(abs(y_hat_value.max()), abs(y_hat_value.min()))
            minmax = int(np.maximum(minmax, 1))
            #num_symbols = int(2 * minmax + 3)
            print(minmax)
            #print(num_symbols)

            # Fast implementations by only encoding non-zero channels with 128/8 = 16bytes overhead
            flag = np.zeros(y_shape[3], dtype=np.int)

            for ch_idx in range(y_shape[3]):
                if np.sum(abs(y_hat_value[:, :, :, ch_idx])) > 0:
                    flag[ch_idx] = 1

            non_zero_idx = np.squeeze(np.where(flag == 1))

            num = np.packbits(np.reshape(flag, [8, y_shape[3] // 8]))

            # ============== encode the bits for z===========
            if os.path.exists(output):
                os.remove(output)

            fileobj = open(output, mode='wb')
            fileobj.write(np.array(x_shape[1:-1], dtype=np.uint16).tobytes())
            fileobj.write(
                np.array([len(string), minmax], dtype=np.uint16).tobytes())
            fileobj.write(np.array(num, dtype=np.uint8).tobytes())
            fileobj.write(string)
            fileobj.close()

            # ============ encode the bits for y ==========
            print("INFO: start encoding y")
            encoder = RangeEncoder(output[:-4] + '.bin')
            samples = np.arange(0, minmax * 2 + 1)
            TINY = 1e-10

            kernel_size = 5
            pad_size = (kernel_size - 1) // 2

            padded_y = np.pad(y_hat_value, ((0, 0), (pad_size, pad_size),
                                            (pad_size, pad_size), (0, 0)),
                              'constant',
                              constant_values=((0., 0.), (0., 0.), (0., 0.),
                                               (0., 0.)))
            padded_phi = np.pad(phi_value, ((0, 0), (pad_size, pad_size),
                                            (pad_size, pad_size), (0, 0)),
                                'constant',
                                constant_values=((0., 0.), (0., 0.), (0., 0.),
                                                 (0., 0.)))

            for h_idx in range(y_shape[1]):
                for w_idx in range(y_shape[2]):

                    extracted_y = padded_y[:, h_idx:h_idx + kernel_size,
                                           w_idx:w_idx + kernel_size, :]
                    extracted_phi = padded_phi[:, h_idx:h_idx + kernel_size,
                                               w_idx:w_idx + kernel_size, :]


                    y_means_values, y_variances_values, y_probs_values = \
                                    sess.run([y_means, y_variances, y_probs], \
                                             feed_dict={tiny_y: extracted_y, tiny_phi: extracted_phi})

                    for i in range(len(non_zero_idx)):
                        ch_idx = non_zero_idx[i]

                        mu = y_means_values[0, pad_size, pad_size,
                                            ch_idx, :] + minmax
                        sigma = y_variances_values[0, pad_size, pad_size,
                                                   ch_idx, :]
                        weight = y_probs_values[0, pad_size, pad_size,
                                                ch_idx, :]

                        start00 = time.time()

                        # Calculate the pmf/cdf
                        pmf = (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5))) - \
                               0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5)))) * weight[0] + \
                              (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5))) - \
                               0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5)))) * weight[1] +\
                              (0.5 * (1 + scipy.special.erf((samples + 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5))) - \
                               0.5 * (1 + scipy.special.erf((samples - 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5)))) * weight[2]
                        '''
            # Add the tail mass
            pmf[0] += 0.5 * (1 + scipy.special.erf(( -0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5))) * weight[0] + \
                      0.5 * (1 + scipy.special.erf(( -0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5))) * weight[1] + \
                      0.5 * (1 + scipy.special.erf(( -0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5))) * weight[2]
                      
            pmf[-1] += (1. - 0.5 * (1 + scipy.special.erf((minmax*2 + 0.5 - mu[0]) / ((sigma[0] + TINY) * 2 ** 0.5)))) * weight[0] + \
                       (1. - 0.5 * (1 + scipy.special.erf((minmax*2 + 0.5 - mu[1]) / ((sigma[1] + TINY) * 2 ** 0.5)))) * weight[1] + \
                       (1. - 0.5 * (1 + scipy.special.erf((minmax*2 + 0.5 - mu[2]) / ((sigma[2] + TINY) * 2 ** 0.5)))) * weight[2]
            '''

                        # To avoid the zero-probability
                        pmf_clip = np.clip(pmf, 1.0 / 65536, 1.0)
                        pmf_clip = np.round(pmf_clip / np.sum(pmf_clip) *
                                            65536)
                        cdf = list(np.add.accumulate(pmf_clip))
                        cdf = [0] + [int(i) for i in cdf]

                        symbol = np.int(y_hat_value[0, h_idx, w_idx, ch_idx] +
                                        minmax)
                        encoder.encode([symbol], cdf)

            encoder.close()

            size_real = os.path.getsize(output) + os.path.getsize(output[:-4] +
                                                                  '.bin')

            bpp_real = (os.path.getsize(output) +
                        os.path.getsize(output[:-4] + '.bin')) * 8 / num_pixels
            bpp_side = (os.path.getsize(output)) * 8 / num_pixels

            end = time.time()
            print("Time : {:0.3f}".format(end - start))

            psnr = sess.run(tf.image.psnr(x_hat, x * 255, 255))
            msssim = sess.run(tf.image.ssim_multiscale(x_hat, x * 255, 255))

            print("Actual bits per pixel for this image: {:0.4}".format(
                bpp_real))
            print("Side bits per pixel for z: {:0.4}".format(bpp_side))
            print("PSNR (dB) : {:0.4}".format(psnr[0]))
            print("MS-SSIM : {:0.4}".format(msssim[0]))