Esempio n. 1
0
    def glow_encoder(self,
                     frame,
                     condition=False,
                     cond_latents=None,
                     init=False):
        """Glow network that encodes frame to a hierarchy of latents.

    Args:
      frame: 5-D Tensor of shape (batch_size, 1, height, width, channels).
      condition: Whether or not to condition on cond_latents.
      cond_latents: optional, list of tensors with length equal to
                    hparams.n_levels - 1. If provided, the latent at level l is
                    conditioned on the cond_latent at level l.
      init: Whether the given batch is an "init" batch or a "train" batch.
    Returns:
      objective: log-likelihood of the frame per the model.
      z_top: top-level latent.
      z_levels: a list of tensors with latents at all levels.
    """
        frame = self.squeeze_video(frame, init=init)
        frame = self.preprocess(frame)
        frame, objective = glow_ops.uniform_binning_correction(frame)

        glow_vals = glow_ops.encoder_decoder("codec",
                                             frame,
                                             self.hparams,
                                             eps=None,
                                             reverse=False,
                                             cond_latents=cond_latents,
                                             states=self.level_states,
                                             condition=condition)
        z_top, encoder_objective, self.eps, z_levels, self.level_states = glow_vals
        objective += encoder_objective
        return objective, z_top, z_levels
Esempio n. 2
0
    def body(self, features):
        x = features["inputs"]

        # Scale x such that the pixels lie in-between -0.5 and.0.5
        x = self.preprocess(x)
        x, objective = glow_ops.uniform_binning_correction(x)

        # The arg_scope call ensures that the actnorm parameters are set such that
        # the per-channel output activations have zero mean and unit variance
        # ONLY during the first step. After that the parameters are learned
        # through optimisation.
        global_step = tf.train.get_or_create_global_step()
        init_op = tf.logical_and(tf.equal(global_step, 0), self.is_training)
        ops = [glow_ops.get_variable_ddi, glow_ops.actnorm]
        with arg_scope(ops, init=init_op):
            self.z, encoder_objective, self.eps, _ = glow_ops.encoder_decoder(
                "codec", x, self.hparams, eps=None, reverse=False)
            objective += encoder_objective

            prior_objective, prior_dist = self.top_prior(self.z)
            tf.summary.scalar("top_prior", tf.reduce_mean(prior_objective))
            self.z_sample = prior_dist.sample()
            objective += prior_objective

        # bits per pixel
        _, h, w, c = common_layers.shape_list(x)
        objective = -objective / (np.log(2) * h * w * c)
        return tf.zeros_like(features["targets"]), {"training": objective}
Esempio n. 3
0
def preprocess_frame(frame):
    """Preprocess frame.

  1. Converts [0, 255] to [-0.5, 0.5]
  2. Adds uniform noise.

  Args:
    frame: 3-D Tensor representing pixels.
  Returns:
    frame: 3-D Tensor with values in between [-0.5, 0.5]
  """
    # Normalize from [0.0, 1.0] -> [-0.5, 0.5]
    frame = common_layers.convert_rgb_to_real(frame)
    frame = frame - 0.5
    frame, _ = glow_ops.uniform_binning_correction(frame)
    return frame
Esempio n. 4
0
    def objective_tower(self, features, init=True):
        """Objective in terms of bits-per-pixel.

    Args:
      features: dict of tensors with "features" and "targets" keys.
      init: Whether or not to run data-dependent init.
    Returns:
      objective: float, bits-per-pixel.
    """
        x = features["inputs"]

        # Scale x such that the pixels lie in-between -0.5 and.0.5
        x = self.preprocess(x)
        x, objective = glow_ops.uniform_binning_correction(x)

        # The arg_scope call ensures that the actnorm parameters are set such that
        # the per-channel output activations have zero mean and unit variance
        # ONLY during the first step. After that the parameters are learned
        # through optimisation.
        ops = [
            glow_ops.get_variable_ddi, glow_ops.actnorm, glow_ops.get_dropout
        ]
        with arg_scope(ops, init=init):
            encoder = glow_ops.encoder_decoder

            self.z, encoder_objective, self.eps, _, _ = encoder("codec",
                                                                x,
                                                                self.hparams,
                                                                eps=None,
                                                                reverse=False)
            objective += encoder_objective

            self.z_top_shape = common_layers.shape_list(self.z)
            prior_dist = self.top_prior()
            prior_objective = tf.reduce_sum(prior_dist.log_prob(self.z),
                                            axis=[1, 2, 3])
            self.z_sample = prior_dist.sample()
            objective += prior_objective

        # bits per pixel
        _, h, w, c = common_layers.shape_list(x)
        objective = -objective / (np.log(2) * h * w * c)
        return objective
Esempio n. 5
0
  def objective_tower(self, features, init=True):
    """Objective in terms of bits-per-pixel.

    Args:
      features: dict of tensors with "features" and "targets" keys.
      init: Whether or not to run data-dependent init.
    Returns:
      objective: float, bits-per-pixel.
    """
    x = features["inputs"]

    # Scale x such that the pixels lie in-between -0.5 and.0.5
    x = self.preprocess(x)
    x, objective = glow_ops.uniform_binning_correction(x)

    # The arg_scope call ensures that the actnorm parameters are set such that
    # the per-channel output activations have zero mean and unit variance
    # ONLY during the first step. After that the parameters are learned
    # through optimisation.
    ops = [glow_ops.get_variable_ddi, glow_ops.actnorm]
    with arg_scope(ops, init=init):
      self.z, encoder_objective, self.eps, _, _ = glow_ops.encoder_decoder(
          "codec", x, self.hparams, eps=None, reverse=False)
      objective += encoder_objective

      self.z_top_shape = common_layers.shape_list(self.z)
      prior_dist = self.top_prior()
      prior_objective = tf.reduce_sum(
          prior_dist.log_prob(self.z), axis=[1, 2, 3])
      self.z_sample = prior_dist.sample()
      objective += prior_objective

    # bits per pixel
    _, h, w, c = common_layers.shape_list(x)
    objective = -objective / (np.log(2) * h * w * c)
    return objective