コード例 #1
0
ファイル: vanilla_gan.py プロジェクト: qixiuai/tensor2tensor
 def generator(self, z, is_training, out_shape):
   """Generator outputting image in [0, 1]."""
   hparams = self.hparams
   height, width, c_dim = out_shape
   batch_size = hparams.batch_size
   with tf.variable_scope(
       "generator",
       initializer=tf.random_normal_initializer(stddev=0.02)):
     net = tf.layers.dense(z, 1024, name="g_fc1")
     net = tf.layers.batch_normalization(net, training=is_training,
                                         momentum=0.999, name="g_bn1")
     net = lrelu(net)
     net = tf.layers.dense(net, 128 * (height // 4) * (width // 4),
                           name="g_fc2")
     net = tf.layers.batch_normalization(net, training=is_training,
                                         momentum=0.999, name="g_bn2")
     net = lrelu(net)
     net = tf.reshape(net, [batch_size, height // 4, width // 4, 128])
     net = deconv2d(net, [batch_size, height // 2, width // 2, 64],
                    4, 4, 2, 2, name="g_dc3")
     net = tf.layers.batch_normalization(net, training=is_training,
                                         momentum=0.999, name="g_bn3")
     net = lrelu(net)
     net = deconv2d(net, [batch_size, height, width, c_dim],
                    4, 4, 2, 2, name="g_dc4")
     out = tf.nn.sigmoid(net)
     return common_layers.convert_real_to_rgb(out)
コード例 #2
0
 def top(self, body_output, _):
     frames = body_output
     if isinstance(body_output, list):
         frames = tf.stack(body_output, axis=1)
     rgb_frames = common_layers.convert_real_to_rgb(frames)
     common_video.gif_summary("body_output", rgb_frames)
     return tf.expand_dims(rgb_frames, axis=-1)
コード例 #3
0
ファイル: vanilla_gan.py プロジェクト: zhaimq/tensor2tensor
 def generator(self, z, is_training, out_shape):
   """Generator outputting image in [0, 1]."""
   hparams = self.hparams
   height, width, c_dim = out_shape
   batch_size = hparams.batch_size
   with tf.variable_scope(
       "generator",
       initializer=tf.random_normal_initializer(stddev=0.02)):
     net = tf.layers.dense(z, 1024, name="g_fc1")
     net = tf.layers.batch_normalization(net, training=is_training,
                                         momentum=0.999, name="g_bn1")
     net = lrelu(net)
     net = tf.layers.dense(net, 128 * (height // 4) * (width // 4),
                           name="g_fc2")
     net = tf.layers.batch_normalization(net, training=is_training,
                                         momentum=0.999, name="g_bn2")
     net = lrelu(net)
     net = tf.reshape(net, [batch_size, height // 4, width // 4, 128])
     net = deconv2d(net, [batch_size, height // 2, width // 2, 64],
                    4, 4, 2, 2, name="g_dc3")
     net = tf.layers.batch_normalization(net, training=is_training,
                                         momentum=0.999, name="g_bn3")
     net = lrelu(net)
     net = deconv2d(net, [batch_size, height, width, c_dim],
                    4, 4, 2, 2, name="g_dc4")
     out = tf.nn.sigmoid(net)
     return common_layers.convert_real_to_rgb(out)
コード例 #4
0
  def get_sampled_frame(self, pred_frame):
    """Samples the frame based on modality.

      if the modality is L2/L1 then the next predicted frame is the
      next frame and there is no sampling but in case of Softmax loss
      the next actual frame should be sampled from predicted frame.

      This enables multi-frame target prediction with Softmax loss.

    Args:
      pred_frame: predicted frame.

    Returns:
      sampled frame.

    """
    # TODO(lukaszkaiser): the logic below heavily depend on the current
    # (a bit strange) video modalities - we should change that.

    if self.is_per_pixel_softmax:
      frame_shape = common_layers.shape_list(pred_frame)
      target_shape = frame_shape[:-1] + [self.hparams.problem.num_channels]
      sampled_frame = tf.reshape(pred_frame, target_shape + [256])
      sampled_frame = pixels_from_softmax(
          sampled_frame, temperature=self.hparams.pixel_sampling_temperature)
      # TODO(lukaszkaiser): this should be consistent with modality.bottom()
      sampled_frame = common_layers.standardize_images(sampled_frame)
    else:
      x = common_layers.convert_real_to_rgb(pred_frame)
      x = x - tf.stop_gradient(x + tf.round(x))
      x = common_layers.convert_rgb_to_real(x)
      return x
    return sampled_frame
コード例 #5
0
ファイル: base.py プロジェクト: qixiuai/tensor2tensor
  def get_sampled_frame(self, pred_frame):
    """Samples the frame based on modality.

      if the modality is L2/L1 then the next predicted frame is the
      next frame and there is no sampling but in case of Softmax loss
      the next actual frame should be sampled from predicted frame.

      This enables multi-frame target prediction with Softmax loss.

    Args:
      pred_frame: predicted frame.

    Returns:
      sampled frame.

    """
    # TODO(lukaszkaiser): the logic below heavily depend on the current
    # (a bit strange) video modalities - we should change that.

    if self.is_per_pixel_softmax:
      frame_shape = common_layers.shape_list(pred_frame)
      target_shape = frame_shape[:-1] + [self.hparams.problem.num_channels]
      sampled_frame = tf.reshape(pred_frame, target_shape + [256])
      sampled_frame = pixels_from_softmax(
          sampled_frame, temperature=self.hparams.pixel_sampling_temperature)
      # TODO(lukaszkaiser): this should be consistent with modality.bottom()
      sampled_frame = common_layers.standardize_images(sampled_frame)
    else:
      x = common_layers.convert_real_to_rgb(pred_frame)
      x = x - tf.stop_gradient(x + tf.round(x))
      x = common_layers.convert_rgb_to_real(x)
      return x
    return sampled_frame
コード例 #6
0
  def body(self, features):
    hparams = self.hparams
    is_training = hparams.mode == tf.estimator.ModeKeys.TRAIN
    if hparams.mode != tf.estimator.ModeKeys.PREDICT:
      x = features["targets"]
      shape = common_layers.shape_list(x)
      is1d = shape[2] == 1
      self.is1d = is1d
      # Run encoder.
      x = self.encoder(x)
      # Bottleneck (mix during early training, not too important but stable).
      b, b_loss = self.bottleneck(x)
      self._cur_bottleneck_tensor = b
      b = self.unbottleneck(b, common_layers.shape_list(x)[-1])
      b = common_layers.mix(b, x, hparams.bottleneck_warmup_steps, is_training)
      if hparams.gan_loss_factor != 0.0:
        # Add a purely sampled batch on which we'll compute the GAN loss.
        g = self.unbottleneck(
            self.sample(), common_layers.shape_list(x)[-1], reuse=True)
        b = tf.concat([g, b], axis=0)
      # With probability bottleneck_max_prob use the bottleneck, otherwise x.
      if hparams.bottleneck_max_prob < -1.0:
        x = tf.where(
            tf.less(tf.random_uniform([]), hparams.bottleneck_max_prob), b, x)
      else:
        x = b
    else:
      if self._cur_bottleneck_tensor is None:
        b = self.sample()
      else:
        b = self._cur_bottleneck_tensor
      res_size = self.hparams.hidden_size * 2**self.hparams.num_hidden_layers
      res_size = min(res_size, hparams.max_hidden_size)
      x = self.unbottleneck(b, res_size)
    # Run decoder.
    x = self.decoder(x)
    if hparams.mode == tf.estimator.ModeKeys.PREDICT:
      return x, {"bottleneck_loss": 0.0}
    # Cut to the right size and mix before returning.
    res = x[:, :shape[1], :shape[2], :]
    # Add GAN loss if requested.
    gan_loss = 0.0
    if hparams.gan_loss_factor != 0.0:
      # Split back if we added a purely sampled batch.
      res_gan, res = tf.split(res, 2, axis=0)
      num_channels = self.hparams.problem.num_channels
      res_rgb = common_layers.convert_real_to_rgb(
          tf.nn.sigmoid(tf.layers.dense(res_gan, num_channels, name="gan_rgb")))
      tf.summary.image(
          "gan", common_layers.tpu_safe_image_summary(res_rgb), max_outputs=1)
      orig_rgb = tf.to_float(features["targets_raw"])

      def discriminate(x):
        return self.discriminator(x, is_training=is_training)

      gan_loss = common_layers.sliced_gan_loss(orig_rgb,
                                               reverse_gradient(res_rgb),
                                               discriminate,
                                               self.hparams.num_sliced_vecs)
      gan_loss *= hparams.gan_loss_factor
    # Mix the final result and return.
    res = common_layers.mix(res, features["targets"],
                            hparams.bottleneck_warmup_steps // 2, is_training)
    return res, {"bottleneck_loss": b_loss, "gan_loss": -gan_loss}
コード例 #7
0
 def top(self, body_output, _):
     frames = tf.stack(body_output, axis=1)
     rgb_frames = common_layers.convert_real_to_rgb(frames)
     common_layers.summarize_video(rgb_frames, "body_output")
     return tf.expand_dims(rgb_frames, axis=-1)
コード例 #8
0
 def top(self, body_output, _):
     frames = tf.stack(body_output, axis=1)
     rgb_frames = common_layers.convert_real_to_rgb(frames)
     common_layers.summarize_video(rgb_frames, "body_output")
     # TODO(lukaszkaiser): remove the need for the last dimension of 1 in eval.
     return tf.expand_dims(rgb_frames, axis=-1)
コード例 #9
0
ファイル: modalities.py プロジェクト: kltony/tensor2tensor
 def top(self, body_output, _):
   frames = tf.stack(body_output, axis=1)
   rgb_frames = common_layers.convert_real_to_rgb(frames)
   common_layers.summarize_video(rgb_frames, "body_output")
   return tf.expand_dims(rgb_frames, axis=-1)
コード例 #10
0
ファイル: smnist_base.py プロジェクト: blackPython/svg_lp
    def process(self, inputs, targets):
        all_frames = tf.unstack(inputs, axis = 1) + tf.unstack(targets, axis = 1)
        hparams = self.hparams

        batch_size = common_layers.shape_list(all_frames[0])[0]
        
        z_dim = hparams.z_dim
        g_dim = hparams.g_dim
        rnn_size = hparams.rnn_size
        prior_rnn_layers = hparams.prior_rnn_layers
        posterior_rnn_layers = hparams.posterior_rnn_layers
        predictor_rnn_layers = hparams.predictor_rnn_layers

        num_input_frames = hparams.num_input_frames
        num_target_frames = hparams.num_target_frames
        num_all_frames = num_input_frames + num_target_frames

        #Creating RNN cells
        predictor_cell = self.rnn_model(rnn_size, "predictor", n_layers = predictor_rnn_layers)
        prior_cell = self.rnn_model(rnn_size, "prior", n_layers = prior_rnn_layers)
        posterior_cell = self.rnn_model(rnn_size, "posterior", n_layers = posterior_rnn_layers)

        #Getting RNN states 
        predictor_state = predictor_cell.zero_state(batch_size, tf.float32)
        prior_state = prior_cell.zero_state(batch_size, tf.float32)
        posterior_state = posterior_cell.zero_state(batch_size, tf.float32)

        #Encoding
        enc_frames, enc_skips = [], []
        for frame in all_frames if self.is_training else all_frames[:num_input_frames]:
            with tf.variable_scope("encoder", reuse = tf.AUTO_REUSE):
                enc, skip = self.encoder(frame)
                enc_frames.append(enc)
                enc_skips.append(skip)

        #Prediction
        prior_mus = []
        prior_logvars = []
        posterior_mus = []
        posterior_logvars = []
        predicted_frames = []
        z_positions = []
        skip = None
        if self.is_training:
            for i in range(1,num_all_frames):
                h = enc_frames[i-1]
                h_target = enc_frames[i]
                if i < num_input_frames:
                    skip = enc_skips[i-1]
                with tf.variable_scope("prediction", reuse = tf.AUTO_REUSE):
                    mu, log_var, posterior_state = self.gaussian_rnn(posterior_cell, h_target, posterior_state,
                        z_dim, "posterior")
                    mu_p, log_var_p, prior_state = self.gaussian_rnn(prior_cell, h, prior_state, z_dim, "prior")
                    z = utils.get_gaussian_tensor(mu,log_var)
                    h_pred, predictor_state = self.deterministic_rnn(predictor_cell, tf.concat([h,z], axis = 1),\
                        predictor_state, g_dim, "predictor")
                with tf.variable_scope("decoder", reuse = tf.AUTO_REUSE):
                    x_pred = self.decoder(h_pred, skip)
                predicted_frames.append(x_pred)
                prior_mus.append(mu_p)
                prior_logvars.append(log_var_p)
                posterior_mus.append(mu)
                posterior_logvars.append(log_var)
                z_positions.append(z)
        else:
            for i in range(1, num_all_frames):
                if i < num_input_frames:
                    h = enc_frames[i-1]
                    skip = enc_skips[i-1]
                else:
                    with tf.variable_scope("encoder", reuse = tf.AUTO_REUSE):
                        h, _ = self.encoder(predicted_frames[-1])
                mu = log_var = mu_p = log_var_p = None
                if i < num_input_frames:
                    h_target = enc_frames[i]
                    with tf.variable_scope("prediction", reuse = tf.AUTO_REUSE):
                        mu, log_var, posterior_state = self.gaussian_rnn(posterior_cell, h_target, posterior_state,\
                            z_dim, "posterior")
                        mu_p, log_var_p, prior_state= self.gaussian_rnn(prior_cell, h, prior_state, z_dim, "prior")
                        z = utils.get_gaussian_tensor(mu,log_var)
                        _, predictor_state = self.deterministic_rnn(predictor_cell, tf.concat([h,z], axis = 1), predictor_state,\
                            g_dim, "predictor")
                    x_pred = all_frames[i]
                else:
                    with tf.variable_scope("prediction", reuse = tf.AUTO_REUSE):
                        mu_p, log_var_p, prior_state = self.gaussian_rnn(prior_cell, h, prior_state, z_dim, "prior")
                        z = utils.get_gaussian_tensor(mu_p, log_var_p)
                        h_pred, predictor_state = self.deterministic_rnn(predictor_cell, tf.concat([h,z], axis = 1), predictor_state, g_dim, "predictor")
                    with tf.variable_scope("decoder", reuse = tf.AUTO_REUSE):
                        x_pred = self.decoder(h_pred,skip)
                predicted_frames.append(x_pred)
                prior_mus.append(mu_p)
                prior_logvars.append(log_var_p)
                posterior_mus.append(mu)
                posterior_logvars.append(log_var)
                z_positions.append(z)

        recon_loss = 0
        kl_loss = 0

        #recon loss
        recon_loss = l2_loss(tf.stack(predicted_frames), tf.stack(all_frames[1:]))*(num_all_frames-1)

        if self.is_training:
            #kl loss
            kl_loss = self.get_kl_loss(posterior_mus,posterior_logvars, prior_mus,\
                prior_logvars)
        pred_outputs = tf.stack(predicted_frames[num_input_frames-1:], axis = 1)
        rgb_frames = tf.tile(common_layers.convert_real_to_rgb(tf.stack(predicted_frames, axis = 1)), [1,1,1,1,3])
        all_frames = tf.stack(all_frames, axis = 1)
        all_frames_rgb = tf.tile(common_layers.convert_real_to_rgb(all_frames), [1,1,1,1,3])
        common_video.gif_summary("body_output", rgb_frames)
        common_video.gif_summary("all_ground_frames", all_frames_rgb)
        tf.summary.scalar("kl_loss", kl_loss)
        tf.summary.scalar("recon_loss", recon_loss)
        loss = recon_loss + kl_loss

        return pred_outputs, loss, tf.stack(z_positions,axis = 1)