Esempio n. 1
0
def residual_block_layer(inputs, hparams):
    """Residual block over inputs.

  Runs a residual block consisting of
    conv: kernel_size x kernel_size
    conv: 1x1
    dropout, add and normalize according to hparams.layer_postprocess_sequence.

  Args:
    inputs: Tensor of shape [batch_size, height, width, hidden_dim].
    hparams: Dict, hyperparameters.

  Returns:
    x: Tensor of shape [batch_size, height, width, hidden_dim]
  """
    kernel = (hparams.res_kernel_size, hparams.res_kernel_size)
    x = inputs
    for i in range(hparams.num_res_layers):
        with tf.variable_scope("res_conv_%d" % i):
            # kernel_size x kernel_size conv block
            y = common_layers.conv_block(
                common_layers.layer_norm(x, hparams.hidden_size, name="lnorm"),
                hparams.hidden_size, [((1, 1), kernel)],
                strides=(1, 1),
                padding="SAME",
                name="residual_conv")
            # 1x1 conv block
            y = common_layers.conv_block(y,
                                         hparams.hidden_size,
                                         [((1, 1), (1, 1))],
                                         strides=(1, 1),
                                         padding="SAME",
                                         name="residual_dense")
            x = common_layers.layer_postprocess(x, y, hparams)
    return x
Esempio n. 2
0
def residual_block_layer(inputs, hparams):
  """Residual block over inputs.

  Runs a residual block consisting of
    conv: kernel_size x kernel_size
    conv: 1x1
    dropout, add and normalize according to hparams.layer_postprocess_sequence.

  Args:
    inputs: Tensor of shape [batch, height, width, hparams.hidden_size].
    hparams: tf.contrib.training.HParams.

  Returns:
    Tensor of shape [batch, height, width, hparams.hidden_size].
  """
  kernel = (hparams.res_kernel_size, hparams.res_kernel_size)
  x = inputs
  for i in range(hparams.num_res_layers):
    with tf.variable_scope("res_conv_%d" % i):
      # kernel_size x kernel_size conv block
      y = common_layers.conv_block(
          common_layers.layer_norm(x, hparams.hidden_size, name="lnorm"),
          hparams.hidden_size, [((1, 1), kernel)],
          strides=(1, 1),
          padding="SAME",
          name="residual_conv")
      # 1x1 conv block
      y = common_layers.conv_block(
          y,
          hparams.hidden_size, [((1, 1), (1, 1))],
          strides=(1, 1),
          padding="SAME",
          name="residual_dense")
      x = common_layers.layer_postprocess(x, y, hparams)
  return x
Esempio n. 3
0
    def bottom(self, inputs):
        """Transform input from data space to model space.

    Perform the Xception "Entry flow", which consists of two convolutional
    filter upscalings followed by three residually connected separable
    convolution blocks.

    Args:
      inputs: A Tensor with shape [batch, ...]
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
        with tf.variable_scope(self.name):

            def xnet_resblock(x, filters, res_relu, name):
                with tf.variable_scope(name):
                    y = common_layers.separable_conv_block(
                        x,
                        filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
                        first_relu=True,
                        padding="SAME",
                        force2d=True,
                        name="sep_conv_block")
                    y = common_layers.pool(y, (3, 3),
                                           "MAX",
                                           "SAME",
                                           strides=(2, 2))
                    return y + common_layers.conv_block(x,
                                                        filters, [((1, 1),
                                                                   (1, 1))],
                                                        padding="SAME",
                                                        strides=(2, 2),
                                                        first_relu=res_relu,
                                                        force2d=True,
                                                        name="res_conv0")

            inputs = common_layers.standardize_images(inputs)
            # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet.
            # tf.summary.image("inputs", inputs, max_outputs=2)
            x = common_layers.conv_block(inputs,
                                         32, [((1, 1), (3, 3))],
                                         first_relu=False,
                                         padding="SAME",
                                         strides=(2, 2),
                                         force2d=True,
                                         name="conv0")
            x = common_layers.conv_block(x,
                                         64, [((1, 1), (3, 3))],
                                         padding="SAME",
                                         force2d=True,
                                         name="conv1")
            x = xnet_resblock(x, min(128, self._body_input_depth), True,
                              "block0")
            x = xnet_resblock(x, min(256, self._body_input_depth), False,
                              "block1")
            return xnet_resblock(x, self._body_input_depth, False, "block2")
Esempio n. 4
0
  def bottom_compress(self, inputs, name="bottom"):
    """Transform input from data space to model space.

    Perform conversion of RGB pixel values to a real number and combine values
    for each pixel to form representation of image_length x image_length dims.

    Args:
      inputs: A Tensor with shape [batch, ...]
      name: string, scope.
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
    with tf.variable_scope(name):
      inputs = common_layers.convert_rgb_to_real(inputs)
      ishape = common_layers.shape_list(inputs)
      inputs = tf.reshape(inputs, [-1, ishape[1], ishape[2] * ishape[3], 1])
      inputs.set_shape([None, None, None, 1])
      # We compress RGB intensities for each pixel using a conv.
      x = common_layers.conv_block(
          inputs,
          self._body_input_depth, [((1, 1), (1, 3))],
          first_relu=False,
          padding="VALID",
          strides=(1, 3),
          force2d=True,
          name="conv_input")
      return x
Esempio n. 5
0
def bytenet_internal(inputs, targets, hparams):
  """ByteNet, main step used for training."""
  with tf.variable_scope("bytenet"):
    # Flatten inputs and extend length by 50%.
    inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2)
    extend_length = tf.to_int32(0.5 * tf.to_float(tf.shape(inputs)[1]))
    inputs_shape = inputs.shape.as_list()
    inputs = tf.pad(inputs, [[0, 0], [0, extend_length], [0, 0], [0, 0]])
    inputs_shape[1] = None
    inputs.set_shape(inputs_shape)  # Don't lose the other shapes when padding.
    # Pad inputs and targets to be the same length, divisible by 50.
    inputs, targets = common_layers.pad_to_same_length(
        inputs, targets, final_length_divisible_by=50)
    final_encoder = residual_dilated_conv(inputs, hparams.num_block_repeat,
                                          "SAME", "encoder", hparams)

    shifted_targets = common_layers.shift_right(targets)
    kernel = (hparams.kernel_height, hparams.kernel_width)
    decoder_start = common_layers.conv_block(
        tf.concat([final_encoder, shifted_targets], axis=3),
        hparams.hidden_size, [((1, 1), kernel)],
        padding="LEFT")

    return residual_dilated_conv(decoder_start, hparams.num_block_repeat,
                                 "LEFT", "decoder", hparams)
Esempio n. 6
0
def slicenet_internal(inputs, targets, target_space, hparams, run_decoder=True):
  """The slicenet model, main step used for training."""
  with tf.variable_scope("slicenet"):
    # Project to hidden size if necessary
    if inputs.get_shape().as_list()[-1] != hparams.hidden_size:
      inputs = common_layers.conv_block(
          inputs,
          hparams.hidden_size, [((1, 1), (3, 3))],
          first_relu=False,
          padding="SAME",
          force2d=True)

    # Flatten inputs and encode.
    inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2)
    inputs_mask = 1.0 - embedding_to_padding(inputs)
    inputs = common_layers.add_timing_signal(inputs)  # Add position info.
    target_space_emb = embed_target_space(target_space, hparams.hidden_size)
    extra_layers = int(hparams.num_hidden_layers * 1.5)
    inputs_encoded = multi_conv_res(
        inputs, "SAME", "encoder", extra_layers, hparams, mask=inputs_mask)
    if not run_decoder:
      return inputs_encoded
    # Do the middle part.
    decoder_start, similarity_loss = slicenet_middle(
        inputs_encoded, targets, target_space_emb, inputs_mask, hparams)
    # Decode.
    decoder_final = multi_conv_res(
        decoder_start,
        "LEFT",
        "decoder",
        hparams.num_hidden_layers,
        hparams,
        mask=inputs_mask,
        source=inputs_encoded)
    return decoder_final, tf.reduce_mean(similarity_loss)
Esempio n. 7
0
 def project_to_hidden(inputs):
   return common_layers.conv_block(
       inputs,
       hparams.hidden_size, [((1, 1), (3, 3))],
       first_relu=False,
       padding="SAME",
       force2d=True)
Esempio n. 8
0
  def bottom_compress(self, inputs, name="bottom"):
    """Transform input from data space to model space.

    Perform conversion of RGB pixel values to a real number and combine values
    for each pixel to form representation of image_length x image_length dims.

    Args:
      inputs: A Tensor with shape [batch, ...]
      name: string, scope.
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
    with tf.variable_scope(name):
      inputs = common_layers.convert_rgb_to_real(inputs)
      ishape = common_layers.shape_list(inputs)
      inputs = tf.reshape(inputs, [-1, ishape[1], ishape[2] * ishape[3], 1])
      inputs.set_shape([None, None, None, 1])
      # We compress RGB intensities for each pixel using a conv.
      x = common_layers.conv_block(
          inputs,
          self._body_input_depth, [((1, 1), (1, 3))],
          first_relu=False,
          padding="VALID",
          strides=(1, 3),
          force2d=True,
          name="conv_input")
      return x
Esempio n. 9
0
 def project_to_hidden(inputs):
   return common_layers.conv_block(
       inputs,
       hparams.hidden_size, [((1, 1), (3, 3))],
       first_relu=False,
       padding="SAME",
       force2d=True)
def slicenet_internal(inputs, targets, target_space, hparams, run_decoder=True):
  """The slicenet model, main step used for training."""
  with tf.variable_scope("slicenet"):
    # Project to hidden size if necessary
    if inputs.get_shape().as_list()[-1] != hparams.model_d:
      inputs = common_layers.conv_block(
          inputs,
          hparams.model_d, [((1, 1), (3, 3))],
          first_relu=False,
          padding="SAME",
          force2d=True)

    # Flatten inputs and encode.
    inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2)
    inputs_mask = 1.0 - embedding_to_padding(inputs)
    inputs = common_layers.add_timing_signal(inputs)  # Add position info.
    target_space_emb = embed_target_space(target_space, hparams.model_d)
    extra_layers = int(hparams.num_hidden_layers * 1.5)
    inputs_encoded = multi_conv_res(
        inputs, "SAME", "encoder", extra_layers, hparams, mask=inputs_mask)
    if not run_decoder:
      return inputs_encoded
    # Do the middle part.
    decoder_start, similarity_loss = slicenet_middle(
        inputs_encoded, targets, target_space_emb, inputs_mask, hparams)
    # Decode.
    decoder_final = multi_conv_res(
        decoder_start,
        "LEFT",
        "decoder",
        hparams.num_hidden_layers,
        hparams,
        mask=inputs_mask,
        source=inputs_encoded)
    return decoder_final, tf.reduce_mean(similarity_loss)
Esempio n. 11
0
    def encode(self,
               inputs,
               target_space,
               hparams,
               features=None,
               losses=None):
        """Add two layers strided convolutions ontop of encode."""
        inputs = common_layers.conv_block(inputs,
                                          hparams.hidden_size,
                                          [((1, 1), (3, 3))],
                                          first_relu=False,
                                          padding="SAME",
                                          force2d=True,
                                          name="small_image_conv")

        hparams.num_compress_steps = 2
        compressed_inputs = transformer_vae.compress(inputs,
                                                     None,
                                                     is_2d=True,
                                                     hparams=hparams,
                                                     name="convolutions")

        return super(TransformerSketch, self).encode(compressed_inputs,
                                                     target_space,
                                                     hparams,
                                                     features=features,
                                                     losses=losses)
Esempio n. 12
0
def bytenet_internal(inputs, targets, hparams):
    """ByteNet, main step used for training."""
    with tf.variable_scope("bytenet"):
        # Flatten inputs and extend length by 50%.
        inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2)
        extend_length = tf.to_int32(0.5 * tf.to_float(tf.shape(inputs)[1]))
        inputs_shape = inputs.shape.as_list()
        inputs = tf.pad(inputs, [[0, 0], [0, extend_length], [0, 0], [0, 0]])
        inputs_shape[1] = None
        inputs.set_shape(
            inputs_shape)  # Don't lose the other shapes when padding.
        # Pad inputs and targets to be the same length, divisible by 50.
        inputs, targets = common_layers.pad_to_same_length(
            inputs, targets, final_length_divisible_by=50)
        final_encoder = residual_dilated_conv(inputs, hparams.num_block_repeat,
                                              "SAME", "encoder", hparams)

        shifted_targets = common_layers.shift_right(targets)
        kernel = (hparams.kernel_height, hparams.kernel_width)
        decoder_start = common_layers.conv_block(
            tf.concat([final_encoder, shifted_targets], axis=3),
            hparams.hidden_size, [((1, 1), kernel)],
            padding="LEFT")

        return residual_dilated_conv(decoder_start, hparams.num_block_repeat,
                                     "LEFT", "decoder", hparams)
Esempio n. 13
0
def ae_compress(x, is_2d, hparams, name, reuse=None):
  """Compress, then AE."""
  with tf.variable_scope(name, reuse=reuse):
    cur = compress(x, None, is_2d, hparams, "compress")
    # Convolve and ReLu to get state.
    cur = common_layers.conv_block(
        cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv")
    means_size = hparams.z_size if hparams.do_vae else hparams.v_size
    means = tf.get_variable("z_to_dense", [means_size, hparams.hidden_size])
    if hparams.do_vae:
      if hparams.bit_vae:
        hot, loss = bit_vae(cur, hparams, "bvae")
      else:
        hot, loss, _, _ = vae(cur, hparams.z_size, "vae")
      return cur, hot, loss
    if hparams.use_gumbel_softmax:
      _, hot, loss = dae(cur, hparams, "dae")
      return cur, hot, loss
    # Using k-means part. L2-normalizing to use fast cosine distance.
    cur = mix(tf.nn.l2_normalize(cur, dim=3), cur,
              hparams.startup_steps // 3, mode="exp", simple=True)
    cur_n = hparams.kmeans_lr_factor * cur
    cur_n += (1.0 - hparams.kmeans_lr_factor) * tf.stop_gradient(cur)
    hot, loss = kmeans(cur_n, means, hparams, name="kmeans")
    # We need a linear layer to undo the l2-normalization.
    cur = tf.layers.dense(cur, hparams.hidden_size, name="unnormalize")
    return cur, hot, loss
Esempio n. 14
0
def vae_transformer_internal(inputs, targets, target_space, hparams):
  """VAE Transformer, main step used for training."""
  with tf.variable_scope("vae_transformer"):
    # Prepare inputs, targets, and k.
    inputs = common_layers.flatten4d3d(inputs)
    input_len = tf.shape(inputs)[1]  # Double input size to cover targets.
    inputs = tf.pad(inputs, [[0, 0], [0, input_len], [0, 0]])
    inputs.set_shape([None, None, hparams.hidden_size])
    targets = common_layers.flatten4d3d(targets)
    k = 2**hparams.num_compress_steps
    inputs, targets = common_layers.pad_to_same_length(
        inputs, targets, final_length_divisible_by=k)
    inputs = encode(inputs, target_space, hparams, "input_enc")

    # Compress and vae.
    z, kl_loss, _, _ = vae_compress(tf.expand_dims(targets, axis=2),
                                    tf.expand_dims(inputs, axis=2),
                                    hparams, "vae_compress", "vae_decompress")

    # Join z with inputs, run decoder.
    to_decode = common_layers.conv_block(
        tf.concat([z, tf.expand_dims(inputs, axis=2)], axis=3),
        hparams.hidden_size, [((1, 1), (1, 1))], name="join_z")
    ret = encode(tf.squeeze(to_decode, axis=2), target_space, hparams, "dec")

    # For experiments with one-sided decoder:
    # decoder_in = tf.squeeze(to_decode, axis=2)
    # (decoder_input, decoder_self_attention_bias) = (
    #     transformer.transformer_prepare_decoder(decoder_in, hparams))
    # ret = transformer.transformer_decoder(
    #     decoder_input, inputs, decoder_self_attention_bias, None, hparams)

    kl_loss *= common_layers.inverse_exp_decay(hparams.kl_warmup_steps) * 3.0
    losses = {"kl": kl_loss}
    return tf.expand_dims(ret, axis=2), losses
Esempio n. 15
0
 def testConvBlock(self):
     x = np.random.rand(5, 7, 1, 11)
     y = common_layers.conv_block(tf.constant(x, dtype=tf.float32),
                                  13, [(1, (3, 3)), (1, (3, 3))],
                                  padding="SAME",
                                  normalizer_fn=common_layers.noam_norm)
     self.evaluate(tf.global_variables_initializer())
     res = self.evaluate(y)
     self.assertEqual(res.shape, (5, 7, 1, 13))
Esempio n. 16
0
def xception_entry(inputs, hidden_dim):
    """Xception entry flow."""
    with tf.variable_scope("xception_entry"):

        def xnet_resblock(x, filters, res_relu, name):
            """Resblock."""
            with tf.variable_scope(name):
                y = common_layers.separable_conv_block(x,
                                                       filters,
                                                       [((1, 1), (3, 3)),
                                                        ((1, 1), (3, 3))],
                                                       first_relu=True,
                                                       padding="SAME",
                                                       force2d=True,
                                                       name="sep_conv_block")
                y = common_layers.pool(y, (3, 3),
                                       "MAX",
                                       "SAME",
                                       strides=(2, 2))
                return y + common_layers.conv_block(x,
                                                    filters, [((1, 1),
                                                               (1, 1))],
                                                    padding="SAME",
                                                    strides=(2, 2),
                                                    first_relu=res_relu,
                                                    force2d=True,
                                                    name="res_conv0")

        tf.summary.image("inputs", inputs, max_outputs=2)
        x = common_layers.conv_block(inputs,
                                     32, [((1, 1), (3, 3))],
                                     first_relu=False,
                                     padding="SAME",
                                     strides=(2, 2),
                                     force2d=True,
                                     name="conv0")
        x = common_layers.conv_block(x,
                                     64, [((1, 1), (3, 3))],
                                     padding="SAME",
                                     force2d=True,
                                     name="conv1")
        x = xnet_resblock(x, min(128, hidden_dim), True, "block0")
        x = xnet_resblock(x, min(256, hidden_dim), False, "block1")
        return xnet_resblock(x, hidden_dim, False, "block2")
Esempio n. 17
0
def xception_entry(inputs, hidden_dim):
    with tf.variable_scope("xception_entry"):

        def xnet_resblock(x, filters, res_relu, name):
            with tf.variable_scope(name):
                y = common_layers.separable_conv_block(x,
                                                       filters,
                                                       [((1, 1), (3, 3)),
                                                        ((1, 1), (3, 3))],
                                                       first_relu=True,
                                                       padding="SAME",
                                                       force2d=True,
                                                       name="sep_conv_block")
                y = common_layers.pool(y, (3, 3),
                                       "MAX",
                                       "SAME",
                                       strides=(2, 2))
                return y + common_layers.conv_block(x,
                                                    filters, [((1, 1),
                                                               (1, 1))],
                                                    padding="SAME",
                                                    strides=(2, 2),
                                                    first_relu=res_relu,
                                                    force2d=True,
                                                    name="res_conv0")

        inputs = common_layers.standardize_images(inputs)
        # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet.
        # tf.summary.image("inputs", inputs, max_outputs=2)
        x = common_layers.conv_block(inputs,
                                     32, [((1, 1), (3, 3))],
                                     first_relu=False,
                                     padding="SAME",
                                     strides=(2, 2),
                                     force2d=True,
                                     name="conv0")
        x = common_layers.conv_block(x,
                                     64, [((1, 1), (3, 3))],
                                     padding="SAME",
                                     force2d=True,
                                     name="conv1")
        x = xnet_resblock(x, min(128, hidden_dim), True, "block0")
        x = xnet_resblock(x, min(256, hidden_dim), False, "block1")
        return xnet_resblock(x, hidden_dim, False, "block2")
Esempio n. 18
0
 def testConvBlock(self):
   x = np.random.rand(5, 7, 1, 11)
   y = common_layers.conv_block(
       tf.constant(x, dtype=tf.float32),
       13, [(1, (3, 3)), (1, (3, 3))],
       padding="SAME",
       normalizer_fn=common_layers.noam_norm)
   self.evaluate(tf.global_variables_initializer())
   res = self.evaluate(y)
   self.assertEqual(res.shape, (5, 7, 1, 13))
Esempio n. 19
0
def decompress(source, hparams, name):
    """Decompression function."""
    with tf.variable_scope(name):
        shape = tf.shape(source)
        thicker = common_layers.conv_block(source,
                                           hparams.hidden_size * 2,
                                           [((1, 1), (1, 1))],
                                           name="decompress_conv")
        return tf.reshape(thicker,
                          [shape[0], shape[1] * 2, 1, hparams.hidden_size])
Esempio n. 20
0
def decompress_step(source, c, hparams, first_relu, name):
  """Decompression function."""
  with tf.variable_scope(name):
    shape = tf.shape(source)
    if c is not None:
      source = attend(source, c, hparams, "decompress_attend")
    thicker = common_layers.conv_block(
        source, hparams.hidden_size * 2, [((1, 1), (1, 1))],
        first_relu=first_relu, name="decompress_conv")
    return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size])
Esempio n. 21
0
def compress_vae(inputs, hparams, name):
    """Compress, then VAE."""
    with tf.variable_scope(name):
        # Run compression by strided convs.
        cur = tf.expand_dims(inputs, axis=2)
        for i in xrange(hparams.num_compress_steps):
            cur = common_layers.conv_block(cur,
                                           hparams.hidden_size,
                                           [((1, 1), (2, 1))],
                                           strides=(2, 1),
                                           name="compress_%d" % i)

        # Convolve and ReLu to get state.
        cur = common_layers.conv_block(cur,
                                       hparams.hidden_size, [((1, 1), (1, 1))],
                                       name="mid_conv")

        cur, kl_loss = vae(cur, hparams, name="vae")
        return cur, kl_loss
Esempio n. 22
0
 def bottom(self, inputs):
   with tf.variable_scope(self.name):
     inputs = common_layers.standardize_images(inputs)
     tf.summary.image("inputs", inputs, max_outputs=2)
     return common_layers.conv_block(
         inputs,
         self._body_input_depth, [((1, 1), (3, 3))],
         first_relu=False,
         padding="SAME",
         force2d=True,
         name="small_image_conv")
Esempio n. 23
0
def decompress_step(source, hparams, first_relu, is_2d, name):
  """Decompression function."""
  with tf.variable_scope(name):
    shape = common_layers.shape_list(source)
    multiplier = 4 if is_2d else 2
    kernel = (1, 1) if is_2d else (1, 1)
    thicker = common_layers.conv_block(
        source, hparams.hidden_size * multiplier, [((1, 1), kernel)],
        first_relu=first_relu, name="decompress_conv")
    if is_2d:
      return tf.depth_to_space(thicker, 2)
    return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size])
def decompress_step(source, hparams, first_relu, is_2d, name):
  """Decompression function."""
  with tf.variable_scope(name):
    shape = common_layers.shape_list(source)
    multiplier = 4 if is_2d else 2
    kernel = (1, 1) if is_2d else (1, 1)
    thicker = common_layers.conv_block(
        source, hparams.hidden_size * multiplier, [((1, 1), kernel)],
        first_relu=first_relu, name="decompress_conv")
    if is_2d:
      return tf.depth_to_space(thicker, 2)
    return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size])
Esempio n. 25
0
def decompress_step(source, c, hparams, first_relu, name):
    """Decompression function."""
    with tf.variable_scope(name):
        shape = tf.shape(source)
        if c is not None:
            source = attend(source, c, hparams, "decompress_attend")
        first = common_layers.conv_block(source,
                                         hparams.hidden_size,
                                         [((1, 1), (3, 1)), ((1, 1), (3, 1))],
                                         first_relu=first_relu,
                                         padding="SAME",
                                         name="decompress_conv1")
        second = common_layers.conv_block(tf.concat([source, first], axis=3),
                                          hparams.hidden_size,
                                          [((1, 1), (3, 1)), ((1, 1), (3, 1))],
                                          first_relu=first_relu,
                                          padding="SAME",
                                          name="decompress_conv2")
        thicker = interleave(first, second)
        return tf.reshape(thicker,
                          [shape[0], shape[1] * 2, 1, hparams.hidden_size])
Esempio n. 26
0
def compress(x, c, hparams, name):
  """Compress."""
  with tf.variable_scope(name):
    # Run compression by strided convs.
    cur = x
    for i in xrange(hparams.num_compress_steps):
      if c is not None:
        cur = attend(cur, c, hparams, "compress_attend_%d" % i)
      cur = residual_conv(cur, 1, hparams, "compress_rc_%d" % i)
      cur = common_layers.conv_block(
          cur, hparams.hidden_size, [((1, 1), (2, 1))],
          strides=(2, 1), name="compress_%d" % i)
    return cur
def decompress_step(source, hparams, first_relu, name):
    """Decompression function."""
    with tf.variable_scope(name):
        shape = common_layers.shape_list(source)
        multiplier = 2
        kernel = (1, 1)
        thicker = common_layers.conv_block(source,
                                           hparams.model_d * multiplier,
                                           [((1, 1), kernel)],
                                           first_relu=first_relu,
                                           name="decompress_conv")
        return tf.reshape(thicker,
                          [shape[0], shape[1] * 2, 1, hparams.model_d])
def compress(x, is_2d, hparams, name):
  """Compress."""
  with tf.variable_scope(name):
    # Run compression by strided convs.
    cur = x
    k1 = (3, 3) if is_2d else (3, 1)
    cur = residual_conv(cur, hparams.num_compress_steps, k1, hparams, "rc")
    k2 = (2, 2) if is_2d else (2, 1)
    for i in xrange(hparams.num_compress_steps):
      cur = common_layers.conv_block(
          cur, hparams.hidden_size, [((1, 1), k2)],
          strides=k2, name="compress_%d" % i)
    return cur
Esempio n. 29
0
def compress(x, is_2d, hparams, name):
  """Compress."""
  with tf.variable_scope(name):
    # Run compression by strided convs.
    cur = x
    k1 = (3, 3) if is_2d else (3, 1)
    cur = residual_conv(cur, hparams.num_compress_steps, k1, hparams, "rc")
    k2 = (2, 2) if is_2d else (2, 1)
    for i in xrange(hparams.num_compress_steps):
      cur = common_layers.conv_block(
          cur, hparams.hidden_size, [((1, 1), k2)],
          strides=k2, name="compress_%d" % i)
    return cur
Esempio n. 30
0
def compress_encoder(inputs,
                     hparams,
                     strides=(2, 2),
                     kernel_size=(3, 3),
                     name=None):
    """Encoder that compresses 2-D inputs by 2**num_compress_steps.

  Args:
    inputs: Tensor of shape [batch, height, width, channels].
    hparams: HParams.
    strides: Tuple, strides for conv block.
    kernel_size: Tuple, kernel window size for conv block.
    name: string, variable scope.

  Returns:
    Tensor of shape [batch, latent_length, hparams.hidden_size], where
      latent_length is
      hparams.num_latents * (height*width) / 2**(hparams.num_compress_steps).
  """
    with tf.variable_scope(name, default_name="compress"):
        x = inputs
        for i in range(hparams.num_compress_steps // 2):
            with tf.variable_scope("compress_conv_%d" % i):
                y = common_layers.conv_block(
                    common_layers.layer_norm(x,
                                             hparams.hidden_size,
                                             name="lnorm"),
                    hparams.hidden_size,
                    dilation_rates_and_kernel_sizes=[((1, 1), kernel_size)],
                    strides=strides,
                    padding="SAME",
                    name="compress_conv_%d" % i)
                y = tf.nn.dropout(y, 1.0 - hparams.dropout)
                if hparams.do_compress_attend:
                    y = compress_self_attention_layer(
                        x, hparams, name="compress_selfatt_%d" % i)
                    y += x
                x = y

        x = residual_block_layer(x, hparams)

        # If using multiple copies of latents, blow up the hidden size and then
        # reshape to increase by num_latents.
        shape_x = common_layers.shape_list(x)
        x = tf.layers.dense(x,
                            hparams.num_latents * hparams.hidden_size,
                            name=name + "_dense")
        return tf.reshape(x, [
            shape_x[0], shape_x[1] * shape_x[2] * hparams.num_latents,
            hparams.hidden_size
        ])
Esempio n. 31
0
def xception_entry(inputs, hidden_dim):
  """Xception entry flow."""
  with tf.variable_scope("xception_entry"):

    def xnet_resblock(x, filters, res_relu, name):
      """Resblock."""
      with tf.variable_scope(name):
        y = common_layers.separable_conv_block(
            x,
            filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
            first_relu=True,
            padding="SAME",
            force2d=True,
            name="sep_conv_block")
        y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2))
        return y + common_layers.conv_block(
            x,
            filters, [((1, 1), (1, 1))],
            padding="SAME",
            strides=(2, 2),
            first_relu=res_relu,
            force2d=True,
            name="res_conv0")

    tf.summary.image("inputs", inputs, max_outputs=2)
    x = common_layers.conv_block(
        inputs,
        32, [((1, 1), (3, 3))],
        first_relu=False,
        padding="SAME",
        strides=(2, 2),
        force2d=True,
        name="conv0")
    x = common_layers.conv_block(
        x, 64, [((1, 1), (3, 3))], padding="SAME", force2d=True, name="conv1")
    x = xnet_resblock(x, min(128, hidden_dim), True, "block0")
    x = xnet_resblock(x, min(256, hidden_dim), False, "block1")
    return xnet_resblock(x, hidden_dim, False, "block2")
Esempio n. 32
0
def xception_entry(inputs, hidden_dim):
  with tf.variable_scope("xception_entry"):

    def xnet_resblock(x, filters, res_relu, name):
      with tf.variable_scope(name):
        y = common_layers.separable_conv_block(
            x,
            filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
            first_relu=True,
            padding="SAME",
            force2d=True,
            name="sep_conv_block")
        y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2))
        return y + common_layers.conv_block(
            x,
            filters, [((1, 1), (1, 1))],
            padding="SAME",
            strides=(2, 2),
            first_relu=res_relu,
            force2d=True,
            name="res_conv0")

    inputs = common_layers.standardize_images(inputs)
    # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet.
    # tf.summary.image("inputs", inputs, max_outputs=2)
    x = common_layers.conv_block(
        inputs,
        32, [((1, 1), (3, 3))],
        first_relu=False,
        padding="SAME",
        strides=(2, 2),
        force2d=True,
        name="conv0")
    x = common_layers.conv_block(
        x, 64, [((1, 1), (3, 3))], padding="SAME", force2d=True, name="conv1")
    x = xnet_resblock(x, min(128, hidden_dim), True, "block0")
    x = xnet_resblock(x, min(256, hidden_dim), False, "block1")
    return xnet_resblock(x, hidden_dim, False, "block2")
Esempio n. 33
0
def compress_encoder(inputs,
                     hparams,
                     strides=(2, 2),
                     kernel_size=(3, 3),
                     name=None):
  """Encoder that compresses 2-D inputs by 2**num_compress_steps.

  Args:
    inputs: Tensor of shape [batch, height, width, channels].
    hparams: tf.contrib.training.HParams.
    strides: Tuple, strides for conv block.
    kernel_size: Tuple, kernel window size for conv block.
    name: string, variable scope.

  Returns:
    Tensor of shape [batch, latent_length, hparams.hidden_size], where
      latent_length is
      hparams.num_latents * (height*width) / 2**(hparams.num_compress_steps).
  """
  with tf.variable_scope(name, default_name="compress"):
    x = inputs
    for i in range(hparams.num_compress_steps // 2):
      with tf.variable_scope("compress_conv_%d" % i):
        y = common_layers.conv_block(
            common_layers.layer_norm(
                x, hparams.hidden_size, name="lnorm"),
            hparams.hidden_size,
            dilation_rates_and_kernel_sizes=[((1, 1), kernel_size)],
            strides=strides,
            padding="SAME",
            name="compress_conv_%d" % i)
        y = tf.nn.dropout(y, 1.0 - hparams.dropout)
        if hparams.do_compress_attend:
          y = compress_self_attention_layer(
              x, hparams, name="compress_selfatt_%d" % i)
          y += x
        x = y

    x = residual_block_layer(x, hparams)

    # If using multiple copies of latents, blow up the hidden size and then
    # reshape to increase by num_latents.
    shape_x = common_layers.shape_list(x)
    x = tf.layers.dense(x,
                        hparams.num_latents * hparams.hidden_size,
                        name=name + "_dense")
    return tf.reshape(x, [shape_x[0],
                          shape_x[1] * shape_x[2] * hparams.num_latents,
                          hparams.hidden_size])
Esempio n. 34
0
def residual_conv(x, repeat, k, hparams, name, reuse=None):
  """A stack of convolution blocks with residual connections."""
  with tf.variable_scope(name, reuse=reuse):
    dilations_and_kernels = [((1, 1), k) for _ in range(3)]
    for i in range(repeat):
      with tf.variable_scope("repeat_%d" % i):
        y = common_layers.conv_block(
            common_layers.layer_norm(x, hparams.hidden_size, name="lnorm"),
            hparams.hidden_size,
            dilations_and_kernels,
            padding="SAME",
            name="residual_conv")
        y = tf.nn.dropout(y, 1.0 - hparams.dropout)
        x += y
    return x
Esempio n. 35
0
def residual_conv(x, repeat, k, hparams, name, reuse=None):
    """A stack of convolution blocks with residual connections."""
    with tf.variable_scope(name, reuse=reuse):
        dilations_and_kernels = [((1, 1), k) for _ in xrange(3)]
        for i in xrange(repeat):
            with tf.variable_scope("repeat_%d" % i):
                y = common_layers.conv_block(common_layers.layer_norm(
                    x, hparams.hidden_size, name="lnorm"),
                                             hparams.hidden_size,
                                             dilations_and_kernels,
                                             padding="SAME",
                                             name="residual_conv")
                y = tf.nn.dropout(y, 1.0 - hparams.dropout)
                x += y
        return x
Esempio n. 36
0
def compress_encoder(inputs,
                     hparams,
                     strides=(2, 2),
                     kernel=(3, 3),
                     name="compress"):
    """Encoder that compresses inputs to length/2**num_compress_steps.

  Args:
    inputs: Tensor of shape [batch, height, width, hidden_dim].
    hparams: Dict, hyperparameters.
    strides: Tuple, strides for conv block.
    kernel: Tuple, kernel window size for conv block.
    name: string, variable scope.

  Returns:
    x: Tensor of shape [batch, height*width/2**(compress_steps), hidden_dim].
  """
    with tf.variable_scope(name):
        x = inputs
        # Compress conv layers with strides and kernels as passed to the function.
        for i in range(hparams.num_compress_steps // 2):
            with tf.variable_scope("compress_conv_%d" % i):
                y = common_layers.conv_block(
                    common_layers.layer_norm(x,
                                             hparams.hidden_size,
                                             name="lnorm"),
                    hparams.hidden_size, [((1, 1), kernel)],
                    strides=strides,
                    padding="SAME",
                    name="compress_conv_%d" % i)
                y = tf.nn.dropout(y, 1.0 - hparams.dropout)
                x = y

        # Residual blocks.
        x = residual_block_layer(x, hparams)

        # If using multiple copies of latents, blow up the hidden size and then
        # reshape to increase by num_latents.
        shape_x = common_layers.shape_list(x)
        x = tf.layers.dense(x,
                            hparams.num_latents * hparams.hidden_size,
                            name=name + "_dense")
        new_shape = [
            shape_x[0], shape_x[1] * shape_x[2] * hparams.num_latents,
            hparams.hidden_size
        ]
        return tf.reshape(x, new_shape)
Esempio n. 37
0
def ae_compress(x, is_2d, hparams, name, reuse=None):
    """Compress, then AE."""
    with tf.variable_scope(name, reuse=reuse):
        cur = compress(x, None, is_2d, hparams, "compress")
        # Convolve and ReLu to get state.
        cur = common_layers.conv_block(cur,
                                       hparams.hidden_size, [((1, 1), (1, 1))],
                                       name="mid_conv")
        means_size = hparams.z_size if hparams.do_vae else hparams.v_size
        means = tf.get_variable("z_to_dense",
                                [means_size, hparams.hidden_size])
        if hparams.do_vae:
            if hparams.bit_vae:
                hot, loss = bit_vae(cur, hparams, "bvae")
            else:
                hot, loss, _, _ = vae(cur, hparams.z_size, "vae")
            # Do a second level vae with some probability.
            if hparams.z_size2 > 0:
                prob_z2 = common_layers.inverse_exp_decay(
                    hparams.startup_steps * 2) * 0.8
                if hparams.mode != tf.contrib.learn.ModeKeys.TRAIN:
                    prob_z2 = 1.0

                def vae2():
                    hot2, loss2, _, _ = vae(hot, hparams.z_size2, "vae2")
                    ret = tf.layers.dense(hot2, hparams.z_size)
                    return mix(ret, hot, hparams.startup_steps * 2), loss2

                hot, loss2 = tf.cond(tf.less(tf.random_uniform([]), prob_z2),
                                     vae2, lambda: (hot, tf.constant(0.0)))
                loss += loss2 * 0.1
            return cur, hot, loss
        if hparams.use_gumbel_softmax:
            _, hot, loss = dae(cur, hparams, "dae")
            return cur, hot, loss
        # Using k-means part. L2-normalizing to use fast cosine distance.
        cur = mix(tf.nn.l2_normalize(cur, dim=3),
                  cur,
                  hparams.startup_steps // 3,
                  mode="exp",
                  simple=True)
        cur_n = hparams.kmeans_lr_factor * cur
        cur_n += (1.0 - hparams.kmeans_lr_factor) * tf.stop_gradient(cur)
        hot, loss = kmeans(cur_n, means, hparams, name="kmeans")
        # We need a linear layer to undo the l2-normalization.
        cur = tf.layers.dense(cur, hparams.hidden_size, name="unnormalize")
        return cur, hot, loss
  def encode(self, inputs, target_space, hparams):
    """Add two layers strided convolutions ontop of encode."""
    inputs = common_layers.conv_block(
        inputs,
        hparams.hidden_size, [((1, 1), (3, 3))],
        first_relu=False,
        padding="SAME",
        force2d=True,
        name="small_image_conv")

    hparams.num_compress_steps = 2
    compressed_inputs = transformer_vae.compress(inputs, is_2d=True,
                                                 hparams=hparams,
                                                 name="convolutions")

    return super(TransformerSketch, self).encode(
        compressed_inputs, target_space, hparams)
Esempio n. 39
0
def ae_compress(x, is_2d, hparams, name, reuse=None):
    """Compress, then AE."""
    with tf.variable_scope(name, reuse=reuse):
        cur = compress(x, None, is_2d, hparams, "compress")
        # Convolve and ReLu to get state.
        cur = common_layers.conv_block(cur,
                                       hparams.hidden_size, [((1, 1), (1, 1))],
                                       name="mid_conv")
        cur = tf.nn.l2_normalize(cur, dim=3)
        cur_n = hparams.kmeans_lr_factor * cur
        cur_n += (1.0 - hparams.kmeans_lr_factor) * tf.stop_gradient(cur)
        means = tf.get_variable("z_to_dense",
                                [hparams.v_size, hparams.hidden_size])
        hot, loss = kmeans(cur_n, means, hparams, name="kmeans")
        # We need a linear layer to undo the l2-normalization.
        cur = tf.layers.dense(cur, hparams.hidden_size, name="unnormalize")
        return cur, hot, loss
Esempio n. 40
0
def residual_dilated_conv(x, repeat, padding, name, hparams):
  """A stack of convolution blocks with residual connections."""
  with tf.variable_scope(name):
    k = (hparams.kernel_height, hparams.kernel_width)
    dilations_and_kernels = [((2**i, 1), k)
                             for i in range(hparams.num_hidden_layers)]
    for i in range(repeat):
      with tf.variable_scope("repeat_%d" % i):
        y = common_layers.conv_block(
            common_layers.layer_norm(x, hparams.hidden_size, name="lnorm"),
            hparams.hidden_size,
            dilations_and_kernels,
            padding=padding,
            name="residual_conv")
        y = tf.nn.dropout(y, 1.0 - hparams.dropout)
        x += y
    return x
Esempio n. 41
0
def residual_dilated_conv(x, repeat, padding, name, hparams):
    """A stack of convolution blocks with residual connections."""
    with tf.variable_scope(name):
        k = (hparams.kernel_height, hparams.kernel_width)
        dilations_and_kernels = [((2**i, 1), k)
                                 for i in xrange(hparams.num_hidden_layers)]
        for i in xrange(repeat):
            with tf.variable_scope("repeat_%d" % i):
                y = common_layers.conv_block(common_layers.layer_norm(
                    x, hparams.hidden_size, name="lnorm"),
                                             hparams.hidden_size,
                                             dilations_and_kernels,
                                             padding=padding,
                                             name="residual_conv")
                y = tf.nn.dropout(y, 1.0 - hparams.dropout)
                x += y
        return x
Esempio n. 42
0
 def bottom(self, inputs):
     with tf.variable_scope(self.name):
         inputs = common_layers.standardize_images(inputs)
         # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet.
         # tf.summary.image("inputs", inputs, max_outputs=2)
         if self._model_hparams.compress_steps > 0:
             strides = (2, 2)
         else:
             strides = (1, 1)
         return common_layers.conv_block(inputs,
                                         self._body_input_depth,
                                         [((1, 1), (3, 3))],
                                         first_relu=False,
                                         strides=strides,
                                         padding="SAME",
                                         force2d=True,
                                         name="small_image_conv")
Esempio n. 43
0
 def xnet_resblock(x, filters, res_relu, name):
   with tf.variable_scope(name):
     y = common_layers.separable_conv_block(
         x,
         filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
         first_relu=True,
         padding="SAME",
         force2d=True,
         name="sep_conv_block")
     y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2))
     return y + common_layers.conv_block(
         x,
         filters, [((1, 1), (1, 1))],
         padding="SAME",
         strides=(2, 2),
         first_relu=res_relu,
         force2d=True,
         name="res_conv0")
Esempio n. 44
0
 def xnet_resblock(x, filters, res_relu, name):
   with tf.variable_scope(name):
     y = common_layers.separable_conv_block(
         x,
         filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
         first_relu=True,
         padding="SAME",
         force2d=True,
         name="sep_conv_block")
     y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2))
     return y + common_layers.conv_block(
         x,
         filters, [((1, 1), (1, 1))],
         padding="SAME",
         strides=(2, 2),
         first_relu=res_relu,
         force2d=True,
         name="res_conv0")
Esempio n. 45
0
def vae_compress(x, c, hparams, compress_name, decompress_name, reuse=None):
  """Compress, then VAE."""
  mix_k = 8
  with tf.variable_scope(compress_name, reuse=reuse):
    cur = compress(x, None, hparams, "compress")
    # Convolve and ReLu to get state.
    cur = common_layers.conv_block(
        cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv")
    # z, kl_loss, mu, log_sigma = vae(cur, hparams, name="vae")
    z, kl_loss = dvae(cur, None, hparams, name="dvae")
    z1, kl_loss1 = top_k_experts(cur, mix_k, hparams)
    mu, log_sigma = None, None

    # Mix expert-selection and flat selection.
    alpha_p = common_layers.inverse_lin_decay(60000) + 0.001
    z = alpha_p * z1 + (1 - alpha_p) * z
    kl_loss += kl_loss1

  # Compress context.
  with tf.variable_scope(compress_name, reuse=reuse):
    compress_c = compress(c, None, hparams, "compress_context")
    c_z = tf.layers.dense(compress_c, hparams.v_size, name="mask_context")
    reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits(
        labels=z, logits=c_z)

  # If not training, use the predicted z instead of the autoregressive one.
  # if hparams.mode != tf.contrib.learn.ModeKeys.TRAIN:
  # z = mix(c_z, z, 50000, max_prob=0.3, mode="exp")
  # z, _ = top_k_softmax(c_z, mix_k)

  with tf.variable_scope(decompress_name, reuse=reuse):
    # Decompress.
    z = tf.layers.dense(z, hparams.hidden_size, name="z_to_dense")

    # Leak at the beginning to help train.
    z = mix(z, cur, 30000)

    for i in xrange(hparams.num_compress_steps):
      j = hparams.num_compress_steps - i - 1
      z = residual_conv(z, 1, hparams, "decompress_rc_%d" % j)
      z = decompress_step(z, c, hparams, i > 0, "decompress_step_%d" % j)
    return z, kl_loss + 0.0001 * reconstruct_loss, mu, log_sigma
Esempio n. 46
0
 def xnet_resblock(x, filters, res_relu, name):
   with tf.variable_scope(name):
     # Typically audio samples are >100k samples in length and have a width
     # of 2 or 4. Mono audio has a single channel while stereo has 2.
     y = common_layers.separable_conv_block(
         x,
         filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
         first_relu=True,
         padding="SAME",
         force2d=True,
         name="sep_conv_block")
     y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2))
     return y + common_layers.conv_block(
         x,
         filters, [((1, 1), (1, 1))],
         padding="SAME",
         strides=(2, 2),
         first_relu=res_relu,
         force2d=True,
         name="res_conv0")
Esempio n. 47
0
 def xnet_resblock(x, filters, res_relu, name):
   with tf.variable_scope(name):
     # We only stride along the length dimension to preserve the spectral
     # bins (which are tiny in dimensionality relative to length)
     y = common_layers.separable_conv_block(
         x,
         filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
         first_relu=True,
         padding="SAME",
         force2d=True,
         name="sep_conv_block")
     y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 1))
     return y + common_layers.conv_block(
         x,
         filters, [((1, 1), (1, 1))],
         padding="SAME",
         strides=(2, 1),
         first_relu=res_relu,
         force2d=True,
         name="res_conv0")
Esempio n. 48
0
def xception_internal(inputs, hparams):
  """Xception body."""
  with tf.variable_scope("xception"):
    cur = inputs

    if cur.get_shape().as_list()[1] > 200:
      # Large image, Xception entry flow
      cur = xception_entry(cur, hparams.hidden_size)
    else:
      # Small image, conv
      cur = common_layers.conv_block(
          cur,
          hparams.hidden_size, [((1, 1), (3, 3))],
          first_relu=False,
          padding="SAME",
          force2d=True,
          name="small_image_conv")

    for i in xrange(hparams.num_hidden_layers):
      with tf.variable_scope("layer_%d" % i):
        cur = residual_block(cur, hparams)

    return xception_exit(cur)