Beispiel #1
0
    def bottom(self, inputs):
        """Transform input from data space to model space.

    Perform the Xception "Entry flow", which consists of two convolutional
    filter upscalings followed by three residually connected separable
    convolution blocks.

    Args:
      inputs: A Tensor with shape [batch, ...]
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
        with tf.variable_scope(self.name):

            def xnet_resblock(x, filters, res_relu, name):
                with tf.variable_scope(name):
                    y = common_layers.separable_conv_block(
                        x,
                        filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
                        first_relu=True,
                        padding="SAME",
                        force2d=True,
                        name="sep_conv_block")
                    y = common_layers.pool(y, (3, 3),
                                           "MAX",
                                           "SAME",
                                           strides=(2, 2))
                    return y + common_layers.conv_block(x,
                                                        filters, [((1, 1),
                                                                   (1, 1))],
                                                        padding="SAME",
                                                        strides=(2, 2),
                                                        first_relu=res_relu,
                                                        force2d=True,
                                                        name="res_conv0")

            inputs = common_layers.standardize_images(inputs)
            # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet.
            # tf.summary.image("inputs", inputs, max_outputs=2)
            x = common_layers.conv_block(inputs,
                                         32, [((1, 1), (3, 3))],
                                         first_relu=False,
                                         padding="SAME",
                                         strides=(2, 2),
                                         force2d=True,
                                         name="conv0")
            x = common_layers.conv_block(x,
                                         64, [((1, 1), (3, 3))],
                                         padding="SAME",
                                         force2d=True,
                                         name="conv1")
            x = xnet_resblock(x, min(128, self._body_input_depth), True,
                              "block0")
            x = xnet_resblock(x, min(256, self._body_input_depth), False,
                              "block1")
            return xnet_resblock(x, self._body_input_depth, False, "block2")
  def bottom(self, inputs):
    """Transform input from data space to model space.

    Perform the Xception "Entry flow", which consists of two convolutional
    filter upscalings followed by three residually connected separable
    convolution blocks.

    Args:
      inputs: A Tensor with shape [batch, ...]
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
    with tf.variable_scope(self.name):

      def xnet_resblock(x, filters, res_relu, name):
        with tf.variable_scope(name):
          y = common_layers.separable_conv_block(
              x,
              filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
              first_relu=True,
              padding="SAME",
              force2d=True,
              name="sep_conv_block")
          y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2))
          return y + common_layers.conv_block(
              x,
              filters, [((1, 1), (1, 1))],
              padding="SAME",
              strides=(2, 2),
              first_relu=res_relu,
              force2d=True,
              name="res_conv0")

      inputs = common_layers.standardize_images(inputs)
      # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet.
      # tf.summary.image("inputs", inputs, max_outputs=2)
      x = common_layers.conv_block(
          inputs,
          32, [((1, 1), (3, 3))],
          first_relu=False,
          padding="SAME",
          strides=(2, 2),
          force2d=True,
          name="conv0")
      x = common_layers.conv_block(
          x, 64, [((1, 1), (3, 3))], padding="SAME", force2d=True, name="conv1")
      x = xnet_resblock(x, min(128, self._body_input_depth), True, "block0")
      x = xnet_resblock(x, min(256, self._body_input_depth), False, "block1")
      return xnet_resblock(x, self._body_input_depth, False, "block2")
def bytenet_internal(inputs, targets, hparams, train):
  """ByteNet, main step used for training."""
  with tf.variable_scope("bytenet"):
    # Flatten inputs and extend length by 50%.
    inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2)
    extend_length = tf.to_int32(0.5 * tf.to_float(tf.shape(inputs)[1]))
    inputs_shape = inputs.shape.as_list()
    inputs = tf.pad(inputs, [[0, 0], [0, extend_length], [0, 0], [0, 0]])
    inputs_shape[1] = None
    inputs.set_shape(inputs_shape)  # Don't lose the other shapes when padding.
    # Pad inputs and targets to be the same length, divisible by 50.
    inputs, targets = common_layers.pad_to_same_length(
        inputs, targets, final_length_divisible_by=50)
    final_encoder = residual_dilated_conv(
        inputs, hparams.num_block_repeat, "SAME", "encoder", hparams, train)

    shifted_targets = common_layers.shift_left(targets)
    kernel = (hparams.kernel_height, hparams.kernel_width)
    decoder_start = common_layers.conv_block(
        tf.concat([final_encoder, shifted_targets], axis=3),
        hparams.hidden_size, [((1, 1), kernel)],
        padding="LEFT")

    return residual_dilated_conv(
        decoder_start, hparams.num_block_repeat,
        "LEFT", "decoder", hparams, train)
Beispiel #4
0
def bytenet_internal(inputs, targets, hparams, train):
    """ByteNet, main step used for training."""
    with tf.variable_scope("bytenet"):
        # Flatten inputs and extend length by 50%.
        inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2)
        extend_length = tf.to_int32(0.5 * tf.to_float(tf.shape(inputs)[1]))
        inputs_shape = inputs.shape.as_list()
        inputs = tf.pad(inputs, [[0, 0], [0, extend_length], [0, 0], [0, 0]])
        inputs_shape[1] = None
        inputs.set_shape(
            inputs_shape)  # Don't lose the other shapes when padding.
        # Pad inputs and targets to be the same length, divisible by 50.
        inputs, targets = common_layers.pad_to_same_length(
            inputs, targets, final_length_divisible_by=50)
        final_encoder = residual_dilated_conv(inputs, hparams.num_block_repeat,
                                              "SAME", "encoder", hparams,
                                              train)

        shifted_targets = common_layers.shift_left(targets)
        kernel = (hparams.kernel_height, hparams.kernel_width)
        decoder_start = common_layers.conv_block(
            tf.concat([final_encoder, shifted_targets], axis=3),
            hparams.hidden_size, [((1, 1), kernel)],
            padding="LEFT")

        return residual_dilated_conv(decoder_start, hparams.num_block_repeat,
                                     "LEFT", "decoder", hparams, train)
 def testConvBlock(self):
     x = np.random.rand(5, 7, 1, 11)
     with self.test_session() as session:
         y = common_layers.conv_block(tf.constant(x, dtype=tf.float32),
                                      13, [(1, (3, 3)), (1, (3, 3))],
                                      padding="SAME",
                                      normalizer_fn=common_layers.noam_norm)
         session.run(tf.global_variables_initializer())
         res = session.run(y)
     self.assertEqual(res.shape, (5, 7, 1, 13))
 def testConvBlock(self):
   x = np.random.rand(5, 7, 1, 11)
   with self.test_session() as session:
     y = common_layers.conv_block(
         tf.constant(x, dtype=tf.float32),
         13, [(1, (3, 3)), (1, (3, 3))],
         padding="SAME",
         normalizer_fn=common_layers.noam_norm)
     session.run(tf.global_variables_initializer())
     res = session.run(y)
   self.assertEqual(res.shape, (5, 7, 1, 13))
Beispiel #7
0
 def targets_bottom_simple(self, inputs):
     with tf.variable_scope(self.name):
         inputs = common_layers.standardize_images(inputs)
         if self._model_hparams.compress_steps > 0:
             kernel, strides = (2, 2), (2, 2)  # Crucial to not leak!
         else:
             kernel, strides = (1, 1), (1, 1)
         return common_layers.conv_block(inputs,
                                         self._body_input_depth,
                                         [((1, 1), kernel)],
                                         first_relu=False,
                                         strides=strides,
                                         force2d=True,
                                         name="small_image_conv")
 def bottom(self, inputs):
   with tf.variable_scope(self.name):
     inputs = common_layers.standardize_images(inputs)
     # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet.
     # tf.summary.image("inputs", inputs, max_outputs=2)
     if self._model_hparams.compress_steps > 0:
       strides = (2, 2)
     else:
       strides = (1, 1)
     return common_layers.conv_block(
         inputs,
         self._body_input_depth, [((1, 1), (3, 3))],
         first_relu=False,
         strides=strides,
         padding="SAME",
         force2d=True,
         name="small_image_conv")
def residual_dilated_conv(x, repeat, padding, name, hparams, train):
  """A stack of convolution blocks with residual connections."""
  with tf.variable_scope(name):
    k = (hparams.kernel_height, hparams.kernel_width)
    dilations_and_kernels = [((2**i, 1), k)
                             for i in xrange(hparams.num_hidden_layers)]
    for i in xrange(repeat):
      with tf.variable_scope("repeat_%d" % i):
        y = common_layers.conv_block(
            x,
            hparams.hidden_size,
            dilations_and_kernels,
            padding=padding,
            name="residual_conv")
        x = common_layers.layer_norm(x + y, hparams.hidden_size, name="lnorm")
        x = tf.nn.dropout(x, 1.0 - hparams.dropout * tf.to_float(train))
    return x
Beispiel #10
0
 def bottom(self, inputs):
     with tf.variable_scope(self.name):
         inputs = common_layers.standardize_images(inputs)
         # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet.
         # tf.summary.image("inputs", inputs, max_outputs=2)
         if self._model_hparams.compress_steps > 0:
             strides = (2, 2)
         else:
             strides = (1, 1)
         return common_layers.conv_block(inputs,
                                         self._body_input_depth,
                                         [((1, 1), (3, 3))],
                                         first_relu=False,
                                         strides=strides,
                                         padding="SAME",
                                         force2d=True,
                                         name="small_image_conv")
Beispiel #11
0
def residual_dilated_conv(x, repeat, padding, name, hparams):
    """A stack of convolution blocks with residual connections."""
    with tf.variable_scope(name):
        k = (hparams.kernel_height, hparams.kernel_width)
        dilations_and_kernels = [((2**i, 1), k)
                                 for i in xrange(hparams.num_hidden_layers)]
        for i in xrange(repeat):
            with tf.variable_scope("repeat_%d" % i):
                y = common_layers.conv_block(x,
                                             hparams.hidden_size,
                                             dilations_and_kernels,
                                             padding=padding,
                                             name="residual_conv")
                x = common_layers.layer_norm(x + y,
                                             hparams.hidden_size,
                                             name="lnorm")
                x = tf.nn.dropout(x, hparams.dropout)
        return x
 def xnet_resblock(x, filters, res_relu, name):
   with tf.variable_scope(name):
     y = common_layers.separable_conv_block(
         x,
         filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
         first_relu=True,
         padding="SAME",
         force2d=True,
         name="sep_conv_block")
     y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2))
     return y + common_layers.conv_block(
         x,
         filters, [((1, 1), (1, 1))],
         padding="SAME",
         strides=(2, 2),
         first_relu=res_relu,
         force2d=True,
         name="res_conv0")
Beispiel #13
0
 def xnet_resblock(x, filters, res_relu, name):
   with tf.variable_scope(name):
     y = common_layers.separable_conv_block(
         x,
         filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
         first_relu=True,
         padding="SAME",
         force2d=True,
         name="sep_conv_block")
     y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2))
     return y + common_layers.conv_block(
         x,
         filters, [((1, 1), (1, 1))],
         padding="SAME",
         strides=(2, 2),
         first_relu=res_relu,
         force2d=True,
         name="res_conv0")
 def xnet_resblock(x, filters, res_relu, name):
   with tf.variable_scope(name):
     # Typically audio samples are >100k samples in length and have a width
     # of 2 or 4. Mono audio has a single channel while stereo has 2.
     y = common_layers.separable_conv_block(
         x,
         filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
         first_relu=True,
         padding="SAME",
         force2d=True,
         name="sep_conv_block")
     y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2))
     return y + common_layers.conv_block(
         x,
         filters, [((1, 1), (1, 1))],
         padding="SAME",
         strides=(2, 2),
         first_relu=res_relu,
         force2d=True,
         name="res_conv0")
 def xnet_resblock(x, filters, res_relu, name):
   with tf.variable_scope(name):
     # We only stride along the length dimension to preserve the spectral
     # bins (which are tiny in dimensionality relative to length)
     y = common_layers.separable_conv_block(
         x,
         filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
         first_relu=True,
         padding="SAME",
         force2d=True,
         name="sep_conv_block")
     y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 1))
     return y + common_layers.conv_block(
         x,
         filters, [((1, 1), (1, 1))],
         padding="SAME",
         strides=(2, 1),
         first_relu=res_relu,
         force2d=True,
         name="res_conv0")
Beispiel #16
0
 def xnet_resblock(x, filters, res_relu, name):
   with tf.variable_scope(name):
     # Typically audio samples are >100k samples in length and have a width
     # of 2 or 4. Mono audio has a single channel while stereo has 2.
     y = common_layers.separable_conv_block(
         x,
         filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
         first_relu=True,
         padding="SAME",
         force2d=True,
         name="sep_conv_block")
     y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2))
     return y + common_layers.conv_block(
         x,
         filters, [((1, 1), (1, 1))],
         padding="SAME",
         strides=(2, 2),
         first_relu=res_relu,
         force2d=True,
         name="res_conv0")
Beispiel #17
0
 def xnet_resblock(x, filters, res_relu, name):
   with tf.variable_scope(name):
     # We only stride along the length dimension to preserve the spectral
     # bins (which are tiny in dimensionality relative to length)
     y = common_layers.separable_conv_block(
         x,
         filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
         first_relu=True,
         padding="SAME",
         force2d=True,
         name="sep_conv_block")
     y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 1))
     return y + common_layers.conv_block(
         x,
         filters, [((1, 1), (1, 1))],
         padding="SAME",
         strides=(2, 1),
         first_relu=res_relu,
         force2d=True,
         name="res_conv0")