Esempio n. 1
0
    def bottom(self, inputs):
        """Transform input from data space to model space.

    Perform the Xception "Entry flow", which consists of two convolutional
    filter upscalings followed by three residually connected separable
    convolution blocks.

    Args:
      inputs: A Tensor with shape [batch, ...]
    Returns:
      body_input: A Tensor with shape [batch, ?, ?, body_input_depth].
    """
        with tf.variable_scope(self.name):

            def xnet_resblock(x, filters, res_relu, name):
                with tf.variable_scope(name):
                    y = common_layers.separable_conv_block(
                        x,
                        filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
                        first_relu=True,
                        padding="SAME",
                        force2d=True,
                        name="sep_conv_block")
                    y = common_layers.pool(y, (3, 3),
                                           "MAX",
                                           "SAME",
                                           strides=(2, 2))
                    return y + common_layers.conv_block(x,
                                                        filters, [((1, 1),
                                                                   (1, 1))],
                                                        padding="SAME",
                                                        strides=(2, 2),
                                                        first_relu=res_relu,
                                                        force2d=True,
                                                        name="res_conv0")

            inputs = common_layers.standardize_images(inputs)
            # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet.
            # tf.summary.image("inputs", inputs, max_outputs=2)
            x = common_layers.conv_block(inputs,
                                         32, [((1, 1), (3, 3))],
                                         first_relu=False,
                                         padding="SAME",
                                         strides=(2, 2),
                                         force2d=True,
                                         name="conv0")
            x = common_layers.conv_block(x,
                                         64, [((1, 1), (3, 3))],
                                         padding="SAME",
                                         force2d=True,
                                         name="conv1")
            x = xnet_resblock(x, min(128, self._body_input_depth), True,
                              "block0")
            x = xnet_resblock(x, min(256, self._body_input_depth), False,
                              "block1")
            return xnet_resblock(x, self._body_input_depth, False, "block2")
Esempio n. 2
0
def bytenet_internal(inputs, targets, hparams):
    """ByteNet, main step used for training."""
    with tf.variable_scope("bytenet"):
        # Flatten inputs and extend length by 50%.
        inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2)
        extend_length = tf.to_int32(0.5 * tf.to_float(tf.shape(inputs)[1]))
        inputs_shape = inputs.shape.as_list()
        inputs = tf.pad(inputs, [[0, 0], [0, extend_length], [0, 0], [0, 0]])
        inputs_shape[1] = None
        inputs.set_shape(
            inputs_shape)  # Don't lose the other shapes when padding.
        # Pad inputs and targets to be the same length, divisible by 50.
        inputs, targets = common_layers.pad_to_same_length(
            inputs, targets, final_length_divisible_by=50)
        final_encoder = residual_dilated_conv(inputs, hparams.num_block_repeat,
                                              "SAME", "encoder", hparams)

        shifted_targets = common_layers.shift_right(targets)
        kernel = (hparams.kernel_height, hparams.kernel_width)
        decoder_start = common_layers.conv_block(
            tf.concat([final_encoder, shifted_targets], axis=3),
            hparams.hidden_size, [((1, 1), kernel)],
            padding="LEFT")

        return residual_dilated_conv(decoder_start, hparams.num_block_repeat,
                                     "LEFT", "decoder", hparams)
Esempio n. 3
0
def ae_compress(x, is_2d, hparams, name, reuse=None):
    """Compress, then AE."""
    with tf.variable_scope(name, reuse=reuse):
        cur = compress(x, None, is_2d, hparams, "compress")
        # Convolve and ReLu to get state.
        cur = common_layers.conv_block(cur,
                                       hparams.hidden_size, [((1, 1), (1, 1))],
                                       name="mid_conv")
        # To put a standard VAE use the line below.
        # cur, vae_kl, _, _ = vae(cur, hparams, "kmeans_vae")
        means = tf.get_variable("z_to_dense",
                                [hparams.v_size, hparams.hidden_size])
        if hparams.use_gumbel_softmax:
            _, hot, loss = dae(cur, hparams, "dae")
            return cur, hot, loss
        # Using k-means part. L2-normalizing to use fast cosine distance.
        cur = mix(tf.nn.l2_normalize(cur, dim=3),
                  cur,
                  hparams.startup_steps // 3,
                  mode="exp",
                  simple=True)
        cur_n = hparams.kmeans_lr_factor * cur
        cur_n += (1.0 - hparams.kmeans_lr_factor) * tf.stop_gradient(cur)
        hot, loss = kmeans(cur_n, means, hparams, name="kmeans")
        # We need a linear layer to undo the l2-normalization.
        cur = tf.layers.dense(cur, hparams.hidden_size, name="unnormalize")
        return cur, hot, loss
Esempio n. 4
0
 def bottom(self, inputs):
     with tf.variable_scope(self.name):
         inputs = common_layers.standardize_images(inputs)
         tf.summary.image("inputs", inputs, max_outputs=2)
         return common_layers.conv_block(inputs,
                                         self._body_input_depth,
                                         [((1, 1), (3, 3))],
                                         first_relu=False,
                                         padding="SAME",
                                         force2d=True,
                                         name="small_image_conv")
Esempio n. 5
0
def residual_conv(x, repeat, k, hparams, name, reuse=None):
    """A stack of convolution blocks with residual connections."""
    with tf.variable_scope(name, reuse=reuse):
        dilations_and_kernels = [((1, 1), k) for _ in xrange(3)]
        for i in xrange(repeat):
            with tf.variable_scope("repeat_%d" % i):
                y = common_layers.conv_block(common_layers.layer_norm(
                    x, hparams.hidden_size, name="lnorm"),
                                             hparams.hidden_size,
                                             dilations_and_kernels,
                                             padding="SAME",
                                             name="residual_conv")
                y = tf.nn.dropout(y, 1.0 - hparams.dropout)
                x += y
        return x
Esempio n. 6
0
def compress(x, c, is_2d, hparams, name):
    """Compress."""
    with tf.variable_scope(name):
        # Run compression by strided convs.
        cur = x
        k1 = (3, 3) if is_2d else (3, 1)
        k2 = (2, 2) if is_2d else (2, 1)
        for i in xrange(hparams.num_compress_steps):
            if c is not None:
                cur = attend(cur, c, hparams, "compress_attend_%d" % i)
            cur = residual_conv(cur, 1, k1, hparams, "compress_rc_%d" % i)
            cur = common_layers.conv_block(cur,
                                           hparams.hidden_size, [((1, 1), k2)],
                                           strides=k2,
                                           name="compress_%d" % i)
        return cur
Esempio n. 7
0
def residual_dilated_conv(x, repeat, padding, name, hparams):
    """A stack of convolution blocks with residual connections."""
    with tf.variable_scope(name):
        k = (hparams.kernel_height, hparams.kernel_width)
        dilations_and_kernels = [((2**i, 1), k)
                                 for i in xrange(hparams.num_hidden_layers)]
        for i in xrange(repeat):
            with tf.variable_scope("repeat_%d" % i):
                y = common_layers.conv_block(common_layers.layer_norm(
                    x, hparams.hidden_size, name="lnorm"),
                                             hparams.hidden_size,
                                             dilations_and_kernels,
                                             padding=padding,
                                             name="residual_conv")
                y = tf.nn.dropout(y, 1.0 - hparams.dropout)
                x += y
        return x
Esempio n. 8
0
def decompress_step(source, c, hparams, first_relu, is_2d, name):
    """Decompression function."""
    with tf.variable_scope(name):
        shape = tf.shape(source)
        if c is not None:
            source = attend(source, c, hparams, "decompress_attend")
        multiplier = 4 if is_2d else 2
        kernel = (1, 1) if is_2d else (1, 1)
        thicker = common_layers.conv_block(source,
                                           hparams.hidden_size * multiplier,
                                           [((1, 1), kernel)],
                                           first_relu=first_relu,
                                           name="decompress_conv")
        if is_2d:
            return tf.depth_to_space(thicker, 2)
        return tf.reshape(thicker,
                          [shape[0], shape[1] * 2, 1, hparams.hidden_size])
Esempio n. 9
0
 def xnet_resblock(x, filters, res_relu, name):
     with tf.variable_scope(name):
         y = common_layers.separable_conv_block(
             x,
             filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
             first_relu=True,
             padding="SAME",
             force2d=True,
             name="sep_conv_block")
         y = common_layers.pool(y, (3, 3),
                                "MAX",
                                "SAME",
                                strides=(2, 2))
         return y + common_layers.conv_block(x,
                                             filters, [((1, 1),
                                                        (1, 1))],
                                             padding="SAME",
                                             strides=(2, 2),
                                             first_relu=res_relu,
                                             force2d=True,
                                             name="res_conv0")
Esempio n. 10
0
 def xnet_resblock(x, filters, res_relu, name):
     with tf.variable_scope(name):
         # We only stride along the length dimension to preserve the spectral
         # bins (which are tiny in dimensionality relative to length)
         y = common_layers.separable_conv_block(
             x,
             filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
             first_relu=True,
             padding="SAME",
             force2d=True,
             name="sep_conv_block")
         y = common_layers.pool(y, (3, 3),
                                "MAX",
                                "SAME",
                                strides=(2, 1))
         return y + common_layers.conv_block(x,
                                             filters, [((1, 1),
                                                        (1, 1))],
                                             padding="SAME",
                                             strides=(2, 1),
                                             first_relu=res_relu,
                                             force2d=True,
                                             name="res_conv0")
Esempio n. 11
0
 def xnet_resblock(x, filters, res_relu, name):
     with tf.variable_scope(name):
         # Typically audio samples are >100k samples in length and have a width
         # of 2 or 4. Mono audio has a single channel while stereo has 2.
         y = common_layers.separable_conv_block(
             x,
             filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))],
             first_relu=True,
             padding="SAME",
             force2d=True,
             name="sep_conv_block")
         y = common_layers.pool(y, (3, 3),
                                "MAX",
                                "SAME",
                                strides=(2, 2))
         return y + common_layers.conv_block(x,
                                             filters, [((1, 1),
                                                        (1, 1))],
                                             padding="SAME",
                                             strides=(2, 2),
                                             first_relu=res_relu,
                                             force2d=True,
                                             name="res_conv0")