Exemple #1
0
    def call(self, inputs, training=True, *args, **kwargs):
        """Creation of the model graph."""
        net = self._local_layers["conv2d"](inputs=inputs)
        net = hr.overlap(net)

        net = self._local_layers["batchnorm"](inputs=net, training=False)

        net = self._local_layers["maxpool2d"](net)

        c2 = self._local_layers["block_1"](
            inputs=net,
            training=False,
        )
        c2 = hr.overlap(c2)

        c3 = self._local_layers["block_2"](
            inputs=c2,
            training=training,
        )
        c3 = hr.overlap(c3)

        c4 = self._local_layers["block_3"](
            inputs=c3,
            training=training,
        )
        c4 = hr.overlap(c4)

        c5 = self._local_layers["block_4"](
            inputs=c4,
            training=training,
        )
        c5 = hr.overlap(c5)

        return {2: c2, 3: c3, 4: c4, 5: c5}
Exemple #2
0
    def __call__(self, inputs, training=False):
        """
        Args:
        inputs: `Tensor` of size `[batch, channels, height, width]`.

        Returns:
        The output `Tensor` of the block.
        """

        try:
            # Projection shortcut in first layer to match filters and strides
            shortcut = self._local_layers["projection"]["conv2d"](
                inputs=inputs)

            shortcut = self._local_layers["projection"]["batchnorm"](
                inputs=shortcut,
                training=training and self._trainable and self._finetune_bn)

        except KeyError:
            shortcut = inputs

        net = inputs

        for i in range(1, 4):
            net = self._local_layers["conv2d_%d" % i](inputs=net)
            net = hr.overlap(net)

            net = self._local_layers["batchnorm_%d" % i](inputs=net,
                                                         training=training
                                                         and self._trainable
                                                         and self._finetune_bn)

        return self._local_layers["activation"](net + shortcut)
Exemple #3
0
def transformer_model(input_tensor,
                      attention_mask=None,
                      hidden_size=768,
                      num_hidden_layers=12,
                      num_attention_heads=12,
                      intermediate_size=3072,
                      intermediate_act_fn=gelu,
                      hidden_dropout_prob=0.1,
                      attention_probs_dropout_prob=0.1,
                      initializer_range=0.02,
                      do_return_all_layers=False):
    """Multi-headed, multi-layer Transformer from "Attention is All You Need".

  This is almost an exact implementation of the original Transformer encoder.

  See the original paper:
  https://arxiv.org/abs/1706.03762

  Also see:
  https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py

  Args:
    input_tensor: float Tensor of shape [batch_size, seq_length, hidden_size].
    attention_mask: (optional) int32 Tensor of shape [batch_size, seq_length,
      seq_length], with 1 for positions that can be attended to and 0 in
      positions that should not be.
    hidden_size: int. Hidden size of the Transformer.
    num_hidden_layers: int. Number of layers (blocks) in the Transformer.
    num_attention_heads: int. Number of attention heads in the Transformer.
    intermediate_size: int. The size of the "intermediate" (a.k.a., feed
      forward) layer.
    intermediate_act_fn: function. The non-linear activation function to apply
      to the output of the intermediate/feed-forward layer.
    hidden_dropout_prob: float. Dropout probability for the hidden layers.
    attention_probs_dropout_prob: float. Dropout probability of the attention
      probabilities.
    initializer_range: float. Range of the initializer (stddev of truncated
      normal).
    do_return_all_layers: Whether to also return all layers or just the final
      layer.

  Returns:
    float Tensor of shape [batch_size, seq_length, hidden_size], the final
    hidden layer of the Transformer.

  Raises:
    ValueError: A Tensor shape or parameter is invalid.
  """
    if hidden_size % num_attention_heads != 0:
        raise ValueError(
            "The hidden size (%d) is not a multiple of the number of attention "
            "heads (%d)" % (hidden_size, num_attention_heads))

    attention_head_size = int(hidden_size / num_attention_heads)
    input_shape = get_shape_list(input_tensor, expected_rank=3)
    batch_size = input_shape[0]
    seq_length = input_shape[1]
    input_width = input_shape[2]

    # The Transformer performs sum residuals on all layers so the input needs
    # to be the same as the hidden size.
    if input_width != hidden_size:
        raise ValueError(
            "The width of the input tensor (%d) != hidden size (%d)" %
            (input_width, hidden_size))

    # We keep the representation as a 2D tensor to avoid re-shaping it back and
    # forth from a 3D tensor to a 2D tensor. Re-shapes are normally free on
    # the GPU/CPU but may not be free on the TPU, so we want to minimize them to
    # help the optimizer.
    prev_output = reshape_to_matrix(input_tensor)

    all_layer_outputs = []
    for layer_idx in range(num_hidden_layers):
        with tf.variable_scope("layer_%d" % layer_idx):
            layer_input = prev_output

            with tf.variable_scope("attention"):
                attention_heads = []
                with tf.variable_scope("self"):
                    attention_head = attention_layer(
                        from_tensor=layer_input,
                        to_tensor=layer_input,
                        attention_mask=attention_mask,
                        num_attention_heads=num_attention_heads,
                        size_per_head=attention_head_size,
                        attention_probs_dropout_prob=
                        attention_probs_dropout_prob,
                        initializer_range=initializer_range,
                        do_return_2d_tensor=True,
                        batch_size=batch_size,
                        from_seq_length=seq_length,
                        to_seq_length=seq_length)
                    attention_heads.append(attention_head)

                attention_output = None
                if len(attention_heads) == 1:
                    attention_output = attention_heads[0]
                else:
                    # In the case where we have other sequences, we just concatenate
                    # them to the self-attention head before the projection.
                    attention_output = tf.concat(attention_heads, axis=-1)

                # Run a linear projection of `hidden_size` then add a residual
                # with `layer_input`.
                with tf.variable_scope("output"):
                    attention_output = tf.layers.dense(
                        attention_output,
                        hidden_size,
                        kernel_initializer=create_initializer(
                            initializer_range))
                    attention_output = dropout(attention_output,
                                               hidden_dropout_prob)
                    attention_output = layer_norm(attention_output +
                                                  layer_input)

            # The activation is only applied to the "intermediate" hidden layer.
            with tf.variable_scope("intermediate"):
                intermediate_output = tf.layers.dense(
                    attention_output,
                    intermediate_size,
                    activation=intermediate_act_fn,
                    kernel_initializer=create_initializer(initializer_range))

            # Down-project back to `hidden_size` then add the residual.
            with tf.variable_scope("output"):
                layer_output = tf.layers.dense(
                    intermediate_output,
                    hidden_size,
                    kernel_initializer=create_initializer(initializer_range))
                layer_output = dropout(layer_output, hidden_dropout_prob)
                layer_output = layer_norm(layer_output + attention_output)
                if layer_idx % 4 == 0:  # Break XLA
                    layer_output = herring.overlap(layer_output)
                prev_output = layer_output
                all_layer_outputs.append(layer_output)

    if do_return_all_layers:
        final_outputs = []
        for layer_output in all_layer_outputs:
            final_output = reshape_from_matrix(layer_output, input_shape)
            final_outputs.append(final_output)
        return final_outputs
    else:
        final_output = reshape_from_matrix(prev_output, input_shape)
        return final_output
    def call(self, inputs, **kwargs):
        """
        Returns:
        mask_outputs: a tensor with a shape of
          [batch_size, num_masks, mask_height, mask_width],
          representing the mask predictions.
        fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2],
          representing the fg mask targets.
        Raises:
        ValueError: If boxes is not a rank-3 tensor or the last dimension of
          boxes is not 4.
        """

        batch_size, num_rois, height, width, filters = inputs.get_shape(
        ).as_list()

        net = tf.reshape(inputs, [-1, height, width, filters])

        for conv_id in range(4):
            net = self._conv_stage1[conv_id](net)
            net = hr.overlap(net)

        net = self._conv_stage2(net)
        net = hr.overlap(net)

        mask_outputs = self._conv_stage3(net)
        mask_outputs = hr.overlap(mask_outputs)

        mask_outputs = tf.reshape(mask_outputs, [
            -1, num_rois, self._mrcnn_resolution, self._mrcnn_resolution,
            self._num_classes
        ])

        with tf.name_scope('masks_post_processing'):

            mask_outputs = tf.transpose(a=mask_outputs, perm=[0, 1, 4, 2, 3])

            indices_dtype = tf.float32 if self._is_gpu_inference else tf.int32

            if batch_size == 1:
                indices = tf.reshape(
                    tf.reshape(tf.range(num_rois, dtype=indices_dtype),
                               [batch_size, num_rois, 1]) * self._num_classes +
                    tf.expand_dims(self._class_indices, axis=-1),
                    [batch_size, -1])
                indices = tf.cast(indices, tf.int32)

                mask_outputs = tf.gather(tf.reshape(mask_outputs, [
                    batch_size, -1, self._mrcnn_resolution,
                    self._mrcnn_resolution
                ]),
                                         indices,
                                         axis=1)

                mask_outputs = tf.squeeze(mask_outputs, axis=1)
                mask_outputs = tf.reshape(mask_outputs, [
                    batch_size, num_rois, self._mrcnn_resolution,
                    self._mrcnn_resolution
                ])

            else:
                batch_indices = (tf.expand_dims(
                    tf.range(batch_size, dtype=indices_dtype), axis=1) *
                                 tf.ones([1, num_rois], dtype=indices_dtype))

                mask_indices = (tf.expand_dims(
                    tf.range(num_rois, dtype=indices_dtype), axis=0) *
                                tf.ones([batch_size, 1], dtype=indices_dtype))

                gather_indices = tf.stack(
                    [batch_indices, mask_indices, self._class_indices], axis=2)

                if self._is_gpu_inference:
                    gather_indices = tf.cast(gather_indices, dtype=tf.int32)

                mask_outputs = tf.gather_nd(mask_outputs, gather_indices)

        return mask_outputs