Esempio n. 1
0
def _bottleneck_block_v1(inputs, filters, training, projection_shortcut,
                         strides):
    """A single block for ResNet v1, with a bottleneck.

    Similar to _building_block_v1(), except using the "bottleneck" blocks
    described in:
    Convolution then batch normalization then ReLU as described by:
      Deep Residual Learning for Image Recognition
      https://arxiv.org/pdf/1512.03385.pdf
      by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.

    Args:
    inputs: A tensor of size [batch, channels, height_in, width_in] or
      [batch, height_in, width_in, channels] depending on data_format.
    filters: The number of filters for the convolutions.
    training: A Boolean for whether the model is in training or inference
      mode. Needed for batch normalization.
    projection_shortcut: The function to use for projection shortcuts
      (typically a 1x1 convolution when downsampling the input).
    strides: The block's stride. If greater than 1, this block will ultimately
      downsample the input.
    data_format: The input format ('channels_last' or 'channels_first').

    Returns:
    The output tensor of the block; shape should match inputs.
    """
    with tf.variable_scope('bottleneck_v1'):
        shortcut = inputs

        if projection_shortcut is not None:
            shortcut = projection_shortcut(inputs)
            shortcut = batch_norm(inputs=shortcut, training=training)

        inputs = conv2d_fixed_padding(inputs=inputs,
                                      filters=filters,
                                      kernel_size=1,
                                      strides=1)
        inputs = batch_norm(inputs, training)
        inputs = tf.nn.relu(inputs)

        inputs = conv2d_fixed_padding(inputs=inputs,
                                      filters=filters,
                                      kernel_size=3,
                                      strides=strides)
        inputs = batch_norm(inputs, training)
        inputs = tf.nn.relu(inputs)

        inputs = conv2d_fixed_padding(inputs=inputs,
                                      filters=4 * filters,
                                      kernel_size=1,
                                      strides=1)
        inputs = batch_norm(inputs, training)
        inputs += shortcut
        inputs = tf.nn.relu(inputs)

    return inputs
Esempio n. 2
0
def res_block(inputs,
              expansion_ratio,
              output_dim,
              stride,
              is_train,
              name,
              bias=False,
              shortcut=True,
              is_pw=True):
    with tf.name_scope(name), tf.variable_scope(name):
        if is_pw:
            # pw
            bottleneck_dim = round(expansion_ratio *
                                   inputs.get_shape().as_list()[-1])
            net = conv_1x1(inputs, bottleneck_dim, name='pw', bias=bias)
            net = batch_norm(net,
                             training=is_train,
                             scale=True,
                             name='batch_normalization_pw')
            net = relu6(net)
        else:
            net = inputs
        # dw
        net = dwise_conv(net,
                         strides=[1, stride, stride, 1],
                         name='dw',
                         bias=bias)
        net = batch_norm(net,
                         training=is_train,
                         scale=True,
                         name='batch_normalization_dw')
        net = relu6(net)
        # pw & linear
        net = conv_1x1(net, output_dim, name='pw_linear', bias=bias)
        net = batch_norm(net,
                         training=is_train,
                         scale=True,
                         name='batch_normalization_pw_linear')

        # element wise add, only for stride==1
        if shortcut and stride == 1:
            in_dim = int(inputs.get_shape().as_list()[-1])
            if in_dim != output_dim:
                ins = conv_1x1(inputs, output_dim, name='ex_dim')
                net = ins + net
            else:
                net = inputs + net

        return net
Esempio n. 3
0
def _building_block_v2(inputs, filters, training, projection_shortcut,
                       strides):
    """A single block for ResNet v2, without a bottleneck.

    Batch normalization then ReLu then convolution as described by:
    Identity Mappings in Deep Residual Networks
    https://arxiv.org/pdf/1603.05027.pdf
    by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016.

    Args:
    inputs: A tensor of size [batch, channels, height_in, width_in] or
      [batch, height_in, width_in, channels] depending on data_format.
    filters: The number of filters for the convolutions.
    training: A Boolean for whether the model is in training or inference
      mode. Needed for batch normalization.
    projection_shortcut: The function to use for projection shortcuts
      (typically a 1x1 convolution when downsampling the input).
    strides: The block's stride. If greater than 1, this block will ultimately
      downsample the input.
    data_format: The input format ('channels_last' or 'channels_first').

    Returns:
    The output tensor of the block; shape should match inputs.
    """
    with tf.variable_scope('building_block_v2'):
        shortcut = inputs
        inputs = batch_norm(inputs, training)
        inputs = tf.nn.relu(inputs)

        # The projection shortcut should come after the first batch norm and ReLU
        # since it performs a 1x1 convolution.
        if projection_shortcut is not None:
            shortcut = projection_shortcut(inputs)

        inputs = conv2d_fixed_padding(inputs=inputs,
                                      filters=filters,
                                      kernel_size=3,
                                      strides=strides)

        inputs = batch_norm(inputs, training)
        inputs = tf.nn.relu(inputs)
        inputs = conv2d_fixed_padding(inputs=inputs,
                                      filters=filters,
                                      kernel_size=3,
                                      strides=1)

        inputs = inputs + shortcut
    return inputs
Esempio n. 4
0
    def build_model(self, inputs, is_training, requested_stages=None):
        """Add operations to classify a batch of input images.

        Args:
          inputs: A Tensor representing a batch of input images.
          is_training: A boolean. Set to True to add operations required only when
            training the classifier.
          requested_stages: A list of requested stages

        Returns:
          A logits Tensor with shape [<batch_size>, self.num_classes].
        """

        super().build_model(inputs, is_training, requested_stages)
        frontend_scope = 'resnet_v' + str(self.resnet_version) + '_' + str(
            self.resnet_size)
        with tf.variable_scope(frontend_scope):
            # TODO: Consider converting the inputs from NHWC to NCHW to improve GPU performance
            #  See https://www.tensorflow.org/performance/performance_guide

            inputs = conv2d_fixed_padding(inputs=inputs,
                                          filters=self.num_filters,
                                          kernel_size=self.kernel_size,
                                          strides=self.conv_stride,
                                          use_bias=True)
            inputs = tf.identity(inputs, 'initial_conv')

            # We do not include batch normalization or activation functions in V2
            # for the initial conv1 because the first ResNet unit will perform these
            # for both the shortcut and non-shortcut paths as part of the first
            # block's projection. Cf. Appendix of [2].
            if self.resnet_version == 1:
                inputs = batch_norm(inputs, is_training)
                inputs = tf.nn.relu(inputs)
            if self.update_endpoint(inputs):  # Stage 1
                return inputs, self.end_points, frontend_scope
            if self.first_pool_size:
                inputs = tf.layers.max_pooling2d(
                    inputs=inputs,
                    pool_size=self.first_pool_size,
                    strides=self.first_pool_stride,
                    padding='SAME',
                    data_format='channels_last')
                inputs = tf.identity(inputs, 'initial_max_pool')

            for i, num_blocks in enumerate(self.block_sizes):
                num_filters = self.num_filters * (2**i)
                inputs = block_layer(inputs=inputs,
                                     filters=num_filters,
                                     bottleneck=self.bottleneck,
                                     block_fn=self.block_fn,
                                     blocks=num_blocks,
                                     strides=self.block_strides[i],
                                     training=is_training,
                                     name='block_layer{}'.format(i + 1),
                                     namescope="block" + str(i + 1))

                if self.update_endpoint(inputs):  # Stage 2 - 5
                    return inputs, self.end_points, frontend_scope

            if self.update_endpoint(None):  # Stage 6
                return inputs, self.end_points, frontend_scope

            # Only apply the BN and ReLU for model that does pre_activation in each
            # building/bottleneck block, eg resnet V2.
            if self.pre_activation:
                inputs = batch_norm(inputs, is_training)
                inputs = tf.nn.relu(inputs)
                if self.update_endpoint(inputs):  # Stage 7
                    return inputs, self.end_points, frontend_scope
            else:
                if self.update_endpoint(None):  # Stage 7
                    return inputs, self.end_points, frontend_scope
            # The current top layer has shape
            # `batch_size x pool_size x pool_size x final_size`.
            # ResNet does an Average Pooling layer over pool_size,
            # but that is the same as doing a reduce_mean. We do a reduce_mean
            # here because it performs better than AveragePooling2D.
            axes = [1, 2]
            inputs = tf.reduce_mean(input_tensor=inputs,
                                    axis=axes,
                                    keepdims=True)
            inputs = tf.identity(inputs, 'final_reduce_mean')

            inputs = tf.squeeze(inputs, axes)
            inputs = tf.layers.dense(inputs=inputs, units=self.num_classes)
            inputs = tf.identity(inputs, 'final_dense')
            self.update_endpoint(inputs)  # Stage 8

            return inputs, self.end_points, frontend_scope
Esempio n. 5
0
def pwise_block(input, output_dim, is_train, name, bias=False, scale=False):
    with tf.name_scope(name), tf.variable_scope(name):
        out = conv_1x1(input, output_dim, bias=bias, name='pwb')
        out = batch_norm(out, training=is_train, scale=scale)
        out = relu6(out)
        return out
Esempio n. 6
0
def conv2d_block(input, out_dim, k, s, is_train, name):
    with tf.name_scope(name), tf.variable_scope(name):
        net = conv2d(input, out_dim, k, k, s, s, name='conv2d')
        net = batch_norm(net, training=is_train)
        net = relu6(net)
        return net