Ejemplo n.º 1
0
def resample_with_sepconv(feat,
                          target_width,
                          target_num_filters,
                          use_native_resize_op=False,
                          batch_norm_activation=nn_ops.BatchNormActivation(),
                          data_format='channels_last',
                          name=None,
                          is_training=False):
    """Match resolution and feature dimension to the target block."""
    _, height, width, num_filters = feat.get_shape().as_list()
    if width is None or num_filters is None:
        raise ValueError('Shape of feat is None (shape:{}).'.format(
            feat.shape))

    with tf.variable_scope('resample_with_sepconv_{}'.format(name)):
        # Down-sample.
        if width > target_width:
            if width % target_width != 0:
                raise ValueError('width ({}) is not divisible by '
                                 'target_width ({}).'.format(
                                     width, target_width))

            while width > target_width:
                feat = nn_ops.depthwise_conv2d_fixed_padding(
                    inputs=feat,
                    kernel_size=3,
                    strides=2,
                    data_format=data_format)
                feat = batch_norm_activation(feat, is_training=is_training)
                width /= 2

        # Up-sample with NN interpolation.
        elif width < target_width:
            if target_width % width != 0:
                raise ValueError('target_wdith ({}) is not divisible by '
                                 'width ({}).'.format(target_width, width))
            scale = target_width // width
            if use_native_resize_op:
                feat = tf.image.resize_nearest_neighbor(
                    feat, [height * scale, width * scale])
            else:
                feat = spatial_transform_ops.nearest_upsampling(feat,
                                                                scale=scale)

        # Match feature dimension to the target block.
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=target_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_activation(feat, relu=False, is_training=is_training)

    return feat
Ejemplo n.º 2
0
def mbconv_block(inputs,
                 in_filters,
                 out_filters,
                 expand_ratio,
                 strides,
                 use_projection,
                 kernel_size=3,
                 se_ratio=None,
                 batch_norm_relu=nn_ops.BatchNormRelu(),
                 dropblock=nn_ops.Dropblock(),
                 drop_connect_rate=None,
                 data_format='channels_last',
                 is_training=False):
    """The bottleneck block with BN and DropBlock after convolutions.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    in_filters: a `int` number of filters for the input feature map.
    out_filters: a `int` number of filters for the output feature map.
    expand_ratio: a `int` number as the feature dimension expansion ratio.
    strides: a `int` block stride. If greater than 1, this block will ultimately
      downsample the input.
    use_projection: a `bool` for whether this block should use a projection
      shortcut (versus the default identity shortcut). This is usually `True`
      for the first block of a block group, which may change the number of
      filters and the resolution.
    kernel_size: kernel size for the depthwise convolution.
    se_ratio: squeeze and excitation ratio.
    batch_norm_relu: an operation that is added after convolutions, including a
      batch norm layer and an optional relu activation.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block.
  """
    tf.logging.info('-----> Building mbconv block.')
    shortcut = inputs
    if use_projection:
        shortcut = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                               filters=out_filters,
                                               kernel_size=1,
                                               strides=strides,
                                               data_format=data_format)
        shortcut = batch_norm_relu(shortcut, is_training=is_training)
        shortcut = dropblock(shortcut, is_training=is_training)

    # First 1x1 conv for channel expansion.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=in_filters * expand_ratio,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    # Second depthwise conv.
    inputs = nn_ops.depthwise_conv2d_fixed_padding(inputs=inputs,
                                                   kernel_size=kernel_size,
                                                   strides=strides,
                                                   data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    # Squeeze and excitation.
    if se_ratio is not None and se_ratio > 0 and se_ratio <= 1:
        inputs = nn_ops.squeeze_excitation(inputs,
                                           in_filters,
                                           se_ratio,
                                           expand_ratio=expand_ratio,
                                           data_format=data_format)

    # Third 1x1 conv for reversed bottleneck.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=out_filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    if drop_connect_rate:
        inputs = nn_ops.drop_connect(inputs, is_training, drop_connect_rate)

    return tf.add(inputs, shortcut)