Beispiel #1
0
def block_group(inputs,
                filters,
                strides,
                use_projection,
                block_fn,
                block_repeats,
                batch_norm_relu=nn_ops.BatchNormRelu(),
                dropblock=nn_ops.Dropblock(),
                drop_connect_rate=None,
                data_format='channels_last',
                name=None,
                is_training=False):
    """Builds one group of blocks.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    filters: an `int` number of filters for the first two convolutions.
    strides: an `int` block stride. If greater than 1, this block will
      ultimately downsample the input.
    use_projection: a `bool` for whether this block should use a projection
      shortcut (versus the default identity shortcut). This is usually `True`
      for the first block of a block group, which may change the number of
      filters and the resolution.
    block_fn: the `function` for the block to use within the model
    block_repeats: an `int` number of blocks to repeat in the group.
    batch_norm_relu: an operation that is added after convolutions, including a
      batch norm layer and an optional relu activation.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    name: a `str` name for the Tensor output of the block layer.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block layer.
  """
    # Only the first block per block_group uses projection shortcut and strides.
    inputs = block_fn(inputs,
                      filters,
                      strides,
                      use_projection=use_projection,
                      batch_norm_relu=batch_norm_relu,
                      dropblock=dropblock,
                      drop_connect_rate=drop_connect_rate,
                      data_format=data_format,
                      is_training=is_training)
    for _ in range(1, block_repeats):
        inputs = block_fn(inputs,
                          filters,
                          1,
                          use_projection=False,
                          batch_norm_relu=batch_norm_relu,
                          dropblock=dropblock,
                          drop_connect_rate=drop_connect_rate,
                          data_format=data_format,
                          is_training=is_training)
    return tf.identity(inputs, name)
Beispiel #2
0
    def __init__(self,
                 resnet_depth,
                 dropblock=nn_ops.Dropblock(),
                 batch_norm_relu=nn_ops.BatchNormRelu(),
                 data_format='channels_last'):
        """ResNet initialization function.

    Args:
      resnet_depth: `int` depth of ResNet backbone model.
      dropblock: a dropblock layer.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
      data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.
    """
        self._resnet_depth = resnet_depth

        self._dropblock = dropblock
        self._batch_norm_relu = batch_norm_relu

        self._data_format = data_format

        model_params = {
            18: {
                'block': self.residual_block,
                'layers': [2, 2, 2, 2]
            },
            34: {
                'block': self.residual_block,
                'layers': [3, 4, 6, 3]
            },
            50: {
                'block': self.bottleneck_block,
                'layers': [3, 4, 6, 3]
            },
            101: {
                'block': self.bottleneck_block,
                'layers': [3, 4, 23, 3]
            },
            152: {
                'block': self.bottleneck_block,
                'layers': [3, 8, 36, 3]
            },
            200: {
                'block': self.bottleneck_block,
                'layers': [3, 24, 36, 3]
            }
        }

        if resnet_depth not in model_params:
            valid_resnet_depths = ', '.join(
                [str(depth) for depth in sorted(model_params.keys())])
            raise ValueError(
                'The resnet_depth should be in [%s]. Not a valid resnet_depth:'
                % (valid_resnet_depths), self._resnet_depth)
        params = model_params[resnet_depth]
        self._resnet_fn = self.resnet_v1_generator(params['block'],
                                                   params['layers'])
Beispiel #3
0
    def __init__(self,
                 min_level=3,
                 max_level=7,
                 block_specs=build_block_specs(),
                 endpoints_num_filters=48,
                 use_native_resize_op=False,
                 se_ratio=0.2,
                 block_repeats=1,
                 filter_size_scale=1.0,
                 activation='swish',
                 batch_norm_activation=nn_ops.BatchNormActivation(
                     activation='swish'),
                 init_drop_connect_rate=None,
                 data_format='channels_last'):
        """SpineNetMBConv initialization function.

    Args:
      min_level: `int` minimum level in SpineNet endpoints.
      max_level: `int` maximum level in SpineNet endpoints.
      block_specs: a list of BlockSpec objects that specifies the SpineNet
        network topology. By default, the previously discovered architecture is
        used.
      endpoints_num_filters: `int` feature dimension applied to endpoints before
        sharing conv layers in head.
      use_native_resize_op: Whether to use native
        tf.image.nearest_neighbor_resize or the broadcast implmentation to do
        upsampling.
      se_ratio: squeeze and excitation ratio for MBConv blocks.
      block_repeats: `int` number of repeats per block.
      filter_size_scale: `float` a scaling factor to uniformaly scale feature
        dimension in SpineNet.
      activation: the activation function after cross-scale feature fusion.
        Support 'relu' and 'swish'.
      batch_norm_activation: An operation that includes a batch normalization
        layer followed by an optional activation layer.
      init_drop_connect_rate: `float` initial drop connect rate.
      data_format: An optional string from: "channels_last", "channels_first".
        Defaults to "channels_last".
    """
        self._min_level = min_level
        self._max_level = max_level
        self._block_specs = block_specs
        self._endpoints_num_filters = endpoints_num_filters
        self._use_native_resize_op = use_native_resize_op
        self._se_ratio = se_ratio
        self._block_repeats = block_repeats
        self._filter_size_scale = filter_size_scale
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._batch_norm_activation = batch_norm_activation
        self._init_dc_rate = init_drop_connect_rate
        self._data_format = data_format
        self._dropblock = nn_ops.Dropblock()
Beispiel #4
0
def block_group(inputs,
                filters,
                strides,
                block_fn_cand,
                block_repeats,
                activation=tf.nn.swish,
                batch_norm_activation=nn_ops.BatchNormActivation(),
                dropblock=nn_ops.Dropblock(),
                drop_connect_rate=None,
                data_format='channels_last',
                name=None,
                is_training=False):
  """Creates one group of blocks for SpineNet."""
  block_fn_candidates = {
      'bottleneck': nn_blocks.bottleneck_block,
      'residual': nn_blocks.residual_block,
  }
  if block_fn_cand not in block_fn_candidates:
    raise ValueError('Block function {} not implemented.'.format(block_fn_cand))

  block_fn = block_fn_candidates[block_fn_cand]
  _, _, _, num_filters = inputs.get_shape().as_list()

  if block_fn_cand == 'bottleneck':
    use_projection = not (num_filters == (filters * 4) and strides == 1)
  else:
    use_projection = not (num_filters == filters and strides == 1)

  # Only the first block per block_group uses projection shortcut and strides.
  inputs = block_fn(
      inputs,
      filters,
      strides,
      use_projection=use_projection,
      activation=activation,
      batch_norm_activation=batch_norm_activation,
      dropblock=dropblock,
      drop_connect_rate=drop_connect_rate,
      data_format=data_format,
      is_training=is_training)
  for _ in range(1, block_repeats):
    inputs = block_fn(
        inputs,
        filters,
        1,
        use_projection=False,
        activation=activation,
        batch_norm_activation=batch_norm_activation,
        dropblock=dropblock,
        drop_connect_rate=drop_connect_rate,
        data_format=data_format,
        is_training=is_training)
  return tf.identity(inputs, name)
Beispiel #5
0
def block_group(inputs,
                filters,
                strides,
                block_fn,
                block_repeats,
                conv2d_op=None,
                activation=tf.nn.swish,
                batch_norm_activation=nn_ops.BatchNormActivation(),
                dropblock=nn_ops.Dropblock(),
                drop_connect_rate=None,
                data_format='channels_last',
                name=None,
                is_training=False):
    """Creates one group of blocks for NAS-FPN."""
    if block_fn == 'conv':
        inputs = conv2d_op(inputs,
                           filters=filters,
                           kernel_size=(3, 3),
                           padding='same',
                           data_format=data_format,
                           name='conv')
        inputs = batch_norm_activation(inputs,
                                       is_training=is_training,
                                       relu=False,
                                       name='bn')
        inputs = dropblock(inputs, is_training=is_training)
        return inputs

    if block_fn != 'bottleneck':
        raise ValueError('Block function {} not implemented.'.format(block_fn))
    _, _, _, num_filters = inputs.get_shape().as_list()
    block_fn = nn_blocks.bottleneck_block
    use_projection = not (num_filters == (filters * 4) and strides == 1)

    return resnet.block_group(inputs=inputs,
                              filters=filters,
                              strides=strides,
                              use_projection=use_projection,
                              block_fn=block_fn,
                              block_repeats=block_repeats,
                              activation=activation,
                              batch_norm_activation=batch_norm_activation,
                              dropblock=dropblock,
                              drop_connect_rate=drop_connect_rate,
                              data_format=data_format,
                              name=name,
                              is_training=is_training)
Beispiel #6
0
  def __init__(self,
               min_level=3,
               max_level=7,
               fpn_feat_dims=256,
               num_repeats=7,
               use_separable_conv=False,
               dropblock=nn_ops.Dropblock(),
               batch_norm_relu=nn_ops.BatchNormRelu()):
    """NAS-FPN initialization function.

    Args:
      min_level: `int` minimum level in NAS-FPN output feature maps.
      max_level: `int` maximum level in NAS-FPN output feature maps.
      fpn_feat_dims: `int` number of filters in FPN layers.
      num_repeats: number of repeats for feature pyramid network.
      use_separable_conv: `bool`, if True use separable convolution for
        convolution in NAS-FPN layers.
      dropblock: a Dropblock layer.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """

    self._min_level = min_level
    self._max_level = max_level
    if min_level == 3 and max_level == 7:
      model_config = [
          3, 1, 1, 3,
          3, 0, 1, 5,
          4, 0, 0, 6,  # Output to level 3.
          3, 0, 6, 7,  # Output to level 4.
          2, 1, 7, 8,  # Output to level 5.
          0, 1, 6, 9,  # Output to level 7.
          1, 1, 9, 10]  # Output to level 6.
    else:
      raise ValueError('The NAS-FPN with min level {} and max level {} '
                       'is not supported.'.format(min_level, max_level))
    self._config = Config(model_config, self._min_level, self._max_level)
    self._num_repeats = num_repeats
    self._fpn_feat_dims = fpn_feat_dims
    self._use_separable_conv = use_separable_conv
    self._dropblock = dropblock
    self._batch_norm_relu = batch_norm_relu
    self._resample_feature_map = functools.partial(
        resample_feature_map,
        target_feat_dims=fpn_feat_dims, batch_norm_relu=batch_norm_relu)
Beispiel #7
0
def block_group(inputs,
                in_filters,
                out_filters,
                strides,
                expand_ratio,
                block_repeats,
                se_ratio=0.2,
                batch_norm_activation=nn_ops.BatchNormActivation(),
                dropblock=nn_ops.Dropblock(),
                drop_connect_rate=None,
                data_format='channels_last',
                name=None,
                is_training=False):
    """Creates one group of blocks for Mobile SpineNet."""
    # Apply strides only to the first block in block_group.
    inputs = nn_blocks.mbconv_block(
        inputs,
        in_filters,
        out_filters,
        expand_ratio,
        strides,
        se_ratio=se_ratio,
        batch_norm_activation=batch_norm_activation,
        dropblock=dropblock,
        drop_connect_rate=drop_connect_rate,
        data_format=data_format,
        is_training=is_training)
    for _ in range(1, block_repeats):
        inputs = nn_blocks.mbconv_block(
            inputs,
            out_filters,
            out_filters,
            expand_ratio,
            1,  # strides
            se_ratio=se_ratio,
            batch_norm_activation=batch_norm_activation,
            dropblock=dropblock,
            drop_connect_rate=drop_connect_rate,
            data_format=data_format,
            is_training=is_training)
    return tf.identity(inputs, name)
Beispiel #8
0
    def __init__(self,
                 min_level=3,
                 max_level=7,
                 block_specs=build_block_specs(),
                 endpoints_num_filters=256,
                 resample_alpha=0.5,
                 use_native_resize_op=False,
                 block_repeats=1,
                 filter_size_scale=1.0,
                 activation='swish',
                 batch_norm_activation=nn_ops.BatchNormActivation(
                     activation='swish'),
                 init_drop_connect_rate=None,
                 data_format='channels_last'):
        """SpineNet initialization function.

    Args:
      min_level: an `int` representing the minimum level in SpineNet endpoints.
      max_level: an `int` representing the maximum level in SpineNet endpoints.
      block_specs: a list of BlockSpec objects that specifies the SpineNet
        network topology. By default, the previously discovered architecture is
        used.
      endpoints_num_filters: an `int` representing the final feature dimension
        of endpoints before the shared conv layers in head.
      resample_alpha: a `float` representing the scaling factor to scale feature
        dimension before resolution resampling.
      use_native_resize_op: Whether to use native
        tf.image.nearest_neighbor_resize or the broadcast implmentation to do
        upsampling.
      block_repeats: an `int` representing the number of repeats per block
        group.
      filter_size_scale: a `float` representing the scaling factor to uniformaly
        scale feature dimension in SpineNet.
      activation: activation function. Support 'relu' and 'swish'.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
      init_drop_connect_rate: a 'float' number that specifies the initial drop
        connection rate. Note that the default `None` means no drop connection
        is applied.
      data_format: An optional string from: "channels_last", "channels_first".
        Defaults to "channels_last".
    """
        self._min_level = min_level
        self._max_level = max_level
        self._block_specs = block_specs
        self._endpoints_num_filters = endpoints_num_filters
        self._use_native_resize_op = use_native_resize_op
        self._resample_alpha = resample_alpha
        self._block_repeats = block_repeats
        self._filter_size_scale = filter_size_scale
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._batch_norm_activation = batch_norm_activation
        self._init_drop_connect_rate = init_drop_connect_rate
        self._data_format = data_format
        self._dropblock = nn_ops.Dropblock(
        )  # Hard-code it to not use DropBlock.
        self._init_block_fn = 'bottleneck'
        self._num_init_blocks = 2
Beispiel #9
0
def dropblock_generator(params):
  return nn_ops.Dropblock(
      dropblock_keep_prob=params.dropblock_keep_prob,
      dropblock_size=params.dropblock_size)
Beispiel #10
0
def bottleneck_block(inputs,
                     filters,
                     strides,
                     use_projection,
                     activation=tf.nn.relu,
                     batch_norm_relu=nn_ops.BatchNormRelu(),
                     dropblock=nn_ops.Dropblock(),
                     drop_connect_rate=None,
                     data_format='channels_last',
                     is_training=False):
    """The bottleneck block with BN and DropBlock after convolutions.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    filters: a `int` number of filters for the first two convolutions. Note that
      the third and final convolution will use 4 times as many filters.
    strides: an `int` block stride. If greater than 1, this block will
      ultimately downsample the input.
    use_projection: a `bool` for whether this block should use a projection
      shortcut (versus the default identity shortcut). This is usually `True`
      for the first block of a block group, which may change the number of
      filters and the resolution.
    activation: activation function. Support 'relu' and 'swish'.
    batch_norm_relu: an operation that is added after convolutions, including a
      batch norm layer and an optional relu activation.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block.
  """
    logging.info('-----> Building bottleneck block.')
    shortcut = inputs
    if use_projection:
        out_filters = 4 * filters
        shortcut = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                               filters=out_filters,
                                               kernel_size=1,
                                               strides=strides,
                                               data_format=data_format)
        shortcut = batch_norm_relu(shortcut,
                                   relu=False,
                                   is_training=is_training)
    shortcut = dropblock(shortcut, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=filters,
                                         kernel_size=3,
                                         strides=strides,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=4 * filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, relu=False, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    if drop_connect_rate:
        inputs = nn_ops.drop_connect(inputs, is_training, drop_connect_rate)

    return activation(inputs + shortcut)
Beispiel #11
0
def mbconv_block(inputs,
                 in_filters,
                 out_filters,
                 expand_ratio,
                 strides,
                 use_projection,
                 kernel_size=3,
                 se_ratio=None,
                 batch_norm_relu=nn_ops.BatchNormRelu(),
                 dropblock=nn_ops.Dropblock(),
                 drop_connect_rate=None,
                 data_format='channels_last',
                 is_training=False):
    """The bottleneck block with BN and DropBlock after convolutions.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    in_filters: a `int` number of filters for the input feature map.
    out_filters: a `int` number of filters for the output feature map.
    expand_ratio: a `int` number as the feature dimension expansion ratio.
    strides: a `int` block stride. If greater than 1, this block will ultimately
      downsample the input.
    use_projection: a `bool` for whether this block should use a projection
      shortcut (versus the default identity shortcut). This is usually `True`
      for the first block of a block group, which may change the number of
      filters and the resolution.
    kernel_size: kernel size for the depthwise convolution.
    se_ratio: squeeze and excitation ratio.
    batch_norm_relu: an operation that is added after convolutions, including a
      batch norm layer and an optional relu activation.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block.
  """
    tf.logging.info('-----> Building mbconv block.')
    shortcut = inputs
    if use_projection:
        shortcut = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                               filters=out_filters,
                                               kernel_size=1,
                                               strides=strides,
                                               data_format=data_format)
        shortcut = batch_norm_relu(shortcut, is_training=is_training)
        shortcut = dropblock(shortcut, is_training=is_training)

    # First 1x1 conv for channel expansion.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=in_filters * expand_ratio,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    # Second depthwise conv.
    inputs = nn_ops.depthwise_conv2d_fixed_padding(inputs=inputs,
                                                   kernel_size=kernel_size,
                                                   strides=strides,
                                                   data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    # Squeeze and excitation.
    if se_ratio is not None and se_ratio > 0 and se_ratio <= 1:
        inputs = nn_ops.squeeze_excitation(inputs,
                                           in_filters,
                                           se_ratio,
                                           expand_ratio=expand_ratio,
                                           data_format=data_format)

    # Third 1x1 conv for reversed bottleneck.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=out_filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    if drop_connect_rate:
        inputs = nn_ops.drop_connect(inputs, is_training, drop_connect_rate)

    return tf.add(inputs, shortcut)
Beispiel #12
0
    def __init__(self,
                 resnet_depth,
                 dropblock=nn_ops.Dropblock(),
                 activation='relu',
                 batch_norm_activation=nn_ops.BatchNormActivation(),
                 init_drop_connect_rate=None,
                 data_format='channels_last',
                 space_to_depth_block_size=1):
        """ResNet initialization function.

    Args:
      resnet_depth: `int` depth of ResNet backbone model.
      dropblock: a dropblock layer.
      activation: activation function. Support 'relu' and 'swish'.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
      init_drop_connect_rate: a 'float' number that specifies the initial drop
        connection rate. Note that the default `None` means no drop connection
        is applied.
      data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.
      space_to_depth_block_size: an integer indicates the block size of
        space-to-depth convolution for conv0. `0` means use the original conv2d
        in ResNet
    """
        self._resnet_depth = resnet_depth

        self._dropblock = dropblock
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._batch_norm_activation = batch_norm_activation
        self._init_drop_connect_rate = init_drop_connect_rate

        self._data_format = data_format
        self._space_to_depth_block_size = space_to_depth_block_size

        model_params = {
            10: {
                'block': nn_blocks.residual_block,
                'layers': [1, 1, 1, 1]
            },
            14: {
                'block': nn_blocks.bottleneck_block,
                'layers': [1, 1, 1, 1]
            },
            18: {
                'block': nn_blocks.residual_block,
                'layers': [2, 2, 2, 2]
            },
            26: {
                'block': nn_blocks.bottleneck_block,
                'layers': [2, 2, 2, 2]
            },
            34: {
                'block': nn_blocks.residual_block,
                'layers': [3, 4, 6, 3]
            },
            50: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 4, 6, 3]
            },
            101: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 4, 23, 3]
            },
            152: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 8, 36, 3]
            },
            200: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 24, 36, 3]
            }
        }

        if resnet_depth not in model_params:
            valid_resnet_depths = ', '.join(
                [str(depth) for depth in sorted(model_params.keys())])
            raise ValueError(
                'The resnet_depth should be in [%s]. Not a valid resnet_depth:'
                % (valid_resnet_depths), self._resnet_depth)
        params = model_params[resnet_depth]
        self._resnet_fn = self.resnet_v1_generator(
            params['block'], params['layers'], self._space_to_depth_block_size)
Beispiel #13
0
def fused_mbconv_block(inputs,
                       in_filters,
                       out_filters,
                       expand_ratio,
                       strides,
                       kernel_size=3,
                       se_ratio=None,
                       batch_norm_activation=nn_ops.BatchNormActivation(),
                       dropblock=nn_ops.Dropblock(),
                       drop_connect_rate=None,
                       data_format='channels_last',
                       is_training=False):
    """The fused bottleneck block with BN and DropBlock after convolutions.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    in_filters: a `int` number of filters for the input feature map.
    out_filters: a `int` number of filters for the output feature map.
    expand_ratio: a `int` number as the feature dimension expansion ratio.
    strides: a `int` block stride. If greater than 1, this block will ultimately
      downsample the input.
    kernel_size: kernel size for the depthwise convolution.
    se_ratio: squeeze and excitation ratio.
    batch_norm_activation: an operation that includes a batch normalization
      layer followed by an optional activation layer.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block.
  """
    tf.logging.info('-----> Building fused mbconv block.')
    shortcut = inputs

    # First 1x1 conv for channel expansion.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=in_filters * expand_ratio,
                                         kernel_size=kernel_size,
                                         strides=strides,
                                         data_format=data_format)
    inputs = batch_norm_activation(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    # Squeeze and excitation.
    if se_ratio is not None and se_ratio > 0 and se_ratio <= 1:
        inputs = nn_ops.squeeze_excitation(inputs,
                                           in_filters,
                                           se_ratio,
                                           expand_ratio=expand_ratio,
                                           data_format=data_format)

    # Third 1x1 conv for reversed bottleneck.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=out_filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_activation(inputs, relu=False, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    if in_filters == out_filters and strides == 1:
        if drop_connect_rate:
            inputs = nn_ops.drop_connect(inputs, is_training,
                                         drop_connect_rate)
        inputs = tf.add(inputs, shortcut)

    return inputs
Beispiel #14
0
  def __init__(self,
               min_level=3,
               max_level=7,
               fpn_feat_dims=256,
               num_repeats=7,
               use_separable_conv=False,
               dropblock=nn_ops.Dropblock(),
               block_fn='conv',
               block_repeats=1,
               activation='swish',
               batch_norm_activation=nn_ops.BatchNormActivation(),
               init_drop_connect_rate=None):
    """NAS-FPN initialization function.

    Args:
      min_level: `int` minimum level in NAS-FPN output feature maps.
      max_level: `int` maximum level in NAS-FPN output feature maps.
      fpn_feat_dims: `int` number of filters in FPN layers.
      num_repeats: number of repeats for feature pyramid network.
      use_separable_conv: `bool`, if True use separable convolution for
        convolution in NAS-FPN layers.
      dropblock: a Dropblock layer.
      block_fn: `string` representing types of block group support: conv,
        bottleneck.
      block_repeats: `int` representing the number of repeats per block group
        when block group is bottleneck.
      activation: activation function. Support 'relu' and 'swish'.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
      init_drop_connect_rate: a 'float' number that specifies the initial drop
        connection rate. Note that the default `None` means no drop connection
        is applied.
    """

    self._min_level = min_level
    self._max_level = max_level
    if min_level == 3 and max_level == 7:
      model_config = [
          3, 1, 1, 3,
          3, 0, 1, 5,
          4, 0, 0, 6,  # Output to level 3.
          3, 0, 6, 7,  # Output to level 4.
          2, 1, 7, 8,  # Output to level 5.
          0, 1, 6, 9,  # Output to level 7.
          1, 1, 9, 10]  # Output to level 6.
    else:
      raise ValueError('The NAS-FPN with min level {} and max level {} '
                       'is not supported.'.format(min_level, max_level))
    self._config = Config(model_config, self._min_level, self._max_level)
    self._num_repeats = num_repeats
    self._fpn_feat_dims = fpn_feat_dims
    self._block_fn = block_fn
    self._block_repeats = block_repeats
    if use_separable_conv:
      self._conv2d_op = functools.partial(
          tf.layers.separable_conv2d, depth_multiplier=1)
    else:
      self._conv2d_op = tf.layers.conv2d
    self._dropblock = dropblock
    if activation == 'relu':
      self._activation = tf.nn.relu
    elif activation == 'swish':
      self._activation = tf.nn.swish
    else:
      raise ValueError('Activation {} not implemented.'.format(activation))
    self._batch_norm_activation = batch_norm_activation
    self._init_drop_connect_rate = init_drop_connect_rate
    self._resample_feature_map = functools.partial(
        resample_feature_map,
        target_feat_dims=fpn_feat_dims,
        conv2d_op=self._conv2d_op,
        batch_norm_activation=batch_norm_activation)
Beispiel #15
0
    def __init__(self,
                 min_level=3,
                 max_level=7,
                 block_specs=build_block_specs(),
                 fpn_feat_dims=256,
                 num_repeats=7,
                 use_separable_conv=False,
                 dropblock=nn_ops.Dropblock(),
                 block_fn='conv',
                 block_repeats=1,
                 activation='relu',
                 batch_norm_activation=nn_ops.BatchNormActivation(
                     activation='relu'),
                 init_drop_connect_rate=None,
                 data_format='channels_last',
                 use_sum_for_combination=False):
        """NAS-FPN initialization function.

    Args:
      min_level: `int` minimum level in NAS-FPN output feature maps.
      max_level: `int` maximum level in NAS-FPN output feature maps.
      block_specs: a list of BlockSpec objects that specifies the SpineNet
        network topology. By default, the previously discovered architecture is
        used.
      fpn_feat_dims: `int` number of filters in FPN layers.
      num_repeats: number of repeats for feature pyramid network.
      use_separable_conv: `bool`, if True use separable convolution for
        convolution in NAS-FPN layers.
      dropblock: a Dropblock layer.
      block_fn: `string` representing types of block group support: conv,
        bottleneck.
      block_repeats: `int` representing the number of repeats per block group
        when block group is bottleneck.
      activation: activation function. Support 'relu' and 'swish'.
      batch_norm_activation: an operation that includes a batch normalization
        layer followed by an optional activation layer.
      init_drop_connect_rate: a 'float' number that specifies the initial drop
        connection rate. Note that the default `None` means no drop connection
        is applied.
      data_format: An optional string from: "channels_last", "channels_first".
        Defaults to "channels_last".
      use_sum_for_combination: `bool`, if True only 'sum' is used for combining
        two nodes.
    """
        self._min_level = min_level
        self._max_level = max_level
        self._block_specs = block_specs
        self._fpn_feat_dims = fpn_feat_dims
        self._num_repeats = num_repeats
        self._block_fn = block_fn
        self._block_repeats = block_repeats
        if use_separable_conv:
            self._conv2d_op = functools.partial(tf.layers.separable_conv2d,
                                                depth_multiplier=1)
        else:
            self._conv2d_op = tf.layers.conv2d
        self._dropblock = dropblock
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._batch_norm_activation = batch_norm_activation
        self._init_drop_connect_rate = init_drop_connect_rate
        self._data_format = data_format
        self._resample_feature_map = functools.partial(
            resample_feature_map,
            target_feat_dims=fpn_feat_dims,
            conv2d_op=self._conv2d_op,
            batch_norm_activation=batch_norm_activation,
            data_format=self._data_format)
        self._use_sum_for_combination = use_sum_for_combination