Beispiel #1
0
    def residual_block(self,
                       inputs,
                       filters,
                       strides,
                       use_projection=False,
                       is_training=False):
        """Standard building block for residual networks with BN after convolutions.

    Args:
      inputs: `Tensor` of size `[batch, channels, height, width]`.
      filters: `int` number of filters for the first two convolutions. Note that
          the third and final convolution will use 4 times as many filters.
      strides: `int` block stride. If greater than 1, this block will ultimately
          downsample the input.
      use_projection: `bool` for whether this block should use a projection
          shortcut (versus the default identity shortcut). This is usually
          `True` for the first block of a block group, which may change the
          number of filters and the resolution.
      is_training: `bool` if True, the model is in training mode.
    Returns:
      The output `Tensor` of the block.
    """
        shortcut = inputs
        if use_projection:
            # Projection shortcut in first layer to match filters and strides
            shortcut = nn_ops.conv2d_fixed_padding(
                inputs=inputs,
                filters=filters,
                kernel_size=1,
                strides=strides,
                data_format=self._data_format)
            shortcut = self._batch_norm_relu(shortcut,
                                             relu=False,
                                             is_training=is_training)

        inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                             filters=filters,
                                             kernel_size=3,
                                             strides=strides,
                                             data_format=self._data_format)
        inputs = self._batch_norm_relu(inputs, is_training=is_training)

        inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                             filters=filters,
                                             kernel_size=3,
                                             strides=1,
                                             data_format=self._data_format)
        inputs = self._batch_norm_relu(inputs,
                                       relu=False,
                                       init_zero=True,
                                       is_training=is_training)

        return tf.nn.relu(inputs + shortcut)
Beispiel #2
0
        def model(inputs, is_training=False):
            """Creation of the model graph."""
            inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                                 filters=64,
                                                 kernel_size=7,
                                                 strides=2,
                                                 data_format=self._data_format)
            inputs = tf.identity(inputs, 'initial_conv')
            inputs = self._batch_norm_relu(inputs, is_training=is_training)

            inputs = tf.layers.max_pooling2d(inputs=inputs,
                                             pool_size=3,
                                             strides=2,
                                             padding='SAME',
                                             data_format=self._data_format)
            inputs = tf.identity(inputs, 'initial_max_pool')

            c2 = block_group(inputs=inputs,
                             filters=64,
                             strides=1,
                             use_projection=True,
                             block_fn=block_fn,
                             block_repeats=layers[0],
                             batch_norm_relu=self._batch_norm_relu,
                             dropblock=self._dropblock,
                             name='block_group1',
                             is_training=is_training)
            c3 = block_group(inputs=c2,
                             filters=128,
                             strides=2,
                             use_projection=True,
                             block_fn=block_fn,
                             block_repeats=layers[1],
                             batch_norm_relu=self._batch_norm_relu,
                             dropblock=self._dropblock,
                             name='block_group2',
                             is_training=is_training)
            c4 = block_group(inputs=c3,
                             filters=256,
                             strides=2,
                             use_projection=True,
                             block_fn=block_fn,
                             block_repeats=layers[2],
                             batch_norm_relu=self._batch_norm_relu,
                             dropblock=self._dropblock,
                             name='block_group3',
                             is_training=is_training)
            c5 = block_group(inputs=c4,
                             filters=512,
                             strides=2,
                             use_projection=True,
                             block_fn=block_fn,
                             block_repeats=layers[3],
                             batch_norm_relu=self._batch_norm_relu,
                             dropblock=self._dropblock,
                             name='block_group4',
                             is_training=is_training)
            return {2: c2, 3: c3, 4: c4, 5: c5}
Beispiel #3
0
def resample_with_sepconv(feat,
                          target_width,
                          target_num_filters,
                          use_native_resize_op=False,
                          batch_norm_activation=nn_ops.BatchNormActivation(),
                          data_format='channels_last',
                          name=None,
                          is_training=False):
    """Match resolution and feature dimension to the target block."""
    _, height, width, num_filters = feat.get_shape().as_list()
    if width is None or num_filters is None:
        raise ValueError('Shape of feat is None (shape:{}).'.format(
            feat.shape))

    with tf.variable_scope('resample_with_sepconv_{}'.format(name)):
        # Down-sample.
        if width > target_width:
            if width % target_width != 0:
                raise ValueError('width ({}) is not divisible by '
                                 'target_width ({}).'.format(
                                     width, target_width))

            while width > target_width:
                feat = nn_ops.depthwise_conv2d_fixed_padding(
                    inputs=feat,
                    kernel_size=3,
                    strides=2,
                    data_format=data_format)
                feat = batch_norm_activation(feat, is_training=is_training)
                width /= 2

        # Up-sample with NN interpolation.
        elif width < target_width:
            if target_width % width != 0:
                raise ValueError('target_wdith ({}) is not divisible by '
                                 'width ({}).'.format(target_width, width))
            scale = target_width // width
            if use_native_resize_op:
                feat = tf.image.resize_nearest_neighbor(
                    feat, [height * scale, width * scale])
            else:
                feat = spatial_transform_ops.nearest_upsampling(feat,
                                                                scale=scale)

        # Match feature dimension to the target block.
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=target_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_activation(feat, relu=False, is_training=is_training)

    return feat
Beispiel #4
0
 def _build_endpoints(self, features, is_training):
     """Match filter size for endpoints before sharing conv layers."""
     endpoints = {}
     for level in range(self._min_level, self._max_level + 1):
         feature = nn_ops.conv2d_fixed_padding(
             inputs=features[level],
             filters=self._endpoints_num_filters,
             kernel_size=1,
             strides=1,
             data_format=self._data_format)
         feature = self._batch_norm_relu(feature, is_training=is_training)
         endpoints[level] = feature
     return endpoints
Beispiel #5
0
  def __call__(self, features, is_training):
    """Generate logits for classification.

    It takes a dict of multiscale feature maps and produces the final logits
    used for classification.

    Args:
      features: a dict of Tensors representing the multiscale feature maps with
        keys being level and values being the feature maps.
      is_training: a bool indicating whether it's in training mode.

    Returns:
      logits: a Tensor of shape [batch_size, num_classes] representing the
        prediction logits.
    """
    with tf.variable_scope('classification_head'):
      if self._aggregation == 'top':
        bottleneck = features[max(features.keys())]
      else:
        raise ValueError(
            'Un-supported aggregation type: `{}`!'.format(self._aggregation))

      # Optionally project to an embedding space of different dimensions.
      if self._endpoints_num_filters > 0:
        bottleneck = nn_ops.conv2d_fixed_padding(
            inputs=bottleneck,
            filters=self._endpoints_num_filters,
            kernel_size=1,
            strides=1,
            data_format=self._data_format)
        bottleneck = self._batch_norm_relu(bottleneck, is_training=is_training)

      # Global average pooling.
      bottleneck = tf.reduce_mean(
          bottleneck,
          axis=([1, 2] if self._data_format == 'channels_last' else [2, 3]))
      bottleneck = tf.identity(bottleneck, 'final_avg_pool')

      # Dropout layer.
      if is_training and self._dropout_rate > 0.0:
        bottleneck = tf.nn.dropout(bottleneck, self._dropout_rate)

      # Prediction layer.
      logits = tf.layers.dense(
          inputs=bottleneck,
          units=self._num_classes,
          kernel_initializer=tf.random_normal_initializer(stddev=0.01))
      logits = tf.identity(logits, 'logits')

    return logits
Beispiel #6
0
    def _build_stem_network(self, inputs, is_training):
        """Build the stem network."""

        # Build the first conv layer.
        inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                             filters=nn_ops.round_filters(
                                                 FILTER_SIZE_MAP[0],
                                                 self._filter_size_scale),
                                             kernel_size=3,
                                             strides=2,
                                             data_format=self._data_format)
        inputs = tf.identity(inputs, 'initial_conv')
        inputs = self._batch_norm_activation(inputs, is_training=is_training)

        # Build the initial L1 block and L2 block.
        base0 = block_group(
            inputs=inputs,
            in_filters=nn_ops.round_filters(FILTER_SIZE_MAP[0],
                                            self._filter_size_scale),
            out_filters=nn_ops.round_filters(FILTER_SIZE_MAP[1],
                                             self._filter_size_scale),
            expand_ratio=DEFAULT_EXPAND_RATIO,
            block_repeats=self._block_repeats,
            strides=1,
            se_ratio=self._se_ratio,
            batch_norm_activation=self._batch_norm_activation,
            dropblock=self._dropblock,
            data_format=self._data_format,
            name='stem_block_0',
            is_training=is_training)
        base1 = block_group(
            inputs=base0,
            in_filters=nn_ops.round_filters(FILTER_SIZE_MAP[1],
                                            self._filter_size_scale),
            out_filters=nn_ops.round_filters(FILTER_SIZE_MAP[2],
                                             self._filter_size_scale),
            expand_ratio=DEFAULT_EXPAND_RATIO,
            block_repeats=self._block_repeats,
            strides=2,
            se_ratio=self._se_ratio,
            batch_norm_activation=self._batch_norm_activation,
            dropblock=self._dropblock,
            data_format=self._data_format,
            name='stem_block_1',
            is_training=is_training)

        return [base0, base1]
Beispiel #7
0
  def _build_stem_network(self, inputs, is_training):
    """Build the stem network."""

    # Build the first conv and maxpooling layers.
    net = nn_ops.conv2d_fixed_padding(
        inputs=inputs,
        filters=64,
        kernel_size=7,
        strides=2,
        data_format=self._data_format)
    net = tf.identity(net, 'initial_conv')
    net = self._batch_norm_activation(net, is_training=is_training)
    net = tf.layers.max_pooling2d(
        inputs=net,
        pool_size=3,
        strides=2,
        padding='SAME',
        data_format=self._data_format)
    net = tf.identity(net, 'initial_max_pool')

    stem_features = []
    # Build the initial level 2 blocks.
    for i in range(self._num_init_blocks):
      net = block_group(
          inputs=net,
          filters=int(FILTER_SIZE_MAP[2] * self._filter_size_scale),
          strides=1,
          block_fn_cand=self._init_block_fn,
          block_repeats=self._block_repeats,
          activation=self._activation,
          batch_norm_activation=self._batch_norm_activation,
          dropblock=self._dropblock,
          data_format=self._data_format,
          name='stem_block_{}'.format(i + 1),
          is_training=is_training)
      stem_features.append(net)

    return stem_features
Beispiel #8
0
def resample_with_alpha(feat,
                        input_block_fn,
                        target_width,
                        target_num_filters,
                        target_block_fn,
                        alpha=1.0,
                        use_native_resize_op=False,
                        batch_norm_activation=nn_ops.BatchNormActivation(),
                        data_format='channels_last',
                        name=None,
                        is_training=False):
    """Match resolution and feature dimension to the target block."""
    _, height, width, num_filters = feat.get_shape().as_list()
    if width is None or num_filters is None:
        raise ValueError('Shape of feat is None (shape:{}).'.format(
            feat.shape))

    if input_block_fn == 'bottleneck':
        num_filters /= 4
    new_num_filters = int(num_filters * alpha)

    with tf.variable_scope('resample_with_alpha_{}'.format(name)):
        # First 1x1 conv to reduce feature dimension to alpha*.
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=new_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_activation(feat, is_training=is_training)

        # Down-sample.
        if width > target_width:
            # Apply stride-2 conv to reduce feature map size to 1/2.
            feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                               filters=new_num_filters,
                                               kernel_size=3,
                                               strides=2,
                                               data_format=data_format)
            feat = batch_norm_activation(feat, is_training=is_training)
            # Apply maxpool to further reduce feature map size if necessary.
            if width // target_width > 2:
                if width % target_width != 0:
                    stride_size = 2
                else:
                    stride_size = width // target_width // 2
                feat = tf.layers.max_pooling2d(
                    inputs=feat,
                    pool_size=3 if width / target_width <= 4 else 5,
                    strides=stride_size,
                    padding='SAME',
                    data_format=data_format)
            # Use NN interpolation to resize if necessary. This could happen in cases
            # where `wdith` is not divisible by `target_width`.
            if feat.get_shape().as_list()[2] != target_width:
                feat = spatial_transform_ops.native_resize(
                    feat, [int(target_width / width * height), target_width])
        # Up-sample with NN interpolation.
        elif width < target_width:
            if target_width % width != 0 or use_native_resize_op:
                feat = spatial_transform_ops.native_resize(
                    feat, [int(target_width / width * height), target_width])
            else:
                scale = target_width // width
                feat = spatial_transform_ops.nearest_upsampling(feat,
                                                                scale=scale)

        # Match feature dimension to the target block.
        if target_block_fn == 'bottleneck':
            target_num_filters *= 4
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=target_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_activation(feat, relu=False, is_training=is_training)

    return feat
Beispiel #9
0
def bottleneck_block(inputs,
                     filters,
                     strides,
                     use_projection,
                     activation=tf.nn.relu,
                     batch_norm_relu=nn_ops.BatchNormRelu(),
                     dropblock=nn_ops.Dropblock(),
                     drop_connect_rate=None,
                     data_format='channels_last',
                     is_training=False):
    """The bottleneck block with BN and DropBlock after convolutions.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    filters: a `int` number of filters for the first two convolutions. Note that
      the third and final convolution will use 4 times as many filters.
    strides: an `int` block stride. If greater than 1, this block will
      ultimately downsample the input.
    use_projection: a `bool` for whether this block should use a projection
      shortcut (versus the default identity shortcut). This is usually `True`
      for the first block of a block group, which may change the number of
      filters and the resolution.
    activation: activation function. Support 'relu' and 'swish'.
    batch_norm_relu: an operation that is added after convolutions, including a
      batch norm layer and an optional relu activation.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block.
  """
    logging.info('-----> Building bottleneck block.')
    shortcut = inputs
    if use_projection:
        out_filters = 4 * filters
        shortcut = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                               filters=out_filters,
                                               kernel_size=1,
                                               strides=strides,
                                               data_format=data_format)
        shortcut = batch_norm_relu(shortcut,
                                   relu=False,
                                   is_training=is_training)
    shortcut = dropblock(shortcut, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=filters,
                                         kernel_size=3,
                                         strides=strides,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=4 * filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, relu=False, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    if drop_connect_rate:
        inputs = nn_ops.drop_connect(inputs, is_training, drop_connect_rate)

    return activation(inputs + shortcut)
Beispiel #10
0
def mbconv_block(inputs,
                 in_filters,
                 out_filters,
                 expand_ratio,
                 strides,
                 use_projection,
                 kernel_size=3,
                 se_ratio=None,
                 batch_norm_relu=nn_ops.BatchNormRelu(),
                 dropblock=nn_ops.Dropblock(),
                 drop_connect_rate=None,
                 data_format='channels_last',
                 is_training=False):
    """The bottleneck block with BN and DropBlock after convolutions.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    in_filters: a `int` number of filters for the input feature map.
    out_filters: a `int` number of filters for the output feature map.
    expand_ratio: a `int` number as the feature dimension expansion ratio.
    strides: a `int` block stride. If greater than 1, this block will ultimately
      downsample the input.
    use_projection: a `bool` for whether this block should use a projection
      shortcut (versus the default identity shortcut). This is usually `True`
      for the first block of a block group, which may change the number of
      filters and the resolution.
    kernel_size: kernel size for the depthwise convolution.
    se_ratio: squeeze and excitation ratio.
    batch_norm_relu: an operation that is added after convolutions, including a
      batch norm layer and an optional relu activation.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block.
  """
    tf.logging.info('-----> Building mbconv block.')
    shortcut = inputs
    if use_projection:
        shortcut = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                               filters=out_filters,
                                               kernel_size=1,
                                               strides=strides,
                                               data_format=data_format)
        shortcut = batch_norm_relu(shortcut, is_training=is_training)
        shortcut = dropblock(shortcut, is_training=is_training)

    # First 1x1 conv for channel expansion.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=in_filters * expand_ratio,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    # Second depthwise conv.
    inputs = nn_ops.depthwise_conv2d_fixed_padding(inputs=inputs,
                                                   kernel_size=kernel_size,
                                                   strides=strides,
                                                   data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    # Squeeze and excitation.
    if se_ratio is not None and se_ratio > 0 and se_ratio <= 1:
        inputs = nn_ops.squeeze_excitation(inputs,
                                           in_filters,
                                           se_ratio,
                                           expand_ratio=expand_ratio,
                                           data_format=data_format)

    # Third 1x1 conv for reversed bottleneck.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=out_filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    if drop_connect_rate:
        inputs = nn_ops.drop_connect(inputs, is_training, drop_connect_rate)

    return tf.add(inputs, shortcut)
Beispiel #11
0
        def model(inputs, is_training=False):
            """Creation of the model graph."""
            if space_to_depth_block_size > 1:
                # conv0 uses space-to-depth transform for TPU performance.
                inputs = nn_ops.conv0_space_to_depth(
                    inputs=inputs,
                    filters=64,
                    kernel_size=7,
                    strides=2,
                    data_format=self._data_format,
                    space_to_depth_block_size=space_to_depth_block_size)
            else:
                inputs = nn_ops.conv2d_fixed_padding(
                    inputs=inputs,
                    filters=64,
                    kernel_size=7,
                    strides=2,
                    data_format=self._data_format)
            inputs = tf.identity(inputs, 'initial_conv')
            inputs = self._batch_norm_activation(inputs,
                                                 is_training=is_training)

            inputs = tf.layers.max_pooling2d(inputs=inputs,
                                             pool_size=3,
                                             strides=2,
                                             padding='SAME',
                                             data_format=self._data_format)
            inputs = tf.identity(inputs, 'initial_max_pool')

            c2 = block_group(inputs=inputs,
                             filters=64,
                             strides=1,
                             use_projection=True,
                             block_fn=block_fn,
                             block_repeats=layers[0],
                             activation=self._activation,
                             batch_norm_activation=self._batch_norm_activation,
                             dropblock=self._dropblock,
                             drop_connect_rate=get_drop_connect_rate(
                                 self._init_drop_connect_rate, 2, 5),
                             name='block_group1',
                             is_training=is_training)
            c3 = block_group(inputs=c2,
                             filters=128,
                             strides=2,
                             use_projection=True,
                             block_fn=block_fn,
                             block_repeats=layers[1],
                             activation=self._activation,
                             batch_norm_activation=self._batch_norm_activation,
                             dropblock=self._dropblock,
                             drop_connect_rate=get_drop_connect_rate(
                                 self._init_drop_connect_rate, 3, 5),
                             name='block_group2',
                             is_training=is_training)
            c4 = block_group(inputs=c3,
                             filters=256,
                             strides=2,
                             use_projection=True,
                             block_fn=block_fn,
                             block_repeats=layers[2],
                             activation=self._activation,
                             batch_norm_activation=self._batch_norm_activation,
                             dropblock=self._dropblock,
                             drop_connect_rate=get_drop_connect_rate(
                                 self._init_drop_connect_rate, 4, 5),
                             name='block_group3',
                             is_training=is_training)
            c5 = block_group(inputs=c4,
                             filters=512,
                             strides=2,
                             use_projection=True,
                             block_fn=block_fn,
                             block_repeats=layers[3],
                             activation=self._activation,
                             batch_norm_activation=self._batch_norm_activation,
                             dropblock=self._dropblock,
                             drop_connect_rate=get_drop_connect_rate(
                                 self._init_drop_connect_rate, 5, 5),
                             name='block_group4',
                             is_training=is_training)
            return {2: c2, 3: c3, 4: c4, 5: c5}
Beispiel #12
0
def fused_mbconv_block(inputs,
                       in_filters,
                       out_filters,
                       expand_ratio,
                       strides,
                       kernel_size=3,
                       se_ratio=None,
                       batch_norm_activation=nn_ops.BatchNormActivation(),
                       dropblock=nn_ops.Dropblock(),
                       drop_connect_rate=None,
                       data_format='channels_last',
                       is_training=False):
    """The fused bottleneck block with BN and DropBlock after convolutions.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    in_filters: a `int` number of filters for the input feature map.
    out_filters: a `int` number of filters for the output feature map.
    expand_ratio: a `int` number as the feature dimension expansion ratio.
    strides: a `int` block stride. If greater than 1, this block will ultimately
      downsample the input.
    kernel_size: kernel size for the depthwise convolution.
    se_ratio: squeeze and excitation ratio.
    batch_norm_activation: an operation that includes a batch normalization
      layer followed by an optional activation layer.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block.
  """
    tf.logging.info('-----> Building fused mbconv block.')
    shortcut = inputs

    # First 1x1 conv for channel expansion.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=in_filters * expand_ratio,
                                         kernel_size=kernel_size,
                                         strides=strides,
                                         data_format=data_format)
    inputs = batch_norm_activation(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    # Squeeze and excitation.
    if se_ratio is not None and se_ratio > 0 and se_ratio <= 1:
        inputs = nn_ops.squeeze_excitation(inputs,
                                           in_filters,
                                           se_ratio,
                                           expand_ratio=expand_ratio,
                                           data_format=data_format)

    # Third 1x1 conv for reversed bottleneck.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=out_filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_activation(inputs, relu=False, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    if in_filters == out_filters and strides == 1:
        if drop_connect_rate:
            inputs = nn_ops.drop_connect(inputs, is_training,
                                         drop_connect_rate)
        inputs = tf.add(inputs, shortcut)

    return inputs
Beispiel #13
0
    def __call__(self, images, is_training=False):
        """Generate a multiscale feature pyramid.

    Args:
      images: The input image tensor.
      is_training: `bool` if True, the model is in training mode.

    Returns:
      a `dict` containing `int` keys for continuous feature levels
      [min_level, min_level + 1, ..., max_level]. The values are corresponding
      features with shape [batch_size, height_l, width_l,
      endpoints_num_filters].
    """
        x = images
        with tf.variable_scope('efficientnet'):
            x = nn_ops.conv2d_fixed_padding(inputs=x,
                                            filters=32,
                                            kernel_size=3,
                                            strides=2,
                                            data_format=self._data_format)
            x = tf.identity(x, 'initial_conv')
            x = self._batch_norm_activation(x, is_training=is_training)

            endpoints = []
            for i, block_spec in enumerate(self._block_specs):
                bn_act = nn_ops.BatchNormActivation(
                    activation=block_spec.act_fn)
                with tf.variable_scope('block_{}'.format(i)):
                    for j in range(block_spec.num_repeats):
                        strides = (
                            1 if j > 0 else
                            efficientnet_constants.EFFICIENTNET_STRIDES[i])

                        if block_spec.block_fn == 'conv':
                            x = nn_ops.conv2d_fixed_padding(
                                inputs=x,
                                filters=block_spec.output_filters,
                                kernel_size=block_spec.kernel_size,
                                strides=strides,
                                data_format=self._data_format)
                            x = bn_act(x, is_training=is_training)
                        elif block_spec.block_fn == 'mbconv':
                            x_shape = x.get_shape().as_list()
                            in_filters = (x_shape[1] if self._data_format
                                          == 'channel_first' else x_shape[-1])
                            x = nn_blocks.mbconv_block(
                                inputs=x,
                                in_filters=in_filters,
                                out_filters=block_spec.output_filters,
                                expand_ratio=block_spec.expand_ratio,
                                strides=strides,
                                kernel_size=block_spec.kernel_size,
                                se_ratio=block_spec.se_ratio,
                                batch_norm_activation=bn_act,
                                data_format=self._data_format,
                                is_training=is_training)
                        elif block_spec.block_fn == 'fused_mbconv':
                            x_shape = x.get_shape().as_list()
                            in_filters = (x_shape[1] if self._data_format
                                          == 'channel_first' else x_shape[-1])
                            x = nn_blocks.fused_mbconv_block(
                                inputs=x,
                                in_filters=in_filters,
                                out_filters=block_spec.output_filters,
                                expand_ratio=block_spec.expand_ratio,
                                strides=strides,
                                kernel_size=block_spec.kernel_size,
                                se_ratio=block_spec.se_ratio,
                                batch_norm_activation=bn_act,
                                data_format=self._data_format,
                                is_training=is_training)
                        else:
                            raise ValueError(
                                'Un-supported block_fn `{}`!'.format(
                                    block_spec.block_fn))
                    x = tf.identity(x, 'endpoints')
                    endpoints.append(x)

        return {
            2: endpoints[1],
            3: endpoints[2],
            4: endpoints[4],
            5: endpoints[6]
        }