Beispiel #1
0
    def coarsemask_decoder_net(self,
                               images,
                               is_training=False,
                               batch_norm_relu=nn_ops.BatchNormRelu()):
        """Coarse mask decoder network architecture.

    Args:
      images: A tensor of size [batch, height_in, width_in, channels_in].
      is_training: Whether batch_norm layers are in training mode.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    Returns:
      images: A feature tensor of size [batch, output_size, output_size,
        num_channels]
    """
        for i in range(self._num_convs):
            images = tf.layers.conv2d(
                images,
                self._num_downsample_channels,
                kernel_size=(3, 3),
                bias_initializer=tf.zeros_initializer(),
                kernel_initializer=tf.random_normal_initializer(stddev=0.01),
                activation=None,
                padding='same',
                name='coarse-class-%d' % i)
            images = batch_norm_relu(images,
                                     is_training=is_training,
                                     name='coarse-class-%d-bn' % i)

        return images
Beispiel #2
0
    def __init__(self,
                 min_level,
                 max_level,
                 num_classes,
                 anchors_per_location,
                 num_convs=4,
                 num_filters=256,
                 use_separable_conv=False,
                 batch_norm_relu=nn_ops.BatchNormRelu()):
        """Initialize params to build RetinaNet head.

    Args:
      min_level: `int` number of minimum feature level.
      max_level: `int` number of maximum feature level.
      num_classes: `int` number of classification categories.
      anchors_per_location: `int` number of anchors per pixel location.
      num_convs: `int` number of stacked convolution before the last prediction
        layer.
      num_filters: `int` number of filters used in the head architecture.
      use_separable_conv: `bool` to indicate whether to use separable
        convoluation.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """
        self._min_level = min_level
        self._max_level = max_level

        self._num_classes = num_classes
        self._anchors_per_location = anchors_per_location

        self._num_convs = num_convs
        self._num_filters = num_filters
        self._use_separable_conv = use_separable_conv

        self._batch_norm_relu = batch_norm_relu
Beispiel #3
0
def resample_feature_map(feat,
                         level,
                         target_level,
                         is_training,
                         target_feat_dims=256,
                         conv2d_op=tf.layers.conv2d,
                         batch_norm_relu=nn_ops.BatchNormRelu(),
                         name=None):
    """Resample input feature map to have target number of channels and width."""
    feat_dims = feat.get_shape().as_list()[3]
    with tf.variable_scope('resample_{}'.format(name)):
        if feat_dims != target_feat_dims:
            feat = conv2d_op(feat,
                             filters=target_feat_dims,
                             kernel_size=(1, 1),
                             padding='same')
            feat = batch_norm_relu(feat,
                                   is_training=is_training,
                                   relu=False,
                                   name='bn')
        if level < target_level:
            stride = int(2**(target_level - level))
            feat = tf.layers.max_pooling2d(inputs=feat,
                                           pool_size=stride,
                                           strides=[stride, stride],
                                           padding='SAME')
        elif level > target_level:
            scale = int(2**(level - target_level))
            feat = spatial_transform_ops.nearest_upsampling(feat, scale=scale)
    return feat
Beispiel #4
0
def block_group(inputs,
                filters,
                strides,
                use_projection,
                block_fn,
                block_repeats,
                batch_norm_relu=nn_ops.BatchNormRelu(),
                dropblock=nn_ops.Dropblock(),
                drop_connect_rate=None,
                data_format='channels_last',
                name=None,
                is_training=False):
    """Builds one group of blocks.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    filters: an `int` number of filters for the first two convolutions.
    strides: an `int` block stride. If greater than 1, this block will
      ultimately downsample the input.
    use_projection: a `bool` for whether this block should use a projection
      shortcut (versus the default identity shortcut). This is usually `True`
      for the first block of a block group, which may change the number of
      filters and the resolution.
    block_fn: the `function` for the block to use within the model
    block_repeats: an `int` number of blocks to repeat in the group.
    batch_norm_relu: an operation that is added after convolutions, including a
      batch norm layer and an optional relu activation.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    name: a `str` name for the Tensor output of the block layer.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block layer.
  """
    # Only the first block per block_group uses projection shortcut and strides.
    inputs = block_fn(inputs,
                      filters,
                      strides,
                      use_projection=use_projection,
                      batch_norm_relu=batch_norm_relu,
                      dropblock=dropblock,
                      drop_connect_rate=drop_connect_rate,
                      data_format=data_format,
                      is_training=is_training)
    for _ in range(1, block_repeats):
        inputs = block_fn(inputs,
                          filters,
                          1,
                          use_projection=False,
                          batch_norm_relu=batch_norm_relu,
                          dropblock=dropblock,
                          drop_connect_rate=drop_connect_rate,
                          data_format=data_format,
                          is_training=is_training)
    return tf.identity(inputs, name)
Beispiel #5
0
    def __init__(self,
                 min_level=3,
                 max_level=7,
                 fpn_feat_dims=256,
                 use_separable_conv=False,
                 batch_norm_relu=nn_ops.BatchNormRelu()):
        """FPN initialization function.

    Args:
      min_level: `int` minimum level in FPN output feature maps.
      max_level: `int` maximum level in FPN output feature maps.
      fpn_feat_dims: `int` number of filters in FPN layers.
      use_separable_conv: `bool`, if True use separable convolution for
        convolution in FPN layers.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """
        self._min_level = min_level
        self._max_level = max_level
        self._fpn_feat_dims = fpn_feat_dims
        if use_separable_conv:
            self._conv2d_op = functools.partial(tf.layers.separable_conv2d,
                                                depth_multiplier=1)
        else:
            self._conv2d_op = tf.layers.conv2d
        self._batch_norm_relu = batch_norm_relu
Beispiel #6
0
    def __init__(self,
                 num_classes,
                 endpoints_num_filters=0,
                 aggregation='top',
                 dropout_rate=0.0,
                 batch_norm_relu=nn_ops.BatchNormRelu(),
                 data_format='channels_last'):
        """Initialize params to build classification head.

    Args:
      num_classes: the number of classes, including one background class.
      endpoints_num_filters: the number of filters of the optional embedding
        layer after the multiscale feature aggregation. If 0, no additional
        embedding layer is applied.
      aggregation: the method to aggregate the multiscale feature maps. If
        `top`, the feature map of the highest level will be directly used.
        If `all`, all levels will be used by nearest-neighbor upsampling and
        averaging to the same size as the lowest level (the number of filters
        for all levels should match).
      dropout_rate: the dropout rate of the optional dropout layer. If 0.0, no
        additional dropout layer is applied.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by an optional relu layer.
      data_format: An optional string from: `channels_last`, `channels_first`.
        Defaults to `channels_last`.
    """
        self._num_classes = num_classes
        self._endpoints_num_filters = endpoints_num_filters
        self._aggregation = aggregation
        self._dropout_rate = dropout_rate
        self._batch_norm_relu = batch_norm_relu
        self._data_format = data_format
Beispiel #7
0
def batch_norm_relu_generator(params, activation='relu'):
  return nn_ops.BatchNormRelu(
      momentum=params.batch_norm_momentum,
      epsilon=params.batch_norm_epsilon,
      trainable=params.batch_norm_trainable,
      use_sync_bn=params.use_sync_bn,
      activation=activation)
Beispiel #8
0
    def __init__(self,
                 resnet_depth,
                 dropblock=nn_ops.Dropblock(),
                 batch_norm_relu=nn_ops.BatchNormRelu(),
                 data_format='channels_last'):
        """ResNet initialization function.

    Args:
      resnet_depth: `int` depth of ResNet backbone model.
      dropblock: a dropblock layer.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
      data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.
    """
        self._resnet_depth = resnet_depth

        self._dropblock = dropblock
        self._batch_norm_relu = batch_norm_relu

        self._data_format = data_format

        model_params = {
            18: {
                'block': self.residual_block,
                'layers': [2, 2, 2, 2]
            },
            34: {
                'block': self.residual_block,
                'layers': [3, 4, 6, 3]
            },
            50: {
                'block': self.bottleneck_block,
                'layers': [3, 4, 6, 3]
            },
            101: {
                'block': self.bottleneck_block,
                'layers': [3, 4, 23, 3]
            },
            152: {
                'block': self.bottleneck_block,
                'layers': [3, 8, 36, 3]
            },
            200: {
                'block': self.bottleneck_block,
                'layers': [3, 24, 36, 3]
            }
        }

        if resnet_depth not in model_params:
            valid_resnet_depths = ', '.join(
                [str(depth) for depth in sorted(model_params.keys())])
            raise ValueError(
                'The resnet_depth should be in [%s]. Not a valid resnet_depth:'
                % (valid_resnet_depths), self._resnet_depth)
        params = model_params[resnet_depth]
        self._resnet_fn = self.resnet_v1_generator(params['block'],
                                                   params['layers'])
Beispiel #9
0
    def decoder_net(self,
                    features,
                    is_training=False,
                    batch_norm_relu=nn_ops.BatchNormRelu()):
        """Fine mask decoder network architecture.

    Args:
      features: A tensor of size [batch, height_in, width_in, channels_in].
      is_training: Whether batch_norm layers are in training mode.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).

    Returns:
      images: A feature tensor of size [batch, output_size, output_size,
        num_channels], where output size is self._gt_upsample_scale times
        that of input.
    """
        (batch_size, num_instances, height, width,
         num_channels) = features.get_shape().as_list()
        features = tf.reshape(
            features,
            [batch_size * num_instances, height, width, num_channels])
        for i in range(self._num_convs):
            features = tf.layers.conv2d(
                features,
                self._num_downsample_channels,
                kernel_size=(3, 3),
                bias_initializer=tf.zeros_initializer(),
                kernel_initializer=tf.random_normal_initializer(stddev=0.01),
                activation=None,
                padding='same',
                name='class-%d' % i)
            features = batch_norm_relu(features,
                                       is_training=is_training,
                                       name='class-%d-bn' % i)

        # Predict per-class instance masks.
        mask_logits = tf.layers.conv2d(
            features,
            self._mask_num_classes,
            kernel_size=(1, 1),
            # Focal loss bias initialization to have foreground 0.01 probability.
            bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) /
                                                             0.01)),
            kernel_initializer=tf.random_normal_initializer(mean=0,
                                                            stddev=0.01),
            padding='same',
            name='class-predict')

        mask_logits = tf.reshape(
            mask_logits,
            [batch_size, num_instances, height, width, self._mask_num_classes])
        return mask_logits
Beispiel #10
0
def block_group(inputs,
                filters,
                strides,
                block_fn_cand,
                block_repeats,
                activation=tf.nn.swish,
                batch_norm_relu=nn_ops.BatchNormRelu(),
                dropblock=nn_ops.Dropblock(),
                drop_connect_rate=None,
                data_format='channels_last',
                name=None,
                is_training=False):
    """Creates one group of blocks for SpineNet."""
    block_fn_candidates = {
        'bottleneck': nn_blocks.bottleneck_block,
        'residual': nn_blocks.residual_block,
    }
    if block_fn_cand not in block_fn_candidates:
        raise ValueError(
            'Block function {} not implemented.'.format(block_fn_cand))

    block_fn = block_fn_candidates[block_fn_cand]
    _, _, _, num_filters = inputs.get_shape().as_list()

    if block_fn_cand == 'bottleneck':
        use_projection = not (num_filters == (filters * 4) and strides == 1)
    else:
        use_projection = not (num_filters == filters and strides == 1)

    # Only the first block per block_group uses projection shortcut and strides.
    inputs = block_fn(inputs,
                      filters,
                      strides,
                      use_projection=use_projection,
                      activation=activation,
                      batch_norm_relu=batch_norm_relu,
                      dropblock=dropblock,
                      drop_connect_rate=drop_connect_rate,
                      data_format=data_format,
                      is_training=is_training)
    for _ in range(1, block_repeats):
        inputs = block_fn(inputs,
                          filters,
                          1,
                          use_projection=False,
                          activation=activation,
                          batch_norm_relu=batch_norm_relu,
                          dropblock=dropblock,
                          drop_connect_rate=drop_connect_rate,
                          data_format=data_format,
                          is_training=is_training)
    return tf.identity(inputs, name)
Beispiel #11
0
    def __init__(self,
                 num_classes,
                 num_convs=0,
                 num_filters=256,
                 use_separable_conv=False,
                 num_fcs=2,
                 fc_dims=1024,
                 use_batch_norm=True,
                 batch_norm_relu=nn_ops.BatchNormRelu()):
        """Initialize params to build Fast R-CNN box head.

    Args:
      num_classes: a integer for the number of classes.
      num_convs: `int` number that represents the number of the intermediate
        conv layers before the FC layers.
      num_filters: `int` number that represents the number of filters of the
        intermediate conv layers.
      use_separable_conv: `bool`, indicating whether the separable conv layers
        is used.
      num_fcs: `int` number that represents the number of FC layers before the
        predictions.
      fc_dims: `int` number that represents the number of dimension of the FC
        layers.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """
        self._num_classes = num_classes

        self._num_convs = num_convs
        self._num_filters = num_filters
        if use_separable_conv:
            self._conv2d_op = functools.partial(
                tf.layers.separable_conv2d,
                depth_multiplier=1,
                bias_initializer=tf.zeros_initializer())
        else:
            self._conv2d_op = functools.partial(
                tf.layers.conv2d,
                kernel_initializer=tf.keras.initializers.VarianceScaling(
                    scale=2, mode='fan_out',
                    distribution='untruncated_normal'),
                bias_initializer=tf.zeros_initializer())

        self._num_fcs = num_fcs
        self._fc_dims = fc_dims

        self._use_batch_norm = use_batch_norm
        self._batch_norm_relu = batch_norm_relu
Beispiel #12
0
    def __init__(self,
                 num_classes,
                 mrcnn_resolution,
                 batch_norm_relu=nn_ops.BatchNormRelu()):
        """Initialize params to build Fast R-CNN head.

    Args:
      num_classes: a integer for the number of classes.
      mrcnn_resolution: a integer that is the resolution of masks.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """
        self._num_classes = num_classes
        self._mrcnn_resolution = mrcnn_resolution
        self._batch_norm_relu = batch_norm_relu
Beispiel #13
0
    def __init__(self,
                 num_classes,
                 mlp_head_dim,
                 batch_norm_relu=nn_ops.BatchNormRelu()):
        """Initialize params to build Fast R-CNN box head.

    Args:
      num_classes: a integer for the number of classes.
      mlp_head_dim: a integer that is the hidden dimension in the
        fully-connected layers.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """
        self._num_classes = num_classes
        self._mlp_head_dim = mlp_head_dim
        self._batch_norm_relu = batch_norm_relu
Beispiel #14
0
  def __init__(self,
               min_level=3,
               max_level=7,
               fpn_feat_dims=256,
               num_repeats=7,
               use_separable_conv=False,
               dropblock=nn_ops.Dropblock(),
               batch_norm_relu=nn_ops.BatchNormRelu()):
    """NAS-FPN initialization function.

    Args:
      min_level: `int` minimum level in NAS-FPN output feature maps.
      max_level: `int` maximum level in NAS-FPN output feature maps.
      fpn_feat_dims: `int` number of filters in FPN layers.
      num_repeats: number of repeats for feature pyramid network.
      use_separable_conv: `bool`, if True use separable convolution for
        convolution in NAS-FPN layers.
      dropblock: a Dropblock layer.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """

    self._min_level = min_level
    self._max_level = max_level
    if min_level == 3 and max_level == 7:
      model_config = [
          3, 1, 1, 3,
          3, 0, 1, 5,
          4, 0, 0, 6,  # Output to level 3.
          3, 0, 6, 7,  # Output to level 4.
          2, 1, 7, 8,  # Output to level 5.
          0, 1, 6, 9,  # Output to level 7.
          1, 1, 9, 10]  # Output to level 6.
    else:
      raise ValueError('The NAS-FPN with min level {} and max level {} '
                       'is not supported.'.format(min_level, max_level))
    self._config = Config(model_config, self._min_level, self._max_level)
    self._num_repeats = num_repeats
    self._fpn_feat_dims = fpn_feat_dims
    self._use_separable_conv = use_separable_conv
    self._dropblock = dropblock
    self._batch_norm_relu = batch_norm_relu
    self._resample_feature_map = functools.partial(
        resample_feature_map,
        target_feat_dims=fpn_feat_dims, batch_norm_relu=batch_norm_relu)
Beispiel #15
0
    def __init__(self,
                 min_level,
                 max_level,
                 anchors_per_location,
                 num_convs=2,
                 num_filters=256,
                 use_separable_conv=False,
                 use_batch_norm=True,
                 batch_norm_relu=nn_ops.BatchNormRelu()):
        """Initialize params to build Region Proposal Network head.

    Args:
      min_level: `int` number of minimum feature level.
      max_level: `int` number of maximum feature level.
      anchors_per_location: `int` number of number of anchors per pixel
        location.
      num_convs: `int` number that represents the number of the intermediate
        conv layers before the prediction.
      num_filters: `int` number that represents the number of filters of the
        intermediate conv layers.
      use_separable_conv: `bool`, indicating whether the separable conv layers
        is used.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """
        self._min_level = min_level
        self._max_level = max_level
        self._anchors_per_location = anchors_per_location

        self._num_convs = num_convs
        self._num_filters = num_filters
        if use_separable_conv:
            self._conv2d_op = functools.partial(
                tf.layers.separable_conv2d,
                depth_multiplier=1,
                bias_initializer=tf.zeros_initializer())
        else:
            self._conv2d_op = functools.partial(
                tf.layers.conv2d,
                kernel_initializer=tf.random_normal_initializer(stddev=0.01),
                bias_initializer=tf.zeros_initializer())

        self._use_batch_norm = use_batch_norm
        self._batch_norm_relu = batch_norm_relu
Beispiel #16
0
    def __init__(self,
                 num_classes,
                 mask_target_size,
                 use_batch_norm=True,
                 batch_norm_relu=nn_ops.BatchNormRelu()):
        """Initialize params to build Fast R-CNN head.

    Args:
      num_classes: a integer for the number of classes.
      mask_target_size: a integer that is the resolution of masks.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """
        self._num_classes = num_classes
        self._mask_target_size = mask_target_size
        self._use_batch_norm = use_batch_norm
        self._batch_norm_relu = batch_norm_relu
Beispiel #17
0
  def __call__(self,
               features,
               is_training,
               batch_norm_relu=nn_ops.BatchNormRelu()):
    """Generate logits for semantic segmentation.

    Args:
      features: a float Tensor of shape [batch_size, num_instances,
        mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
        instance feature crop.
      is_training: a bool indicating whether in training mode.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).

    Returns:
      logits: semantic segmentation logits as a float Tensor of shape
        [batch_size, height, width, num_classes].
    """
    features = features[self._level]
    feat_dim = features.get_shape().as_list()[-1]
    with tf.variable_scope('segmentation', reuse=tf.AUTO_REUSE):
      for i in range(self._num_convs):
        features = tf.layers.conv2d(
            features,
            feat_dim,
            kernel_size=(3, 3),
            bias_initializer=tf.zeros_initializer(),
            kernel_initializer=tf.random_normal_initializer(stddev=0.01),
            activation=None,
            padding='same',
            name='class-%d' % i)
        features = batch_norm_relu(
            features,
            is_training=is_training,
            name='class-%d-bn' % i)
      logits = tf.layers.conv2d(
          features,
          self._num_classes,  # This include background class 0.
          kernel_size=(1, 1),
          bias_initializer=tf.zeros_initializer(),
          kernel_initializer=tf.random_normal_initializer(stddev=0.01),
          activation=None,
          padding='same')
      return logits
Beispiel #18
0
    def __init__(self,
                 min_level,
                 max_level,
                 anchors_per_location,
                 batch_norm_relu=nn_ops.BatchNormRelu()):
        """Initialize params to build Region Proposal Network head.

    Args:
      min_level: `int` number of minimum feature level.
      max_level: `int` number of maximum feature level.
      anchors_per_location: `int` number of number of anchors per pixel
        location.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """
        self._min_level = min_level
        self._max_level = max_level
        self._anchors_per_location = anchors_per_location
        self._batch_norm_relu = batch_norm_relu
Beispiel #19
0
Datei: fpn.py Projekt: zwcdp/tpu
    def __init__(self,
                 min_level=3,
                 max_level=7,
                 fpn_feat_dims=256,
                 batch_norm_relu=nn_ops.BatchNormRelu()):
        """FPN initialization function.

    Args:
      min_level: `int` minimum level in FPN output feature maps.
      max_level: `int` maximum level in FPN output feature maps.
      fpn_feat_dims: `int` number of filters in FPN layers.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """
        self._min_level = min_level
        self._max_level = max_level
        self._fpn_feat_dims = fpn_feat_dims

        self._batch_norm_relu = batch_norm_relu
Beispiel #20
0
    def __init__(self,
                 num_classes,
                 level,
                 num_convs,
                 use_batch_norm=True,
                 batch_norm_relu=nn_ops.BatchNormRelu()):
        """Initialize params to build segmentation head.

    Args:
      num_classes: `int` number of mask classification categories. The number of
        classes does not include background class.
      level: `int` feature level used for prediction.
      num_convs: `int` number of stacked convolution before the last prediction
        layer.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
    """
        self._num_classes = num_classes
        self._level = level
        self._num_convs = num_convs
        self._use_batch_norm = use_batch_norm
        self._batch_norm_relu = batch_norm_relu
Beispiel #21
0
    def __init__(self,
                 min_level=3,
                 max_level=7,
                 endpoints_num_filters=256,
                 resample_alpha=0.5,
                 use_native_resize_op=False,
                 block_specs=build_block_specs(),
                 block_repeats=1,
                 filter_size_scale=1.0,
                 activation='swish',
                 batch_norm_relu=nn_ops.BatchNormRelu(),
                 init_drop_connect_rate=None,
                 data_format='channels_last'):
        """SpineNet initialization function.

    Args:
      min_level: an `int` representing the minimum level in SpineNet endpoints.
      max_level: an `int` representing the maximum level in SpineNet endpoints.
      endpoints_num_filters: an `int` representing the final feature dimension
        of endpoints before the shared conv layers in head.
      resample_alpha: a `float` representing the scaling factor to scale feature
        dimension before resolution resampling.
      use_native_resize_op: Whether to use native
        tf.image.nearest_neighbor_resize or the broadcast implmentation to do
        upsampling.
      block_specs: a list of BlockSpec objects that specifies the SpineNet
        network topology. By default, the previously discovered architecture is
        used.
      block_repeats: an `int` representing the number of repeats per block
        group.
      filter_size_scale: a `float` representing the scaling factor to uniformaly
        scale feature dimension in SpineNet.
      activation: activation function. Support 'relu' and 'swish'.
      batch_norm_relu: an operation that is added after convolutions, including
        a batch norm layer and an optional relu activation.
      init_drop_connect_rate: a 'float' number that specifies the initial drop
        connection rate. Note that the default `None` means no drop connection
        is applied.
      data_format: An optional string from: "channels_last", "channels_first".
        Defaults to "channels_last".
    """
        self._min_level = min_level
        self._max_level = max_level
        self._endpoints_num_filters = endpoints_num_filters
        self._init_block_fn = 'bottleneck'
        self._num_init_blocks = 2
        self._resample_alpha = resample_alpha
        self._use_native_resize_op = use_native_resize_op
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._block_specs = block_specs
        self._block_repeats = block_repeats
        self._filter_size_scale = filter_size_scale
        self._batch_norm_relu = batch_norm_relu
        self._dropblock = nn_ops.Dropblock(
        )  # Hard-code it to not use DropBlock.
        self._init_drop_connect_rate = init_drop_connect_rate
        self._data_format = data_format
Beispiel #22
0
def batch_norm_relu_generator(params):
    return nn_ops.BatchNormRelu(momentum=params.batch_norm_momentum,
                                epsilon=params.batch_norm_epsilon,
                                trainable=params.batch_norm_trainable)
 def _batch_norm_op(**kwargs):
     return nn_ops.BatchNormRelu(momentum=params.batch_norm_momentum,
                                 epsilon=params.batch_norm_epsilon,
                                 trainable=params.batch_norm_trainable,
                                 **kwargs)
Beispiel #24
0
def bottleneck_block(inputs,
                     filters,
                     strides,
                     use_projection,
                     activation=tf.nn.relu,
                     batch_norm_relu=nn_ops.BatchNormRelu(),
                     dropblock=nn_ops.Dropblock(),
                     drop_connect_rate=None,
                     data_format='channels_last',
                     is_training=False):
    """The bottleneck block with BN and DropBlock after convolutions.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    filters: a `int` number of filters for the first two convolutions. Note that
      the third and final convolution will use 4 times as many filters.
    strides: an `int` block stride. If greater than 1, this block will
      ultimately downsample the input.
    use_projection: a `bool` for whether this block should use a projection
      shortcut (versus the default identity shortcut). This is usually `True`
      for the first block of a block group, which may change the number of
      filters and the resolution.
    activation: activation function. Support 'relu' and 'swish'.
    batch_norm_relu: an operation that is added after convolutions, including a
      batch norm layer and an optional relu activation.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block.
  """
    logging.info('-----> Building bottleneck block.')
    shortcut = inputs
    if use_projection:
        out_filters = 4 * filters
        shortcut = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                               filters=out_filters,
                                               kernel_size=1,
                                               strides=strides,
                                               data_format=data_format)
        shortcut = batch_norm_relu(shortcut,
                                   relu=False,
                                   is_training=is_training)
    shortcut = dropblock(shortcut, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=filters,
                                         kernel_size=3,
                                         strides=strides,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=4 * filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, relu=False, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    if drop_connect_rate:
        inputs = nn_ops.drop_connect(inputs, is_training, drop_connect_rate)

    return activation(inputs + shortcut)
Beispiel #25
0
def mbconv_block(inputs,
                 in_filters,
                 out_filters,
                 expand_ratio,
                 strides,
                 use_projection,
                 kernel_size=3,
                 se_ratio=None,
                 batch_norm_relu=nn_ops.BatchNormRelu(),
                 dropblock=nn_ops.Dropblock(),
                 drop_connect_rate=None,
                 data_format='channels_last',
                 is_training=False):
    """The bottleneck block with BN and DropBlock after convolutions.

  Args:
    inputs: a `Tensor` of size `[batch, channels, height, width]`.
    in_filters: a `int` number of filters for the input feature map.
    out_filters: a `int` number of filters for the output feature map.
    expand_ratio: a `int` number as the feature dimension expansion ratio.
    strides: a `int` block stride. If greater than 1, this block will ultimately
      downsample the input.
    use_projection: a `bool` for whether this block should use a projection
      shortcut (versus the default identity shortcut). This is usually `True`
      for the first block of a block group, which may change the number of
      filters and the resolution.
    kernel_size: kernel size for the depthwise convolution.
    se_ratio: squeeze and excitation ratio.
    batch_norm_relu: an operation that is added after convolutions, including a
      batch norm layer and an optional relu activation.
    dropblock: a drop block layer that is added after convluations. Note that
      the default implementation does not apply any drop block.
    drop_connect_rate: a 'float' number that specifies the drop connection rate
      of the block. Note that the default `None` means no drop connection is
      applied.
    data_format: a `str` that specifies the data format.
    is_training: a `bool` if True, the model is in training mode.

  Returns:
    The output `Tensor` of the block.
  """
    tf.logging.info('-----> Building mbconv block.')
    shortcut = inputs
    if use_projection:
        shortcut = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                               filters=out_filters,
                                               kernel_size=1,
                                               strides=strides,
                                               data_format=data_format)
        shortcut = batch_norm_relu(shortcut, is_training=is_training)
        shortcut = dropblock(shortcut, is_training=is_training)

    # First 1x1 conv for channel expansion.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=in_filters * expand_ratio,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    # Second depthwise conv.
    inputs = nn_ops.depthwise_conv2d_fixed_padding(inputs=inputs,
                                                   kernel_size=kernel_size,
                                                   strides=strides,
                                                   data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    # Squeeze and excitation.
    if se_ratio is not None and se_ratio > 0 and se_ratio <= 1:
        inputs = nn_ops.squeeze_excitation(inputs,
                                           in_filters,
                                           se_ratio,
                                           expand_ratio=expand_ratio,
                                           data_format=data_format)

    # Third 1x1 conv for reversed bottleneck.
    inputs = nn_ops.conv2d_fixed_padding(inputs=inputs,
                                         filters=out_filters,
                                         kernel_size=1,
                                         strides=1,
                                         data_format=data_format)
    inputs = batch_norm_relu(inputs, is_training=is_training)
    inputs = dropblock(inputs, is_training=is_training)

    if drop_connect_rate:
        inputs = nn_ops.drop_connect(inputs, is_training, drop_connect_rate)

    return tf.add(inputs, shortcut)
Beispiel #26
0
    def __init__(self,
                 resnet_depth,
                 dropblock=nn_ops.Dropblock(),
                 activation='relu',
                 batch_norm_relu=nn_ops.BatchNormRelu(),
                 init_drop_connect_rate=None,
                 data_format='channels_last'):
        """ResNet initialization function.

    Args:
      resnet_depth: `int` depth of ResNet backbone model.
      dropblock: a dropblock layer.
      activation: activation function. Support 'relu' and 'swish'.
      batch_norm_relu: an operation that includes a batch normalization layer
        followed by a relu layer(optional).
      init_drop_connect_rate: a 'float' number that specifies the initial drop
        connection rate. Note that the default `None` means no drop connection
        is applied.
      data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.
    """
        self._resnet_depth = resnet_depth

        self._dropblock = dropblock
        if activation == 'relu':
            self._activation = tf.nn.relu
        elif activation == 'swish':
            self._activation = tf.nn.swish
        else:
            raise ValueError(
                'Activation {} not implemented.'.format(activation))
        self._batch_norm_relu = batch_norm_relu
        self._init_drop_connect_rate = init_drop_connect_rate

        self._data_format = data_format

        model_params = {
            10: {
                'block': nn_blocks.residual_block,
                'layers': [1, 1, 1, 1]
            },
            14: {
                'block': nn_blocks.bottleneck_block,
                'layers': [1, 1, 1, 1]
            },
            18: {
                'block': nn_blocks.residual_block,
                'layers': [2, 2, 2, 2]
            },
            26: {
                'block': nn_blocks.bottleneck_block,
                'layers': [2, 2, 2, 2]
            },
            34: {
                'block': nn_blocks.residual_block,
                'layers': [3, 4, 6, 3]
            },
            50: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 4, 6, 3]
            },
            101: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 4, 23, 3]
            },
            152: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 8, 36, 3]
            },
            200: {
                'block': nn_blocks.bottleneck_block,
                'layers': [3, 24, 36, 3]
            }
        }

        if resnet_depth not in model_params:
            valid_resnet_depths = ', '.join(
                [str(depth) for depth in sorted(model_params.keys())])
            raise ValueError(
                'The resnet_depth should be in [%s]. Not a valid resnet_depth:'
                % (valid_resnet_depths), self._resnet_depth)
        params = model_params[resnet_depth]
        self._resnet_fn = self.resnet_v1_generator(params['block'],
                                                   params['layers'])
Beispiel #27
0
def resample_with_alpha(feat,
                        input_block_fn,
                        target_width,
                        target_num_filters,
                        target_block_fn,
                        alpha=1.0,
                        use_native_resize_op=False,
                        batch_norm_relu=nn_ops.BatchNormRelu(),
                        data_format='channels_last',
                        name=None,
                        is_training=False):
    """Match resolution and feature dimension to the target block."""
    _, height, width, num_filters = feat.get_shape().as_list()
    if width is None or num_filters is None:
        raise ValueError('Shape of feat is None (shape:{}).'.format(
            feat.shape))

    if input_block_fn == 'bottleneck':
        num_filters /= 4
    new_num_filters = int(num_filters * alpha)

    with tf.variable_scope('resample_with_alpha_{}'.format(name)):
        # First 1x1 conv to reduce feature dimension to alpha*.
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=new_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_relu(feat, is_training=is_training)

        # Down-sample.
        if width > target_width:
            if width % target_width != 0:
                raise ValueError('wdith ({}) is not divisible by '
                                 'target_width ({}).'.format(
                                     width, target_width))
            # Apply stride-2 conv to reduce feature map size to 1/2.
            feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                               filters=new_num_filters,
                                               kernel_size=3,
                                               strides=2,
                                               data_format=data_format)
            feat = batch_norm_relu(feat, is_training=is_training)
            # Apply maxpool to further reduce feature map size if necessary.
            if width // target_width > 2:
                feat = tf.layers.max_pooling2d(
                    inputs=feat,
                    pool_size=3 if width // target_width == 4 else 5,
                    strides=[
                        width // target_width // 2, width // target_width // 2
                    ],
                    padding='SAME',
                    data_format=data_format)
        # Up-sample with NN interpolation.
        elif width < target_width:
            if target_width % width != 0:
                raise ValueError('target_wdith ({}) is not divisible by '
                                 'width ({}).'.format(target_width, width))
            scale = target_width // width
            if use_native_resize_op:
                feat = tf.image.resize_nearest_neighbor(
                    feat, [height * scale, width * scale])
            else:
                feat = spatial_transform_ops.nearest_upsampling(feat,
                                                                scale=scale)

        # Match feature dimension to the target block.
        if target_block_fn == 'bottleneck':
            target_num_filters *= 4
        feat = nn_ops.conv2d_fixed_padding(inputs=feat,
                                           filters=target_num_filters,
                                           kernel_size=1,
                                           strides=1,
                                           data_format=data_format)
        feat = batch_norm_relu(feat, relu=False, is_training=is_training)

    return feat