Ejemplo n.º 1
0
 def _block_group(self,
                  inputs: tf.Tensor,
                  in_filters: int,
                  out_filters: int,
                  strides: int,
                  expand_ratio: int = 6,
                  block_repeats: int = 1,
                  se_ratio: float = 0.2,
                  stochastic_depth_drop_rate: Optional[float] = None,
                  name: str = 'block_group'):
   """Creates one group of blocks for the SpineNet model."""
   x = nn_blocks.InvertedBottleneckBlock(
       in_filters=in_filters,
       out_filters=out_filters,
       strides=strides,
       se_ratio=se_ratio,
       expand_ratio=expand_ratio,
       stochastic_depth_drop_rate=stochastic_depth_drop_rate,
       kernel_initializer=self._kernel_initializer,
       kernel_regularizer=self._kernel_regularizer,
       bias_regularizer=self._bias_regularizer,
       activation=self._activation,
       use_sync_bn=self._use_sync_bn,
       norm_momentum=self._norm_momentum,
       norm_epsilon=self._norm_epsilon)(
           inputs)
   for _ in range(1, block_repeats):
     x = nn_blocks.InvertedBottleneckBlock(
         in_filters=in_filters,
         out_filters=out_filters,
         strides=1,
         se_ratio=se_ratio,
         expand_ratio=expand_ratio,
         stochastic_depth_drop_rate=stochastic_depth_drop_rate,
         kernel_initializer=self._kernel_initializer,
         kernel_regularizer=self._kernel_regularizer,
         bias_regularizer=self._bias_regularizer,
         activation=self._activation,
         use_sync_bn=self._use_sync_bn,
         norm_momentum=self._norm_momentum,
         norm_epsilon=self._norm_epsilon)(
             inputs)
   return tf.identity(x, name=name)
Ejemplo n.º 2
0
    def _mobilenet_base(
            self,
            inputs: tf.Tensor) -> Tuple[tf.Tensor, Dict[str, tf.Tensor], int]:
        """Builds the base MobileNet architecture.

    Args:
      inputs: A `tf.Tensor` of shape `[batch_size, height, width, channels]`.

    Returns:
      A tuple of output Tensor and dictionary that collects endpoints.
    """

        input_shape = inputs.get_shape().as_list()
        if len(input_shape) != 4:
            raise ValueError('Expected rank 4 input, was: %d' %
                             len(input_shape))

        # The current_stride variable keeps track of the output stride of the
        # activations, i.e., the running product of convolution strides up to the
        # current network layer. This allows us to invoke atrous convolution
        # whenever applying the next convolution would result in the activations
        # having output stride larger than the target output_stride.
        current_stride = 1

        # The atrous convolution rate parameter.
        rate = 1

        net = inputs
        endpoints = {}
        endpoint_level = 2
        for i, block_def in enumerate(self._decoded_specs):
            block_name = 'block_group_{}_{}'.format(block_def.block_fn, i)
            # A small catch for gpooling block with None strides
            if not block_def.strides:
                block_def.strides = 1
            if (self._output_stride is not None
                    and current_stride == self._output_stride):
                # If we have reached the target output_stride, then we need to employ
                # atrous convolution with stride=1 and multiply the atrous rate by the
                # current unit's stride for use in subsequent layers.
                layer_stride = 1
                layer_rate = rate
                rate *= block_def.strides
            else:
                layer_stride = block_def.strides
                layer_rate = 1
                current_stride *= block_def.strides

            if block_def.block_fn == 'convbn':

                net = Conv2DBNBlock(
                    filters=block_def.filters,
                    kernel_size=block_def.kernel_size,
                    strides=block_def.strides,
                    activation=block_def.activation,
                    use_bias=block_def.use_bias,
                    use_normalization=block_def.use_normalization,
                    kernel_initializer=self._kernel_initializer,
                    kernel_regularizer=self._kernel_regularizer,
                    bias_regularizer=self._bias_regularizer,
                    use_sync_bn=self._use_sync_bn,
                    norm_momentum=self._norm_momentum,
                    norm_epsilon=self._norm_epsilon)(net)

            elif block_def.block_fn == 'depsepconv':
                net = nn_blocks.DepthwiseSeparableConvBlock(
                    filters=block_def.filters,
                    kernel_size=block_def.kernel_size,
                    strides=block_def.strides,
                    activation=block_def.activation,
                    dilation_rate=layer_rate,
                    regularize_depthwise=self._regularize_depthwise,
                    kernel_initializer=self._kernel_initializer,
                    kernel_regularizer=self._kernel_regularizer,
                    use_sync_bn=self._use_sync_bn,
                    norm_momentum=self._norm_momentum,
                    norm_epsilon=self._norm_epsilon,
                )(net)

            elif block_def.block_fn == 'invertedbottleneck':
                use_rate = rate
                if layer_rate > 1 and block_def.kernel_size != 1:
                    # We will apply atrous rate in the following cases:
                    # 1) When kernel_size is not in params, the operation then uses
                    #   default kernel size 3x3.
                    # 2) When kernel_size is in params, and if the kernel_size is not
                    #   equal to (1, 1) (there is no need to apply atrous convolution to
                    #   any 1x1 convolution).
                    use_rate = layer_rate
                in_filters = net.shape.as_list()[-1]
                net = nn_blocks.InvertedBottleneckBlock(
                    in_filters=in_filters,
                    out_filters=block_def.filters,
                    kernel_size=block_def.kernel_size,
                    strides=layer_stride,
                    expand_ratio=block_def.expand_ratio,
                    se_ratio=block_def.se_ratio,
                    expand_se_in_filters=True,
                    se_gating_activation='hard_sigmoid',
                    activation=block_def.activation,
                    use_depthwise=block_def.use_depthwise,
                    use_residual=block_def.use_residual,
                    dilation_rate=use_rate,
                    regularize_depthwise=self._regularize_depthwise,
                    kernel_initializer=self._kernel_initializer,
                    kernel_regularizer=self._kernel_regularizer,
                    bias_regularizer=self._bias_regularizer,
                    use_sync_bn=self._use_sync_bn,
                    norm_momentum=self._norm_momentum,
                    norm_epsilon=self._norm_epsilon,
                    stochastic_depth_drop_rate=self.
                    _stochastic_depth_drop_rate,
                    divisible_by=self._get_divisible_by())(net)

            elif block_def.block_fn == 'gpooling':
                net = layers.GlobalAveragePooling2D()(net)
                net = layers.Reshape((1, 1, net.shape[1]))(net)

            else:
                raise ValueError('Unknown block type {} for layer {}'.format(
                    block_def.block_fn, i))

            net = tf.identity(net, name=block_name)

            if block_def.is_output:
                endpoints[str(endpoint_level)] = net
                endpoint_level += 1

        return net, endpoints, endpoint_level
Ejemplo n.º 3
0
  def _mobiledet_base(self,
                      inputs: tf.Tensor
                      ) -> Tuple[tf.Tensor, Dict[str, tf.Tensor], int]:
    """Builds the base MobileDet architecture.

    Args:
      inputs: A `tf.Tensor` of shape `[batch_size, height, width, channels]`.

    Returns:
      A tuple of output Tensor and dictionary that collects endpoints.
    """

    input_shape = inputs.get_shape().as_list()
    if len(input_shape) != 4:
      raise ValueError('Expected rank 4 input, was: %d' % len(input_shape))

    net = inputs
    endpoints = {}
    endpoint_level = 1
    for i, block_def in enumerate(self._decoded_specs):
      block_name = 'block_group_{}_{}'.format(block_def.block_fn, i)

      if block_def.block_fn == 'convbn':

        net = mobilenet.Conv2DBNBlock(
            filters=block_def.filters,
            kernel_size=block_def.kernel_size,
            strides=block_def.strides,
            activation=block_def.activation,
            use_bias=block_def.use_bias,
            use_normalization=block_def.use_normalization,
            kernel_initializer=self._kernel_initializer,
            kernel_regularizer=self._kernel_regularizer,
            bias_regularizer=self._bias_regularizer,
            use_sync_bn=self._use_sync_bn,
            norm_momentum=self._norm_momentum,
            norm_epsilon=self._norm_epsilon
        )(net)

      elif block_def.block_fn == 'invertedbottleneck':

        in_filters = net.shape.as_list()[-1]
        net = nn_blocks.InvertedBottleneckBlock(
            in_filters=in_filters,
            out_filters=block_def.filters,
            kernel_size=block_def.kernel_size,
            strides=block_def.strides,
            expand_ratio=block_def.expand_ratio,
            se_ratio=block_def.se_ratio,
            se_inner_activation=block_def.activation,
            se_gating_activation='sigmoid',
            se_round_down_protect=False,
            expand_se_in_filters=True,
            activation=block_def.activation,
            use_depthwise=block_def.use_depthwise,
            use_residual=block_def.use_residual,
            regularize_depthwise=self._regularize_depthwise,
            kernel_initializer=self._kernel_initializer,
            kernel_regularizer=self._kernel_regularizer,
            bias_regularizer=self._bias_regularizer,
            use_sync_bn=self._use_sync_bn,
            norm_momentum=self._norm_momentum,
            norm_epsilon=self._norm_epsilon,
            divisible_by=self._get_divisible_by()
        )(net)

      elif block_def.block_fn == 'tucker':

        in_filters = net.shape.as_list()[-1]
        net = nn_blocks.TuckerConvBlock(
            in_filters=in_filters,
            out_filters=block_def.filters,
            kernel_size=block_def.kernel_size,
            strides=block_def.strides,
            input_compression_ratio=block_def.input_compression_ratio,
            output_compression_ratio=block_def.output_compression_ratio,
            activation=block_def.activation,
            use_residual=block_def.use_residual,
            kernel_initializer=self._kernel_initializer,
            kernel_regularizer=self._kernel_regularizer,
            bias_regularizer=self._bias_regularizer,
            use_sync_bn=self._use_sync_bn,
            norm_momentum=self._norm_momentum,
            norm_epsilon=self._norm_epsilon,
            divisible_by=self._get_divisible_by()
        )(net)

      else:
        raise ValueError('Unknown block type {} for layer {}'.format(
            block_def.block_fn, i))

      net = tf.keras.layers.Activation('linear', name=block_name)(net)

      if block_def.is_output:
        endpoints[str(endpoint_level)] = net
        endpoint_level += 1

    return net, endpoints, endpoint_level