Example #1
0
def _stacked_separable_conv(net, stride, operation, filter_size):
    """Takes in an operations and parses it to the correct sep operation."""
    num_layers, kernel_size = _operation_to_info(operation)
    net_type = net.dtype
    net = tf.cast(net, tf.float32) if net_type == tf.float16 else net

    for layer_num in range(num_layers - 1):
        net = tf.nn.relu(net)
        net = slim.separable_conv2d(net,
                                    filter_size,
                                    kernel_size,
                                    depth_multiplier=1,
                                    scope='separable_{0}x{0}_{1}'.format(
                                        kernel_size, layer_num + 1),
                                    stride=stride)
        net = slim.batch_norm(net,
                              scope='bn_sep_{0}x{0}_{1}'.format(
                                  kernel_size, layer_num + 1))
        stride = 1
    net = tf.nn.relu(net)
    net = slim.separable_conv2d(net,
                                filter_size,
                                kernel_size,
                                depth_multiplier=1,
                                scope='separable_{0}x{0}_{1}'.format(
                                    kernel_size, num_layers),
                                stride=stride)
    net = slim.batch_norm(net,
                          scope='bn_sep_{0}x{0}_{1}'.format(
                              kernel_size, num_layers))
    net = tf.cast(net, net_type)
    return net
Example #2
0
def _stacked_separable_conv(net, stride, operation, filter_size,
                            use_bounded_activation):
    """Takes in an operations and parses it to the correct sep operation."""
    num_layers, kernel_size = _operation_to_info(operation)
    activation_fn = tf.nn.relu6 if use_bounded_activation else tf.nn.relu
    for layer_num in range(num_layers - 1):
        net = activation_fn(net)
        net = slim.separable_conv2d(net,
                                    filter_size,
                                    kernel_size,
                                    depth_multiplier=1,
                                    scope='separable_{0}x{0}_{1}'.format(
                                        kernel_size, layer_num + 1),
                                    stride=stride)
        net = slim.batch_norm(net,
                              scope='bn_sep_{0}x{0}_{1}'.format(
                                  kernel_size, layer_num + 1))
        stride = 1
    net = activation_fn(net)
    net = slim.separable_conv2d(net,
                                filter_size,
                                kernel_size,
                                depth_multiplier=1,
                                scope='separable_{0}x{0}_{1}'.format(
                                    kernel_size, num_layers),
                                stride=stride)
    net = slim.batch_norm(net,
                          scope='bn_sep_{0}x{0}_{1}'.format(
                              kernel_size, num_layers))
    return net
def middle_flow_block(inpt, num_outputs=728, kernel_size=None, unit_num=None):
    if kernel_size is None:
        kernel_size = [3, 3]
    unit_num = str(unit_num)

    residual = inpt
    net = tf.nn.relu(inpt)
    net = slim.separable_conv2d(
        net,
        num_outputs,
        kernel_size,
        scope=
        'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv1_depthwise'
        .format(unit_num))
    net = slim.batch_norm(
        net,
        scope=
        'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv1_pointwise/BatchNorm'
        .format(unit_num))
    net = tf.nn.relu(net)

    net = slim.separable_conv2d(
        net,
        num_outputs,
        kernel_size,
        scope=
        'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv2_depthwise'
        .format(unit_num))
    net = slim.batch_norm(
        net,
        scope=
        'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv2_pointwise/BatchNorm'
        .format(unit_num))
    net = tf.nn.relu(net)

    net = slim.separable_conv2d(
        net,
        num_outputs,
        kernel_size,
        scope=
        'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv3_depthwise'
        .format(unit_num))
    net = slim.batch_norm(
        net,
        scope=
        'xception_65/middle_flow/block1/unit_{}/xception_module/separable_conv3_pointwise/BatchNorm'
        .format(unit_num))
    residual_next = tf.math.add(net, residual)

    return residual_next
Example #4
0
 def _expanded_conv(self, net, num_filters, expansion_rates, kernel_size,
                    stride, scope):
     """Expanded convolution."""
     expanded_num_filters = num_filters * expansion_rates
     add_fixed_padding = self._use_explicit_padding and stride > 1
     padding = 'VALID' if add_fixed_padding else 'SAME'
     net = slim.conv2d(net,
                       expanded_num_filters, [1, 1],
                       activation_fn=self._activation_fn,
                       normalizer_fn=self._normalization_fn,
                       padding=padding,
                       scope=scope + '/expansion')
     net = slim.separable_conv2d(
         ops.fixed_padding(net, kernel_size) if add_fixed_padding else net,
         num_outputs=None,
         kernel_size=kernel_size,
         activation_fn=self._activation_fn,
         normalizer_fn=self._normalization_fn,
         stride=stride,
         padding=padding,
         scope=scope + '/depthwise')
     net = slim.conv2d(net,
                       num_filters, [1, 1],
                       activation_fn=tf.identity,
                       normalizer_fn=self._normalization_fn,
                       padding=padding,
                       scope=scope + '/projection')
     return net
Example #5
0
def split_separable_conv2d(input_tensor,
                           num_outputs,
                           scope=None,
                           normalizer_fn=None,
                           stride=1,
                           rate=1,
                           endpoints=None,
                           use_explicit_padding=False):
    """Separable mobilenet V1 style convolution.
  Depthwise convolution, with default non-linearity,
  followed by 1x1 depthwise convolution.  This is similar to
  slim.separable_conv2d, but differs in tha it applies batch
  normalization and non-linearity to depthwise. This  matches
  the basic building of Mobilenet Paper
  (https://arxiv.org/abs/1704.04861)
  Args:
    input_tensor: input
    num_outputs: number of outputs
    scope: optional name of the scope. Note if provided it will use
    scope_depthwise for deptwhise, and scope_pointwise for pointwise.
    normalizer_fn: which normalizer function to use for depthwise/pointwise
    stride: stride
    rate: output rate (also known as dilation rate)
    endpoints: optional, if provided, will export additional tensors to it.
    use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
      inputs so that the output dimensions are the same as if 'SAME' padding
      were used.
  Returns:
    output tesnor
  """

    with _v1_compatible_scope_naming(scope) as scope:
        dw_scope = scope + 'depthwise'
        endpoints = endpoints if endpoints is not None else {}
        kernel_size = [3, 3]
        padding = 'SAME'
        if use_explicit_padding:
            padding = 'VALID'
            input_tensor = _fixed_padding(input_tensor, kernel_size, rate)
        net = slim.separable_conv2d(input_tensor,
                                    None,
                                    kernel_size,
                                    depth_multiplier=1,
                                    stride=stride,
                                    rate=rate,
                                    normalizer_fn=normalizer_fn,
                                    padding=padding,
                                    scope=dw_scope)

        endpoints[dw_scope] = net

        pw_scope = scope + 'pointwise'
        net = slim.conv2d(net,
                          num_outputs, [1, 1],
                          stride=1,
                          normalizer_fn=normalizer_fn,
                          scope=pw_scope)
        endpoints[pw_scope] = net
    return net
Example #6
0
    def predict(self, features, num_predictions_per_location):
        """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing image features.
      num_predictions_per_location: Number of box predictions to be made per
        spatial location.

    Returns:
      mask_predictions: A float tensors of shape
        [batch_size, num_anchors, num_masks, mask_height, mask_width]
        representing the mask predictions for the proposals.
    """
        image_feature = features
        # Add a slot for the background class.
        if self._masks_are_class_agnostic:
            num_masks = 1
        else:
            num_masks = self._num_classes
        num_mask_channels = num_masks * self._mask_height * self._mask_width
        net = image_feature
        if self._use_dropout:
            net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
        if self._use_depthwise:
            mask_predictions = slim.separable_conv2d(
                net,
                None, [self._kernel_size, self._kernel_size],
                padding='SAME',
                depth_multiplier=1,
                stride=1,
                rate=1,
                scope='MaskPredictor_depthwise')
            mask_predictions = slim.conv2d(mask_predictions,
                                           num_predictions_per_location *
                                           num_mask_channels, [1, 1],
                                           activation_fn=None,
                                           normalizer_fn=None,
                                           normalizer_params=None,
                                           scope='MaskPredictor')
        else:
            mask_predictions = slim.conv2d(
                net,
                num_predictions_per_location * num_mask_channels,
                [self._kernel_size, self._kernel_size],
                activation_fn=None,
                normalizer_fn=None,
                normalizer_params=None,
                scope='MaskPredictor')
        batch_size = features.get_shape().as_list()[0]
        if batch_size is None:
            batch_size = tf.shape(features)[0]
        mask_predictions = tf.reshape(
            mask_predictions,
            [batch_size, -1, num_masks, self._mask_height, self._mask_width])
        return mask_predictions
Example #7
0
def ghost_conv(x, ch_num, k_s=3, bn=slim.batch_norm, act=tf.nn.relu, name='ghost_conv'):
    with tf.variable_scope(name_or_scope=name):
        x1 = slim.conv2d(x, ch_num // 2, kernel_size=[k_s, k_s], activation_fn=None, normalizer_fn=None)
        x2 = slim.separable_conv2d(x1, None, [3, 3], activation_fn=None, normalizer_fn=None)
        y = tf.concat([x1, x2], axis=3)
        if bn:
            y = bn(y)
        if act:
            y = act(y)
        return y
Example #8
0
    def predict(self, features, num_predictions_per_location):
        """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing image features.
      num_predictions_per_location: Number of box predictions to be made per
        spatial location.

    Returns:
      class_predictions_with_background: A float tensors of shape
        [batch_size, num_anchors, num_class_slots] representing the class
        predictions for the proposals.
    """
        net = features
        if self._use_dropout:
            net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
        if self._use_depthwise:
            depthwise_scope = self._scope + '_depthwise'
            class_predictions_with_background = slim.separable_conv2d(
                net,
                None, [self._kernel_size, self._kernel_size],
                padding='SAME',
                depth_multiplier=1,
                stride=1,
                rate=1,
                scope=depthwise_scope)
            class_predictions_with_background = slim.conv2d(
                class_predictions_with_background,
                num_predictions_per_location * self._num_class_slots, [1, 1],
                activation_fn=None,
                normalizer_fn=None,
                normalizer_params=None,
                scope=self._scope)
        else:
            class_predictions_with_background = slim.conv2d(
                net,
                num_predictions_per_location * self._num_class_slots,
                [self._kernel_size, self._kernel_size],
                activation_fn=None,
                normalizer_fn=None,
                normalizer_params=None,
                scope=self._scope,
                biases_initializer=tf.constant_initializer(
                    self._class_prediction_bias_init))
        if self._apply_sigmoid_to_scores:
            class_predictions_with_background = tf.sigmoid(
                class_predictions_with_background)
        batch_size = features.get_shape().as_list()[0]
        if batch_size is None:
            batch_size = tf.shape(features)[0]
        class_predictions_with_background = tf.reshape(
            class_predictions_with_background,
            [batch_size, -1, self._num_class_slots])
        return class_predictions_with_background
Example #9
0
  def _extract_box_classifier_features(self, proposal_feature_maps, scope):
    """Extracts second stage box classifier features.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name (unused).

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
    net = proposal_feature_maps

    conv_depth = 1024
    if self._skip_last_stride:
      conv_depth_ratio = float(self._conv_depth_ratio_in_percentage) / 100.0
      conv_depth = int(float(conv_depth) * conv_depth_ratio)

    depth = lambda d: max(int(d * 1.0), 16)
    with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights):
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=self._train_batch_norm,
              weight_decay=self._weight_decay)):
        with slim.arg_scope(
            [slim.conv2d, slim.separable_conv2d], padding='SAME'):
          net = slim.separable_conv2d(
              net,
              depth(conv_depth), [3, 3],
              depth_multiplier=1,
              stride=2,
              scope='Conv2d_12_pointwise')
          return slim.separable_conv2d(
              net,
              depth(conv_depth), [3, 3],
              depth_multiplier=1,
              stride=1,
              scope='Conv2d_13_pointwise')
Example #10
0
    def pre_bottleneck(self, inputs, state, input_index):
        """Apply pre-bottleneck projection to inputs.

    Pre-bottleneck operation maps features of different channels into the same
    dimension. The purpose of this op is to share the features from both large
    and small models in the same LSTM cell.

    Args:
      inputs: 4D Tensor with shape [batch_size x width x height x input_size].
      state: 4D Tensor with shape [batch_size x width x height x state_size].
      input_index: integer index indicating which base features the inputs
        correspoding to.

    Returns:
      inputs: pre-bottlenecked inputs.
    Raises:
      ValueError: If pre_bottleneck is not set or inputs is not rank 4.
    """
        # Sometimes state is a tuple, in which case it cannot be modified, e.g.
        # during training, tf.contrib.training.SequenceQueueingStateSaver
        # returns the state as a tuple. This should not be an issue since we
        # only need to modify state[1] during export, when state should be a
        # list.
        if len(inputs.shape) != 4:
            raise ValueError('Expect rank 4 feature tensor.')
        if not self._flatten_state and len(state.shape) != 4:
            raise ValueError('Expect rank 4 state tensor.')
        if self._flatten_state and len(state.shape) != 2:
            raise ValueError(
                'Expect rank 2 state tensor when flatten_state is set.')

        with tf.name_scope(None):
            state = tf.identity(state, name='raw_inputs/init_lstm_h')
        if self._flatten_state:
            batch_size = inputs.shape[0]
            height = inputs.shape[1]
            width = inputs.shape[2]
            state = tf.reshape(state, [batch_size, height, width, -1])
        with tf.variable_scope('conv_lstm_cell', reuse=tf.AUTO_REUSE):
            scope_name = 'bottleneck_%d' % input_index
            inputs = slim.separable_conv2d(tf.concat([inputs, state], 3),
                                           self.output_size[-1],
                                           self._filter_size,
                                           depth_multiplier=1,
                                           activation_fn=tf.nn.relu6,
                                           normalizer_fn=None,
                                           scope=scope_name)
            # For exporting inference graph, we only mark the first timestep.
            with tf.name_scope(None):
                inputs = tf.identity(inputs,
                                     name='raw_outputs/base_endpoint_%d' %
                                     (input_index + 1))
        return inputs
Example #11
0
def ghost_bottleneck(x, ch_exp, ch_out, s_s, name):
    with tf.variable_scope(name_or_scope=name):
        net = ghost_conv(x, ch_exp, k_s=1, name='expand')
        if 2 == s_s:
            net = slim.separable_conv2d(net, None, kernel_size=3, stride=s_s, normalizer_fn=slim.batch_norm, activation_fn=None, scope='depthwise')
        net = ghost_conv(net, ch_out, k_s=1, act=None, name='project')
        ch_in = int(x.get_shape().as_list()[3])
        if ch_in == ch_out and 2 != s_s:
            y = tf.add(net, x)
        else:
            y = net
    return y
Example #12
0
    def predict(self, features, num_predictions_per_location):
        """Predicts boxes.

    Args:
      features: A float tensor of shape [batch_size, height, width, channels]
        containing image features.
      num_predictions_per_location: Number of box predictions to be made per
        spatial location. Int specifying number of boxes per location.

    Returns:
      box_encodings: A float tensors of shape
        [batch_size, num_anchors, q, code_size] representing the location of
        the objects, where q is 1 or the number of classes.
    """
        net = features
        if self._use_depthwise:
            box_encodings = slim.separable_conv2d(
                net,
                None, [self._kernel_size, self._kernel_size],
                padding='SAME',
                depth_multiplier=1,
                stride=1,
                rate=1,
                scope='BoxEncodingPredictor_depthwise')
            box_encodings = slim.conv2d(box_encodings,
                                        num_predictions_per_location *
                                        self._box_code_size, [1, 1],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        normalizer_params=None,
                                        scope='BoxEncodingPredictor')
        else:
            box_encodings = slim.conv2d(net,
                                        num_predictions_per_location *
                                        self._box_code_size,
                                        [self._kernel_size, self._kernel_size],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        normalizer_params=None,
                                        scope='BoxEncodingPredictor')
        batch_size = features.get_shape().as_list()[0]
        if batch_size is None:
            batch_size = tf.shape(features)[0]
        # Clipping the box encodings to make the inference graph TPU friendly.
        if self._box_encodings_clip_range is not None:
            box_encodings = tf.clip_by_value(
                box_encodings, self._box_encodings_clip_range.min,
                self._box_encodings_clip_range.max)
        box_encodings = tf.reshape(box_encodings,
                                   [batch_size, -1, 1, self._box_code_size])
        return box_encodings
Example #13
0
 def _sep_conv(self, net, num_filters, kernel_size, stride, scope):
     """Depthwise Separable convolution."""
     add_fixed_padding = self._use_explicit_padding and stride > 1
     padding = 'VALID' if add_fixed_padding else 'SAME'
     net = slim.separable_conv2d(
         ops.fixed_padding(net, kernel_size) if add_fixed_padding else net,
         num_outputs=None,
         kernel_size=kernel_size,
         activation_fn=None,
         normalizer_fn=None,
         stride=stride,
         padding=padding,
         scope=scope + '/depthwise')
     net = slim.conv2d(net,
                       num_filters, [1, 1],
                       activation_fn=self._activation_fn,
                       normalizer_fn=self._normalization_fn,
                       padding=padding,
                       scope=scope + '/pointwise')
     return net
Example #14
0
def _separable_conv(
    h, filters, kernel_size, strides=1, activation_fn=tf.nn.relu6):
  """Separable convolution layer."""
  if activation_fn is None:
    raise ValueError('Activation function cannot be None. Use tf.identity '
                     'instead to better support quantized training.')
  # Depthwise variant of He initialization derived under the principle proposed
  # in the original paper. Note the original He normalization was designed for
  # full convolutions and calling tf.initializers.he_normal() can over-estimate
  # the fan-in of a depthwise kernel by orders of magnitude.
  stddev = (2.0 / kernel_size**2)**0.5 / .87962566103423978
  depthwise_initializer = tf.initializers.truncated_normal(stddev=stddev)
  return slim.separable_conv2d(
      h,
      filters,
      kernel_size,
      stride=strides,
      activation_fn=activation_fn,
      normalizer_fn=slim.batch_norm,
      weights_initializer=depthwise_initializer,
      pointwise_initializer=tf.initializers.he_normal(),
      weights_regularizer=slim.l2_regularizer(BACKBONE_WEIGHT_DECAY),
      padding='SAME')
Example #15
0
def simple_generator(z,
                     image_size,
                     num_interpolate=2,
                     channels=None,
                     depthwise_separate=None,
                     output_bn=True,
                     is_training=True,
                     reuse=None,
                     scope=None):
    """A simple generator model used in the paper."""
    # The generator structure is originally defined in "XNOR-Net: ImageNet
    # Classification Using Binary Convolutional Neural Networks"
    # https://arxiv.org/pdf/1603.05279.pdf

    if not channels:
        # default: [128, 64]
        channels = [128 // (i + 1) for i in range(num_interpolate)]
    if not depthwise_separate:
        # default: no depthwise separate conv
        depthwise_separate = [False] * num_interpolate

    # noinspection PyTypeChecker
    assert len(channels) == len(depthwise_separate) == num_interpolate

    init_size = image_size // (2**num_interpolate)
    resize = functools.partial(tf.image.resize,
                               method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)

    with tf.variable_scope(scope, 'generator', [z], reuse=reuse):
        # noinspection PyCallingNonCallable
        with slim.arg_scope([slim.batch_norm],
                            decay=0.9,
                            center=True,
                            scale=True,
                            epsilon=0.8,
                            is_training=is_training):
            # noinspection PyCallingNonCallable
            with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                                activation_fn=tf.nn.leaky_relu,
                                normalizer_fn=slim.batch_norm):
                x = slim.fully_connected(z,
                                         init_size * init_size * channels[0],
                                         activation_fn=None,
                                         biases_initializer=None,
                                         scope='dense')
                x = tf.reshape(x, [-1, init_size, init_size, channels[0]])
                # The code of the DAFL paper uses different epsilon values for batch
                # normalization layers. We keep these settings for reproducibility.
                # See https://github.com/huawei-noah/Data-Efficient-Model-Compression/blob/master/DAFL/DAFL-train.py#L54  pylint: disable=line-too-long
                # for details.
                x = slim.batch_norm(x, epsilon=1e-5, scope='bn_0')
                x = tf.nn.leaky_relu(x)

                # Interpolate layers
                size = init_size
                for i, (n_channels,
                        ds) in enumerate(zip(channels, depthwise_separate)):
                    size *= 2
                    x = resize(x, [size, size],
                               name='interpolate_{}'.format(i))
                    if not ds:
                        x = slim.conv2d(x,
                                        n_channels, [3, 3],
                                        scope='conv_{}'.format(i))
                    else:
                        x = slim.separable_conv2d(
                            x,
                            None, [3, 3],
                            scope='conv_{}_depthwise'.format(i))
                        x = slim.conv2d(x,
                                        n_channels, [1, 1],
                                        scope='conv_{}_pointwise'.format(i))

                # Output layer
                x = slim.conv2d(x,
                                3, [3, 3],
                                activation_fn=tf.nn.tanh,
                                normalizer_fn=None,
                                scope='conv_{}'.format(num_interpolate))

                if output_bn:
                    x = slim.batch_norm(x,
                                        center=False,
                                        scale=True,
                                        scope='bn_output',
                                        is_training=is_training)

            return x
def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
                                  min_depth, insert_1x1_conv, image_features,
                                  pool_residual=False):
  """Generates multi resolution feature maps from input image features.

  Generates multi-scale feature maps for detection as in the SSD papers by
  Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1.

  More specifically, it performs the following two tasks:
  1) If a layer name is provided in the configuration, returns that layer as a
     feature map.
  2) If a layer name is left as an empty string, constructs a new feature map
     based on the spatial shape and depth configuration. Note that the current
     implementation only supports generating new layers using convolution of
     stride 2 resulting in a spatial resolution reduction by a factor of 2.
     By default convolution kernel size is set to 3, and it can be customized
     by caller.

  An example of the configuration for Inception V3:
  {
    'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
    'layer_depth': [-1, -1, -1, 512, 256, 128]
  }

  Args:
    feature_map_layout: Dictionary of specifications for the feature map
      layouts in the following format (Inception V2/V3 respectively):
      {
        'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''],
        'layer_depth': [-1, -1, -1, 512, 256, 128]
      }
      or
      {
        'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
        'layer_depth': [-1, -1, -1, 512, 256, 128]
      }
      If 'from_layer' is specified, the specified feature map is directly used
      as a box predictor layer, and the layer_depth is directly infered from the
      feature map (instead of using the provided 'layer_depth' parameter). In
      this case, our convention is to set 'layer_depth' to -1 for clarity.
      Otherwise, if 'from_layer' is an empty string, then the box predictor
      layer will be built from the previous layer using convolution operations.
      Note that the current implementation only supports generating new layers
      using convolutions of stride 2 (resulting in a spatial resolution
      reduction by a factor of 2), and will be extended to a more flexible
      design. Convolution kernel size is set to 3 by default, and can be
      customized by 'conv_kernel_size' parameter (similarily, 'conv_kernel_size'
      should be set to -1 if 'from_layer' is specified). The created convolution
      operation will be a normal 2D convolution by default, and a depthwise
      convolution followed by 1x1 convolution if 'use_depthwise' is set to True.
    depth_multiplier: Depth multiplier for convolutional layers.
    min_depth: Minimum depth for convolutional layers.
    insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution
      should be inserted before shrinking the feature map.
    image_features: A dictionary of handles to activation tensors from the
      base feature extractor.
    pool_residual: Whether to add an average pooling layer followed by a
      residual connection between subsequent feature maps when the channel
      depth match. For example, with option 'layer_depth': [-1, 512, 256, 256],
      a pooling and residual layer is added between the third and forth feature
      map. This option is better used with Weight Shared Convolution Box
      Predictor when all feature maps have the same channel depth to encourage
      more consistent features across multi-scale feature maps.

  Returns:
    feature_maps: an OrderedDict mapping keys (feature map names) to
      tensors where each tensor has shape [batch, height_i, width_i, depth_i].

  Raises:
    ValueError: if the number entries in 'from_layer' and
      'layer_depth' do not match.
    ValueError: if the generated layer does not have the same resolution
      as specified.
  """
  depth_fn = get_depth_fn(depth_multiplier, min_depth)

  feature_map_keys = []
  feature_maps = []
  base_from_layer = ''
  use_explicit_padding = False
  if 'use_explicit_padding' in feature_map_layout:
    use_explicit_padding = feature_map_layout['use_explicit_padding']
  use_depthwise = False
  if 'use_depthwise' in feature_map_layout:
    use_depthwise = feature_map_layout['use_depthwise']
  for index, from_layer in enumerate(feature_map_layout['from_layer']):
    layer_depth = feature_map_layout['layer_depth'][index]
    conv_kernel_size = 3
    if 'conv_kernel_size' in feature_map_layout:
      conv_kernel_size = feature_map_layout['conv_kernel_size'][index]
    if from_layer:
      feature_map = image_features[from_layer]
      base_from_layer = from_layer
      feature_map_keys.append(from_layer)
    else:
      pre_layer = feature_maps[-1]
      pre_layer_depth = pre_layer.get_shape().as_list()[3]
      intermediate_layer = pre_layer
      if insert_1x1_conv:
        layer_name = '{}_1_Conv2d_{}_1x1_{}'.format(
            base_from_layer, index, depth_fn(layer_depth // 2))
        intermediate_layer = slim.conv2d(
            pre_layer,
            depth_fn(layer_depth // 2), [1, 1],
            padding='SAME',
            stride=1,
            scope=layer_name)
      layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format(
          base_from_layer, index, conv_kernel_size, conv_kernel_size,
          depth_fn(layer_depth))
      stride = 2
      padding = 'SAME'
      if use_explicit_padding:
        padding = 'VALID'
        intermediate_layer = ops.fixed_padding(
            intermediate_layer, conv_kernel_size)
      if use_depthwise:
        feature_map = slim.separable_conv2d(
            intermediate_layer,
            None, [conv_kernel_size, conv_kernel_size],
            depth_multiplier=1,
            padding=padding,
            stride=stride,
            scope=layer_name + '_depthwise')
        feature_map = slim.conv2d(
            feature_map,
            depth_fn(layer_depth), [1, 1],
            padding='SAME',
            stride=1,
            scope=layer_name)
        if pool_residual and pre_layer_depth == depth_fn(layer_depth):
          feature_map += slim.avg_pool2d(
              pre_layer, [3, 3],
              padding='SAME',
              stride=2,
              scope=layer_name + '_pool')
      else:
        feature_map = slim.conv2d(
            intermediate_layer,
            depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size],
            padding=padding,
            stride=stride,
            scope=layer_name)
      feature_map_keys.append(layer_name)
    feature_maps.append(feature_map)
  return collections.OrderedDict(
      [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
Example #17
0
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM) with bottlenecking.

    Args:
      inputs: Input tensor at the current timestep.
      state: Tuple of tensors, the state and output at the previous timestep.
      scope: Optional scope.

    Returns:
      A tuple where the first element is the LSTM output and the second is
      a LSTMStateTuple of the state at the current timestep.
    """
        scope = scope or 'conv_lstm_cell'
        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            c, h = state

            # unflatten state if necessary
            if self._flatten_state:
                c = tf.reshape(c, [-1] + self.output_size)
                h = tf.reshape(h, [-1] + self.output_size)

            # summary of input passed into cell
            if self._viz_gates:
                slim.summaries.add_histogram_summary(inputs, 'cell_input')
            if self._pre_bottleneck:
                bottleneck = inputs
            else:
                bottleneck = slim.separable_conv2d(
                    tf.concat([inputs, h], 3),
                    self._num_units,
                    self._filter_size,
                    depth_multiplier=1,
                    activation_fn=self._activation,
                    normalizer_fn=None,
                    scope='bottleneck')

                if self._viz_gates:
                    slim.summaries.add_histogram_summary(
                        bottleneck, 'bottleneck')

            concat = slim.separable_conv2d(bottleneck,
                                           4 * self._num_units,
                                           self._filter_size,
                                           depth_multiplier=1,
                                           activation_fn=None,
                                           normalizer_fn=None,
                                           scope='gates')

            i, j, f, o = tf.split(concat, 4, 3)

            new_c = (c * tf.sigmoid(f + self._forget_bias) +
                     tf.sigmoid(i) * self._activation(j))
            if self._clip_state:
                new_c = tf.clip_by_value(new_c, -6, 6)
            new_h = self._activation(new_c) * tf.sigmoid(o)
            # summary of cell output and new state
            if self._viz_gates:
                slim.summaries.add_histogram_summary(new_h, 'cell_output')
                slim.summaries.add_histogram_summary(new_c, 'cell_state')

            output = new_h
            if self._output_bottleneck:
                output = tf.concat([new_h, bottleneck], axis=3)

            # reflatten state to store it
            if self._flatten_state:
                new_c = tf.reshape(new_c, [-1, self._param_count])
                new_h = tf.reshape(new_h, [-1, self._param_count])

            return output, contrib_rnn.LSTMStateTuple(new_c, new_h)
def xfcn(inputs, dropout_rate, scope='xfcn'):
    """Defines the xfcn network
    Args:
    inputs: Tensorflow placeholder that contains the input image
    scope: Scope name for the network
    Returns:
    net: Output Tensor of the network
    end_points: Dictionary with all Tensors of the network
    """
    im_size = tf.shape(inputs)

    with tf.variable_scope(scope, 'xfcn', [inputs]) as sc:
        end_points_collection = sc.name + '_end_points'
        # Collect outputs of all intermediate layers.
        with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                            outputs_collections=end_points_collection):
            # Entry flow
            # Block 1
            net = slim.conv2d(inputs,
                              32, [3, 3],
                              stride=2,
                              padding='VALID',
                              scope='xception_65/entry_flow/conv1_1')
            net = slim.batch_norm(
                net, scope='xception_65/entry_flow/conv1_1/BatchNorm')
            net = tf.nn.relu(net)
            net = slim.conv2d(net,
                              64, [3, 3],
                              scope='xception_65/entry_flow/conv1_2')
            net = slim.batch_norm(
                net, scope='xception_65/entry_flow/conv1_2/BatchNorm')
            net = tf.nn.relu(net)
            residual_1 = slim.conv2d(
                net,
                128, [1, 1],
                stride=2,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/shortcut'
            )
            residual_1 = slim.batch_norm(
                residual_1,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/shortcut/BatchNorm'
            )

            # block 2
            net = slim.separable_conv2d(
                net,
                128, [3, 3],
                activation_fn=None,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv1_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv1_pointwise/BatchNorm'
            )

            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                128, [3, 3],
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv2_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv2_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                128, [3, 3],
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv3_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block1/unit_1/xception_module/separable_conv3_pointwise/BatchNorm'
            )

            net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME')

            net_2 = tf.math.add(residual_1, net)

            net_2_drop = slim.dropout(net_2, keep_prob=dropout_rate)

            residual_2 = slim.conv2d(
                net_2,
                256, [1, 1],
                stride=2,
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/shortcut'
            )
            residual_2 = slim.batch_norm(
                residual_2,
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/shortcut/BatchNorm'
            )

            # block 3
            net = tf.nn.relu(net_2)
            net = slim.separable_conv2d(
                net,
                256, [3, 3],
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv1_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv1_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                256, [3, 3],
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv2_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                256, [3, 3],
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv3_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block2/unit_1/xception_module/separable_conv3_pointwise/BatchNorm'
            )

            net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME')
            net_3 = tf.math.add(net, residual_2)

            net_3_drop = slim.dropout(net_3, keep_prob=dropout_rate)

            residual_3 = slim.conv2d(
                net_3,
                728, [1, 1],
                stride=2,
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/shortcut'
            )
            residual_3 = slim.batch_norm(
                residual_3,
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/shortcut/BatchNorm'
            )

            # block 4
            net = tf.nn.relu(net_3)
            net = slim.separable_conv2d(
                net,
                728, [3, 3],
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv1_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv1_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                728, [3, 3],
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv2_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv2_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                728, [3, 3],
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv3_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/entry_flow/block3/unit_1/xception_module/separable_conv3_pointwise/BatchNorm'
            )

            net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME')
            net_4 = tf.math.add(net, residual_3)

            net_4_drop = slim.dropout(net_4, keep_prob=dropout_rate)

            # middle flow
            # block 5
            net = middle_flow_block(net_4, unit_num=1)
            # block 6 - 20
            net = middle_flow_block(net, unit_num=2)
            net_5_drop = slim.dropout(net, keep_prob=dropout_rate)

            # Exit flow
            residual_20 = slim.conv2d(
                net,
                1024, [1, 1],
                stride=2,
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/shortcut')
            residual_20 = slim.batch_norm(
                residual_20,
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/shortcut/BatchNorm'
            )
            # block 21
            net = tf.nn.relu(net)
            net = slim.separable_conv2d(
                net,
                728, [3, 3],
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv1_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv1_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                1024, [3, 3],
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv2_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv2_pointwise/BatchNorm'
            )
            net = tf.nn.relu(net)

            net = slim.separable_conv2d(
                net,
                1024, [3, 3],
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv3_depthwise'
            )
            net = slim.batch_norm(
                net,
                scope=
                'xception_65/exit_flow/block1/unit_1/xception_module/separable_conv3_pointwise/BatchNorm'
            )
            net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME')
            net_6 = tf.math.add(net, residual_20)

            net_6_drop = slim.dropout(net_6, keep_prob=dropout_rate)

            # Get side outputs of the network
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=tf.zeros_initializer()):
                side_2 = slim.conv2d(net_2_drop,
                                     16, [3, 3],
                                     rate=1,
                                     scope='conv2_2_16')

                side_3 = slim.conv2d(net_3_drop,
                                     16, [3, 3],
                                     rate=2,
                                     scope='conv3_3_16')

                side_4 = slim.conv2d(net_4_drop,
                                     16, [3, 3],
                                     rate=4,
                                     scope='conv4_3_16')

                side_5 = slim.conv2d(net_5_drop,
                                     16, [3, 3],
                                     rate=4,
                                     scope='conv5_3_16')

                side_6 = slim.conv2d(net_6_drop,
                                     16, [3, 3],
                                     rate=8,
                                     scope='conv6_3_16')

                # Supervise side outputs
                side_2_s = slim.conv2d(side_2, 1, [1, 1], scope='score-dsn_2')
                side_3_s = slim.conv2d(side_3, 1, [1, 1], scope='score-dsn_3')
                side_4_s = slim.conv2d(side_4, 1, [1, 1], scope='score-dsn_4')
                side_5_s = slim.conv2d(side_5, 1, [1, 1], scope='score-dsn_5')
                side_6_s = slim.conv2d(side_6, 1, [1, 1], scope='score-dsn_6')
                with slim.arg_scope([slim.convolution2d_transpose],
                                    outputs_collections=end_points_collection):
                    # Side outputs
                    side_2_s = slim.convolution2d_transpose(
                        side_2_s, 1, 8, 4, scope='score-dsn_2-up')
                    side_2_s = crop_features(side_2_s, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/score-dsn_2-cr',
                                                side_2_s)

                    side_3_s = slim.convolution2d_transpose(
                        side_3_s, 1, 16, 8, scope='score-dsn_3-up')
                    side_3_s = crop_features(side_3_s, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/score-dsn_3-cr',
                                                side_3_s)

                    side_4_s = slim.convolution2d_transpose(
                        side_4_s, 1, 32, 16, scope='score-dsn_4-up')
                    side_4_s = crop_features(side_4_s, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/score-dsn_4-cr',
                                                side_4_s)

                    side_5_s = slim.convolution2d_transpose(
                        side_5_s, 1, 32, 16, scope='score-dsn_5-up')
                    side_5_s = crop_features(side_5_s, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/score-dsn_5-cr',
                                                side_5_s)

                    side_6_s = slim.convolution2d_transpose(
                        side_6_s, 1, 64, 32, scope='score-dsn_6-up')
                    side_6_s = crop_features(side_6_s, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/score-dsn_6-cr',
                                                side_6_s)

                    # Main output
                    side_2_f = slim.convolution2d_transpose(
                        side_2, 16, 8, 4, scope='score-multi2-up')
                    side_2_f = crop_features(side_2_f, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/side-multi2-cr',
                                                side_2_f)

                    side_3_f = slim.convolution2d_transpose(
                        side_3, 16, 16, 8, scope='score-multi3-up')
                    side_3_f = crop_features(side_3_f, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/side-multi3-cr',
                                                side_3_f)

                    side_4_f = slim.convolution2d_transpose(
                        side_4, 16, 32, 16, scope='score-multi4-up')
                    side_4_f = crop_features(side_4_f, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/side-multi4-cr',
                                                side_4_f)

                    side_5_f = slim.convolution2d_transpose(
                        side_5, 16, 32, 16, scope='score-multi5-up')
                    side_5_f = crop_features(side_5_f, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/side-multi5-cr',
                                                side_5_f)

                    side_6_f = slim.convolution2d_transpose(
                        side_6, 16, 64, 32, scope='score-multi6-up')
                    side_6_f = crop_features(side_6_f, im_size)
                    utils.collect_named_outputs(end_points_collection,
                                                'xfcn/side-multi6-cr',
                                                side_6_f)

                concat_side = tf.concat(
                    [side_2_f, side_3_f, side_4_f, side_5_f, side_6_f], axis=3)
                net = slim.conv2d(concat_side, 1, [1, 1], scope='upscore-fuse')

        end_points = utils.convert_collection_to_dict(end_points_collection)

        return net, end_points
Example #19
0
def _mnasfpn_cell(feature_maps,
                  feature_levels,
                  cell_spec,
                  output_channel=48,
                  use_explicit_padding=False,
                  use_native_resize_op=False,
                  multiplier_func=None):
  """Create a MnasFPN cell.

  Args:
    feature_maps: input feature maps.
    feature_levels: levels of the feature maps.
    cell_spec: A list of Block configs.
    output_channel: Number of features for the input, output and intermediate
      feature maps.
    use_explicit_padding: Whether to use explicit padding.
    use_native_resize_op: Whether to use native resize op.
    multiplier_func: Depth-multiplier function. If None, use identity function.

  Returns:
    A transformed list of feature maps at the same resolutions as the inputs.
  """
  # This is the level where multipliers are realized.
  if multiplier_func is None:
    multiplier_func = lambda x: x
  num_outputs = len(feature_maps)
  cell_features = list(feature_maps)
  cell_levels = list(feature_levels)
  padding = 'VALID' if use_explicit_padding else 'SAME'
  for bi, block in enumerate(cell_spec):
    with tf.variable_scope('block_{}'.format(bi)):
      block_level = block.output_level
      intermediate_feature = None
      for i, inp in enumerate(block.inputs):
        with tf.variable_scope('input_{}'.format(i)):
          input_level = cell_levels[inp]
          node = _apply_size_dependent_ordering(
              cell_features[inp], input_level, block_level,
              multiplier_func(block.expansion_size), use_explicit_padding,
              use_native_resize_op)
        # Add features incrementally to avoid producing AddN, which doesn't
        # play well with TfLite.
        if intermediate_feature is None:
          intermediate_feature = node
        else:
          intermediate_feature += node
      node = tf.nn.relu6(intermediate_feature)
      node = slim.separable_conv2d(
          _maybe_pad(node, use_explicit_padding, block.kernel_size),
          multiplier_func(output_channel),
          block.kernel_size,
          activation_fn=None,
          normalizer_fn=slim.batch_norm,
          padding=padding,
          scope='SepConv')
    cell_features.append(node)
    cell_levels.append(block_level)

  # Cell-wide residuals.
  out_idx = range(len(cell_features) - num_outputs, len(cell_features))
  for in_i, out_i in enumerate(out_idx):
    if cell_features[out_i].shape.as_list(
    ) == cell_features[in_i].shape.as_list():
      cell_features[out_i] += cell_features[in_i]

  return cell_features[-num_outputs:]
def mobilenet_v1_base(inputs,
                      final_endpoint='Conv2d_13_pointwise',
                      min_depth=8,
                      depth_multiplier=1.0,
                      conv_defs=None,
                      output_stride=None,
                      scope=None):
    """Mobilenet v1.

  Constructs a Mobilenet v1 network from inputs to the given final endpoint.

  Args:
    inputs: a tensor of shape [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_0', 'Conv2d_1_pointwise', 'Conv2d_2_pointwise',
      'Conv2d_3_pointwise', 'Conv2d_4_pointwise', 'Conv2d_5'_pointwise,
      'Conv2d_6_pointwise', 'Conv2d_7_pointwise', 'Conv2d_8_pointwise',
      'Conv2d_9_pointwise', 'Conv2d_10_pointwise', 'Conv2d_11_pointwise',
      'Conv2d_12_pointwise', 'Conv2d_13_pointwise'].
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    conv_defs: A list of ConvDef namedtuples specifying the net architecture.
    output_stride: An integer that specifies the requested ratio of input to
      output spatial resolution. If not None, then we invoke atrous convolution
      if necessary to prevent the network from reducing the spatial resolution
      of the activation maps. Allowed values are 8 (accurate fully convolutional
      mode), 16 (fast fully convolutional mode), 32 (classification mode).
    scope: Optional variable_scope.

  Returns:
    tensor_out: output tensor corresponding to the final_endpoint.
    end_points: a set of activations for external use, for example summaries or
                losses.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
                or depth_multiplier <= 0, or the target output_stride is not
                allowed.
  """
    depth = lambda d: max(int(d * depth_multiplier), min_depth)
    end_points = {}

    # Used to find thinned depths for each layer.
    if depth_multiplier <= 0:
        raise ValueError('depth_multiplier is not greater than zero.')

    if conv_defs is None:
        conv_defs = _CONV_DEFS

    if output_stride is not None and output_stride not in [8, 16, 32]:
        raise ValueError('Only allowed output_stride values are 8, 16, 32.')

    with tf.compat.v1.variable_scope(scope, 'MobilenetV1', [inputs]):
        with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                            padding='SAME'):
            # The current_stride variable keeps track of the output stride of the
            # activations, i.e., the running product of convolution strides up to the
            # current network layer. This allows us to invoke atrous convolution
            # whenever applying the next convolution would result in the activations
            # having output stride larger than the target output_stride.
            current_stride = 1

            # The atrous convolution rate parameter.
            rate = 1

            net = inputs
            for i, conv_def in enumerate(conv_defs):
                end_point_base = 'Conv2d_%d' % i

                if output_stride is not None and current_stride == output_stride:
                    # If we have reached the target output_stride, then we need to employ
                    # atrous convolution with stride=1 and multiply the atrous rate by the
                    # current unit's stride for use in subsequent layers.
                    layer_stride = 1
                    layer_rate = rate
                    rate *= conv_def.stride
                else:
                    layer_stride = conv_def.stride
                    layer_rate = 1
                    current_stride *= conv_def.stride

                if isinstance(conv_def, Conv):
                    end_point = end_point_base
                    net = slim.conv2d(net,
                                      depth(conv_def.depth),
                                      conv_def.kernel,
                                      stride=conv_def.stride,
                                      normalizer_fn=slim.batch_norm,
                                      scope=end_point)
                    end_points[end_point] = net
                    if end_point == final_endpoint:
                        return net, end_points

                elif isinstance(conv_def, DepthSepConv):
                    end_point = end_point_base + '_depthwise'

                    # By passing filters=None
                    # separable_conv2d produces only a depthwise convolution layer
                    net = slim.separable_conv2d(net,
                                                None,
                                                conv_def.kernel,
                                                depth_multiplier=1,
                                                stride=layer_stride,
                                                rate=layer_rate,
                                                normalizer_fn=slim.batch_norm,
                                                scope=end_point)

                    end_points[end_point] = net
                    if end_point == final_endpoint:
                        return net, end_points

                    end_point = end_point_base + '_pointwise'

                    net = slim.conv2d(net,
                                      depth(conv_def.depth), [1, 1],
                                      stride=1,
                                      normalizer_fn=slim.batch_norm,
                                      scope=end_point)

                    end_points[end_point] = net
                    if end_point == final_endpoint:
                        return net, end_points
                else:
                    raise ValueError(
                        'Unknown convolution type %s for layer %d' %
                        (conv_def.ltype, i))
    raise ValueError('Unknown final endpoint %s' % final_endpoint)
Example #21
0
def inception_v2_base(inputs,
                      final_endpoint='Mixed_5c',
                      min_depth=16,
                      depth_multiplier=1.0,
                      use_separable_conv=True,
                      data_format='NHWC',
                      scope=None):
  """Inception v2 (6a2).

  Constructs an Inception v2 network from inputs to the given final endpoint.
  This method can construct the network up to the layer inception(5b) as
  described in http://arxiv.org/abs/1502.03167.

  Args:
    inputs: a tensor of shape [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
      'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a',
      'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b',
      'Mixed_5c'].
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    use_separable_conv: Use a separable convolution for the first layer
      Conv2d_1a_7x7. If this is False, use a normal convolution instead.
    data_format: Data format of the activations ('NHWC' or 'NCHW').
    scope: Optional variable_scope.

  Returns:
    tensor_out: output tensor corresponding to the final_endpoint.
    end_points: a set of activations for external use, for example summaries or
                losses.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
                or depth_multiplier <= 0
  """

  # end_points will collect relevant activations for external use, for example
  # summaries or losses.
  end_points = {}

  # Used to find thinned depths for each layer.
  if depth_multiplier <= 0:
    raise ValueError('depth_multiplier is not greater than zero.')
  depth = lambda d: max(int(d * depth_multiplier), min_depth)

  if data_format != 'NHWC' and data_format != 'NCHW':
    raise ValueError('data_format must be either NHWC or NCHW.')
  if data_format == 'NCHW' and use_separable_conv:
    raise ValueError(
        'separable convolution only supports NHWC layout. NCHW data format can'
        ' only be used when use_separable_conv is False.'
    )

  concat_dim = 3 if data_format == 'NHWC' else 1
  with tf.compat.v1.variable_scope(scope, 'InceptionV2', [inputs]):
    with slim.arg_scope(
        [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
        stride=1,
        padding='SAME',
        data_format=data_format):

      # Note that sizes in the comments below assume an input spatial size of
      # 224x224, however, the inputs can be of any size greater 32x32.

      # 224 x 224 x 3
      end_point = 'Conv2d_1a_7x7'

      if use_separable_conv:
        # depthwise_multiplier here is different from depth_multiplier.
        # depthwise_multiplier determines the output channels of the initial
        # depthwise conv (see docs for tf.nn.separable_conv2d), while
        # depth_multiplier controls the # channels of the subsequent 1x1
        # convolution. Must have
        #   in_channels * depthwise_multipler <= out_channels
        # so that the separable convolution is not overparameterized.
        depthwise_multiplier = min(int(depth(64) / 3), 8)
        net = slim.separable_conv2d(
            inputs, depth(64), [7, 7],
            depth_multiplier=depthwise_multiplier,
            stride=2,
            padding='SAME',
            weights_initializer=trunc_normal(1.0),
            scope=end_point)
      else:
        # Use a normal convolution instead of a separable convolution.
        net = slim.conv2d(
            inputs,
            depth(64), [7, 7],
            stride=2,
            weights_initializer=trunc_normal(1.0),
            scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 112 x 112 x 64
      end_point = 'MaxPool_2a_3x3'
      net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 56 x 56 x 64
      end_point = 'Conv2d_2b_1x1'
      net = slim.conv2d(net, depth(64), [1, 1], scope=end_point,
                        weights_initializer=trunc_normal(0.1))
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 56 x 56 x 64
      end_point = 'Conv2d_2c_3x3'
      net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 56 x 56 x 192
      end_point = 'MaxPool_3a_3x3'
      net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 192
      # Inception module.
      end_point = 'Mixed_3b'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(64), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(32), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 256
      end_point = 'Mixed_3c'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 320
      end_point = 'Mixed_4a'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_0 = slim.conv2d(branch_0, depth(160), [3, 3], stride=2,
                                 scope='Conv2d_1a_3x3')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(
              branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
          branch_1 = slim.conv2d(
              branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(
              net, [3, 3], stride=2, scope='MaxPool_1a_3x3')
        net = tf.concat(axis=concat_dim, values=[branch_0, branch_1, branch_2])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_4b'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(
              branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_4c'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(128), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_4d'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(160), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_4e'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(192), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(160), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_5a'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2,
                                 scope='Conv2d_1a_3x3')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(256), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2,
                                 scope='Conv2d_1a_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
                                     scope='MaxPool_1a_3x3')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 7 x 7 x 1024
      end_point = 'Mixed_5b'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(160), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 7 x 7 x 1024
      end_point = 'Mixed_5c'
      with tf.compat.v1.variable_scope(end_point):
        with tf.compat.v1.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
        with tf.compat.v1.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.compat.v1.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.compat.v1.variable_scope('Branch_3'):
          branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(
            axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
    raise ValueError('Unknown final endpoint %s' % final_endpoint)