def extract_base_features_small(self, preprocessed_inputs):
    """Extract the small base model features.

    Variables are created under the scope of <scope>/MobilenetV2_2/

    Args:
      preprocessed_inputs: preprocessed input images of shape:
        [batch, width, height, depth].

    Returns:
      net: the last feature map created from the base feature extractor.
      end_points: a dictionary of feature maps created.
    """
    scope_name = self._base_network_scope + '_2'
    with tf.variable_scope(scope_name, reuse=self._reuse_weights) as base_scope:
      if self._low_res:
        size_small = preprocessed_inputs.get_shape().as_list()[1] / 2
        inputs_small = tf.image.resize_images(preprocessed_inputs,
                                              [size_small, size_small])
        # Create end point handle for tflite deployment.
        with tf.name_scope(None):
          inputs_small = tf.identity(
              inputs_small, name='normalized_input_image_tensor_small')
      else:
        inputs_small = preprocessed_inputs
      net, end_points = mobilenet_v2.mobilenet_base(
          inputs_small,
          depth_multiplier=self._depth_multipliers[1],
          conv_defs=mobilenet_defs.mobilenet_v2_lite_def(
              is_quantized=self._is_quantized, low_res=self._low_res),
          use_explicit_padding=self._use_explicit_padding,
          scope=base_scope)
      return net, end_points
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    #with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
     # with slim.arg_scope(
          #mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
             #slim.arg_scope(
               #[mobilenet.depth_multiplier], min_depth=self._min_depth):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v2.mobilenet_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='layer_19',
              depth_multiplier=self._depth_multiplier,
              conv_defs=self._conv_defs,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
예제 #3
0
 def testMobilenetBase(self):
     tf.reset_default_graph()
     # Verifies that mobilenet_base returns pre-pooling layer.
     with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32):
         net, _ = mobilenet_v2.mobilenet_base(
             tf.placeholder(tf.float32, (10, 224, 224, 16)),
             conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1)
         self.assertEqual(net.get_shape().as_list(), [10, 7, 7, 128])
예제 #4
0
 def testMobilenetBase(self):
   tf.reset_default_graph()
   # Verifies that mobilenet_base returns pre-pooling layer.
   with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32):
     net, _ = mobilenet_v2.mobilenet_base(
         tf.placeholder(tf.float32, (10, 224, 224, 16)),
         conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1)
     self.assertEqual(net.get_shape().as_list(), [10, 7, 7, 128])
 def test_mobilenet_v2_lite_def_low_res(self):
     net, _ = mobilenet_v2.mobilenet_base(
         tf.placeholder(tf.float32, (10, 320, 320, 3)),
         min_depth=8,
         depth_multiplier=1.0,
         conv_defs=mobilenet_defs.mobilenet_v2_lite_def(low_res=True),
         use_explicit_padding=True,
         scope='MobilenetV2')
     self.assertEqual(net.get_shape().as_list(), [10, 20, 20, 320])
 def test_mobilenet_v2_lite_def_is_quantized(self):
     net, _ = mobilenet_v2.mobilenet_base(
         tf.placeholder(tf.float32, (10, 320, 320, 3)),
         min_depth=8,
         depth_multiplier=1.0,
         conv_defs=mobilenet_defs.mobilenet_v2_lite_def(is_quantized=True),
         use_explicit_padding=True,
         scope='MobilenetV2')
     self.assertEqual(net.get_shape().as_list(), [10, 10, 10, 320])
     self._assert_contains_op('MobilenetV2/expanded_conv_16/project/Relu6')
예제 #7
0
    def _image_to_head(self, is_training, reuse=None):
        with slim.arg_scope(mobilenet_v2.training_scope(is_training=is_training)):
            net, endpoints = mobilenet_v2.mobilenet_base(self._image, conv_defs=CTPN_DEF)

        self.variables_to_restore = slim.get_variables_to_restore()

        self._act_summaries.append(net)
        self._layers['head'] = net

        return net
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)
    '''
    feature_map_layout = {
        'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_depthwise': self._use_depthwise,
        'use_explicit_padding': self._use_explicit_padding,
    }

    '''

    feature_map_layout = {
        'from_layer': ['layer_5/expansion_output', 'layer_6/expansion_output', 'layer_7/expansion_output',
                       'layer_10/expansion_output', 'layer_15/expansion_output', 'layer_19'],
        'layer_depth': [-1, -1, -1, -1, -1, -1],
        'use_depthwise': self._use_depthwise,
        'use_explicit_padding': self._use_explicit_padding,
    }

    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_utils.resnet_arg_scope()), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v2.mobilenet_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='layer_19',
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
        with slim.arg_scope(self._conv_hyperparams_fn()):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)


    return feature_maps.values()
예제 #9
0
    def _image_to_head(self, is_training, reuse=None):
        with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=is_training)):
            net, endpoints = mobilenet_v2.mobilenet_base(self._image,
                                                         conv_defs=CTPN_DEF)

        self.variables_to_restore = slim.get_variables_to_restore()

        self._act_summaries.append(net)
        self._layers['head'] = net

        return net
    def _extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['layer_15/expansion_output', 'layer_19', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_depthwise': self._use_depthwise,
            'use_explicit_padding': self._use_explicit_padding,
        }

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(
                    is_training=(self._is_training and self._batch_norm_trainable),
                    bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                # TODO(b/68150321): Enable fused batch norm once quantization
                # supports it.
                with slim.arg_scope([slim.batch_norm], fused=False):
                    _, image_features = mobilenet_v2.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='layer_19',
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
                with slim.arg_scope(self._conv_hyperparams):
                    # TODO(b/68150321): Enable fused batch norm once quantization
                    # supports it.
                    with slim.arg_scope([slim.batch_norm], fused=False):
                        feature_maps = feature_map_generators.multi_resolution_feature_maps(
                            feature_map_layout=feature_map_layout,
                            depth_multiplier=self._depth_multiplier,
                            min_depth=self._min_depth,
                            insert_1x1_conv=True,
                            image_features=image_features)

        return feature_maps.values()
예제 #11
0
def MobileNet(depth_multiplier, imgs_in, weight_decay, batch_norm_momentum,
              is_training):
    with tf.contrib.slim.arg_scope(
            mobilenet_v2.training_scope(is_training=is_training,
                                        weight_decay=weight_decay,
                                        bn_decay=batch_norm_momentum)):
        features, _ = mobilenet_v2.mobilenet_base(
            imgs_in,
            depth_multiplier=depth_multiplier,
            finegrain_classification_mode=depth_multiplier < 1,
            output_stride=16)

    return features
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_depthwise': self._use_depthwise,
        'use_explicit_padding': self._use_explicit_padding,
    }

    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          # TODO(b/68150321): Enable fused batch norm once quantization
          # supports it.
          with slim.arg_scope([slim.batch_norm], fused=False):
            _, image_features = mobilenet_v2.mobilenet_base(
                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
                final_endpoint='layer_19',
                depth_multiplier=self._depth_multiplier,
                use_explicit_padding=self._use_explicit_padding,
                scope=scope)
        with slim.arg_scope(self._conv_hyperparams_fn()):
          # TODO(b/68150321): Enable fused batch norm once quantization
          # supports it.
          with slim.arg_scope([slim.batch_norm], fused=False):
            feature_maps = feature_map_generators.multi_resolution_feature_maps(
                feature_map_layout=feature_map_layout,
                depth_multiplier=self._depth_multiplier,
                min_depth=self._min_depth,
                insert_1x1_conv=True,
                image_features=image_features)

    return feature_maps.values()
 def test_mobilenet_v2_lite_def_reduced(self):
     net, features = mobilenet_v2.mobilenet_base(
         tf.placeholder(tf.float32, (10, 320, 320, 3)),
         min_depth=8,
         depth_multiplier=1.0,
         conv_defs=mobilenet_defs.mobilenet_v2_lite_def(reduced=True),
         use_explicit_padding=True,
         scope='MobilenetV2')
     self.assertEqual(net.get_shape().as_list(), [10, 10, 10, 320])
     self.assertEqual(
         features['layer_3/expansion_output'].get_shape().as_list(),
         [10, 160, 160, 48])
     self.assertEqual(
         features['layer_4/expansion_output'].get_shape().as_list(),
         [10, 80, 80, 72])
예제 #14
0
def build_encoder(network,
                  inputs,
                  is_training,
                  depth_multiplier=None,
                  output_stride=16):
    if network == "mobilenet_v2":
        return mobilenet_v2_slim.mobilenet_base(
            inputs,
            conv_defs=mobilenet_v2_slim.V2_DEF,
            depth_multiplier=depth_multiplier,
            final_endpoint="layer_18",
            output_stride=output_stride,
            is_training=is_training)
    else:
        raise NotImplementedError
 def test_mobilenet_v2_lite_def(self):
     net, features = mobilenet_v2.mobilenet_base(
         tf.placeholder(tf.float32, (10, 320, 320, 3)),
         min_depth=8,
         depth_multiplier=1.0,
         conv_defs=mobilenet_defs.mobilenet_v2_lite_def(),
         use_explicit_padding=True,
         scope='MobilenetV2')
     self.assertEqual(net.get_shape().as_list(), [10, 10, 10, 320])
     self._assert_contains_op(
         'MobilenetV2/expanded_conv_16/project/Identity')
     self.assertEqual(
         features['layer_3/expansion_output'].get_shape().as_list(),
         [10, 160, 160, 96])
     self.assertEqual(
         features['layer_4/expansion_output'].get_shape().as_list(),
         [10, 80, 80, 144])
예제 #16
0
def _mobilenet_v2(net,
                  depth_multiplier,
                  output_stride,
                  conv_defs=None,
                  divisible_by=None,
                  reuse=None,
                  scope=None,
                  final_endpoint=None):
  """Auxiliary function to add support for 'reuse' to mobilenet_v2.

  Args:
    net: Input tensor of shape [batch_size, height, width, channels].
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    output_stride: An integer that specifies the requested ratio of input to
      output spatial resolution. If not None, then we invoke atrous convolution
      if necessary to prevent the network from reducing the spatial resolution
      of the activation maps. Allowed values are 8 (accurate fully convolutional
      mode), 16 (fast fully convolutional mode), 32 (classification mode).
    conv_defs: MobileNet con def.
    divisible_by: None (use default setting) or an integer that ensures all
      layers # channels will be divisible by this number. Used in MobileNet.
    reuse: Reuse model variables.
    scope: Optional variable scope.
    final_endpoint: The endpoint to construct the network up to.

  Returns:
    Features extracted by MobileNetv2.
  """
  if divisible_by is None:
    divisible_by = 8 if depth_multiplier == 1.0 else 1
  if conv_defs is None:
    conv_defs = mobilenet_v2.V2_DEF
  with tf.variable_scope(
      scope, 'MobilenetV2', [net], reuse=reuse) as scope:
    return mobilenet_v2.mobilenet_base(
        net,
        conv_defs=conv_defs,
        depth_multiplier=depth_multiplier,
        min_depth=8 if depth_multiplier == 1.0 else 1,
        divisible_by=divisible_by,
        final_endpoint=final_endpoint or _MOBILENET_V2_FINAL_ENDPOINT,
        output_stride=output_stride,
        scope=scope)
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        #    print('###faster_rcnn_mobilenet_v2_feature_extractor.py### - extract_proposal_features')

        preprocessed_inputs.get_shape().assert_has_rank(4)
        preprocessed_inputs = shape_utils.check_min_image_dim(
            min_dim=33, image_tensor=preprocessed_inputs)

        with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=self._train_batch_norm,
                                            weight_decay=self._weight_decay)):
            with tf.variable_scope('MobilenetV2',
                                   reuse=self._reuse_weights) as scope:
                params = {}
                if self._skip_last_stride:
                    # Not called by default, will use conv_defs in slim.nets.mobilenet.mobilenet_v2
                    params[
                        'conv_defs'] = _get_mobilenet_conv_no_last_stride_defs(
                            conv_depth_ratio_in_percentage=self.
                            _conv_depth_ratio_in_percentage)

                _, endpoints = mobilenet_v2.mobilenet_base(
                    preprocessed_inputs,
                    final_endpoint='layer_19',  # actually 'MobilenetV2/Conv_1'
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope,
                    **params)
        return endpoints['layer_19'], endpoints
  def testMultiplier(self):
    op = mobilenet.op
    new_def = copy.deepcopy(mobilenet_v2.V2_DEF)

    def inverse_multiplier(output_params, multiplier):
      output_params['num_outputs'] /= multiplier

    new_def['spec'][0] = op(
        slim.conv2d,
        kernel_size=(3, 3),
        multiplier_func=inverse_multiplier,
        num_outputs=16)
    _ = mobilenet_v2.mobilenet_base(
        tf.placeholder(tf.float32, (10, 224, 224, 16)),
        conv_defs=new_def, depth_multiplier=0.1)
    s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
    # Expect first layer to be 160 (16 / 0.1), and other layers
    # their max(original size * 0.1, 8)
    self.assertEqual([160, 8, 48, 8, 48], s[:5])
예제 #19
0
def _mobilenet_v2(net,
                  depth_multiplier,
                  output_stride,
                  divisible_by=None,
                  reuse=None,
                  scope=None,
                  final_endpoint=None):
  """Auxiliary function to add support for 'reuse' to mobilenet_v2.

  Args:
    net: Input tensor of shape [batch_size, height, width, channels].
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    output_stride: An integer that specifies the requested ratio of input to
      output spatial resolution. If not None, then we invoke atrous convolution
      if necessary to prevent the network from reducing the spatial resolution
      of the activation maps. Allowed values are 8 (accurate fully convolutional
      mode), 16 (fast fully convolutional mode), 32 (classification mode).
    divisible_by: None (use default setting) or an integer that ensures all
      layers # channels will be divisible by this number. Used in MobileNet.
    reuse: Reuse model variables.
    scope: Optional variable scope.
    final_endpoint: The endpoint to construct the network up to.

  Returns:
    Features extracted by MobileNetv2.
  """
  if divisible_by is None:
    divisible_by = 8 if depth_multiplier == 1.0 else 1
  with tf.variable_scope(
      scope, 'MobilenetV2', [net], reuse=reuse) as scope:
    return mobilenet_v2.mobilenet_base(
        net,
        conv_defs=mobilenet_v2.V2_DEF,
        depth_multiplier=depth_multiplier,
        min_depth=8 if depth_multiplier == 1.0 else 1,
        divisible_by=divisible_by,
        final_endpoint=final_endpoint or _MOBILENET_V2_FINAL_ENDPOINT,
        output_stride=output_stride,
        scope=scope)
  def extract_base_features_large(self, preprocessed_inputs):
    """Extract the large base model features.

    Variables are created under the scope of <scope>/MobilenetV2_1/

    Args:
      preprocessed_inputs: preprocessed input images of shape:
        [batch, width, height, depth].

    Returns:
      net: the last feature map created from the base feature extractor.
      end_points: a dictionary of feature maps created.
    """
    scope_name = self._base_network_scope + '_1'
    with tf.variable_scope(scope_name, reuse=self._reuse_weights) as base_scope:
      net, end_points = mobilenet_v2.mobilenet_base(
          preprocessed_inputs,
          depth_multiplier=self._depth_multipliers[0],
          conv_defs=mobilenet_defs.mobilenet_v2_lite_def(
              is_quantized=self._is_quantized),
          use_explicit_padding=self._use_explicit_padding,
          scope=base_scope)
      return net, end_points
예제 #21
0
  def extract_features(self, inputs):
    """Extracts features from inputs.

    This function adds 4 additional feature maps on top of 
    'layer_15/expansion_output' and 'layer_19' in the base Mobilenet v2 network.

    Args:
      inputs: a tensor of shape [batch_size, height, with, channels],
        holding the input images.

    Returns: 
      a list of 6 float tensors of shape [batch_size, height, width, channels],
        holding feature map tensors to be fed to box predictor.
    """
    feature_map_specs_dict = {
        'layer_name': ['layer_15/expansion_output', 'layer_19', 
            None, None, None, None],
        'layer_depth': [None, None, 512, 256, 256, 128]}

    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)):
        _, end_points = mobilenet_v2.mobilenet_base(
            inputs, 
            final_endpoint='layer_19', 
            depth_multiplier=self._depth_multiplier, 
            scope=scope)

      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.ssd_feature_maps(
            feature_map_tensor_dict=end_points,
            feature_map_specs_dict=feature_map_specs_dict,
            depth_multiplier=1,
            use_depthwise=self._use_depthwise,
            insert_1x1_conv=True)
        feature_map_list = list(feature_maps.values())
        return feature_map_list
예제 #22
0
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                _, activations = mobilenet_v2.mobilenet_base(
                    preprocessed_inputs,
                    final_endpoint='layer_19',
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope)

        return activations['layer_19'], activations
예제 #23
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v2.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='layer_19',
                        depth_multiplier=self._depth_multiplier,
                        conv_defs=self._conv_defs,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            depth_fn = lambda d: max(int(d * self._depth_multiplier), self.
                                     _min_depth)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope('fpn', reuse=self._reuse_weights):
                    feature_blocks = [
                        'layer_4', 'layer_7', 'layer_14', 'layer_19'
                    ]
                    base_fpn_max_level = min(self._fpn_max_level, 5)
                    feature_block_list = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_block_list.append(feature_blocks[level - 2])
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in feature_block_list],
                        depth=depth_fn(self._additional_layer_depth),
                        use_depthwise=self._use_depthwise,
                        use_explicit_padding=self._use_explicit_padding)
                    feature_maps = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_maps.append(fpn_features['top_down_{}'.format(
                            feature_blocks[level - 2])])
                    last_feature_map = fpn_features['top_down_{}'.format(
                        feature_blocks[base_fpn_max_level - 2])]
                    # Construct coarse features
                    padding = 'VALID' if self._use_explicit_padding else 'SAME'
                    kernel_size = 3
                    for i in range(base_fpn_max_level + 1,
                                   self._fpn_max_level + 1):
                        if self._use_depthwise:
                            conv_op = functools.partial(slim.separable_conv2d,
                                                        depth_multiplier=1)
                        else:
                            conv_op = slim.conv2d
                        if self._use_explicit_padding:
                            last_feature_map = ops.fixed_padding(
                                last_feature_map, kernel_size)
                        last_feature_map = conv_op(
                            last_feature_map,
                            num_outputs=depth_fn(self._additional_layer_depth),
                            kernel_size=[kernel_size, kernel_size],
                            stride=2,
                            padding=padding,
                            scope='bottom_up_Conv2d_{}'.format(
                                i - base_fpn_max_level + 19))
                        feature_maps.append(last_feature_map)
        return feature_maps
예제 #24
0
    def _build_model(self, **kwargs):
        d = dict()
        num_classes = self.num_classes
        frontend = kwargs.pop('frontend', 'resnet_v2_50')
        num_anchors = kwargs.pop('num_anchors', 9)

        if 'resnet_v2' in frontend:
            d['feature_map'] = self.X - [[[123.68, 116.779, 103.939]]]
            frontend_dir = os.path.join('pretrained_models', '{}.ckpt'.format(frontend))
            with slim.arg_scope(resnet_v2.resnet_arg_scope()):
                logits, end_points = resnet_v2.resnet_v2_50(d['feature_map'], is_training=self.is_train)
                d['init_fn'] = slim.assign_from_checkpoint_fn(model_path=frontend_dir,
                                                          var_list=slim.get_model_variables(frontend))
            convs = [end_points[frontend + '/block{}'.format(x)] for x in [4, 2, 1]]
        elif 'mobilenet_v2' in frontend:
            d['feature_map'] = (2.0 / 255.0) * self.X  - 1.0
            frontend_dir = os.path.join('pretrained_models', 'mobilenet_v2_1.4_224', '{}.ckpt'.format(frontend))
            with slim.arg_scope(mobilenet_v2.training_scope()):
                _, end_points = mobilenet_v2.mobilenet_base(d['feature_map'], is_training=self.is_train)

            convs = [end_points[x] for x in ['layer_19', 'layer_14', 'layer_7']]
        else:
            #TODO build convNet
            raise NotImplementedError("Build own convNet!")

        with tf.variable_scope('layer5'):
            d['s_5'] = conv_layer(convs[0], 256, (1, 1), (1, 1))
            d['cls_head5'] = build_head_cls(d['s_5'], num_anchors, num_classes + 1)
            d['loc_head5'] = build_head_loc(d['s_5'], num_anchors)
            d['flat_cls_head5'] = tf.reshape(d['cls_head5'], (tf.shape(d['cls_head5'])[0], -1, num_classes + 1))
            d['flat_loc_head5'] = tf.reshape(d['loc_head5'], (tf.shape(d['loc_head5'])[0], -1, 4))

        with tf.variable_scope('layer6'):
            d['s_6'] = conv_layer(d['s_5'], 256, (3, 3), (2, 2))
            d['cls_head6'] = build_head_cls(d['s_6'], num_anchors, num_classes + 1)
            d['loc_head6'] = build_head_loc(d['s_6'], num_anchors)
            d['flat_cls_head6'] = tf.reshape(d['cls_head6'], (tf.shape(d['cls_head6'])[0], -1, num_classes + 1))
            d['flat_loc_head6'] = tf.reshape(d['loc_head6'], (tf.shape(d['loc_head6'])[0], -1, 4))

        with tf.variable_scope('layer7'):
            d['s_7'] = conv_layer(tf.nn.relu(d['s_6']), 256, (3, 3), (2, 2))
            d['cls_head7'] = build_head_cls(d['s_7'], num_anchors, num_classes + 1)
            d['loc_head7'] = build_head_loc(d['s_7'], num_anchors)
            d['flat_cls_head7'] = tf.reshape(d['cls_head7'], (tf.shape(d['cls_head7'])[0], -1, num_classes + 1))
            d['flat_loc_head7'] = tf.reshape(d['loc_head7'], (tf.shape(d['loc_head7'])[0], -1, 4))

        with tf.variable_scope('layer4'):
            d['up4'] = resize_to_target(d['s_5'], convs[1])
            d['s_4'] = conv_layer(convs[1], 256, (1, 1), (1, 1)) + d['up4']
            d['cls_head4'] = build_head_cls(d['s_4'], num_anchors, num_classes + 1)
            d['loc_head4'] = build_head_loc(d['s_4'], num_anchors)
            d['flat_cls_head4'] = tf.reshape(d['cls_head4'], (tf.shape(d['cls_head4'])[0], -1, num_classes + 1))
            d['flat_loc_head4'] = tf.reshape(d['loc_head4'], (tf.shape(d['loc_head4'])[0], -1, 4))

        with tf.variable_scope('layer3'):
            d['up3'] = resize_to_target(d['s_4'], convs[2])
            d['s_3'] = conv_layer(convs[2], 256, (1, 1), (1, 1)) + d['up3']
            d['cls_head3'] = build_head_cls(d['s_3'], num_anchors, num_classes + 1)
            d['loc_head3'] = build_head_loc(d['s_3'], num_anchors)
            d['flat_cls_head3'] = tf.reshape(d['cls_head3'], (tf.shape(d['cls_head3'])[0], -1, num_classes + 1))
            d['flat_loc_head3'] = tf.reshape(d['loc_head3'], (tf.shape(d['loc_head3'])[0], -1, 4))

        with tf.variable_scope('head'):
            d['cls_head'] = tf.concat((d['flat_cls_head3'],
                                       d['flat_cls_head4'],
                                       d['flat_cls_head5'],
                                       d['flat_cls_head6'],
                                       d['flat_cls_head7']), axis=1)

            d['loc_head'] = tf.concat((d['flat_loc_head3'],
                                       d['flat_loc_head4'],
                                       d['flat_loc_head5'],
                                       d['flat_loc_head6'],
                                       d['flat_loc_head7']), axis=1)

            d['logits'] = tf.concat((d['loc_head'], d['cls_head']), axis=2)
            d['pred'] = tf.concat((d['loc_head'], tf.nn.softmax(d['cls_head'], axis=-1)), axis=2)

        return d
예제 #25
0
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)
    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v2.training_scope(is_training=None, bn_decay=0.99)), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
        with slim.arg_scope(
            training_scope(l2_weight_decay=4e-5,
                           is_training=self._is_training)):

          _, image_features = mobilenet_v2.mobilenet_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='layer_18',
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)

    multiplier_func = functools.partial(
        _apply_multiplier,
        multiplier=self._depth_multiplier,
        min_depth=self._min_depth)
    with tf.variable_scope('MnasFPN', reuse=self._reuse_weights):
      with slim.arg_scope(
          training_scope(l2_weight_decay=1e-4, is_training=self._is_training)):
        # Create C6 by downsampling C5.
        c6 = slim.max_pool2d(
            _maybe_pad(image_features['layer_18'], self._use_explicit_padding),
            [3, 3],
            stride=[2, 2],
            padding='VALID' if self._use_explicit_padding else 'SAME',
            scope='C6_downsample')
        c6 = slim.conv2d(
            c6,
            multiplier_func(self._fpn_layer_depth),
            [1, 1],
            activation_fn=tf.identity,
            normalizer_fn=slim.batch_norm,
            weights_regularizer=None,  # this 1x1 has no kernel regularizer.
            padding='VALID',
            scope='C6_Conv1x1')
        image_features['C6'] = tf.identity(c6)  # Needed for quantization.
        for k in sorted(image_features.keys()):
          tf.logging.error('{}: {}'.format(k, image_features[k]))

        mnasfpn_inputs = [
            image_features['layer_7'],  # C3
            image_features['layer_14'],  # C4
            image_features['layer_18'],  # C5
            image_features['C6']  # C6
        ]
        self._verify_config(mnasfpn_inputs)
        feature_maps = mnasfpn(
            mnasfpn_inputs,
            head_def=self._head_def,
            output_channel=self._fpn_layer_depth,
            use_explicit_padding=self._use_explicit_padding,
            use_native_resize_op=self._use_native_resize_op,
            multiplier_func=multiplier_func)
    return feature_maps
예제 #26
0
def backbone_net(inputs,
                 image_size,
                 is_training=True,
                 depth_multiplier=0.5,
                 **kwargs):

    pad_to_multiple = 10
    use_explicit_padding = False
    depth_multiplier = depth_multiplier

    print('construct backbone_net for image_size', image_size,
          'depth_multiplier = ', depth_multiplier)
    use_depthwise = True
    override_base_feature_extractor_hyperparams = False
    reuse_weights = None
    min_depth = 16

    specs = [
        op(slim.conv2d,
           stride=2,
           num_outputs=64,
           kernel_size=[3, 3],
           activation_fn=tf.nn.elu),
        # todo: Depthwise Conv3×3
        op(ops.expanded_conv, stride=1, kernel_size=[3, 3], num_outputs=64),
        # 562×64Bottleneck 2 64 5 2
        op(slim.max_pool2d, kernel_size=[3, 3], padding='SAME', stride=1),
        op(ops.expanded_conv, stride=2, num_outputs=64, kernel_size=[3, 3]),
    ]
    for _ in range(0, 4):
        specs.append(
            op(ops.expanded_conv, stride=1, num_outputs=64, kernel_size=[3,
                                                                         3]))

    # 282×64Bottleneck212812
    specs.append(
        op(ops.expanded_conv, stride=2, num_outputs=128, kernel_size=[3, 3]))

    # 142×128Bottleneck412861
    mid_conv_n = kwargs.get('mid_conv_n', 4)
    for _ in range(0, mid_conv_n):
        specs.append(
            op(ops.expanded_conv,
               expansion_size=expand_input(4),
               num_outputs=128,
               stride=1))

    kernel_size = [5, 5]
    specs.append(op(ops.expanded_conv, stride=1, num_outputs=16, scope='S1'))
    specs.append(
        op(slim.conv2d,
           stride=2,
           kernel_size=[3, 3],
           num_outputs=32,
           scope='S2',
           activation_fn=tf.nn.elu))
    specs.append(
        op(slim.conv2d,
           stride=1,
           kernel_size=kernel_size,
           num_outputs=128,
           scope='S3',
           padding='VALID',
           activation_fn=tf.nn.elu))

    # print('specs = ', specs, ' len = ', len(specs))

    arch = dict(
        defaults={
            # Note: these parameters of batch norm affect the architecture
            # that's why they are here and not in training_scope.
            (
                slim.batch_norm, ): {
                'center': True,
                'scale': True
            },
            (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
                'normalizer_fn': slim.batch_norm,
                'activation_fn': tf.nn.relu6
            },
            (ops.expanded_conv, ): {
                'expansion_size': expand_input(2),
                'split_expansion': 1,
                'normalizer_fn': slim.batch_norm,
                'residual': True,
            },
            (slim.conv2d, slim.separable_conv2d): {
                'padding': 'SAME',
                'weights_initializer': slim.xavier_initializer()
            }
        },
        spec=specs)

    print('input to backbone_net ', inputs)
    with tf.variable_scope('Backbone', reuse=reuse_weights) as scope:
        with slim.arg_scope(
            mobilenet_v2.training_scope(is_training=is_training, bn_decay=0.9997)), \
            slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=min_depth):
            with (slim.arg_scope(conv_hyperparams_fn(is_training=is_training))
                  if override_base_feature_extractor_hyperparams else
                  context_manager.IdentityContextManager()):
                _, image_features = mobilenet_v2.mobilenet_base(
                    od_ops.pad_to_multiple(inputs, pad_to_multiple),
                    depth_multiplier=depth_multiplier,
                    is_training=is_training,
                    use_explicit_padding=use_explicit_padding,
                    conv_defs=arch,
                    scope=scope)
                # do a fully connected layer here
                # TODO

                print('image image_features', image_features.keys())
                all_layers = []
                for layer_name in image_features.keys():
                    if re.match('^layer_\\d+$', layer_name) is not None:
                        all_layers.append(layer_name)

                def layer_key(val):
                    return int(val.split('_')[1])

                all_layers.sort(key=layer_key)
                print('all_layers', all_layers)
                layer_15 = image_features[all_layers[-3]]
                layer_16 = image_features[all_layers[-2]]
                layer_17 = image_features[all_layers[-1]]
                # batch_size = tf.shape(S1)[0]

                S1 = slim.flatten(
                    layer_15,
                    scope='S1flatten')  # tf.reshape(S1, [batch_size, -1])
                S2 = slim.flatten(layer_16,
                                  scope='S2flatten')  # [batch_size, -1])
                S3 = slim.flatten(layer_17,
                                  scope='S3flatten')  # [batch_size, -1])
                before_dense = tf.concat([S1, S2, S3], 1)

                for l in all_layers:
                    print(l, image_features[l])
                # print('layer_17', layer_17)
                print('S1', S1)
                print('S2', S2)
                print('S3', S3)

                # to_test = slim.conv2d(image_features['layer_19'])
                print('before fully_connected', before_dense)
                with slim.arg_scope(
                    [slim.fully_connected],
                        weights_initializer=slim.xavier_initializer(),
                        normalizer_fn=None,
                        activation_fn=tf.nn.tanh):
                    fc_x = kwargs.get('fc_x_n', 2)
                    print('fully_connected before last x ', fc_x)
                    pre_chin = slim.fully_connected(before_dense, 34 * fc_x)
                    pre_left_eye_brow = slim.fully_connected(
                        before_dense, 10 * fc_x)
                    pre_right_eye_brow = slim.fully_connected(
                        before_dense, 10 * fc_x)
                    pre_nose = slim.fully_connected(before_dense, 18 * fc_x)
                    pre_left_eye = slim.fully_connected(
                        before_dense, 12 * fc_x)
                    pre_right_eye = slim.fully_connected(
                        before_dense, 12 * fc_x)
                    pre_mouth = slim.fully_connected(before_dense, 40 * fc_x)

                    chin = slim.fully_connected(pre_chin, 34)
                    left_eye_brow = slim.fully_connected(pre_left_eye_brow, 10)
                    right_eye_brow = slim.fully_connected(
                        pre_right_eye_brow, 10)
                    nose = slim.fully_connected(pre_nose, 18)
                    left_eye = slim.fully_connected(pre_left_eye, 12)
                    right_eye = slim.fully_connected(pre_right_eye, 12)
                    mouth = slim.fully_connected(pre_mouth, 40)

                    landmarks = tf.concat([
                        chin, left_eye_brow, right_eye_brow, nose, left_eye,
                        right_eye, mouth
                    ], -1)
                    return image_features, landmarks, None
                   shape=shift_buffer_shapes[9],
                   name='shift_buffer_9')
]

#FINAL_NODE_NAME="MobilenetV2/Conv_1/Relu6"
FINAL_NODE_NAME = "MobilenetV2/Logits/output"

in_tensor = tf.placeholder(tf.float32, shape=(1, 224, 224, 3), name='in_img')

print(
    torch_params(0)['normalizer_params']['param_initializers']
    ['moving_mean'].get_config())

in_img = tf.identity(in_tensor)

net, endpoints = mobilenet_v2.mobilenet_base(in_img, conv_defs=V2_DEF_TSM)

# Add the classifier
with tf.variable_scope("MobilenetV2/Logits"):
    kernel_initializer = None
    bias_initializer = tf.zeros_initializer()
    if IMPORT_PYTORCH:
        kernel_initializer = torch_params(-1)["weights_initializer"]
        bias_initializer = torch_params(-1)["biases_initializer"]

    net = tf.nn.avg_pool(net, [1, 7, 7, 1], 1, "VALID", name="AvgPool")
    net = tf.squeeze(net, (1, 2))
    net = tf.layers.dense(net,
                          27,
                          use_bias=True,
                          trainable=False,
예제 #28
0
def backbone_net(inputs, image_size, is_training=True, depth_multiplier=0.5):
    
    pad_to_multiple = 14 if image_size == 112 else (10 if image_size == 80 else 8)
    use_explicit_padding = False
    depth_multiplier = depth_multiplier

    print('construct backbone_net for image_size', image_size, 'depth_multiplier = ', depth_multiplier)
    use_depthwise = True
    override_base_feature_extractor_hyperparams = False
    reuse_weights = None
    min_depth = 16

    specs = [
            op(slim.conv2d, stride=2, num_outputs=64, kernel_size=[3, 3]),
            # todo: Depthwise Conv3×3
            op(slim.separable_conv2d, stride=1, kernel_size=[3, 3], num_outputs=None, multiplier_func=dummy_depth_multiplier),    
            # 562×64Bottleneck 2 64 5 2
            op(ops.expanded_conv, stride=2, num_outputs=64),            
        ]
    for _ in range(0, 4):
        specs.append(op(ops.expanded_conv, stride=1, num_outputs=64))

    # 282×64Bottleneck212812
    specs.append(op(ops.expanded_conv, stride=2, num_outputs=128))

    # 142×128Bottleneck412861    
    for _ in range(0, 6):            
        specs.append(op(ops.expanded_conv, 
            expansion_size=expand_input(4), 
            num_outputs=128,
            stride=1))

    kernel_size = [7, 7] if image_size == 112 else ([5,5] if image_size == 80 else [4,4])
    specs.append(op(ops.expanded_conv, stride=1, num_outputs=16, scope='S1'))
    specs.append(op(slim.conv2d, stride=2, kernel_size=[3, 3], num_outputs=32, scope='S2'))
    specs.append(op(slim.conv2d, stride=1, kernel_size=kernel_size, 
        num_outputs=128, scope='S3', padding='VALID'))

    # print('specs = ', specs, ' len = ', len(specs))

    arch = dict(
        defaults={
            # Note: these parameters of batch norm affect the architecture
            # that's why they are here and not in training_scope.
            (slim.batch_norm,): {'center': True, 'scale': True},
            (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
                'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
            },
            (ops.expanded_conv,): {
                'expansion_size': expand_input(2),
                'split_expansion': 1,
                'normalizer_fn': slim.batch_norm,
                'residual': True,
            },
            (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME', 'weights_initializer': slim.xavier_initializer()}
        },

        spec=specs
    )

    print('input to backbone_net ' , inputs)
    with tf.variable_scope('Backbone', reuse=reuse_weights) as scope:
        with slim.arg_scope(
            mobilenet_v2.training_scope(is_training=is_training, bn_decay=0.9997)), \
            slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=min_depth):
            with (slim.arg_scope(conv_hyperparams_fn(is_training=is_training))
                if override_base_feature_extractor_hyperparams else
                context_manager.IdentityContextManager()):
                _, image_features = mobilenet_v2.mobilenet_base(
                  od_ops.pad_to_multiple(inputs, pad_to_multiple),                  
                  depth_multiplier=depth_multiplier,
                  is_training=is_training,
                  use_explicit_padding=use_explicit_padding,
                  conv_defs=arch,
                  scope=scope)
                # do a fully connected layer here
                # TODO
                layer_15 = image_features['layer_15']
                layer_16 = image_features['layer_16']
                layer_17 = image_features['layer_17']
                # batch_size = tf.shape(S1)[0]                

                S1 = slim.flatten(layer_15, scope='S1flatten') # tf.reshape(S1, [batch_size, -1])
                S2 = slim.flatten(layer_16, scope='S2flatten') # [batch_size, -1])
                S3 = slim.flatten(layer_17, scope='S3flatten') # [batch_size, -1])
                before_dense = tf.concat([S1, S2, S3], 1)
                
                for i in range(1, 18):
                    print('layer_' + str(i), image_features['layer_' + str(i)])
                # print('layer_17', layer_17)
                print('S1', S1)
                print('S2', S2)
                print('S3', S3)

                # to_test = slim.conv2d(image_features['layer_19'])
                print('image image_features', image_features.keys())
                with slim.arg_scope([slim.batch_norm], is_training=is_training, center=True, scale=True):
                    return image_features, slim.fully_connected(before_dense, 
                            136, 
                            activation_fn=tf.nn.relu6,
                            normalizer_fn=slim.batch_norm,
                            weights_initializer=slim.xavier_initializer()), (image_features['layer_1'], inputs, image_features['layer_2'])
예제 #29
0
def create_network(images,
                   num_classes=None,
                   add_logits=True,
                   reuse=None,
                   create_summaries=True,
                   weight_decay=1e-8):
    nonlinearity = tf.nn.elu
    conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
    conv_bias_init = tf.zeros_initializer()
    conv_regularizer = slim.l2_regularizer(weight_decay)
    fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
    fc_bias_init = tf.zeros_initializer()
    fc_regularizer = slim.l2_regularizer(weight_decay)

    def batch_norm_fn(x):
        return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn")

    network = images
    network, _, networkFirst = base.mobilenet_base(network)

    feature1_dim = networkFirst.get_shape().as_list()[-1]
    print("feature1 dimensionality: ", feature1_dim)
    feature1 = slim.flatten(networkFirst)
    print("Feature1 Size: ", network.get_shape().as_list())

    feature_dim = network.get_shape().as_list()[-1]
    print("feature2 dimensionality: ", feature_dim)
    network = slim.flatten(network)
    print("Feature2 Size: ", network.get_shape().as_list())

    network = tf.concat([network, feature1], 1)
    print("Total Feature Size: ", network.get_shape().as_list())

    feature_dim = 128
    network = slim.dropout(network, keep_prob=0.6)
    network = slim.fully_connected(
        network,
        feature_dim,
        activation_fn=nonlinearity,  ## feature_dim
        normalizer_fn=batch_norm_fn,
        weights_regularizer=fc_regularizer,
        scope="fc1",
        weights_initializer=fc_weight_init,
        biases_initializer=fc_bias_init)

    features = network

    # Features in rows, normalize axis 1.
    features = tf.nn.l2_normalize(features, dim=1)

    if add_logits:
        with slim.variable_scope.variable_scope("ball", reuse=reuse):
            weights = slim.model_variable(
                "mean_vectors", (feature_dim, int(num_classes)),
                initializer=tf.truncated_normal_initializer(stddev=1e-3),
                regularizer=None)
            scale = slim.model_variable("scale", (),
                                        tf.float32,
                                        initializer=tf.constant_initializer(
                                            0., tf.float32),
                                        regularizer=slim.l2_regularizer(1e-1))
            if create_summaries:
                tf.summary.scalar("scale", scale)
            scale = tf.nn.softplus(scale)

        # Mean vectors in colums, normalize axis 0.
        weights_normed = tf.nn.l2_normalize(weights, dim=0)
        logits = scale * tf.matmul(features, weights_normed)
    else:
        logits = None
    return features, logits
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v2.mobilenet_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='layer_19',
              depth_multiplier=self._depth_multiplier,
              conv_defs=_CONV_DEFS if self._use_depthwise else None,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope('fpn', reuse=self._reuse_weights):
          feature_blocks = [
              'layer_4', 'layer_7', 'layer_14', 'layer_19'
          ]
          base_fpn_max_level = min(self._fpn_max_level, 5)
          feature_block_list = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_block_list.append(feature_blocks[level - 2])
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
              depth=depth_fn(self._additional_layer_depth),
              use_depthwise=self._use_depthwise)
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(fpn_features['top_down_{}'.format(
                feature_blocks[level - 2])])
          last_feature_map = fpn_features['top_down_{}'.format(
              feature_blocks[base_fpn_max_level - 2])]
          # Construct coarse features
          for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
            if self._use_depthwise:
              conv_op = functools.partial(
                  slim.separable_conv2d, depth_multiplier=1)
            else:
              conv_op = slim.conv2d
            last_feature_map = conv_op(
                last_feature_map,
                num_outputs=depth_fn(self._additional_layer_depth),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 19))
            feature_maps.append(last_feature_map)
    return feature_maps