Esempio n. 1
0
 def testModelHasExpectedNumberOfParameters(self):
   batch_size = 5
   height, width = 224, 224
   inputs = tf.random_uniform((batch_size, height, width, 3))
   with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                       normalizer_fn=slim.batch_norm):
     mobilenet_v1.mobilenet_v1_base(inputs)
     total_params, _ = slim.model_analyzer.analyze_vars(
         slim.get_model_variables())
     self.assertAlmostEqual(3217920, total_params)
Esempio n. 2
0
  def testOutputStride8BuildAndCheckAllEndPointsUptoConv2d_13(self):
    batch_size = 5
    height, width = 224, 224
    output_stride = 8

    inputs = tf.random_uniform((batch_size, height, width, 3))
    with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                        normalizer_fn=slim.batch_norm):
      _, end_points = mobilenet_v1.mobilenet_v1_base(
          inputs, output_stride=output_stride,
          final_endpoint='Conv2d_13_pointwise')
      _, explicit_padding_end_points = mobilenet_v1.mobilenet_v1_base(
          inputs, output_stride=output_stride,
          final_endpoint='Conv2d_13_pointwise', use_explicit_padding=True)
    endpoints_shapes = {'Conv2d_0': [batch_size, 112, 112, 32],
                        'Conv2d_1_depthwise': [batch_size, 112, 112, 32],
                        'Conv2d_1_pointwise': [batch_size, 112, 112, 64],
                        'Conv2d_2_depthwise': [batch_size, 56, 56, 64],
                        'Conv2d_2_pointwise': [batch_size, 56, 56, 128],
                        'Conv2d_3_depthwise': [batch_size, 56, 56, 128],
                        'Conv2d_3_pointwise': [batch_size, 56, 56, 128],
                        'Conv2d_4_depthwise': [batch_size, 28, 28, 128],
                        'Conv2d_4_pointwise': [batch_size, 28, 28, 256],
                        'Conv2d_5_depthwise': [batch_size, 28, 28, 256],
                        'Conv2d_5_pointwise': [batch_size, 28, 28, 256],
                        'Conv2d_6_depthwise': [batch_size, 28, 28, 256],
                        'Conv2d_6_pointwise': [batch_size, 28, 28, 512],
                        'Conv2d_7_depthwise': [batch_size, 28, 28, 512],
                        'Conv2d_7_pointwise': [batch_size, 28, 28, 512],
                        'Conv2d_8_depthwise': [batch_size, 28, 28, 512],
                        'Conv2d_8_pointwise': [batch_size, 28, 28, 512],
                        'Conv2d_9_depthwise': [batch_size, 28, 28, 512],
                        'Conv2d_9_pointwise': [batch_size, 28, 28, 512],
                        'Conv2d_10_depthwise': [batch_size, 28, 28, 512],
                        'Conv2d_10_pointwise': [batch_size, 28, 28, 512],
                        'Conv2d_11_depthwise': [batch_size, 28, 28, 512],
                        'Conv2d_11_pointwise': [batch_size, 28, 28, 512],
                        'Conv2d_12_depthwise': [batch_size, 28, 28, 512],
                        'Conv2d_12_pointwise': [batch_size, 28, 28, 1024],
                        'Conv2d_13_depthwise': [batch_size, 28, 28, 1024],
                        'Conv2d_13_pointwise': [batch_size, 28, 28, 1024]}
    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
    for endpoint_name, expected_shape in endpoints_shapes.items():
      self.assertTrue(endpoint_name in end_points)
      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
                           expected_shape)
    self.assertItemsEqual(endpoints_shapes.keys(),
                          explicit_padding_end_points.keys())
    for endpoint_name, expected_shape in endpoints_shapes.items():
      self.assertTrue(endpoint_name in explicit_padding_end_points)
      self.assertListEqual(
          explicit_padding_end_points[endpoint_name].get_shape().as_list(),
          expected_shape)
Esempio n. 3
0
  def testBuildAndCheckAllEndPointsApproximateFaceNet(self):
    batch_size = 5
    height, width = 128, 128

    inputs = tf.random_uniform((batch_size, height, width, 3))
    with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                        normalizer_fn=slim.batch_norm):
      _, end_points = mobilenet_v1.mobilenet_v1_base(
          inputs, final_endpoint='Conv2d_13_pointwise', depth_multiplier=0.75)
      _, explicit_padding_end_points = mobilenet_v1.mobilenet_v1_base(
          inputs, final_endpoint='Conv2d_13_pointwise', depth_multiplier=0.75,
          use_explicit_padding=True)
    # For the Conv2d_0 layer FaceNet has depth=16
    endpoints_shapes = {'Conv2d_0': [batch_size, 64, 64, 24],
                        'Conv2d_1_depthwise': [batch_size, 64, 64, 24],
                        'Conv2d_1_pointwise': [batch_size, 64, 64, 48],
                        'Conv2d_2_depthwise': [batch_size, 32, 32, 48],
                        'Conv2d_2_pointwise': [batch_size, 32, 32, 96],
                        'Conv2d_3_depthwise': [batch_size, 32, 32, 96],
                        'Conv2d_3_pointwise': [batch_size, 32, 32, 96],
                        'Conv2d_4_depthwise': [batch_size, 16, 16, 96],
                        'Conv2d_4_pointwise': [batch_size, 16, 16, 192],
                        'Conv2d_5_depthwise': [batch_size, 16, 16, 192],
                        'Conv2d_5_pointwise': [batch_size, 16, 16, 192],
                        'Conv2d_6_depthwise': [batch_size, 8, 8, 192],
                        'Conv2d_6_pointwise': [batch_size, 8, 8, 384],
                        'Conv2d_7_depthwise': [batch_size, 8, 8, 384],
                        'Conv2d_7_pointwise': [batch_size, 8, 8, 384],
                        'Conv2d_8_depthwise': [batch_size, 8, 8, 384],
                        'Conv2d_8_pointwise': [batch_size, 8, 8, 384],
                        'Conv2d_9_depthwise': [batch_size, 8, 8, 384],
                        'Conv2d_9_pointwise': [batch_size, 8, 8, 384],
                        'Conv2d_10_depthwise': [batch_size, 8, 8, 384],
                        'Conv2d_10_pointwise': [batch_size, 8, 8, 384],
                        'Conv2d_11_depthwise': [batch_size, 8, 8, 384],
                        'Conv2d_11_pointwise': [batch_size, 8, 8, 384],
                        'Conv2d_12_depthwise': [batch_size, 4, 4, 384],
                        'Conv2d_12_pointwise': [batch_size, 4, 4, 768],
                        'Conv2d_13_depthwise': [batch_size, 4, 4, 768],
                        'Conv2d_13_pointwise': [batch_size, 4, 4, 768]}
    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
    for endpoint_name, expected_shape in endpoints_shapes.items():
      self.assertTrue(endpoint_name in end_points)
      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
                           expected_shape)
    self.assertItemsEqual(endpoints_shapes.keys(),
                          explicit_padding_end_points.keys())
    for endpoint_name, expected_shape in endpoints_shapes.items():
      self.assertTrue(endpoint_name in explicit_padding_end_points)
      self.assertListEqual(
          explicit_padding_end_points[endpoint_name].get_shape().as_list(),
          expected_shape)
Esempio n. 4
0
 def testBuildOnlyUptoFinalEndpoint(self):
   batch_size = 5
   height, width = 224, 224
   endpoints = ['Conv2d_0',
                'Conv2d_1_depthwise', 'Conv2d_1_pointwise',
                'Conv2d_2_depthwise', 'Conv2d_2_pointwise',
                'Conv2d_3_depthwise', 'Conv2d_3_pointwise',
                'Conv2d_4_depthwise', 'Conv2d_4_pointwise',
                'Conv2d_5_depthwise', 'Conv2d_5_pointwise',
                'Conv2d_6_depthwise', 'Conv2d_6_pointwise',
                'Conv2d_7_depthwise', 'Conv2d_7_pointwise',
                'Conv2d_8_depthwise', 'Conv2d_8_pointwise',
                'Conv2d_9_depthwise', 'Conv2d_9_pointwise',
                'Conv2d_10_depthwise', 'Conv2d_10_pointwise',
                'Conv2d_11_depthwise', 'Conv2d_11_pointwise',
                'Conv2d_12_depthwise', 'Conv2d_12_pointwise',
                'Conv2d_13_depthwise', 'Conv2d_13_pointwise']
   for index, endpoint in enumerate(endpoints):
     with tf.Graph().as_default():
       inputs = tf.random_uniform((batch_size, height, width, 3))
       out_tensor, end_points = mobilenet_v1.mobilenet_v1_base(
           inputs, final_endpoint=endpoint)
       self.assertTrue(out_tensor.op.name.startswith(
           'MobilenetV1/' + endpoint))
       self.assertItemsEqual(endpoints[:index+1], end_points.keys())
  def _extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: if image height or width are not 256 pixels.
    """
    image_shape = preprocessed_inputs.get_shape()
    image_shape.assert_has_rank(4)
    image_height = image_shape[1].value
    image_width = image_shape[2].value

    if image_height is None or image_width is None:
      shape_assert = tf.Assert(
          tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
                         tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
          ['image size must be 256 in both height and width.'])
      with tf.control_dependencies([shape_assert]):
        preprocessed_inputs = tf.identity(preprocessed_inputs)
    elif image_height != 256 or image_width != 256:
      raise ValueError('image size must be = 256 in both height and width;'
                       ' image dim = %d,%d' % (image_height, image_width))

    feature_map_layout = {
        'from_layer': [
            'Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''
        ],
        'layer_depth': [-1, -1, 512, 256, 256],
        'conv_kernel_size': [-1, -1, 3, 3, 2],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with slim.arg_scope(self._conv_hyperparams):
      with slim.arg_scope([slim.batch_norm], fused=False):
        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with tf.variable_scope('MobilenetV1',
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=True, regularize_depthwise=True)):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams
              else context_manager.IdentityContextManager()):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
          with slim.arg_scope([slim.batch_norm], fused=False):
            _, image_features = mobilenet_v1.mobilenet_v1_base(
                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
                final_endpoint='Conv2d_13_pointwise',
                min_depth=self._min_depth,
                depth_multiplier=self._depth_multiplier,
                use_explicit_padding=self._use_explicit_padding,
                scope=scope)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
        with slim.arg_scope([slim.batch_norm], fused=False):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with tf.variable_scope('MobilenetV1',
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=(self._batch_norm_trainable and self._is_training))):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
        with slim.arg_scope([slim.batch_norm], fused=False):
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
      with slim.arg_scope(self._conv_hyperparams):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
        with slim.arg_scope([slim.batch_norm], fused=False):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
Esempio n. 8
0
    def testOutputStride8BuildAndCheckAllEndPointsUptoConv2d_13(self):
        batch_size = 5
        height, width = 224, 224
        output_stride = 8

        inputs = tf.random.uniform((batch_size, height, width, 3))
        with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                            normalizer_fn=slim.batch_norm):
            _, end_points = mobilenet_v1.mobilenet_v1_base(
                inputs,
                output_stride=output_stride,
                final_endpoint='Conv2d_13_pointwise')
        endpoints_shapes = {
            'Conv2d_0': [batch_size, 112, 112, 32],
            'Conv2d_1_depthwise': [batch_size, 112, 112, 32],
            'Conv2d_1_pointwise': [batch_size, 112, 112, 64],
            'Conv2d_2_depthwise': [batch_size, 56, 56, 64],
            'Conv2d_2_pointwise': [batch_size, 56, 56, 128],
            'Conv2d_3_depthwise': [batch_size, 56, 56, 128],
            'Conv2d_3_pointwise': [batch_size, 56, 56, 128],
            'Conv2d_4_depthwise': [batch_size, 28, 28, 128],
            'Conv2d_4_pointwise': [batch_size, 28, 28, 256],
            'Conv2d_5_depthwise': [batch_size, 28, 28, 256],
            'Conv2d_5_pointwise': [batch_size, 28, 28, 256],
            'Conv2d_6_depthwise': [batch_size, 28, 28, 256],
            'Conv2d_6_pointwise': [batch_size, 28, 28, 512],
            'Conv2d_7_depthwise': [batch_size, 28, 28, 512],
            'Conv2d_7_pointwise': [batch_size, 28, 28, 512],
            'Conv2d_8_depthwise': [batch_size, 28, 28, 512],
            'Conv2d_8_pointwise': [batch_size, 28, 28, 512],
            'Conv2d_9_depthwise': [batch_size, 28, 28, 512],
            'Conv2d_9_pointwise': [batch_size, 28, 28, 512],
            'Conv2d_10_depthwise': [batch_size, 28, 28, 512],
            'Conv2d_10_pointwise': [batch_size, 28, 28, 512],
            'Conv2d_11_depthwise': [batch_size, 28, 28, 512],
            'Conv2d_11_pointwise': [batch_size, 28, 28, 512],
            'Conv2d_12_depthwise': [batch_size, 28, 28, 512],
            'Conv2d_12_pointwise': [batch_size, 28, 28, 1024],
            'Conv2d_13_depthwise': [batch_size, 28, 28, 1024],
            'Conv2d_13_pointwise': [batch_size, 28, 28, 1024]
        }
        self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
        for endpoint_name, expected_shape in endpoints_shapes.items():
            self.assertTrue(endpoint_name in end_points)
            self.assertListEqual(
                end_points[endpoint_name].get_shape().as_list(),
                expected_shape)
Esempio n. 9
0
    def testBuildAndCheckAllEndPointsApproximateFaceNet(self):
        batch_size = 5
        height, width = 128, 128

        inputs = tf.random.uniform((batch_size, height, width, 3))
        with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                            normalizer_fn=slim.batch_norm):
            _, end_points = mobilenet_v1.mobilenet_v1_base(
                inputs,
                final_endpoint='Conv2d_13_pointwise',
                depth_multiplier=0.75)
        # For the Conv2d_0 layer FaceNet has depth=16
        endpoints_shapes = {
            'Conv2d_0': [batch_size, 64, 64, 24],
            'Conv2d_1_depthwise': [batch_size, 64, 64, 24],
            'Conv2d_1_pointwise': [batch_size, 64, 64, 48],
            'Conv2d_2_depthwise': [batch_size, 32, 32, 48],
            'Conv2d_2_pointwise': [batch_size, 32, 32, 96],
            'Conv2d_3_depthwise': [batch_size, 32, 32, 96],
            'Conv2d_3_pointwise': [batch_size, 32, 32, 96],
            'Conv2d_4_depthwise': [batch_size, 16, 16, 96],
            'Conv2d_4_pointwise': [batch_size, 16, 16, 192],
            'Conv2d_5_depthwise': [batch_size, 16, 16, 192],
            'Conv2d_5_pointwise': [batch_size, 16, 16, 192],
            'Conv2d_6_depthwise': [batch_size, 8, 8, 192],
            'Conv2d_6_pointwise': [batch_size, 8, 8, 384],
            'Conv2d_7_depthwise': [batch_size, 8, 8, 384],
            'Conv2d_7_pointwise': [batch_size, 8, 8, 384],
            'Conv2d_8_depthwise': [batch_size, 8, 8, 384],
            'Conv2d_8_pointwise': [batch_size, 8, 8, 384],
            'Conv2d_9_depthwise': [batch_size, 8, 8, 384],
            'Conv2d_9_pointwise': [batch_size, 8, 8, 384],
            'Conv2d_10_depthwise': [batch_size, 8, 8, 384],
            'Conv2d_10_pointwise': [batch_size, 8, 8, 384],
            'Conv2d_11_depthwise': [batch_size, 8, 8, 384],
            'Conv2d_11_pointwise': [batch_size, 8, 8, 384],
            'Conv2d_12_depthwise': [batch_size, 4, 4, 384],
            'Conv2d_12_pointwise': [batch_size, 4, 4, 768],
            'Conv2d_13_depthwise': [batch_size, 4, 4, 768],
            'Conv2d_13_pointwise': [batch_size, 4, 4, 768]
        }
        self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
        for endpoint_name, expected_shape in endpoints_shapes.items():
            self.assertTrue(endpoint_name in end_points)
            self.assertListEqual(
                end_points[endpoint_name].get_shape().as_list(),
                expected_shape)
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        with tf.control_dependencies([shape_assert]):
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=False, weight_decay=self._weight_decay)):
                with tf.variable_scope('MobilenetV1',
                                       reuse=self._reuse_weights) as scope:
                    params = {}
                    if self._skip_last_stride:
                        params[
                            'conv_defs'] = _get_mobilenet_conv_no_last_stride_defs(
                                conv_depth_ratio_in_percentage=self.
                                _conv_depth_ratio_in_percentage)
                    _, activations = mobilenet_v1.mobilenet_v1_base(
                        preprocessed_inputs,
                        final_endpoint='Conv2d_11_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope,
                        **params)
            return activations['Conv2d_11_pointwise']
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with tf.variable_scope('MobilenetV1',
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=None, regularize_depthwise=True)):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams
              else context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=feature_map_layout,
            depth_multiplier=self._depth_multiplier,
            min_depth=self._min_depth,
            insert_1x1_conv=True,
            image_features=image_features)

    return feature_maps.values()
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            #'use_depthwise':True
        }

        with tf.control_dependencies([shape_assert]):
            with slim.arg_scope(self._conv_hyperparams):
                with slim.arg_scope([slim.batch_norm], fused=False):
                    with tf.variable_scope('MobilenetV1',
                                           reuse=self._reuse_weights) as scope:
                        _, image_features = mobilenet_v1.mobilenet_v1_base(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            final_endpoint='Conv2d_13_pointwise',
                            min_depth=self._min_depth,
                            depth_multiplier=self._depth_multiplier,
                            scope=scope)
                        feature_maps = feature_map_generators.multi_resolution_feature_maps(
                            feature_map_layout=feature_map_layout,
                            depth_multiplier=self._depth_multiplier,
                            min_depth=self._min_depth,
                            insert_1x1_conv=True,
                            image_features=image_features)

        return feature_maps.values()
Esempio n. 13
0
    def _extract_box_classifier_features(self, proposal_feature_maps, scope):
        """Extracts second stage box classifier features.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name (unused).

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
        net = proposal_feature_maps

        depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
        trunc_normal = lambda stddev: tf.truncated_normal_initializer(
            0.0, stddev)

        data_format = 'NHWC'
        concat_dim = 3 if data_format == 'NHWC' else 1

        with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME',
                    data_format=data_format):
                with _batch_norm_arg_scope(
                    [slim.conv2d, slim.separable_conv2d],
                        batch_norm_scale=True,
                        train_batch_norm=self._train_batch_norm):

                    proposal_classifier_features, _ = mobilenet_v1.mobilenet_v1_base(
                        proposal_feature_maps,
                        final_endpoint='Conv2d_13_pointwise',
                        start_enum=12,
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        conv_defs=mobilenet_v1._CONV_DEFS[-2:],
                        scope=scope)

        return proposal_classifier_features
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs.get_shape().assert_has_rank(4)
    shape_assert = tf.Assert(
        tf.logical_and(
            tf.equal(tf.shape(preprocessed_inputs)[1], 256),
            tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
        ['image size must be 256 in both height and width.'])

    feature_map_layout = {
        'from_layer': [
            'Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''
        ],
        'layer_depth': [-1, -1, 512, 256, 256],
        'conv_kernel_size': [-1, -1, 3, 3, 2],
    }

    with tf.control_dependencies([shape_assert]):
      with slim.arg_scope(self._conv_hyperparams):
        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        preprocessed_inputs.get_shape().assert_has_rank(4)
        preprocessed_inputs = shape_utils.check_min_image_dim(
            min_dim=33, image_tensor=preprocessed_inputs)

        with slim.arg_scope(
                mobilenet_v1.mobilenet_v1_arg_scope(
                    is_training=self._train_batch_norm,
                    weight_decay=self._weight_decay)):
            with tf.variable_scope('MobilenetV1',
                                   reuse=self._reuse_weights) as scope:
                params = {}
                if self._skip_last_stride:
                    params[
                        'conv_defs'] = _get_mobilenet_conv_no_last_stride_defs(
                            conv_depth_ratio_in_percentage=self.
                            _conv_depth_ratio_in_percentage)
                _, activations = mobilenet_v1.mobilenet_v1_base(
                    preprocessed_inputs,
                    final_endpoint='Conv2d_11_pointwise',
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope,
                    **params)
        return activations['Conv2d_11_pointwise'], activations
  def _extract_proposal_features(self, preprocessed_inputs, scope):
    """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

    preprocessed_inputs.get_shape().assert_has_rank(4)
    preprocessed_inputs = shape_utils.check_min_image_dim(
        min_dim=33, image_tensor=preprocessed_inputs)

    with slim.arg_scope(
        mobilenet_v1.mobilenet_v1_arg_scope(
            is_training=self._train_batch_norm,
            weight_decay=self._weight_decay)):
      with tf.variable_scope('MobilenetV1',
                             reuse=self._reuse_weights) as scope:
        params = {}
        if self._skip_last_stride:
          params['conv_defs'] = _get_mobilenet_conv_no_last_stride_defs(
              conv_depth_ratio_in_percentage=self.
              _conv_depth_ratio_in_percentage)
        _, activations = mobilenet_v1.mobilenet_v1_base(
            preprocessed_inputs,
            final_endpoint='Conv2d_11_pointwise',
            min_depth=self._min_depth,
            depth_multiplier=self._depth_multiplier,
            scope=scope,
            **params)
    return activations['Conv2d_11_pointwise'], activations
def segMnet(inputs,multiplier):
    segMnet, end_points = mobilenet_v1.mobilenet_v1_base(inputs,depth_multiplier=multiplier)
    
    filters = int(1024*multiplier)

    with tf.variable_scope('gobal_average_pool'):
        shape = tf.shape(segMnet)
        gap = tf.layers.average_pooling2d(segMnet,[shape[1], shape[2]],[shape[1], shape[2]])
    with tf.variable_scope('gap_conv'):
        gconv = batch_norm(conv2d(gap, filters, filters, 1, 1))
    with tf.variable_scope('gap_resize'):
        shape = tf.shape(segMnet)
        gresize = tf.image.resize_bilinear(gconv, [shape[1], shape[2]])

    with tf.variable_scope('atrous0'):
        atnet0 = batch_norm(conv2d(segMnet, filters, filters, 1, 1))
    with tf.variable_scope('atrous1'):
        atnet1 = batch_norm(atrous_conv2d(segMnet, filters, filters, 3, 6))
    with tf.variable_scope('atrous2'):
        atnet2 = batch_norm(atrous_conv2d(segMnet, filters, filters, 3, 12))
    with tf.variable_scope('atrous3'):
        atnet3 = batch_norm(atrous_conv2d(segMnet, filters, filters, 3, 18))

    with tf.variable_scope('concat'):
        segMnet = tf.concat([gresize, atnet0, atnet1, atnet2, atnet3], 3)

    with tf.variable_scope('combine_conv'):
        segMnet = batch_norm(conv2d(segMnet, filters*5, filters, 1, 1))
    #with tf.variable_scope('final_conv'):
    #    segMnet = conv2d(segMnet, filters, 2, 1, 1)

    with tf.variable_scope('deconv'):
        wshape = [64, 64, 2, filters]
        #wshape = [64, 64, 2, 2]
        strides = [1, 32, 32, 1]
        initializer = tf.contrib.layers.xavier_initializer()
        weight = tf.Variable(initializer(wshape))
        shape = tf.shape(inputs)
        output_shape = tf.stack([shape[0], shape[1], shape[2], 2])
        segMnet = tf.nn.conv2d_transpose(segMnet, weight, output_shape, strides=strides, padding='SAME', name='conv_transpose')
    
    return segMnet
Esempio n. 18
0
def segMnet(inputs, multiplier):
    segMnet, end_points = mobilenet_v1.mobilenet_v1_base(
        inputs, depth_multiplier=multiplier)
    filters = int(1024 * multiplier)

    with tf.variable_scope('deconv'):
        wshape = [64, 64, 2, filters]
        strides = [1, 32, 32, 1]
        initializer = tf.contrib.layers.xavier_initializer()
        weight = tf.Variable(initializer(wshape))
        shape = tf.shape(inputs)
        output_shape = tf.stack([shape[0], shape[1], shape[2], 2])
        segMnet = tf.nn.conv2d_transpose(segMnet,
                                         weight,
                                         output_shape,
                                         strides=strides,
                                         padding='SAME',
                                         name='conv_transpose')

    return segMnet
    def testBuildCustomNetworkUsingConvDefs(self):
        batch_size = 5
        height, width = 224, 224
        conv_defs = [
            mobilenet_v1.Conv(kernel=[3, 3], stride=2, depth=32),
            mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=64),
            mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=128),
            mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512)
        ]

        inputs = tf.random_uniform((batch_size, height, width, 3))
        net, end_points = mobilenet_v1.mobilenet_v1_base(
            inputs, final_endpoint='Conv2d_3_pointwise', conv_defs=conv_defs)
        self.assertTrue(net.op.name.startswith('MobilenetV1/Conv2d_3'))
        self.assertListEqual(net.get_shape().as_list(),
                             [batch_size, 56, 56, 512])
        expected_endpoints = ['Conv2d_0',
                              'Conv2d_1_depthwise', 'Conv2d_1_pointwise',
                              'Conv2d_2_depthwise', 'Conv2d_2_pointwise',
                              'Conv2d_3_depthwise', 'Conv2d_3_pointwise']
        self.assertItemsEqual(end_points.keys(), expected_endpoints)
Esempio n. 20
0
  def testBuildCustomNetworkUsingConvDefs(self):
    batch_size = 5
    height, width = 224, 224
    conv_defs = [
        mobilenet_v1.Conv(kernel=[3, 3], stride=2, depth=32),
        mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=64),
        mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=2, depth=128),
        mobilenet_v1.DepthSepConv(kernel=[3, 3], stride=1, depth=512)
    ]

    inputs = tf.random_uniform((batch_size, height, width, 3))
    net, end_points = mobilenet_v1.mobilenet_v1_base(
        inputs, final_endpoint='Conv2d_3_pointwise', conv_defs=conv_defs)
    self.assertTrue(net.op.name.startswith('MobilenetV1/Conv2d_3'))
    self.assertListEqual(net.get_shape().as_list(),
                         [batch_size, 56, 56, 512])
    expected_endpoints = ['Conv2d_0',
                          'Conv2d_1_depthwise', 'Conv2d_1_pointwise',
                          'Conv2d_2_depthwise', 'Conv2d_2_pointwise',
                          'Conv2d_3_depthwise', 'Conv2d_3_pointwise']
    self.assertItemsEqual(end_points.keys(), expected_endpoints)
Esempio n. 21
0
    def make_network(self, is_train):
        if is_train:
            image = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.data_shape, 3])

            label15 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            label11 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            label9 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            label7 = tf.placeholder(tf.float32, shape=[cfg.batch_size, *cfg.output_shape, cfg.nr_skeleton])
            labels = [label15, label11, label9, label7]

            valids = tf.placeholder(tf.float32, shape=[cfg.batch_size, cfg.nr_skeleton])

            self.set_inputs(image, label15, label11, label9, label7, valids)
        else:
            image = tf.placeholder(tf.float32, shape=[None, *cfg.data_shape, 3])
            self.set_inputs(image)

        mobilenet_fms, endpoints = mobilenet_v1_base(image)

        heatmap_outs = self.head_net(mobilenet_fms, is_train)

        # make loss
        if is_train:
            def ohkm(loss, top_k):
                ohkm_loss = 0.
                for i in range(cfg.batch_size):
                    sub_loss = loss[i]
                    topk_val, topk_idx = tf.nn.top_k(sub_loss, k=top_k, sorted=False, name='ohkm{}'.format(i))
                    tmp_loss = tf.gather(sub_loss, topk_idx, name='ohkm_loss{}'.format(i)) # can be ignore ???
                    ohkm_loss += tf.reduce_sum(tmp_loss) / top_k
                ohkm_loss /= cfg.batch_size
                return ohkm_loss

            label = label7 * tf.to_float(tf.greater(tf.reshape(valids, (-1, 1, 1, cfg.nr_skeleton)), 0.1))
            loss = tf.reduce_mean(tf.square(heatmap_outs - label))

            self.add_tower_summary('loss', loss)
            self.set_loss(loss)
        else:
            self.set_outputs(heatmap_outs)
Esempio n. 22
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

        Args:
          preprocessed_inputs: a [batch, height, width, channels] float tensor
            representing a batch of images.

        Returns:
          feature_maps: a list of tensors where the ith tensor has shape
            [batch, height_i, width_i, depth_i]
        """
        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        bottomup_features_names = [
            'Conv2d_11_pointwise', 'Conv2d_13_pointwise'
        ]
        num_appended_layers = 4
        appended_channel_num = [512, 256, 256, 256]

        with tf.control_dependencies([shape_assert]):
            with slim.arg_scope(self._conv_hyperparams):
                with tf.variable_scope('MobilenetV1',
                                       reuse=self._reuse_weights) as scope:
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        preprocessed_inputs,
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope)
                    topdown_features = self._topdown_feature_maps(
                        image_features,
                        bottomup_features_names=bottomup_features_names,
                        num_appended_layers=num_appended_layers,
                        appended_channel_num=appended_channel_num)
        return topdown_features.values()
  def _extract_proposal_features(self, preprocessed_inputs, scope):
    """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

    preprocessed_inputs.get_shape().assert_has_rank(4)
    shape_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
        ['image size must at least be 33 in both height and width.'])

    with tf.control_dependencies([shape_assert]):
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=self._train_batch_norm,
              weight_decay=self._weight_decay)):
        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
          _, activations = mobilenet_v1.mobilenet_v1_base(
              preprocessed_inputs,
              final_endpoint='Conv2d_11_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
    return activations['Conv2d_11_pointwise'], activations
    def testBuildBaseNetwork(self):
        batch_size = 5
        height, width = 224, 224

        inputs = tf.random_uniform((batch_size, height, width, 3))
        net, end_points = mobilenet_v1.mobilenet_v1_base(inputs)
        self.assertTrue(net.op.name.startswith('MobilenetV1/Conv2d_13'))
        self.assertListEqual(net.get_shape().as_list(),
                             [batch_size, 7, 7, 1024])
        expected_endpoints = [
            'Conv2d_0', 'Conv2d_1_depthwise', 'Conv2d_1_pointwise',
            'Conv2d_2_depthwise', 'Conv2d_2_pointwise', 'Conv2d_3_depthwise',
            'Conv2d_3_pointwise', 'Conv2d_4_depthwise', 'Conv2d_4_pointwise',
            'Conv2d_5_depthwise', 'Conv2d_5_pointwise', 'Conv2d_6_depthwise',
            'Conv2d_6_pointwise', 'Conv2d_7_depthwise', 'Conv2d_7_pointwise',
            'Conv2d_8_depthwise', 'Conv2d_8_pointwise', 'Conv2d_9_depthwise',
            'Conv2d_9_pointwise', 'Conv2d_10_depthwise', 'Conv2d_10_pointwise',
            'Conv2d_11_depthwise', 'Conv2d_11_pointwise',
            'Conv2d_12_depthwise', 'Conv2d_12_pointwise',
            'Conv2d_13_depthwise', 'Conv2d_13_pointwise'
        ]
        self.assertItemsEqual(end_points.keys(), expected_endpoints)
Esempio n. 25
0
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        with tf.control_dependencies([shape_assert]):
            with tf.variable_scope('MobilenetV1',
                                   reuse=self._reuse_weights) as scope:
                with _batch_norm_arg_scope(
                    [slim.conv2d, slim.separable_conv2d],
                        batch_norm_scale=True,
                        train_batch_norm=self._train_batch_norm):
                    _, activations = mobilenet_v1.mobilenet_v1_base(
                        preprocessed_inputs,
                        final_endpoint='Conv2d_11_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope)

        return activations['Conv2d_11_pointwise']
Esempio n. 26
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
                    base_feature_map_depth=0,
                    num_layers=6,
                    image_features={
                        'image_features': image_features['Conv2d_11_pointwise']
                    })
        return feature_maps.values()
 def testBuildOnlyUptoFinalEndpoint(self):
     batch_size = 5
     height, width = 224, 224
     endpoints = [
         'Conv2d_0', 'Conv2d_1_depthwise', 'Conv2d_1_pointwise',
         'Conv2d_2_depthwise', 'Conv2d_2_pointwise', 'Conv2d_3_depthwise',
         'Conv2d_3_pointwise', 'Conv2d_4_depthwise', 'Conv2d_4_pointwise',
         'Conv2d_5_depthwise', 'Conv2d_5_pointwise', 'Conv2d_6_depthwise',
         'Conv2d_6_pointwise', 'Conv2d_7_depthwise', 'Conv2d_7_pointwise',
         'Conv2d_8_depthwise', 'Conv2d_8_pointwise', 'Conv2d_9_depthwise',
         'Conv2d_9_pointwise', 'Conv2d_10_depthwise', 'Conv2d_10_pointwise',
         'Conv2d_11_depthwise', 'Conv2d_11_pointwise',
         'Conv2d_12_depthwise', 'Conv2d_12_pointwise',
         'Conv2d_13_depthwise', 'Conv2d_13_pointwise'
     ]
     for index, endpoint in enumerate(endpoints):
         with tf.Graph().as_default():
             inputs = tf.random_uniform((batch_size, height, width, 3))
             out_tensor, end_points = mobilenet_v1.mobilenet_v1_base(
                 inputs, final_endpoint=endpoint)
             self.assertTrue(
                 out_tensor.op.name.startswith('MobilenetV1/' + endpoint))
             self.assertItemsEqual(endpoints[:index + 1], end_points.keys())
Esempio n. 28
0
  def testBuildBaseNetwork(self):
    batch_size = 5
    height, width = 224, 224

    inputs = tf.random_uniform((batch_size, height, width, 3))
    net, end_points = mobilenet_v1.mobilenet_v1_base(inputs)
    self.assertTrue(net.op.name.startswith('MobilenetV1/Conv2d_13'))
    self.assertListEqual(net.get_shape().as_list(),
                         [batch_size, 7, 7, 1024])
    expected_endpoints = ['Conv2d_0',
                          'Conv2d_1_depthwise', 'Conv2d_1_pointwise',
                          'Conv2d_2_depthwise', 'Conv2d_2_pointwise',
                          'Conv2d_3_depthwise', 'Conv2d_3_pointwise',
                          'Conv2d_4_depthwise', 'Conv2d_4_pointwise',
                          'Conv2d_5_depthwise', 'Conv2d_5_pointwise',
                          'Conv2d_6_depthwise', 'Conv2d_6_pointwise',
                          'Conv2d_7_depthwise', 'Conv2d_7_pointwise',
                          'Conv2d_8_depthwise', 'Conv2d_8_pointwise',
                          'Conv2d_9_depthwise', 'Conv2d_9_pointwise',
                          'Conv2d_10_depthwise', 'Conv2d_10_pointwise',
                          'Conv2d_11_depthwise', 'Conv2d_11_pointwise',
                          'Conv2d_12_depthwise', 'Conv2d_12_pointwise',
                          'Conv2d_13_depthwise', 'Conv2d_13_pointwise']
    self.assertItemsEqual(end_points.keys(), expected_endpoints)
Esempio n. 29
0
def mobilenet_v1_l2norm(inputs,
                        num_classes=1000,
                        dropout_keep_prob=0.999,
                        is_training=True,
                        min_depth=8,
                        depth_multiplier=1.0,
                        conv_defs=None,
                        prediction_fn=tf.contrib.layers.softmax,
                        spatial_squeeze=True,
                        reuse=None,
                        scope='MobilenetV1',
                        global_pool=False,
                        initial_scale=10.):
  """Mobilenet v1 model for classification.

  Args:
    inputs: a tensor of shape [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer is
      omitted and the input features to the logits layer (before dropout) are
      returned instead.
    dropout_keep_prob: the percentage of activation values that are retained.
    is_training: whether is training or not.
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels) for
      all convolution ops. The value must be greater than zero. Typical usage
      will be to set this value in (0, 1) to reduce the number of parameters or
      computation cost of the model.
    conv_defs: A list of ConvDef namedtuples specifying the net architecture.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
      of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    global_pool: Optional boolean flag to control the avgpooling before the
      logits layer. If false or unset, pooling is done with a fixed window that
      reduces default-sized inputs to 1x1, while larger inputs lead to larger
      outputs. If true, any input size is pooled down to 1x1.
    initial_scale: normalized weights should be multiplied with a scale to
      alleviate the performance loss. This variable is trainable.

  Returns:
    net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
      is a non-zero integer, or the non-dropped-out input to the logits layer
      if num_classes is 0 or None.
    end_points: a dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: Input rank is invalid.
  """
  input_shape = inputs.get_shape().as_list()
  if len(input_shape) != 4:
    raise ValueError('Invalid input tensor rank, expected 4, was: %d' %
                     len(input_shape))

  with tf.variable_scope(scope, 'MobilenetV1', [inputs], reuse=reuse) as scope:
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
      net, end_points = mobilenet_v1.mobilenet_v1_base(
          inputs,
          scope=scope,
          min_depth=min_depth,
          depth_multiplier=depth_multiplier,
          conv_defs=conv_defs)
      with tf.variable_scope('Logits'):
        if global_pool:
          # Global average pooling.
          net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
          end_points['global_pool'] = net
        else:
          # Pooling with a fixed kernel size.
          kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])
          net = slim.avg_pool2d(
              net, kernel_size, padding='VALID', scope='AvgPool_1a')
          end_points['AvgPool_1a'] = net

          net = slim.conv2d(
              net,
              num_outputs=1024,
              kernel_size=[1, 1],
              activation_fn=None,
              normalizer_fn=None,
              scope='Conv2d_1c_1x1_negative')
          net = tf.nn.l2_normalize(net, axis=-1, name='Normalize_1a')
          end_points['Normalize_1a'] = net

        if not num_classes:
          return net, end_points

        net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')

        logits = conv2d_l2norm.conv2d(
            net,
            num_classes, [1, 1],
            activation_fn=None,
            normalizer_fn=None,
            scope='Conv2d_1d_1x1_normalize',
            weight_norm=True)
        scale_factor = tf.get_variable(
            'scale_factor', [],
            dtype=tf.float32,
            initializer=tf.constant_initializer(initial_scale),
            trainable=True)
        slim.summaries.add_scalar_summary(scale_factor, 'scale_factor',
                                          'scale_factors')
        logits = tf.multiply(logits, scale_factor)

        if spatial_squeeze:
          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
      end_points['Logits'] = logits
      if prediction_fn:
        end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
  return logits, end_points
Esempio n. 30
0
def _construct_model(model_type='resnet_v1_50'):
  """Constructs model for the desired type of CNN.

  Args:
    model_type: Type of model to be used.

  Returns:
    end_points: A dictionary from components of the network to the corresponding
      activations.

  Raises:
    ValueError: If the model_type is not supported.
  """
  # Placeholder input.
  images = array_ops.placeholder(
      dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE)

  # Construct model.
  if model_type == 'inception_resnet_v2':
    _, end_points = inception.inception_resnet_v2_base(images)
  elif model_type == 'inception_resnet_v2-same':
    _, end_points = inception.inception_resnet_v2_base(
        images, align_feature_maps=True)
  elif model_type == 'inception_v2':
    _, end_points = inception.inception_v2_base(images)
  elif model_type == 'inception_v2-no-separable-conv':
    _, end_points = inception.inception_v2_base(
        images, use_separable_conv=False)
  elif model_type == 'inception_v3':
    _, end_points = inception.inception_v3_base(images)
  elif model_type == 'inception_v4':
    _, end_points = inception.inception_v4_base(images)
  elif model_type == 'alexnet_v2':
    _, end_points = alexnet.alexnet_v2(images)
  elif model_type == 'vgg_a':
    _, end_points = vgg.vgg_a(images)
  elif model_type == 'vgg_16':
    _, end_points = vgg.vgg_16(images)
  elif model_type == 'mobilenet_v1':
    _, end_points = mobilenet_v1.mobilenet_v1_base(images)
  elif model_type == 'mobilenet_v1_075':
    _, end_points = mobilenet_v1.mobilenet_v1_base(
        images, depth_multiplier=0.75)
  elif model_type == 'resnet_v1_50':
    _, end_points = resnet_v1.resnet_v1_50(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_101':
    _, end_points = resnet_v1.resnet_v1_101(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_152':
    _, end_points = resnet_v1.resnet_v1_152(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_200':
    _, end_points = resnet_v1.resnet_v1_200(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_50':
    _, end_points = resnet_v2.resnet_v2_50(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_101':
    _, end_points = resnet_v2.resnet_v2_101(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_152':
    _, end_points = resnet_v2.resnet_v2_152(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_200':
    _, end_points = resnet_v2.resnet_v2_200(
        images, num_classes=None, is_training=False, global_pool=False)
  else:
    raise ValueError('Unsupported model_type %s.' % model_type)

  return end_points
    def extract_features(self,
                         preprocessed_inputs,
                         state_saver=None,
                         state_name='lstm_state',
                         unroll_length=5,
                         scope=None):
        """Extracts features from preprocessed inputs.

    The features include the base network features, lstm features and SSD
    features, organized in the following name scope:

    <parent scope>/MobilenetV1/...
    <parent scope>/LSTM/...
    <parent scope>/FeatureMaps/...

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float tensor
        representing a batch of consecutive frames from video clips.
      state_saver: A state saver object with methods `state` and `save_state`.
      state_name: A python string for the name to use with the state_saver.
      unroll_length: The number of steps to unroll the lstm.
      scope: The scope for the base network of the feature extractor.

    Returns:
      A list of tensors where the ith tensor has shape [batch, height_i,
      width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)
        with slim.arg_scope(
                mobilenet_v1.mobilenet_v1_arg_scope(
                    is_training=self._is_training)):
            with (slim.arg_scope(self._conv_hyperparams_fn())
                  if self._override_base_feature_extractor_hyperparams else
                  context_manager.IdentityContextManager()):
                with slim.arg_scope([slim.batch_norm], fused=False):
                    # Base network.
                    with tf.variable_scope(scope,
                                           self._base_network_scope,
                                           reuse=self._reuse_weights) as scope:
                        net, image_features = mobilenet_v1.mobilenet_v1_base(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            final_endpoint='Conv2d_13_pointwise',
                            min_depth=self._min_depth,
                            depth_multiplier=self._depth_multiplier,
                            scope=scope)

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with slim.arg_scope([slim.batch_norm],
                                fused=False,
                                is_training=self._is_training):
                # ConvLSTM layers.
                with tf.variable_scope(
                        'LSTM', reuse=self._reuse_weights) as lstm_scope:
                    lstm_cell = lstm_cells.BottleneckConvLSTMCell(
                        filter_size=(3, 3),
                        output_size=(net.shape[1].value, net.shape[2].value),
                        num_units=max(self._min_depth, self._lstm_state_depth),
                        activation=tf.nn.relu6,
                        visualize_gates=True)

                    net_seq = list(tf.split(net, unroll_length))
                    if state_saver is None:
                        init_state = lstm_cell.init_state(
                            state_name, net.shape[0].value / unroll_length,
                            tf.float32)
                    else:
                        c = state_saver.state('%s_c' % state_name)
                        h = state_saver.state('%s_h' % state_name)
                        init_state = (c, h)

                    # Identities added for inputing state tensors externally.
                    c_ident = tf.identity(init_state[0],
                                          name='lstm_state_in_c')
                    h_ident = tf.identity(init_state[1],
                                          name='lstm_state_in_h')
                    init_state = (c_ident, h_ident)

                    net_seq, states_out = rnn_decoder.rnn_decoder(
                        net_seq, init_state, lstm_cell, scope=lstm_scope)
                    batcher_ops = None
                    self._states_out = states_out
                    if state_saver is not None:
                        self._step = state_saver.state('%s_step' % state_name)
                        batcher_ops = [
                            state_saver.save_state('%s_c' % state_name,
                                                   states_out[-1][0]),
                            state_saver.save_state('%s_h' % state_name,
                                                   states_out[-1][1]),
                            state_saver.save_state('%s_step' % state_name,
                                                   self._step - 1)
                        ]
                    with tf_ops.control_dependencies(batcher_ops):
                        image_features['Conv2d_13_pointwise_lstm'] = tf.concat(
                            net_seq, 0)

                    # Identities added for reading output states, to be reused externally.
                    tf.identity(states_out[-1][0], name='lstm_state_out_c')
                    tf.identity(states_out[-1][1], name='lstm_state_out_h')

                # SSD layers.
                with tf.variable_scope('FeatureMaps',
                                       reuse=self._reuse_weights):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=self._feature_map_layout,
                        depth_multiplier=(self._depth_multiplier),
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
Esempio n. 32
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: if image height or width are not 256 pixels.
    """
        image_shape = preprocessed_inputs.get_shape()
        image_shape.assert_has_rank(4)
        image_height = image_shape[1].value
        image_width = image_shape[2].value

        if image_height is None or image_width is None:
            shape_assert = tf.Assert(
                tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
                               tf.equal(tf.shape(preprocessed_inputs)[2],
                                        256)),
                ['image size must be 256 in both height and width.'])
            with tf.control_dependencies([shape_assert]):
                preprocessed_inputs = tf.identity(preprocessed_inputs)
        elif image_height != 256 or image_width != 256:
            raise ValueError(
                'image size must be = 256 in both height and width;'
                ' image dim = %d,%d' % (image_height, image_width))

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256],
            'conv_kernel_size': [-1, -1, 3, 3, 2],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
Esempio n. 33
0
def _construct_model(model_type='resnet_v1_50'):
    """Constructs model for the desired type of CNN.

  Args:
    model_type: Type of model to be used.

  Returns:
    end_points: A dictionary from components of the network to the corresponding
      activations.

  Raises:
    ValueError: If the model_type is not supported.
  """
    # Placeholder input.
    images = array_ops.placeholder(dtypes.float32,
                                   shape=(1, None, None, 3),
                                   name=_INPUT_NODE)

    # Construct model.
    if model_type == 'inception_resnet_v2':
        _, end_points = inception.inception_resnet_v2_base(images)
    elif model_type == 'inception_resnet_v2-same':
        _, end_points = inception.inception_resnet_v2_base(
            images, align_feature_maps=True)
    elif model_type == 'inception_v2':
        _, end_points = inception.inception_v2_base(images)
    elif model_type == 'inception_v2-no-separable-conv':
        _, end_points = inception.inception_v2_base(images,
                                                    use_separable_conv=False)
    elif model_type == 'inception_v3':
        _, end_points = inception.inception_v3_base(images)
    elif model_type == 'inception_v4':
        _, end_points = inception.inception_v4_base(images)
    elif model_type == 'alexnet_v2':
        _, end_points = alexnet.alexnet_v2(images)
    elif model_type == 'vgg_a':
        _, end_points = vgg.vgg_a(images)
    elif model_type == 'vgg_16':
        _, end_points = vgg.vgg_16(images)
    elif model_type == 'mobilenet_v1':
        _, end_points = mobilenet_v1.mobilenet_v1_base(images)
    elif model_type == 'mobilenet_v1_075':
        _, end_points = mobilenet_v1.mobilenet_v1_base(images,
                                                       depth_multiplier=0.75)
    elif model_type == 'resnet_v1_50':
        _, end_points = resnet_v1.resnet_v1_50(images,
                                               num_classes=None,
                                               is_training=False,
                                               global_pool=False)
    elif model_type == 'resnet_v1_101':
        _, end_points = resnet_v1.resnet_v1_101(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    elif model_type == 'resnet_v1_152':
        _, end_points = resnet_v1.resnet_v1_152(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    elif model_type == 'resnet_v1_200':
        _, end_points = resnet_v1.resnet_v1_200(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    elif model_type == 'resnet_v2_50':
        _, end_points = resnet_v2.resnet_v2_50(images,
                                               num_classes=None,
                                               is_training=False,
                                               global_pool=False)
    elif model_type == 'resnet_v2_101':
        _, end_points = resnet_v2.resnet_v2_101(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    elif model_type == 'resnet_v2_152':
        _, end_points = resnet_v2.resnet_v2_152(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    elif model_type == 'resnet_v2_200':
        _, end_points = resnet_v2.resnet_v2_200(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    else:
        raise ValueError('Unsupported model_type %s.' % model_type)

    return end_points
Esempio n. 34
0
import tensorflow as tf
from nets import mobilenet_v1

input = tf.placeholder("float32", shape=[1, 1000, 800, 3])

net, features = mobilenet_v1.mobilenet_v1_base(input)

shape = net.get_shape().as_list()
print("Network shape", shape)

print("There are %d endpoints" % len(features))

for key in sorted(features):
    shape = features[key].get_shape().as_list()
    print("\t%s shape:" % key, shape)
    def extract_features(
        self, preprocessed_inputs
    ):  #this will extract features from iamge w.r.t mobilenet archtecture
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        feature_map_layout = {
            'from_layer': [
                'Conv2d_11_pointwise',
                'Conv2d_13_pointwise',
                '',
                '',  #we first extract 2 layers from mobilenet 
                '',
                ''
            ],
            'layer_depth': [
                -1, -1, 512, 256, 256, 128
            ],  #for first two things it's -1 means we directly take the depth as in the feature maps 
        }

        with tf.control_dependencies([shape_assert]):
            #with following we apply all the hyperparams in the scrip by keeping arg scope free
            with slim.arg_scope(
                    self._conv_hyperparams
            ):  #arg score  - Here the convolutional hyper params are for feature extractor we create ot
                with tf.variable_scope('MobilenetV1',
                                       reuse=self._reuse_weights) as scope:
                    _, image_features = mobilenet_v1.mobilenet_v1_base(  #getting the feature extracted from mobilnet in the slim 
                        preprocessed_inputs,
                        final_endpoint=
                        'Conv2d_13_pointwise',  #this is extracting the features 
                        min_depth=self.
                        _min_depth,  #our min deph is 16 , It's like our depth of the feature extator 
                        depth_multiplier=self.
                        _depth_multiplier,  #there is 1 we take all the layers in depth demension 
                        scope=scope
                    )  #this is a dicrionalt with names of the feature maps and feature maps

                    #the following function can extract the features from above feature maps , also it can create new one's too acording to the output stride thing which we are not using  Alos we give a featue map lay_out what should be there , and this also can create addicitonal feature maps

                    feature_maps = feature_map_generators.multi_resolution_feature_maps(  #This is for generating feature maps 
                        feature_map_layout=
                        feature_map_layout,  #wanted feature maps extracted from above model maps and create new maps for empty things
                        depth_multiplier=self.
                        _depth_multiplier,  #depth multi-plier 
                        min_depth=self._min_depth,  #this is 16 
                        insert_1x1_conv=True,  # 
                        image_features=image_features)  #feature dictionary

        return feature_maps.values()  #list of 6 feature maps for the ssd
Esempio n. 36
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        conv_defs=self._conv_defs,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)

            depth_fn = lambda d: max(int(d * self._depth_multiplier), self.
                                     _min_depth)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope('fpn', reuse=self._reuse_weights):
                    feature_blocks = [
                        'Conv2d_3_pointwise', 'Conv2d_5_pointwise',
                        'Conv2d_11_pointwise', 'Conv2d_13_pointwise'
                    ]
                    base_fpn_max_level = min(self._fpn_max_level, 5)
                    feature_block_list = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_block_list.append(feature_blocks[level - 2])
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in feature_block_list],
                        depth=depth_fn(self._additional_layer_depth),
                        use_depthwise=self._use_depthwise,
                        use_explicit_padding=self._use_explicit_padding)
                    feature_maps = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_maps.append(fpn_features['top_down_{}'.format(
                            feature_blocks[level - 2])])
                    last_feature_map = fpn_features['top_down_{}'.format(
                        feature_blocks[base_fpn_max_level - 2])]
                    # Construct coarse features
                    padding = 'VALID' if self._use_explicit_padding else 'SAME'
                    kernel_size = 3
                    for i in range(base_fpn_max_level + 1,
                                   self._fpn_max_level + 1):
                        if self._use_depthwise:
                            conv_op = functools.partial(slim.separable_conv2d,
                                                        depth_multiplier=1)
                        else:
                            conv_op = slim.conv2d
                        if self._use_explicit_padding:
                            last_feature_map = ops.fixed_padding(
                                last_feature_map, kernel_size)
                        last_feature_map = conv_op(
                            last_feature_map,
                            num_outputs=depth_fn(self._additional_layer_depth),
                            kernel_size=[kernel_size, kernel_size],
                            stride=2,
                            padding=padding,
                            scope='bottom_up_Conv2d_{}'.format(
                                i - base_fpn_max_level + 13))
                        feature_maps.append(last_feature_map)
        return feature_maps
Esempio n. 37
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)

            depth_fn = lambda d: max(int(d * self._depth_multiplier), self.
                                     _min_depth)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope('fpn', reuse=self._reuse_weights):
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key]) for key in [
                            'Conv2d_5_pointwise', 'Conv2d_11_pointwise',
                            'Conv2d_13_pointwise'
                        ]],
                        depth=depth_fn(256))
                    last_feature_map = fpn_features[
                        'top_down_Conv2d_13_pointwise']
                    coarse_features = {}
                    for i in range(14, 16):
                        last_feature_map = slim.conv2d(
                            last_feature_map,
                            num_outputs=depth_fn(256),
                            kernel_size=[3, 3],
                            stride=2,
                            padding='SAME',
                            scope='bottom_up_Conv2d_{}'.format(i))
                        coarse_features['bottom_up_Conv2d_{}'.format(
                            i)] = last_feature_map
        return [
            fpn_features['top_down_Conv2d_5_pointwise'],
            fpn_features['top_down_Conv2d_11_pointwise'],
            fpn_features['top_down_Conv2d_13_pointwise'],
            coarse_features['bottom_up_Conv2d_14'],
            coarse_features['bottom_up_Conv2d_15']
        ]
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs.get_shape().assert_has_rank(4)
    shape_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
        ['image size must at least be 33 in both height and width.'])

    feature_map_layout = {
        'from_layer': ['east_conv1_3x3'],
        'layer_depth': [-1],
    }

    with tf.control_dependencies([shape_assert]):
      with slim.arg_scope(self._conv_hyperparams):
        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              preprocessed_inputs,
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          """
          by chenx
          """
          east_conv_1 = image_features['Conv2d_3_pointwise']
          east_conv_2 = image_features['Conv2d_5_pointwise']
          east_conv_3 = image_features['Conv2d_11_pointwise']
          east_conv_4 = image_features['Conv2d_13_pointwise']

          east_deconv4 = slim.conv2d_transpose(east_conv_4, 512, [4, 4], 2, \
                                          padding='SAME', scope='east_deconv4')
          east_conv4_concat = tf.concat([east_conv_4, east_deconv4], axis=3)
          east_conv4_1x1 = slim.conv2d(east_conv4_concat, 256, [1,1],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv4_1x1')
          east_conv4_3x3 = slim.conv2d(east_conv4_1x1, 256, [3,3],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv4_3x3')
          image_features['east_conv4_3x3'] = east_conv4_3x3

          east_deconv3 = slim.conv2d_transpose(east_conv4_3x3, 256, [4, 4], 2, \
                                          padding='SAME', scope='east_deconv3')
          east_conv3_concat = tf.concat([east_conv_3, east_deconv3], axis=3)
          east_conv3_1x1 = slim.conv2d(east_conv4_concat, 128, [1,1],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv3_1x1')
          east_conv3_3x3 = slim.conv2d(east_conv4_1x1, 128, [3,3],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv3_3x3')
          image_features['east_conv3_3x3'] = east_conv3_3x3

          east_deconv2 = slim.conv2d_transpose(east_conv3_3x3, 128, [4, 4], 2, \
                                          padding='SAME', scope='east_deconv2')
          east_conv2_concat = tf.concat([east_conv_2, east_deconv3], axis=3)
          east_conv2_1x1 = slim.conv2d(east_conv2_concat, 64, [1,1],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv2_1x1')
          east_conv2_3x3 = slim.conv2d(east_conv2_1x1, 64, [3,3],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv2_3x3')
          image_features['east_conv2_3x3'] = east_conv2_3x3

          east_deconv1 = slim.conv2d_transpose(east_conv2_3x3, 64, [4, 4], 2, \
                                          padding='SAME', scope='east_deconv1')
          east_conv1_concat = tf.concat([east_conv_1, east_deconv1], axis=3)
          east_conv1_1x1 = slim.conv2d(east_conv1_concat, 32, [1,1],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv1_1x1')
          east_conv1_3x3 = slim.conv2d(east_conv1_1x1, 32, [3,3],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv1_3x3')
          image_features['east_conv1_3x3'] = east_conv1_3x3

          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    with tf.variable_scope('MobilenetV1',
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=None, regularize_depthwise=True)):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams
              else context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)

      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope('fpn', reuse=self._reuse_weights):
          feature_blocks = [
              'Conv2d_3_pointwise', 'Conv2d_5_pointwise', 'Conv2d_11_pointwise',
              'Conv2d_13_pointwise'
          ]
          base_fpn_max_level = min(self._fpn_max_level, 5)
          feature_block_list = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_block_list.append(feature_blocks[level - 2])
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
              depth=depth_fn(256))
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(fpn_features['top_down_{}'.format(
                feature_blocks[level - 2])])
          last_feature_map = fpn_features['top_down_{}'.format(
              feature_blocks[base_fpn_max_level - 2])]
          # Construct coarse features
          for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
            last_feature_map = slim.conv2d(
                last_feature_map,
                num_outputs=depth_fn(256),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 13))
            feature_maps.append(last_feature_map)
    return feature_maps
  def extract_features(self,
                       preprocessed_inputs,
                       state_saver=None,
                       state_name='lstm_state',
                       unroll_length=5,
                       scope=None):
    """Extracts features from preprocessed inputs.

    The features include the base network features, lstm features and SSD
    features, organized in the following name scope:

    <parent scope>/MobilenetV1/...
    <parent scope>/LSTM/...
    <parent scope>/FeatureMaps/...

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float tensor
        representing a batch of consecutive frames from video clips.
      state_saver: A state saver object with methods `state` and `save_state`.
      state_name: A python string for the name to use with the state_saver.
      unroll_length: The number of steps to unroll the lstm.
      scope: The scope for the base network of the feature extractor.

    Returns:
      A list of tensors where the ith tensor has shape [batch, height_i,
      width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)
    with slim.arg_scope(
        mobilenet_v1.mobilenet_v1_arg_scope(is_training=self._is_training)):
      with (slim.arg_scope(self._conv_hyperparams_fn())
            if self._override_base_feature_extractor_hyperparams else
            context_manager.IdentityContextManager()):
        with slim.arg_scope([slim.batch_norm], fused=False):
          # Base network.
          with tf.variable_scope(
              scope, self._base_network_scope,
              reuse=self._reuse_weights) as scope:
            net, image_features = mobilenet_v1.mobilenet_v1_base(
                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
                final_endpoint='Conv2d_13_pointwise',
                min_depth=self._min_depth,
                depth_multiplier=self._depth_multiplier,
                scope=scope)

    with slim.arg_scope(self._conv_hyperparams_fn()):
      with slim.arg_scope(
          [slim.batch_norm], fused=False, is_training=self._is_training):
        # ConvLSTM layers.
        with tf.variable_scope('LSTM', reuse=self._reuse_weights) as lstm_scope:
          lstm_cell = lstm_cells.BottleneckConvLSTMCell(
              filter_size=(3, 3),
              output_size=(net.shape[1].value, net.shape[2].value),
              num_units=max(self._min_depth, self._lstm_state_depth),
              activation=tf.nn.relu6,
              visualize_gates=True)

          net_seq = list(tf.split(net, unroll_length))
          if state_saver is None:
            init_state = lstm_cell.init_state(
                state_name, net.shape[0].value / unroll_length, tf.float32)
          else:
            c = state_saver.state('%s_c' % state_name)
            h = state_saver.state('%s_h' % state_name)
            init_state = (c, h)

          # Identities added for inputing state tensors externally.
          c_ident = tf.identity(init_state[0], name='lstm_state_in_c')
          h_ident = tf.identity(init_state[1], name='lstm_state_in_h')
          init_state = (c_ident, h_ident)

          net_seq, states_out = rnn_decoder.rnn_decoder(
              net_seq, init_state, lstm_cell, scope=lstm_scope)
          batcher_ops = None
          self._states_out = states_out
          if state_saver is not None:
            self._step = state_saver.state('%s_step' % state_name)
            batcher_ops = [
                state_saver.save_state('%s_c' % state_name, states_out[-1][0]),
                state_saver.save_state('%s_h' % state_name, states_out[-1][1]),
                state_saver.save_state('%s_step' % state_name, self._step - 1)
            ]
          with tf_ops.control_dependencies(batcher_ops):
            image_features['Conv2d_13_pointwise_lstm'] = tf.concat(net_seq, 0)

          # Identities added for reading output states, to be reused externally.
          tf.identity(states_out[-1][0], name='lstm_state_out_c')
          tf.identity(states_out[-1][1], name='lstm_state_out_h')

        # SSD layers.
        with tf.variable_scope('FeatureMaps', reuse=self._reuse_weights):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=self._feature_map_layout,
              depth_multiplier=(self._depth_multiplier),
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
Esempio n. 41
0
                trainable=embed_config['train_embedding'],
                is_training=False)
            with slim.arg_scope(arg_scope):
                embed_x, end_points = convolutional_alexnet(
                    input_image, reuse=False, split=alexnet_config['split'])
                embed_z, end_points_z = convolutional_alexnet(
                    template_image, reuse=True, split=alexnet_config['split'])

        elif feature_extactor == "mobilenet_v1":
            mobilenent_config = model_config['mobilenet_v1']
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(is_training=False)):
                with tf.variable_scope('MobilenetV1', reuse=False) as scope:
                    embed_x, end_points = mobilenet_v1.mobilenet_v1_base(
                        input_image,
                        final_endpoint=mobilenent_config['final_endpoint'],
                        conv_defs=mobilenet.CONV_DEFS,
                        depth_multiplier=mobilenent_config['depth_multiplier'],
                        scope=scope)
                with tf.variable_scope('MobilenetV1', reuse=True) as scope:
                    embed_z, end_points_z = mobilenet_v1.mobilenet_v1_base(
                        template_image,
                        final_endpoint=mobilenent_config['final_endpoint'],
                        conv_defs=mobilenet.CONV_DEFS,
                        depth_multiplier=mobilenent_config['depth_multiplier'],
                        scope=scope)
        else:
            raise ValueError(
                "Invalid feature extractor: {}".format(feature_extactor))

        # build cross-correlation between features from template image and input image
        with tf.variable_scope('detection'):