def test_use_bounded_activations_clip_value(self, use_native_resize_op):
    tf_graph = tf.Graph()
    with tf_graph.as_default():
      image_features = [
          ('block2', 255 * tf.ones([4, 8, 8, 256], dtype=tf.float32)),
          ('block3', 255 * tf.ones([4, 4, 4, 256], dtype=tf.float32)),
          ('block4', 255 * tf.ones([4, 2, 2, 256], dtype=tf.float32)),
          ('block5', 255 * tf.ones([4, 1, 1, 256], dtype=tf.float32))
      ]
      feature_map_generators.fpn_top_down_feature_maps(
          image_features=image_features,
          depth=128,
          use_bounded_activations=True,
          use_native_resize_op=use_native_resize_op)

      expected_clip_by_value_ops = [
          'top_down/clip_by_value', 'top_down/clip_by_value_1',
          'top_down/clip_by_value_2', 'top_down/clip_by_value_3',
          'top_down/clip_by_value_4', 'top_down/clip_by_value_5',
          'top_down/clip_by_value_6'
      ]

      # Gathers activation tensors before and after clip_by_value operations.
      activations = {}
      for clip_by_value_op in expected_clip_by_value_ops:
        clip_input_tensor = tf_graph.get_operation_by_name(
            '{}/Minimum'.format(clip_by_value_op)).inputs[0]
        clip_output_tensor = tf_graph.get_tensor_by_name(
            '{}:0'.format(clip_by_value_op))
        activations.update({
            'before_{}'.format(clip_by_value_op): clip_input_tensor,
            'after_{}'.format(clip_by_value_op): clip_output_tensor,
        })

      expected_lower_bound = -feature_map_generators.ACTIVATION_BOUND
      expected_upper_bound = feature_map_generators.ACTIVATION_BOUND
      init_op = tf.global_variables_initializer()
      with self.test_session() as session:
        session.run(init_op)
        activations_output = session.run(activations)
        for clip_by_value_op in expected_clip_by_value_ops:
          # Before clipping, activations are beyound the expected bound because
          # of large input image_features values.
          activations_before_clipping = (
              activations_output['before_{}'.format(clip_by_value_op)])
          before_clipping_lower_bound = np.amin(activations_before_clipping)
          before_clipping_upper_bound = np.amax(activations_before_clipping)
          self.assertLessEqual(before_clipping_lower_bound,
                               expected_lower_bound)
          self.assertGreaterEqual(before_clipping_upper_bound,
                                  expected_upper_bound)

          # After clipping, activations are bounded as expectation.
          activations_after_clipping = (
              activations_output['after_{}'.format(clip_by_value_op)])
          after_clipping_lower_bound = np.amin(activations_after_clipping)
          after_clipping_upper_bound = np.amax(activations_after_clipping)
          self.assertGreaterEqual(after_clipping_lower_bound,
                                  expected_lower_bound)
          self.assertLessEqual(after_clipping_upper_bound, expected_upper_bound)
    def test_use_bounded_activations_add_operations(self,
                                                    use_native_resize_op):
        tf_graph = tf.Graph()
        with tf_graph.as_default():
            image_features = [
                ('block2', tf.random_uniform([4, 8, 8, 256],
                                             dtype=tf.float32)),
                ('block3', tf.random_uniform([4, 4, 4, 256],
                                             dtype=tf.float32)),
                ('block4', tf.random_uniform([4, 2, 2, 256],
                                             dtype=tf.float32)),
                ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
            ]
            feature_map_generators.fpn_top_down_feature_maps(
                image_features=image_features,
                depth=128,
                use_bounded_activations=True,
                use_native_resize_op=use_native_resize_op)

            expected_added_operations = dict.fromkeys([
                'top_down/clip_by_value', 'top_down/clip_by_value_1',
                'top_down/clip_by_value_2', 'top_down/clip_by_value_3',
                'top_down/clip_by_value_4', 'top_down/clip_by_value_5',
                'top_down/clip_by_value_6'
            ])
            op_names = {op.name: None for op in tf_graph.get_operations()}
            self.assertDictContainsSubset(expected_added_operations, op_names)
  def test_use_bounded_activations_add_operations(self, use_native_resize_op):
    tf_graph = tf.Graph()
    with tf_graph.as_default():
      image_features = [('block2',
                         tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
                        ('block3',
                         tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
                        ('block4',
                         tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
                        ('block5',
                         tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))]
      feature_map_generators.fpn_top_down_feature_maps(
          image_features=image_features,
          depth=128,
          use_bounded_activations=True,
          use_native_resize_op=use_native_resize_op)

      expected_added_operations = dict.fromkeys([
          'top_down/clip_by_value', 'top_down/clip_by_value_1',
          'top_down/clip_by_value_2', 'top_down/clip_by_value_3',
          'top_down/clip_by_value_4', 'top_down/clip_by_value_5',
          'top_down/clip_by_value_6'
      ])
      op_names = {op.name: None for op in tf_graph.get_operations()}
      self.assertDictContainsSubset(expected_added_operations, op_names)
  def test_get_expected_feature_map_shapes_with_depthwise(
      self, use_native_resize_op):
    image_features = [
        ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
        ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
        ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
        ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
    ]
    feature_maps = feature_map_generators.fpn_top_down_feature_maps(
        image_features=image_features,
        depth=128,
        use_depthwise=True,
        use_native_resize_op=use_native_resize_op)

    expected_feature_map_shapes = {
        'top_down_block2': (4, 8, 8, 128),
        'top_down_block3': (4, 4, 4, 128),
        'top_down_block4': (4, 2, 2, 128),
        'top_down_block5': (4, 1, 1, 128)
    }

    init_op = tf.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      out_feature_maps = sess.run(feature_maps)
      out_feature_map_shapes = {key: value.shape
                                for key, value in out_feature_maps.items()}
      self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
    def test_get_expected_feature_map_shapes_with_depthwise(
            self, use_native_resize_op):
        image_features = [
            ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
            ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
            ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
            ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
        ]
        feature_maps = feature_map_generators.fpn_top_down_feature_maps(
            image_features=image_features,
            depth=128,
            use_depthwise=True,
            use_native_resize_op=use_native_resize_op)

        expected_feature_map_shapes = {
            'top_down_block2': (4, 8, 8, 128),
            'top_down_block3': (4, 4, 4, 128),
            'top_down_block4': (4, 2, 2, 128),
            'top_down_block5': (4, 1, 1, 128)
        }

        init_op = tf.global_variables_initializer()
        with self.test_session() as sess:
            sess.run(init_op)
            out_feature_maps = sess.run(feature_maps)
            out_feature_map_shapes = {
                key: value.shape
                for key, value in out_feature_maps.items()
            }
            self.assertDictEqual(out_feature_map_shapes,
                                 expected_feature_map_shapes)
    def test_get_expected_feature_map_shapes(self):
        image_features = [
            tf.random_uniform([4, 8, 8, 256], dtype=tf.float32),
            tf.random_uniform([4, 4, 4, 256], dtype=tf.float32),
            tf.random_uniform([4, 2, 2, 256], dtype=tf.float32),
            tf.random_uniform([4, 1, 1, 256], dtype=tf.float32),
        ]
        feature_maps = feature_map_generators.fpn_top_down_feature_maps(
            image_features=image_features, depth=128)

        expected_feature_map_shapes = {
            'top_down_feature_map_0': (4, 8, 8, 128),
            'top_down_feature_map_1': (4, 4, 4, 128),
            'top_down_feature_map_2': (4, 2, 2, 128),
            'top_down_feature_map_3': (4, 1, 1, 128)
        }

        init_op = tf.global_variables_initializer()
        with self.test_session() as sess:
            sess.run(init_op)
            out_feature_maps = sess.run(feature_maps)
            out_feature_map_shapes = {
                key: value.shape
                for key, value in out_feature_maps.items()
            }
            self.assertDictEqual(out_feature_map_shapes,
                                 expected_feature_map_shapes)
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = self._resnet_base_fn(
              inputs=ops.pad_to_multiple(preprocessed_inputs,
                                         self._pad_to_multiple),
              num_classes=None,
              is_training=None,
              global_pool=False,
              output_stride=None,
              store_non_strided_activations=True,
              min_base_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          image_features = self._filter_features(image_features)
      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope(self._fpn_scope_name,
                               reuse=self._reuse_weights):
          base_fpn_max_level = min(self._fpn_max_level, 5)
          feature_block_list = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_block_list.append('block{}'.format(level - 1))
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
              depth=depth_fn(self._additional_layer_depth))
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(
                fpn_features['top_down_block{}'.format(level - 1)])
          last_feature_map = fpn_features['top_down_block{}'.format(
              base_fpn_max_level - 1)]
          # Construct coarse features
          for i in range(base_fpn_max_level, self._fpn_max_level):
            last_feature_map = slim.conv2d(
                last_feature_map,
                num_outputs=depth_fn(self._additional_layer_depth),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_block{}'.format(i))
            feature_maps.append(last_feature_map)
    return feature_maps
    def test_get_expected_feature_map_shapes(self):
        print('\n================================================')
        print('test_get_expected_feature_map_shapes')

        image_features = [
            ('block2', tf.random_uniform([4, 8, 8, 256], dtype=tf.float32)),
            ('block3', tf.random_uniform([4, 4, 4, 256], dtype=tf.float32)),
            ('block4', tf.random_uniform([4, 2, 2, 256], dtype=tf.float32)),
            ('block5', tf.random_uniform([4, 1, 1, 256], dtype=tf.float32))
        ]
        feature_maps = feature_map_generators.fpn_top_down_feature_maps(
            image_features=image_features, depth=128)

        expected_feature_map_shapes = {
            'top_down_block2': (4, 8, 8, 128),
            'top_down_block3': (4, 4, 4, 128),
            'top_down_block4': (4, 2, 2, 128),
            'top_down_block5': (4, 1, 1, 128)
        }

        init_op = tf.global_variables_initializer()
        with self.test_session() as sess:
            sess.run(init_op)
            out_feature_maps = sess.run(feature_maps)
            out_feature_map_shapes = {
                key: value.shape
                for key, value in out_feature_maps.items()
            }
            for key, value in out_feature_maps.items():
                print('{}: {}'.format(key, value.shape))
            self.assertDictEqual(out_feature_map_shapes,
                                 expected_feature_map_shapes)
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = self._resnet_base_fn(
              inputs=ops.pad_to_multiple(preprocessed_inputs,
                                         self._pad_to_multiple),
              num_classes=None,
              is_training=None,
              global_pool=False,
              output_stride=None,
              store_non_strided_activations=True,
              scope=scope)
          image_features = self._filter_features(image_features)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope(self._fpn_scope_name,
                               reuse=self._reuse_weights):
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key])
               for key in ['block2', 'block3', 'block4']],
              depth=256)
          last_feature_map = fpn_features['top_down_block4']
          coarse_features = {}
          for i in range(5, 7):
            last_feature_map = slim.conv2d(
                last_feature_map,
                num_outputs=256,
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_block{}'.format(i))
            coarse_features['bottom_up_block{}'.format(i)] = last_feature_map
    return [fpn_features['top_down_block2'],
            fpn_features['top_down_block3'],
            fpn_features['top_down_block4'],
            coarse_features['bottom_up_block5'],
            coarse_features['bottom_up_block6']]
Exemple #10
0
 def feature_map_generator(image_features):
     return feature_map_generators.fpn_top_down_feature_maps(
         image_features=image_features,
         depth=depth,
         use_depthwise=use_depthwise,
         use_explicit_padding=use_explicit_padding,
         use_bounded_activations=use_bounded_activations,
         use_native_resize_op=use_native_resize_op)
  def test_get_expected_feature_map_shapes(self):
    image_features = [
        tf.random_uniform([4, 8, 8, 256], dtype=tf.float32),
        tf.random_uniform([4, 4, 4, 256], dtype=tf.float32),
        tf.random_uniform([4, 2, 2, 256], dtype=tf.float32),
        tf.random_uniform([4, 1, 1, 256], dtype=tf.float32),
    ]
    feature_maps = feature_map_generators.fpn_top_down_feature_maps(
        image_features=image_features, depth=128)

    expected_feature_map_shapes = {
        'top_down_feature_map_0': (4, 8, 8, 128),
        'top_down_feature_map_1': (4, 4, 4, 128),
        'top_down_feature_map_2': (4, 2, 2, 128),
        'top_down_feature_map_3': (4, 1, 1, 128)
    }

    init_op = tf.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      out_feature_maps = sess.run(feature_maps)
      out_feature_map_shapes = {key: value.shape
                                for key, value in out_feature_maps.items()}
      self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
Exemple #12
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v2.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='layer_19',
                        depth_multiplier=self._depth_multiplier,
                        conv_defs=self._conv_defs,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            depth_fn = lambda d: max(int(d * self._depth_multiplier), self.
                                     _min_depth)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope('fpn', reuse=self._reuse_weights):
                    feature_blocks = [
                        'layer_4', 'layer_7', 'layer_14', 'layer_19'
                    ]
                    base_fpn_max_level = min(self._fpn_max_level, 5)
                    feature_block_list = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_block_list.append(feature_blocks[level - 2])
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in feature_block_list],
                        depth=depth_fn(self._additional_layer_depth),
                        use_depthwise=self._use_depthwise,
                        use_explicit_padding=self._use_explicit_padding)
                    feature_maps = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_maps.append(fpn_features['top_down_{}'.format(
                            feature_blocks[level - 2])])
                    last_feature_map = fpn_features['top_down_{}'.format(
                        feature_blocks[base_fpn_max_level - 2])]
                    # Construct coarse features
                    padding = 'VALID' if self._use_explicit_padding else 'SAME'
                    kernel_size = 3
                    for i in range(base_fpn_max_level + 1,
                                   self._fpn_max_level + 1):
                        if self._use_depthwise:
                            conv_op = functools.partial(slim.separable_conv2d,
                                                        depth_multiplier=1)
                        else:
                            conv_op = slim.conv2d
                        if self._use_explicit_padding:
                            last_feature_map = ops.fixed_padding(
                                last_feature_map, kernel_size)
                        last_feature_map = conv_op(
                            last_feature_map,
                            num_outputs=depth_fn(self._additional_layer_depth),
                            kernel_size=[kernel_size, kernel_size],
                            stride=2,
                            padding=padding,
                            scope='bottom_up_Conv2d_{}'.format(
                                i - base_fpn_max_level + 19))
                        feature_maps.append(last_feature_map)
        return feature_maps
    def test_use_bounded_activations_clip_value(self, use_native_resize_op):
        tf_graph = tf.Graph()
        with tf_graph.as_default():
            image_features = [
                ('block2', 255 * tf.ones([4, 8, 8, 256], dtype=tf.float32)),
                ('block3', 255 * tf.ones([4, 4, 4, 256], dtype=tf.float32)),
                ('block4', 255 * tf.ones([4, 2, 2, 256], dtype=tf.float32)),
                ('block5', 255 * tf.ones([4, 1, 1, 256], dtype=tf.float32))
            ]
            feature_map_generators.fpn_top_down_feature_maps(
                image_features=image_features,
                depth=128,
                use_bounded_activations=True,
                use_native_resize_op=use_native_resize_op)

            expected_clip_by_value_ops = [
                'top_down/clip_by_value', 'top_down/clip_by_value_1',
                'top_down/clip_by_value_2', 'top_down/clip_by_value_3',
                'top_down/clip_by_value_4', 'top_down/clip_by_value_5',
                'top_down/clip_by_value_6'
            ]

            # Gathers activation tensors before and after clip_by_value operations.
            activations = {}
            for clip_by_value_op in expected_clip_by_value_ops:
                clip_input_tensor = tf_graph.get_operation_by_name(
                    '{}/Minimum'.format(clip_by_value_op)).inputs[0]
                clip_output_tensor = tf_graph.get_tensor_by_name(
                    '{}:0'.format(clip_by_value_op))
                activations.update({
                    'before_{}'.format(clip_by_value_op):
                    clip_input_tensor,
                    'after_{}'.format(clip_by_value_op):
                    clip_output_tensor,
                })

            expected_lower_bound = -feature_map_generators.ACTIVATION_BOUND
            expected_upper_bound = feature_map_generators.ACTIVATION_BOUND
            init_op = tf.global_variables_initializer()
            with self.test_session() as session:
                session.run(init_op)
                activations_output = session.run(activations)
                for clip_by_value_op in expected_clip_by_value_ops:
                    # Before clipping, activations are beyound the expected bound because
                    # of large input image_features values.
                    activations_before_clipping = (activations_output[
                        'before_{}'.format(clip_by_value_op)])
                    before_clipping_lower_bound = np.amin(
                        activations_before_clipping)
                    before_clipping_upper_bound = np.amax(
                        activations_before_clipping)
                    self.assertLessEqual(before_clipping_lower_bound,
                                         expected_lower_bound)
                    self.assertGreaterEqual(before_clipping_upper_bound,
                                            expected_upper_bound)

                    # After clipping, activations are bounded as expectation.
                    activations_after_clipping = (activations_output[
                        'after_{}'.format(clip_by_value_op)])
                    after_clipping_lower_bound = np.amin(
                        activations_after_clipping)
                    after_clipping_upper_bound = np.amax(
                        activations_after_clipping)
                    self.assertGreaterEqual(after_clipping_lower_bound,
                                            expected_lower_bound)
                    self.assertLessEqual(after_clipping_upper_bound,
                                         expected_upper_bound)
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v2.mobilenet_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='layer_19',
              depth_multiplier=self._depth_multiplier,
              conv_defs=_CONV_DEFS if self._use_depthwise else None,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope('fpn', reuse=self._reuse_weights):
          feature_blocks = [
              'layer_4', 'layer_7', 'layer_14', 'layer_19'
          ]
          base_fpn_max_level = min(self._fpn_max_level, 5)
          feature_block_list = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_block_list.append(feature_blocks[level - 2])
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
              depth=depth_fn(self._additional_layer_depth),
              use_depthwise=self._use_depthwise)
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(fpn_features['top_down_{}'.format(
                feature_blocks[level - 2])])
          last_feature_map = fpn_features['top_down_{}'.format(
              feature_blocks[base_fpn_max_level - 2])]
          # Construct coarse features
          for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
            if self._use_depthwise:
              conv_op = functools.partial(
                  slim.separable_conv2d, depth_multiplier=1)
            else:
              conv_op = slim.conv2d
            last_feature_map = conv_op(
                last_feature_map,
                num_outputs=depth_fn(self._additional_layer_depth),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 19))
            feature_maps.append(last_feature_map)
    return feature_maps
class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
  """SSD Feature Extractor using MobilenetV2 FPN features."""

  def __init__(self,
               is_training,
               depth_multiplier,
               min_depth,
               pad_to_multiple,
               conv_hyperparams_fn,
               fpn_min_level=3,
               fpn_max_level=7,
               additional_layer_depth=256,
               reuse_weights=None,
               use_explicit_padding=False,
               use_depthwise=False,
               override_base_feature_extractor_hyperparams=False):
    """SSD FPN feature extractor based on Mobilenet v2 architecture.

    Args:
      is_training: whether the network is in training mode.
      depth_multiplier: float depth multiplier for feature extractor.
      min_depth: minimum feature extractor depth.
      pad_to_multiple: the nearest multiple to zero pad the input height and
        width dimensions to.
      conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
        and separable_conv2d ops in the layers that are added on top of the base
        feature extractor.
      fpn_min_level: the highest resolution feature map to use in FPN. The valid
        values are {2, 3, 4, 5} which map to MobileNet v2 layers
        {layer_4, layer_7, layer_14, layer_19}, respectively.
      fpn_max_level: the smallest resolution feature map to construct or use in
        FPN. FPN constructions uses features maps starting from fpn_min_level
        upto the fpn_max_level. In the case that there are not enough feature
        maps in the backbone network, additional feature maps are created by
        applying stride 2 convolutions until we get the desired number of fpn
        levels.
      additional_layer_depth: additional feature map layer channel depth.
      reuse_weights: whether to reuse variables. Default is None.
      use_explicit_padding: Whether to use explicit padding when extracting
        features. Default is False.
      use_depthwise: Whether to use depthwise convolutions. Default is False.
      override_base_feature_extractor_hyperparams: Whether to override
        hyperparameters of the base feature extractor with the one from
        `conv_hyperparams_fn`.
    """
    super(SSDMobileNetV2FpnFeatureExtractor, self).__init__(
        is_training=is_training,
        depth_multiplier=depth_multiplier,
        min_depth=min_depth,
        pad_to_multiple=pad_to_multiple,
        conv_hyperparams_fn=conv_hyperparams_fn,
        reuse_weights=reuse_weights,
        use_explicit_padding=use_explicit_padding,
        use_depthwise=use_depthwise,
        override_base_feature_extractor_hyperparams=
        override_base_feature_extractor_hyperparams)
    self._fpn_min_level = fpn_min_level
    self._fpn_max_level = fpn_max_level
    self._additional_layer_depth = additional_layer_depth
    self._conv_defs = None
    if self._use_depthwise:
      self._conv_defs = _create_modified_mobilenet_config()

  def preprocess(self, resized_inputs):
    """SSD preprocessing.

    Maps pixel values to the range [-1, 1].

    Args:
      resized_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.
    """
    return (2.0 / 255.0) * resized_inputs - 1.0

  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    #with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
     # with slim.arg_scope(
          #mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
             #slim.arg_scope(
               #[mobilenet.depth_multiplier], min_depth=self._min_depth):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v2.mobilenet_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='layer_19',
              depth_multiplier=self._depth_multiplier,
              conv_defs=self._conv_defs,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope('fpn', reuse=self._reuse_weights):
          feature_blocks = [
              'layer_4', 'layer_7', 'layer_14', 'layer_19'
          ]
          base_fpn_max_level = min(self._fpn_max_level, 5)
          feature_block_list = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_block_list.append(feature_blocks[level - 2])
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
              depth=depth_fn(self._additional_layer_depth),
              use_depthwise=self._use_depthwise,
              use_explicit_padding=self._use_explicit_padding)
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(fpn_features['top_down_{}'.format(
                feature_blocks[level - 2])])
          last_feature_map = fpn_features['top_down_{}'.format(
              feature_blocks[base_fpn_max_level - 2])]
          # Construct coarse features
          padding = 'VALID' if self._use_explicit_padding else 'SAME'
          kernel_size = 3
          for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
            if self._use_depthwise:
              conv_op = functools.partial(
                  slim.separable_conv2d, depth_multiplier=1)
            else:
              conv_op = slim.conv2d
            if self._use_explicit_padding:
              last_feature_map = ops.fixed_padding(
                  last_feature_map, kernel_size)
            last_feature_map = conv_op(
                last_feature_map,
                num_outputs=depth_fn(self._additional_layer_depth),
                kernel_size=[kernel_size, kernel_size],
                stride=2,
                padding=padding,
                scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 19))
            feature_maps.append(last_feature_map)
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)

            depth_fn = lambda d: max(int(d * self._depth_multiplier), self.
                                     _min_depth)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope('fpn', reuse=self._reuse_weights):
                    feature_blocks = [
                        'Conv2d_3_pointwise', 'Conv2d_5_pointwise',
                        'Conv2d_11_pointwise', 'Conv2d_13_pointwise'
                    ]
                    base_fpn_max_level = min(self._fpn_max_level, 5)
                    feature_block_list = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_block_list.append(feature_blocks[level - 2])
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in feature_block_list],
                        depth=depth_fn(256))
                    feature_maps = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_maps.append(fpn_features['top_down_{}'.format(
                            feature_blocks[level - 2])])
                    last_feature_map = fpn_features['top_down_{}'.format(
                        feature_blocks[base_fpn_max_level - 2])]
                    # Construct coarse features
                    for i in range(base_fpn_max_level + 1,
                                   self._fpn_max_level + 1):
                        last_feature_map = slim.conv2d(
                            last_feature_map,
                            num_outputs=depth_fn(256),
                            kernel_size=[3, 3],
                            stride=2,
                            padding='SAME',
                            scope='bottom_up_Conv2d_{}'.format(
                                i - base_fpn_max_level + 13))
                        feature_maps.append(last_feature_map)
        return feature_maps
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    with tf.variable_scope('MobilenetV1',
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=None, regularize_depthwise=True)):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams
              else context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)

      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope('fpn', reuse=self._reuse_weights):
          feature_blocks = [
              'Conv2d_3_pointwise', 'Conv2d_5_pointwise', 'Conv2d_11_pointwise',
              'Conv2d_13_pointwise'
          ]
          base_fpn_max_level = min(self._fpn_max_level, 5)
          feature_block_list = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_block_list.append(feature_blocks[level - 2])
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
              depth=depth_fn(256))
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(fpn_features['top_down_{}'.format(
                feature_blocks[level - 2])])
          last_feature_map = fpn_features['top_down_{}'.format(
              feature_blocks[base_fpn_max_level - 2])]
          # Construct coarse features
          for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
            last_feature_map = slim.conv2d(
                last_feature_map,
                num_outputs=depth_fn(256),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 13))
            feature_maps.append(last_feature_map)
    return feature_maps