def test_get_expected_feature_map_shapes_with_inception_v3(self):
    image_features = {
        'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
        'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
        'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
    }

    feature_maps = feature_map_generators.multi_resolution_feature_maps(
        feature_map_layout=INCEPTION_V3_LAYOUT,
        depth_multiplier=1,
        min_depth=32,
        insert_1x1_conv=True,
        image_features=image_features)

    expected_feature_map_shapes = {
        'Mixed_5d': (4, 35, 35, 256),
        'Mixed_6e': (4, 17, 17, 576),
        'Mixed_7c': (4, 8, 8, 1024),
        'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
        'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
        'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)}

    init_op = tf.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      out_feature_maps = sess.run(feature_maps)
      out_feature_map_shapes = dict(
          (key, value.shape) for key, value in out_feature_maps.items())
      self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
Exemple #2
0
 def feature_map_generator(image_features):
     return feature_map_generators.multi_resolution_feature_maps(
         feature_map_layout=feature_map_layout,
         depth_multiplier=1,
         min_depth=32,
         insert_1x1_conv=True,
         image_features=image_features)
  def extract_features(self, preprocessed_inputs, state_saver=None,
                       state_name='lstm_state', unroll_length=5, scope=None):
    with tf.variable_scope('mock_model'):
      net_large = self.extract_base_features_large(preprocessed_inputs)
      net_small = self.extract_base_features_small(preprocessed_inputs)
      net = slim.conv2d(
          inputs=tf.concat([net_large, net_small], axis=3),
          num_outputs=32,
          kernel_size=1,
          scope='layer1')
      image_features = {'last_layer': net}

    self._states_out = {}
    feature_map_layout = {
        'from_layer': ['last_layer'],
        'layer_depth': [-1],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }
    feature_maps = feature_map_generators.multi_resolution_feature_maps(
        feature_map_layout=feature_map_layout,
        depth_multiplier=(self._depth_multiplier),
        min_depth=self._min_depth,
        insert_1x1_conv=True,
        image_features=image_features)
    return list(feature_maps.values())
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)
        '''
    feature_map_layout = {
        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }
    '''
        feature_map_layout = {
            'from_layer': [
                'FeatureExtractor/vgg_16/conv4/conv4_3',
                'FeatureExtractor/vgg_16/fc7', '', '', '', ''
            ],
            'layer_depth': [-1, -1, 256, 128, 128, 128],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }
        with tf.variable_scope('vgg_16', reuse=self._reuse_weights) as scope:
            with slim.arg_scope(vgg.vgg_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    # TODO(skligys): Enable fused batch norm once quantization supports it.
                    with slim.arg_scope([slim.batch_norm], fused=False):

                        _, image_features = vgg.vgg_16(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            num_classes=None,
                            is_training=self._is_training,
                            scope=scope)
            print(image_features.keys())
            print(image_features.values())
            with slim.arg_scope(self._conv_hyperparams_fn()):
                # TODO(skligys): Enable fused batch norm once quantization supports it.
                with slim.arg_scope([slim.batch_norm], fused=False):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
    def test_get_expected_feature_map_shapes_with_inception_v3(self):
        image_features = {
            'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
            'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
            'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
        }

        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=INCEPTION_V3_LAYOUT,
            depth_multiplier=1,
            min_depth=32,
            insert_1x1_conv=True,
            image_features=image_features)

        expected_feature_map_shapes = {
            'Mixed_5d': (4, 35, 35, 256),
            'Mixed_6e': (4, 17, 17, 576),
            'Mixed_7c': (4, 8, 8, 1024),
            'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
            'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
            'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)
        }

        init_op = tf.global_variables_initializer()
        with self.test_session() as sess:
            sess.run(init_op)
            out_feature_maps = sess.run(feature_maps)
            out_feature_map_shapes = dict(
                (key, value.shape) for key, value in out_feature_maps.items())
            self.assertDictEqual(out_feature_map_shapes,
                                 expected_feature_map_shapes)
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    Raises:
      ValueError if conv_defs is not provided or from_layer does not meet the
        size requirement.
    """

        if not self._conv_defs:
            raise ValueError('Must provide backbone conv defs.')

        if len(self._from_layer) != 2:
            raise ValueError('SSD input feature names are not provided.')

        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            [self._from_layer[0], self._from_layer[1], '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_depthwise':
            self._use_depthwise,
            'use_explicit_padding':
            self._use_explicit_padding,
        }

        with tf.variable_scope(self._scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v3.training_scope(is_training=None, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v3.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        conv_defs=self._conv_defs,
                        final_endpoint=self._from_layer[1],
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
                with slim.arg_scope(self._conv_hyperparams_fn()):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return list(feature_maps.values())
    def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1(
            self):
        image_features = {
            'Conv2d_11_pointwise':
            tf.random_uniform([4, 16, 16, 512], dtype=tf.float32),
            'Conv2d_13_pointwise':
            tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32),
        }

        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT,
            depth_multiplier=1,
            min_depth=32,
            insert_1x1_conv=True,
            image_features=image_features)

        expected_feature_map_shapes = {
            'Conv2d_11_pointwise': (4, 16, 16, 512),
            'Conv2d_13_pointwise': (4, 8, 8, 1024),
            'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512),
            'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256),
            'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)
        }

        init_op = tf.global_variables_initializer()
        with self.test_session() as sess:
            sess.run(init_op)
            out_feature_maps = sess.run(feature_maps)
            out_feature_map_shapes = dict(
                (key, value.shape) for key, value in out_feature_maps.items())
            self.assertDictEqual(out_feature_map_shapes,
                                 expected_feature_map_shapes)
  def extract_features(self, preprocessed_inputs, fpn=0):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    feature_map_layout = {
        'from_layer': ['block2/unit_8', 'block3/unit_4', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
    }

    with slim.arg_scope(self._conv_hyperparams):
        with slim.arg_scope([slim.batch_norm], fused=False):
            image_features = shufflenet_v1_base(preprocessed_inputs)
            for i, j in image_features.items():
                print i, j.get_shape()
            feature_maps = feature_map_generators.multi_resolution_feature_maps(
                feature_map_layout=feature_map_layout,
                depth_multiplier=self._depth_multiplier,
                min_depth=self._min_depth,
                insert_1x1_conv=True,
                image_features=image_features)

    return  feature_maps.values()
Exemple #9
0
  def extract_features(self, preprocessed_inputs, audio_inputs=None, preprocessed_flows=None):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    preprocessed_flows = shape_utils.check_min_image_dim(
        33, preprocessed_flows)

    feature_map_layout = {
        'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
        'layer_depth': [-1, -1, -1, 512, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with slim.arg_scope(self._conv_hyperparams_fn()):
      with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope:

       if (audio_inputs != None):
         audio_features = self.extract_audio_features(audio_inputs)
       else:
         audio_features = None

       """
       _, image_features = inception_v3.inception_v3_base(
            ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
            final_endpoint='Mixed_7c',
            min_depth=self._min_depth,
            depth_multiplier=self._depth_multiplier,
            scope=scope)
       """
       _, image_features = inception_v3_m2.inception_v3_base(
            ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
            ops.pad_to_multiple(preprocessed_flows, self._pad_to_multiple),
            final_endpoint='Mixed_7c',
            min_depth=self._min_depth,
            depth_multiplier=self._depth_multiplier,
            scope=scope)

       feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=feature_map_layout,
            depth_multiplier=self._depth_multiplier,
            min_depth=self._min_depth,
            insert_1x1_conv=True,
            image_features=image_features)

       # A list of multi-resoulition feature maps
       # mixed_5d, mixed_6e, ...
       #print (len(feature_maps.values()))

    return feature_maps.values(), audio_features
 def feature_map_generator(image_features):
   return feature_map_generators.multi_resolution_feature_maps(
       feature_map_layout=feature_map_layout,
       depth_multiplier=1,
       min_depth=32,
       insert_1x1_conv=True,
       image_features=image_features)
  def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1(
      self):
    image_features = {
        'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512],
                                                 dtype=tf.float32),
        'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024],
                                                 dtype=tf.float32),
    }

    feature_maps = feature_map_generators.multi_resolution_feature_maps(
        feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT,
        depth_multiplier=1,
        min_depth=32,
        insert_1x1_conv=True,
        image_features=image_features)

    expected_feature_map_shapes = {
        'Conv2d_11_pointwise': (4, 16, 16, 512),
        'Conv2d_13_pointwise': (4, 8, 8, 1024),
        'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512),
        'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256),
        'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)}

    init_op = tf.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      out_feature_maps = sess.run(feature_maps)
      out_feature_map_shapes = dict(
          (key, value.shape) for key, value in out_feature_maps.items())
      self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
  def _extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: if image height or width are not 256 pixels.
    """
    image_shape = preprocessed_inputs.get_shape()
    image_shape.assert_has_rank(4)
    image_height = image_shape[1].value
    image_width = image_shape[2].value

    if image_height is None or image_width is None:
      shape_assert = tf.Assert(
          tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
                         tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
          ['image size must be 256 in both height and width.'])
      with tf.control_dependencies([shape_assert]):
        preprocessed_inputs = tf.identity(preprocessed_inputs)
    elif image_height != 256 or image_width != 256:
      raise ValueError('image size must be = 256 in both height and width;'
                       ' image dim = %d,%d' % (image_height, image_width))

    feature_map_layout = {
        'from_layer': [
            'Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''
        ],
        'layer_depth': [-1, -1, 512, 256, 256],
        'conv_kernel_size': [-1, -1, 3, 3, 2],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with slim.arg_scope(self._conv_hyperparams):
      with slim.arg_scope([slim.batch_norm], fused=False):
        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
Exemple #13
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

        Args:
          preprocessed_inputs: a [batch, height, width, channels] float tensor
            representing a batch of images.

        Returns:
          feature_maps: a list of tensors where the ith tensor has shape
            [batch, height_i, width_i, depth_i]
        """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer': [
                'FeatureExtractor/{}/block3'.format(self._resnet_scope_name),
                'FeatureExtractor/{}/block4'.format(self._resnet_scope_name),
                '', '', '', ''
            ],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_depthwise':
            self._use_depthwise,
            'use_explicit_padding':
            self._use_explicit_padding,
        }

        if self._num_layers == 7:
            feature_map_layout['from_layer'] += ['']
            feature_map_layout['layer_depth'] += [64]

        with tf.variable_scope(self._resnet_scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = self._resnet_base_fn(
                        inputs=ops.pad_to_multiple(preprocessed_inputs,
                                                   self._pad_to_multiple),
                        num_classes=None,
                        is_training=None,
                        global_pool=False,
                        output_stride=None,
                        store_non_strided_activations=True,
                        min_base_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
Exemple #14
0
    def extract_features(self, preprocessed_inputs,
                         preprocessed_second_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        feature_map_layout = {
            'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
            'layer_depth': [-1, -1, -1, 512, 256, 128],
        }

        #print("preprocessed_second_inputs", preprocessed_second_inputs.get_shape())

        with tf.control_dependencies([shape_assert]):
            with slim.arg_scope(self._conv_hyperparams):
                with tf.variable_scope('InceptionV3',
                                       reuse=self._reuse_weights) as scope:

                    audio_features = self.extract_audio_features(
                        preprocessed_second_inputs)

                    _, image_features = inception_v3.inception_v3_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        #              audio_features['fc5'],
                        #              audio_features['flat'],
                        final_endpoint='Mixed_7c',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope)

                    #audio_features = self.extract_audio_features(preprocessed_second_inputs)

                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)
                    #audio_features=audio_features)

        #return feature_maps.values()
        return feature_maps.values(), audio_features['fc4']
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['block4', 'block7', 'block8', 'block9', 'block10', 'block11'],
            'layer_depth': [-1, -1, -1, -1, -1, -1],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }
        '''
    with slim.arg_scope(self._conv_hyperparams_fn()):
      with tf.variable_scope('InceptionV2',
                             reuse=self._reuse_weights) as scope:
        _, image_features = inception_v2.inception_v2_base(
            ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
            final_endpoint='Mixed_5c',
            min_depth=self._min_depth,
            depth_multiplier=self._depth_multiplier,
            scope=scope)
    '''

        with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=0.0)):
            with tf.variable_scope('vgg16',
                                   reuse=self._reuse_weights) as var_scope:
                _, image_features = vgg.vgg_16_ssd(preprocessed_inputs,
                                                   num_classes=3,
                                                   is_training=True,
                                                   dropout_keep_prob=0.9,
                                                   spatial_squeeze=False,
                                                   scope=var_scope,
                                                   fc_conv_padding='VALID',
                                                   global_pool=False,
                                                   end_point='pool5')
            feature_maps = feature_map_generators.multi_resolution_feature_maps(
                feature_map_layout=feature_map_layout,
                depth_multiplier=self._depth_multiplier,
                min_depth=self._min_depth,
                insert_1x1_conv=True,
                image_features=image_features)

        return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)
    '''
    feature_map_layout = {
        'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_depthwise': self._use_depthwise,
        'use_explicit_padding': self._use_explicit_padding,
    }

    '''

    feature_map_layout = {
        'from_layer': ['layer_5/expansion_output', 'layer_6/expansion_output', 'layer_7/expansion_output',
                       'layer_10/expansion_output', 'layer_15/expansion_output', 'layer_19'],
        'layer_depth': [-1, -1, -1, -1, -1, -1],
        'use_depthwise': self._use_depthwise,
        'use_explicit_padding': self._use_explicit_padding,
    }

    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_utils.resnet_arg_scope()), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v2.mobilenet_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='layer_19',
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
        with slim.arg_scope(self._conv_hyperparams_fn()):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)


    return feature_maps.values()
    def _extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['layer_15/expansion_output', 'layer_19', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_depthwise': self._use_depthwise,
            'use_explicit_padding': self._use_explicit_padding,
        }

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(
                    is_training=(self._is_training and self._batch_norm_trainable),
                    bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                # TODO(b/68150321): Enable fused batch norm once quantization
                # supports it.
                with slim.arg_scope([slim.batch_norm], fused=False):
                    _, image_features = mobilenet_v2.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='layer_19',
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
                with slim.arg_scope(self._conv_hyperparams):
                    # TODO(b/68150321): Enable fused batch norm once quantization
                    # supports it.
                    with slim.arg_scope([slim.batch_norm], fused=False):
                        feature_maps = feature_map_generators.multi_resolution_feature_maps(
                            feature_map_layout=feature_map_layout,
                            depth_multiplier=self._depth_multiplier,
                            min_depth=self._min_depth,
                            insert_1x1_conv=True,
                            image_features=image_features)

        return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs.get_shape().assert_has_rank(4)
    shape_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
        ['image size must at least be 33 in both height and width.'])

    feature_map_layout = {
        'from_layer': ['Cell_10', 'Cell_11', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
    }

    with tf.control_dependencies([shape_assert]):
      with slim.arg_scope(self._conv_hyperparams):
        with slim.arg_scope([slim.batch_norm], fused=False):
          with tf.variable_scope('NasNetLarge',
                                 reuse=self._reuse_weights) as scope:
            preprocessed_and_padded_inputs = ops.pad_to_multiple(preprocessed_inputs,
                                                                 self._pad_to_multiple)
            _, image_features = nasnet.build_nasnet_large(
                preprocessed_and_padded_inputs,
                num_classes=None,
                final_endpoint='Cell_11')
            feature_maps = feature_map_generators.multi_resolution_feature_maps(
                feature_map_layout=feature_map_layout,
                depth_multiplier=self._depth_multiplier,
                min_depth=self._min_depth,
                insert_1x1_conv=True,
                image_features=image_features)

    batch_size = preprocessed_and_padded_inputs.get_shape().as_list()[0]
    for feature_map_name in feature_maps:
        print(feature_maps[feature_map_name])
        feature_maps[feature_map_name].set_shape([batch_size] + feature_maps[feature_map_name].get_shape().as_list()[1:])
    print(batch_size)
    print("Inputs: ", preprocessed_and_padded_inputs)
    print("Image features: ", image_features)
    print("Feature maps:", feature_maps)
    raw_input()
    return feature_maps.values()
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '',
             ''][:self._num_layers],
            'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_depthwise': self._use_depthwise,
        'use_explicit_padding': self._use_explicit_padding,
    }

    with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
          slim.arg_scope(
              [mobilenet.depth_multiplier], min_depth=self._min_depth):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          # TODO(b/68150321): Enable fused batch norm once quantization
          # supports it.
          with slim.arg_scope([slim.batch_norm], fused=False):
            _, image_features = mobilenet_v2.mobilenet_base(
                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
                final_endpoint='layer_19',
                depth_multiplier=self._depth_multiplier,
                use_explicit_padding=self._use_explicit_padding,
                scope=scope)
        with slim.arg_scope(self._conv_hyperparams_fn()):
          # TODO(b/68150321): Enable fused batch norm once quantization
          # supports it.
          with slim.arg_scope([slim.batch_norm], fused=False):
            feature_maps = feature_map_generators.multi_resolution_feature_maps(
                feature_map_layout=feature_map_layout,
                depth_multiplier=self._depth_multiplier,
                min_depth=self._min_depth,
                insert_1x1_conv=True,
                image_features=image_features)

    return feature_maps.values()
  def _extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with tf.variable_scope('MobilenetV1',
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=(self._batch_norm_trainable and self._is_training))):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
        with slim.arg_scope([slim.batch_norm], fused=False):
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
      with slim.arg_scope(self._conv_hyperparams):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
        with slim.arg_scope([slim.batch_norm], fused=False):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with tf.variable_scope('MobilenetV1',
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=None, regularize_depthwise=True)):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams
              else context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=feature_map_layout,
            depth_multiplier=self._depth_multiplier,
            min_depth=self._min_depth,
            insert_1x1_conv=True,
            image_features=image_features)

    return feature_maps.values()
Exemple #23
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """

        feature_map_layout = {
            'from_layer': ['Cell_7', 'Cell_11', '', '', '',
                           ''][:self._num_layers],
            'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers],
            'use_explicit_padding': self._use_explicit_padding,
            'use_depthwise': self._use_depthwise,
        }

        with slim.arg_scope(
                pnasnet_large_arg_scope_for_detection(
                    is_batch_norm_training=self._is_training)):
            with slim.arg_scope(
                [slim.conv2d, slim.batch_norm, slim.separable_conv2d],
                    reuse=self._reuse_weights):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = pnasnet.build_pnasnet_large(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        num_classes=None,
                        is_training=self._is_training,
                        final_endpoint='Cell_11')
        with tf.compat.v1.variable_scope('SSD_feature_maps',
                                         reuse=self._reuse_weights):
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)
        padded_inputs = ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple)

        feature_map_layout = {
            'from_layer': ['C4', 'C5', '', '', '', ''],
            # Do not specify the layer depths (number of filters) for C4 and C5, as
            # their values are determined based on the backbone.
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_depthwise': self._use_depthwise,
            'use_explicit_padding': self._use_explicit_padding,
        }

        with tf.variable_scope(self._scope_name, reuse=self._reuse_weights):
            with slim.arg_scope([slim.batch_norm],
                                is_training=self._is_training,
                                epsilon=0.01,
                                decay=0.99,
                                center=True,
                                scale=True):
                endpoints = self._backbone_fn(
                    padded_inputs, multiplier=self._depth_multiplier)

            image_features = {'C4': endpoints['C4'], 'C5': endpoints['C5']}
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return list(feature_maps.values())
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            #'use_depthwise':True
        }

        with tf.control_dependencies([shape_assert]):
            with slim.arg_scope(self._conv_hyperparams):
                with slim.arg_scope([slim.batch_norm], fused=False):
                    with tf.variable_scope('MobilenetV1',
                                           reuse=self._reuse_weights) as scope:
                        _, image_features = mobilenet_v1.mobilenet_v1_base(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            final_endpoint='Conv2d_13_pointwise',
                            min_depth=self._min_depth,
                            depth_multiplier=self._depth_multiplier,
                            scope=scope)
                        feature_maps = feature_map_generators.multi_resolution_feature_maps(
                            feature_map_layout=feature_map_layout,
                            depth_multiplier=self._depth_multiplier,
                            min_depth=self._min_depth,
                            insert_1x1_conv=True,
                            image_features=image_features)

        return feature_maps.values()
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with slim.arg_scope(self._conv_hyperparams):
            # TODO: Enable fused batch norm once quantization supports it.
            with slim.arg_scope([slim.batch_norm], fused=False):
                with tf.variable_scope('MobilenetV1',
                                       reuse=self._reuse_weights) as scope:
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope)
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs.get_shape().assert_has_rank(4)
    shape_assert = tf.Assert(
        tf.logical_and(
            tf.equal(tf.shape(preprocessed_inputs)[1], 256),
            tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
        ['image size must be 256 in both height and width.'])

    feature_map_layout = {
        'from_layer': [
            'Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''
        ],
        'layer_depth': [-1, -1, 512, 256, 256],
        'conv_kernel_size': [-1, -1, 3, 3, 2],
    }

    with tf.control_dependencies([shape_assert]):
      with slim.arg_scope(self._conv_hyperparams):
        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """

    feature_map_layout = {
        'from_layer': ['Cell_7', 'Cell_11', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with slim.arg_scope(
        pnasnet_large_arg_scope_for_detection(
            is_batch_norm_training=self._is_training)):
      with slim.arg_scope([slim.conv2d, slim.batch_norm, slim.separable_conv2d],
                          reuse=self._reuse_weights):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = pnasnet.build_pnasnet_large(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              num_classes=None,
              is_training=self._is_training,
              final_endpoint='Cell_11')
    with tf.variable_scope('SSD_feature_maps', reuse=self._reuse_weights):
      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=feature_map_layout,
            depth_multiplier=self._depth_multiplier,
            min_depth=self._min_depth,
            insert_1x1_conv=True,
            image_features=image_features)

    return feature_maps.values()
Exemple #29
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '',
                           ''][:self._num_layers],
            'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with tf.variable_scope('InceptionV2',
                                   reuse=self._reuse_weights) as scope:
                _, image_features = inception_v2.inception_v2_base(
                    ops.pad_to_multiple(preprocessed_inputs,
                                        self._pad_to_multiple),
                    final_endpoint='Mixed_5c',
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope)
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs.get_shape().assert_has_rank(4)
    shape_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
        ['image size must at least be 33 in both height and width.'])

    feature_map_layout = {
        'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
    }

    with tf.control_dependencies([shape_assert]):
      with slim.arg_scope(self._conv_hyperparams):
        with tf.variable_scope('InceptionV2',
                               reuse=self._reuse_weights) as scope:
          _, image_features = inception_v2.inception_v2_base(
              preprocessed_inputs,
              final_endpoint='Mixed_5c',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
Exemple #31
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        feature_map_layout = {
            'from_layer': ['conv4', '', '', '', '', '', ''],
            'layer_depth': [-1, 1024, 1024, 512, 256, 256, 256],
        }

        with tf.control_dependencies([shape_assert]):
            with slim.arg_scope(self._conv_hyperparams):
                with tf.variable_scope('vgg_16',
                                       reuse=self._reuse_weights) as scope:
                    net, image_features = vgg.vgg_16_base(
                        preprocessed_inputs,
                        final_endpoint='pool5',
                        trainable=False,
                        scope=scope)
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with slim.arg_scope(self._conv_hyperparams_fn()):
      with tf.variable_scope('InceptionV2',
                             reuse=self._reuse_weights) as scope:
        _, image_features = inception_v2.inception_v2_base(
            ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
            final_endpoint='Mixed_5c',
            min_depth=self._min_depth,
            depth_multiplier=self._depth_multiplier,
            scope=scope)
        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=feature_map_layout,
            depth_multiplier=self._depth_multiplier,
            min_depth=self._min_depth,
            insert_1x1_conv=True,
            image_features=image_features)

    return feature_maps.values()
  def extract_features(self, preprocessed_inputs, state_saver=None,
                       state_name='lstm_state', unroll_length=10, scope=None):
    """Extract features from preprocessed inputs.

    The features include the base network features, lstm features and SSD
    features, organized in the following name scope:

    <scope>/MobilenetV2_1/...
    <scope>/MobilenetV2_2/...
    <scope>/LSTM/...
    <scope>/FeatureMap/...

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of consecutive frames from video clips.
      state_saver: A state saver object with methods `state` and `save_state`.
      state_name: Python string, the name to use with the state_saver.
      unroll_length: number of steps to unroll the lstm.
      scope: Scope for the base network of the feature extractor.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    Raises:
      ValueError: if interleave_method not recognized or large and small base
        network output feature maps of different sizes.
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)
    preprocessed_inputs = ops.pad_to_multiple(
        preprocessed_inputs, self._pad_to_multiple)
    batch_size = preprocessed_inputs.shape[0].value / unroll_length
    batch_axis = 0
    nets = []

    # Batch processing of mobilenet features.
    with slim.arg_scope(mobilenet_v2.training_scope(
        is_training=self._is_training,
        bn_decay=0.9997)), \
        slim.arg_scope([mobilenet.depth_multiplier],
                       min_depth=self._min_depth, divisible_by=8):
      # Big model.
      net, _ = self.extract_base_features_large(preprocessed_inputs)
      nets.append(net)
      large_base_feature_shape = net.shape

      # Small models
      net, _ = self.extract_base_features_small(preprocessed_inputs)
      nets.append(net)
      small_base_feature_shape = net.shape
      if not (large_base_feature_shape[1] == small_base_feature_shape[1] and
              large_base_feature_shape[2] == small_base_feature_shape[2]):
        raise ValueError('Large and Small base network feature map dimension '
                         'not equal!')

    with slim.arg_scope(self._conv_hyperparams_fn()):
      with tf.variable_scope('LSTM', reuse=self._reuse_weights):
        output_size = (large_base_feature_shape[1], large_base_feature_shape[2])
        lstm_cell, init_state, step = self.create_lstm_cell(
            batch_size, output_size, state_saver, state_name)

        nets_seq = [
            tf.split(net, unroll_length, axis=batch_axis) for net in nets
        ]

        net_seq, states_out = rnn_decoder.multi_input_rnn_decoder(
            nets_seq,
            init_state,
            lstm_cell,
            step,
            selection_strategy=self._interleave_method,
            is_training=self._is_training,
            is_quantized=self._is_quantized,
            pre_bottleneck=self._pre_bottleneck,
            flatten_state=self._flatten_state,
            scope=None)
        self._states_out = states_out

      batcher_ops = None
      if state_saver is not None:
        self._step = state_saver.state(state_name + '_step')
        batcher_ops = [
            state_saver.save_state(state_name + '_c', states_out[-1][0]),
            state_saver.save_state(state_name + '_h', states_out[-1][1]),
            state_saver.save_state(state_name + '_step', self._step + 1)]
      image_features = {}
      with tf_ops.control_dependencies(batcher_ops):
        image_features['layer_19'] = tf.concat(net_seq, 0)

      # SSD layers.
      with tf.variable_scope('FeatureMap'):
        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=self._feature_map_layout,
            depth_multiplier=self._depth_multiplier,
            min_depth=self._min_depth,
            insert_1x1_conv=True,
            image_features=image_features,
            pool_residual=True)
    return feature_maps.values()
    def extract_features(self,
                         preprocessed_inputs,
                         state_saver=None,
                         state_name='lstm_state',
                         unroll_length=5,
                         scope=None):
        """Extracts features from preprocessed inputs.

    The features include the base network features, lstm features and SSD
    features, organized in the following name scope:

    <parent scope>/MobilenetV1/...
    <parent scope>/LSTM/...
    <parent scope>/FeatureMaps/...

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float tensor
        representing a batch of consecutive frames from video clips.
      state_saver: A state saver object with methods `state` and `save_state`.
      state_name: A python string for the name to use with the state_saver.
      unroll_length: The number of steps to unroll the lstm.
      scope: The scope for the base network of the feature extractor.

    Returns:
      A list of tensors where the ith tensor has shape [batch, height_i,
      width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)
        with slim.arg_scope(
                mobilenet_v1.mobilenet_v1_arg_scope(
                    is_training=self._is_training)):
            with (slim.arg_scope(self._conv_hyperparams_fn())
                  if self._override_base_feature_extractor_hyperparams else
                  context_manager.IdentityContextManager()):
                with slim.arg_scope([slim.batch_norm], fused=False):
                    # Base network.
                    with tf.variable_scope(scope,
                                           self._base_network_scope,
                                           reuse=self._reuse_weights) as scope:
                        net, image_features = mobilenet_v1.mobilenet_v1_base(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            final_endpoint='Conv2d_13_pointwise',
                            min_depth=self._min_depth,
                            depth_multiplier=self._depth_multiplier,
                            scope=scope)

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with slim.arg_scope([slim.batch_norm],
                                fused=False,
                                is_training=self._is_training):
                # ConvLSTM layers.
                with tf.variable_scope(
                        'LSTM', reuse=self._reuse_weights) as lstm_scope:
                    lstm_cell = lstm_cells.BottleneckConvLSTMCell(
                        filter_size=(3, 3),
                        output_size=(net.shape[1].value, net.shape[2].value),
                        num_units=max(self._min_depth, self._lstm_state_depth),
                        activation=tf.nn.relu6,
                        visualize_gates=True)

                    net_seq = list(tf.split(net, unroll_length))
                    if state_saver is None:
                        init_state = lstm_cell.init_state(
                            state_name, net.shape[0].value / unroll_length,
                            tf.float32)
                    else:
                        c = state_saver.state('%s_c' % state_name)
                        h = state_saver.state('%s_h' % state_name)
                        init_state = (c, h)

                    # Identities added for inputing state tensors externally.
                    c_ident = tf.identity(init_state[0],
                                          name='lstm_state_in_c')
                    h_ident = tf.identity(init_state[1],
                                          name='lstm_state_in_h')
                    init_state = (c_ident, h_ident)

                    net_seq, states_out = rnn_decoder.rnn_decoder(
                        net_seq, init_state, lstm_cell, scope=lstm_scope)
                    batcher_ops = None
                    self._states_out = states_out
                    if state_saver is not None:
                        self._step = state_saver.state('%s_step' % state_name)
                        batcher_ops = [
                            state_saver.save_state('%s_c' % state_name,
                                                   states_out[-1][0]),
                            state_saver.save_state('%s_h' % state_name,
                                                   states_out[-1][1]),
                            state_saver.save_state('%s_step' % state_name,
                                                   self._step - 1)
                        ]
                    with tf_ops.control_dependencies(batcher_ops):
                        image_features['Conv2d_13_pointwise_lstm'] = tf.concat(
                            net_seq, 0)

                    # Identities added for reading output states, to be reused externally.
                    tf.identity(states_out[-1][0], name='lstm_state_out_c')
                    tf.identity(states_out[-1][1], name='lstm_state_out_h')

                # SSD layers.
                with tf.variable_scope('FeatureMaps',
                                       reuse=self._reuse_weights):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=self._feature_map_layout,
                        depth_multiplier=(self._depth_multiplier),
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
Exemple #35
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: if image height or width are not 256 pixels.
    """
        image_shape = preprocessed_inputs.get_shape()
        image_shape.assert_has_rank(4)
        image_height = image_shape[1].value
        image_width = image_shape[2].value

        if image_height is None or image_width is None:
            shape_assert = tf.Assert(
                tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
                               tf.equal(tf.shape(preprocessed_inputs)[2],
                                        256)),
                ['image size must be 256 in both height and width.'])
            with tf.control_dependencies([shape_assert]):
                preprocessed_inputs = tf.identity(preprocessed_inputs)
        elif image_height != 256 or image_width != 256:
            raise ValueError(
                'image size must be = 256 in both height and width;'
                ' image dim = %d,%d' % (image_height, image_width))

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256],
            'conv_kernel_size': [-1, -1, 3, 3, 2],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
  def extract_features(self,
                       preprocessed_inputs,
                       state_saver=None,
                       state_name='lstm_state',
                       unroll_length=5,
                       scope=None):
    """Extracts features from preprocessed inputs.

    The features include the base network features, lstm features and SSD
    features, organized in the following name scope:

    <parent scope>/MobilenetV1/...
    <parent scope>/LSTM/...
    <parent scope>/FeatureMaps/...

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float tensor
        representing a batch of consecutive frames from video clips.
      state_saver: A state saver object with methods `state` and `save_state`.
      state_name: A python string for the name to use with the state_saver.
      unroll_length: The number of steps to unroll the lstm.
      scope: The scope for the base network of the feature extractor.

    Returns:
      A list of tensors where the ith tensor has shape [batch, height_i,
      width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)
    with slim.arg_scope(
        mobilenet_v1.mobilenet_v1_arg_scope(is_training=self._is_training)):
      with (slim.arg_scope(self._conv_hyperparams_fn())
            if self._override_base_feature_extractor_hyperparams else
            context_manager.IdentityContextManager()):
        with slim.arg_scope([slim.batch_norm], fused=False):
          # Base network.
          with tf.variable_scope(
              scope, self._base_network_scope,
              reuse=self._reuse_weights) as scope:
            net, image_features = mobilenet_v1.mobilenet_v1_base(
                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
                final_endpoint='Conv2d_13_pointwise',
                min_depth=self._min_depth,
                depth_multiplier=self._depth_multiplier,
                scope=scope)

    with slim.arg_scope(self._conv_hyperparams_fn()):
      with slim.arg_scope(
          [slim.batch_norm], fused=False, is_training=self._is_training):
        # ConvLSTM layers.
        with tf.variable_scope('LSTM', reuse=self._reuse_weights) as lstm_scope:
          lstm_cell = lstm_cells.BottleneckConvLSTMCell(
              filter_size=(3, 3),
              output_size=(net.shape[1].value, net.shape[2].value),
              num_units=max(self._min_depth, self._lstm_state_depth),
              activation=tf.nn.relu6,
              visualize_gates=True)

          net_seq = list(tf.split(net, unroll_length))
          if state_saver is None:
            init_state = lstm_cell.init_state(
                state_name, net.shape[0].value / unroll_length, tf.float32)
          else:
            c = state_saver.state('%s_c' % state_name)
            h = state_saver.state('%s_h' % state_name)
            init_state = (c, h)

          # Identities added for inputing state tensors externally.
          c_ident = tf.identity(init_state[0], name='lstm_state_in_c')
          h_ident = tf.identity(init_state[1], name='lstm_state_in_h')
          init_state = (c_ident, h_ident)

          net_seq, states_out = rnn_decoder.rnn_decoder(
              net_seq, init_state, lstm_cell, scope=lstm_scope)
          batcher_ops = None
          self._states_out = states_out
          if state_saver is not None:
            self._step = state_saver.state('%s_step' % state_name)
            batcher_ops = [
                state_saver.save_state('%s_c' % state_name, states_out[-1][0]),
                state_saver.save_state('%s_h' % state_name, states_out[-1][1]),
                state_saver.save_state('%s_step' % state_name, self._step - 1)
            ]
          with tf_ops.control_dependencies(batcher_ops):
            image_features['Conv2d_13_pointwise_lstm'] = tf.concat(net_seq, 0)

          # Identities added for reading output states, to be reused externally.
          tf.identity(states_out[-1][0], name='lstm_state_out_c')
          tf.identity(states_out[-1][1], name='lstm_state_out_h')

        # SSD layers.
        with tf.variable_scope('FeatureMaps', reuse=self._reuse_weights):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=self._feature_map_layout,
              depth_multiplier=(self._depth_multiplier),
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
Exemple #37
0
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['vgg_16/conv4_3_norm', 'vgg_16/fc7', 'vgg_16/conv6_2', 'vgg_16/conv7_2', 'vgg_16/conv8_2', 'vgg_16/conv9_2'],
        'layer_depth': [-1, -1, -1, -1, -1, -1],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }
    net,image_features = vgg.vgg_16(ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
                                        final_endpoint='pool5',spatial_squeeze=False)
    #double check scale filler
    image_features['vgg_16/conv4_3_norm']=custom_layers.l2_normalization(image_features['vgg_16/conv4/conv4_3'],scaling=True,scope='vgg_16/conv4_3_norm')
    with slim.arg_scope(self._conv_hyperparams):
      with tf.variable_scope('vgg_16', reuse=self._reuse_weights) as scope:
        # In [5]: net
        # Out[5]: <tf.Tensor 'vgg_16/pool5/MaxPool:0' shape=(32, 18, 18, 512) dtype=float32>

        # In [6]: end_points
        # Out[6]:
        # OrderedDict([('vgg_16/conv1/conv1_1',
        #               <tf.Tensor 'vgg_16/conv1/conv1_1/Relu:0' shape=(32, 300, 300, 64) dtype=float32>),
        #              ('vgg_16/conv1/conv1_2',
        #               <tf.Tensor 'vgg_16/conv1/conv1_2/Relu:0' shape=(32, 300, 300, 64) dtype=float32>),
        #              ('vgg_16/pool1',
        #               <tf.Tensor 'vgg_16/pool1/MaxPool:0' shape=(32, 150, 150, 64) dtype=float32>),
        #              ('vgg_16/conv2/conv2_1',
        #               <tf.Tensor 'vgg_16/conv2/conv2_1/Relu:0' shape=(32, 150, 150, 128) dtype=float32>),
        #              ('vgg_16/conv2/conv2_2',
        #               <tf.Tensor 'vgg_16/conv2/conv2_2/Relu:0' shape=(32, 150, 150, 128) dtype=float32>),
        #              ('vgg_16/pool2',
        #               <tf.Tensor 'vgg_16/pool2/MaxPool:0' shape=(32, 75, 75, 128) dtype=float32>),
        #              ('vgg_16/conv3/conv3_1',
        #               <tf.Tensor 'vgg_16/conv3/conv3_1/Relu:0' shape=(32, 75, 75, 256) dtype=float32>),
        #              ('vgg_16/conv3/conv3_2',
        #               <tf.Tensor 'vgg_16/conv3/conv3_2/Relu:0' shape=(32, 75, 75, 256) dtype=float32>),
        #              ('vgg_16/conv3/conv3_3',
        #               <tf.Tensor 'vgg_16/conv3/conv3_3/Relu:0' shape=(32, 75, 75, 256) dtype=float32>),
        #              ('vgg_16/pool3',
        #               <tf.Tensor 'vgg_16/pool3/MaxPool:0' shape=(32, 37, 37, 256) dtype=float32>),
        #              ('vgg_16/conv4/conv4_1',
        #               <tf.Tensor 'vgg_16/conv4/conv4_1/Relu:0' shape=(32, 37, 37, 512) dtype=float32>),
        #              ('vgg_16/conv4/conv4_2',
        #               <tf.Tensor 'vgg_16/conv4/conv4_2/Relu:0' shape=(32, 37, 37, 512) dtype=float32>),
        #              ('vgg_16/conv4/conv4_3',
        #               <tf.Tensor 'vgg_16/conv4/conv4_3/Relu:0' shape=(32, 37, 37, 512) dtype=float32>),
        #              ('vgg_16/pool4',
        #               <tf.Tensor 'vgg_16/pool4/MaxPool:0' shape=(32, 18, 18, 512) dtype=float32>),
        #              ('vgg_16/conv5/conv5_1',
        #               <tf.Tensor 'vgg_16/conv5/conv5_1/Relu:0' shape=(32, 18, 18, 512) dtype=float32>),
        #              ('vgg_16/conv5/conv5_2',
        #               <tf.Tensor 'vgg_16/conv5/conv5_2/Relu:0' shape=(32, 18, 18, 512) dtype=float32>),
        #              ('vgg_16/conv5/conv5_3',
        #               <tf.Tensor 'vgg_16/conv5/conv5_3/Relu:0' shape=(32, 18, 18, 512) dtype=float32>),
        #              ('vgg_16/pool5',
        #               <tf.Tensor 'vgg_16/pool5/MaxPool:0' shape=(32, 18, 18, 512) dtype=float32>)])
        end_points_collection = scope.original_name_scope + '_end_points'
        with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):

          net = slim.convolution(net, 1024, [3, 3], padding='SAME', rate=6, scope='fc6')

          # def convolution(inputs,
          #         num_outputs,
          #         kernel_size,
          #         stride=1,
          #         padding='SAME',
          #         data_format=None,
          #         rate=1,
          #         activation_fn=nn.relu,
          #         normalizer_fn=None,
          #         normalizer_params=None,
          #         weights_initializer=initializers.xavier_initializer(),
          #         weights_regularizer=None,
          #         biases_initializer=init_ops.zeros_initializer(),
          #         biases_regularizer=None,
          #         reuse=None,
          #         variables_collections=None,
          #         outputs_collections=None,
          #         trainable=True,
          #         scope=None):

          #fc6 is dilated conv
          # layer {
          # name: "fc6"
          # type: "Convolution"
          # bottom: "pool5"
          # top: "fc6"
          #   param {
          #     lr_mult: 1.0
          #     decay_mult: 1.0
          #   }
          #   param {
          #     lr_mult: 2.0
          #     decay_mult: 0.0
          #   }
          #   convolution_param {
          #     num_output: 1024
          #     pad: 6
          #     kernel_size: 3
          #     weight_filler {
          #       type: "xavier"
          #     }
          #     bias_filler {
          #       type: "constant"
          #       value: 0.0
          #     }
          #     dilation: 6
          #   }
          # }
          # layer {
          #   name: "relu6"
          #   type: "ReLU"
          #   bottom: "fc6"
          #   top: "fc6"
          # }

          #fc7 is 1*1 conv
          # layer {
          #   name: "fc7"
          #   type: "Convolution"
          #   bottom: "fc6"
          #   top: "fc7"
          #   param {
          #     lr_mult: 1.0
          #     decay_mult: 1.0
          #   }
          #   param {
          #     lr_mult: 2.0
          #     decay_mult: 0.0
          #   }
          #   convolution_param {
          #     num_output: 1024
          #     kernel_size: 1
          #     weight_filler {
          #       type: "xavier"
          #     }
          #     bias_filler {
          #       type: "constant"
          #       value: 0.0
          #     }
          #   }
          # }
          # layer {
          #   name: "relu7"
          #   type: "ReLU"
          #   bottom: "fc7"
          #   top: "fc7"
          # }
          net = slim.conv2d(net,1024,[1,1],padding='SAME',scope='fc7')
          net = slim.conv2d(net,256, [1,1],padding='SAME',scope='conv6_1')
          net = slim.conv2d(net,512, [3,3],padding='SAME',stride=2,scope='conv6_2')
          net = slim.conv2d(net,128, [1,1],padding='SAME',scope='conv7_1')
          net = slim.conv2d(net,256, [3,3],padding='SAME',stride=2,scope='conv7_2')
          net = slim.conv2d(net,128, [1,1],padding='VALID',scope='conv8_1')
          net = slim.conv2d(net,256, [3,3],padding='VALID',stride=1,scope='conv8_2')
          net = slim.conv2d(net,128, [1,1],padding='VALID',scope='conv9_1')
          net = slim.conv2d(net,256, [3,3],padding='VALID',stride=1,scope='conv9_2')
          _image_features_new = slim.utils.convert_collection_to_dict(end_points_collection)

          for k,v in _image_features_new.iteritems():
            image_features[k] = v
        # _, image_features = inception_v3.inception_v3_base(
        #     ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
        #     final_endpoint='Mixed_7c',
        #     min_depth=self._min_depth,
        #     depth_multiplier=self._depth_multiplier,
        #     scope=scope)
    feature_maps = feature_map_generators.multi_resolution_feature_maps(
        feature_map_layout=feature_map_layout,
        depth_multiplier=self._depth_multiplier,
        min_depth=self._min_depth,
        insert_1x1_conv=True,
        image_features=image_features)

    return feature_maps.values()
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
        trunc_normal = lambda stddev: tf.truncated_normal_initializer(
            0.0, stddev)

        #add convolution autoencoder
        encoder_1_conv = slim.conv2d(preprocessed_inputs,
                                     depth(64), [3, 3],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c1_conv')
        #encoder_1_pool = slim.max_pool2d(encoder_1_conv, [2, 2], stride=2,
        #                                    scope='encoder_c1_pool')
        #encoder_1_dropout1 = slim.dropout(encoder_1_pool, 0.7, scope='encoder_c1_dropout1')
        encoder_2_conv = slim.conv2d(encoder_1_conv,
                                     depth(128), [5, 5],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c2_conv')
        encoder_3_conv = slim.conv2d(encoder_2_conv,
                                     depth(128), [5, 5],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c3_conv')
        #decoder
        decoder_3_deconv = slim.conv2d_transpose(
            encoder_3_conv,
            depth(128), [5, 5],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c3_deconv')
        decoder_2_deconv = slim.conv2d_transpose(
            decoder_3_deconv,
            depth(128), [5, 5],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c2_deconv')
        decoder_1_deconv = slim.conv2d_transpose(
            decoder_2_deconv,
            depth(1), [3, 3],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c1_deconv')

        feature_map_layout = {
            'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_explicit_padding': self._use_explicit_padding,
            'use_depthwise': self._use_depthwise,
        }

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with tf.variable_scope('InceptionV2',
                                   reuse=self._reuse_weights) as scope:
                _, image_features = inception_v2.inception_v2_base(
                    ops.pad_to_multiple(decoder_1_deconv,
                                        self._pad_to_multiple),
                    final_endpoint='Mixed_5c',
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope)
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
    def extract_features(
        self, preprocessed_inputs
    ):  #this will extract features from iamge w.r.t mobilenet archtecture
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        feature_map_layout = {
            'from_layer': [
                'Conv2d_11_pointwise',
                'Conv2d_13_pointwise',
                '',
                '',  #we first extract 2 layers from mobilenet 
                '',
                ''
            ],
            'layer_depth': [
                -1, -1, 512, 256, 256, 128
            ],  #for first two things it's -1 means we directly take the depth as in the feature maps 
        }

        with tf.control_dependencies([shape_assert]):
            #with following we apply all the hyperparams in the scrip by keeping arg scope free
            with slim.arg_scope(
                    self._conv_hyperparams
            ):  #arg score  - Here the convolutional hyper params are for feature extractor we create ot
                with tf.variable_scope('MobilenetV1',
                                       reuse=self._reuse_weights) as scope:
                    _, image_features = mobilenet_v1.mobilenet_v1_base(  #getting the feature extracted from mobilnet in the slim 
                        preprocessed_inputs,
                        final_endpoint=
                        'Conv2d_13_pointwise',  #this is extracting the features 
                        min_depth=self.
                        _min_depth,  #our min deph is 16 , It's like our depth of the feature extator 
                        depth_multiplier=self.
                        _depth_multiplier,  #there is 1 we take all the layers in depth demension 
                        scope=scope
                    )  #this is a dicrionalt with names of the feature maps and feature maps

                    #the following function can extract the features from above feature maps , also it can create new one's too acording to the output stride thing which we are not using  Alos we give a featue map lay_out what should be there , and this also can create addicitonal feature maps

                    feature_maps = feature_map_generators.multi_resolution_feature_maps(  #This is for generating feature maps 
                        feature_map_layout=
                        feature_map_layout,  #wanted feature maps extracted from above model maps and create new maps for empty things
                        depth_multiplier=self.
                        _depth_multiplier,  #depth multi-plier 
                        min_depth=self._min_depth,  #this is 16 
                        insert_1x1_conv=True,  # 
                        image_features=image_features)  #feature dictionary

        return feature_maps.values()  #list of 6 feature maps for the ssd
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs.get_shape().assert_has_rank(4)
    shape_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
        ['image size must at least be 33 in both height and width.'])

    feature_map_layout = {
        'from_layer': ['east_conv1_3x3'],
        'layer_depth': [-1],
    }

    with tf.control_dependencies([shape_assert]):
      with slim.arg_scope(self._conv_hyperparams):
        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              preprocessed_inputs,
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          """
          by chenx
          """
          east_conv_1 = image_features['Conv2d_3_pointwise']
          east_conv_2 = image_features['Conv2d_5_pointwise']
          east_conv_3 = image_features['Conv2d_11_pointwise']
          east_conv_4 = image_features['Conv2d_13_pointwise']

          east_deconv4 = slim.conv2d_transpose(east_conv_4, 512, [4, 4], 2, \
                                          padding='SAME', scope='east_deconv4')
          east_conv4_concat = tf.concat([east_conv_4, east_deconv4], axis=3)
          east_conv4_1x1 = slim.conv2d(east_conv4_concat, 256, [1,1],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv4_1x1')
          east_conv4_3x3 = slim.conv2d(east_conv4_1x1, 256, [3,3],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv4_3x3')
          image_features['east_conv4_3x3'] = east_conv4_3x3

          east_deconv3 = slim.conv2d_transpose(east_conv4_3x3, 256, [4, 4], 2, \
                                          padding='SAME', scope='east_deconv3')
          east_conv3_concat = tf.concat([east_conv_3, east_deconv3], axis=3)
          east_conv3_1x1 = slim.conv2d(east_conv4_concat, 128, [1,1],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv3_1x1')
          east_conv3_3x3 = slim.conv2d(east_conv4_1x1, 128, [3,3],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv3_3x3')
          image_features['east_conv3_3x3'] = east_conv3_3x3

          east_deconv2 = slim.conv2d_transpose(east_conv3_3x3, 128, [4, 4], 2, \
                                          padding='SAME', scope='east_deconv2')
          east_conv2_concat = tf.concat([east_conv_2, east_deconv3], axis=3)
          east_conv2_1x1 = slim.conv2d(east_conv2_concat, 64, [1,1],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv2_1x1')
          east_conv2_3x3 = slim.conv2d(east_conv2_1x1, 64, [3,3],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv2_3x3')
          image_features['east_conv2_3x3'] = east_conv2_3x3

          east_deconv1 = slim.conv2d_transpose(east_conv2_3x3, 64, [4, 4], 2, \
                                          padding='SAME', scope='east_deconv1')
          east_conv1_concat = tf.concat([east_conv_1, east_deconv1], axis=3)
          east_conv1_1x1 = slim.conv2d(east_conv1_concat, 32, [1,1],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv1_1x1')
          east_conv1_3x3 = slim.conv2d(east_conv1_1x1, 32, [3,3],
                                       stride=1,
                                       normalizer_fn=slim.batch_norm,
                                       scope='east_conv1_3x3')
          image_features['east_conv1_3x3'] = east_conv1_3x3

          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()