def test_get_expected_feature_map_shapes_with_inception_v3(self): image_features = { 'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32), 'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32), 'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32) } feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=INCEPTION_V3_LAYOUT, depth_multiplier=1, min_depth=32, insert_1x1_conv=True, image_features=image_features) expected_feature_map_shapes = { 'Mixed_5d': (4, 35, 35, 256), 'Mixed_6e': (4, 17, 17, 576), 'Mixed_7c': (4, 8, 8, 1024), 'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512), 'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256), 'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128) } init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) out_feature_maps = sess.run(feature_maps) out_feature_map_shapes = dict( (key, value.shape) for key, value in out_feature_maps.items()) self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1( self): image_features = { 'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512], dtype=tf.float32), 'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32), } feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT, depth_multiplier=1, min_depth=32, insert_1x1_conv=True, image_features=image_features) expected_feature_map_shapes = { 'Conv2d_11_pointwise': (4, 16, 16, 512), 'Conv2d_13_pointwise': (4, 8, 8, 1024), 'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512), 'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256), 'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256) } init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) out_feature_maps = sess.run(feature_maps) out_feature_map_shapes = dict( (key, value.shape) for key, value in out_feature_maps.items()) self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_depthwise': self._use_depthwise, 'use_explicit_padding': self._use_explicit_padding, } with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope( is_training=(self._is_training and self._batch_norm_trainable), bn_decay=0.9997)), \ slim.arg_scope( [mobilenet.depth_multiplier], min_depth=self._min_depth): # TODO(b/68150321): Enable fused batch norm once quantization # supports it. with slim.arg_scope([slim.batch_norm], fused=False): _, image_features = mobilenet_v2.mobilenet_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='layer_19', depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams): # TODO(b/68150321): Enable fused batch norm once quantization # supports it. with slim.arg_scope([slim.batch_norm], fused=False): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''][:self._num_layers], 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], 'use_depthwise': self._use_depthwise, 'use_explicit_padding': self._use_explicit_padding, } with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \ slim.arg_scope( [mobilenet.depth_multiplier], min_depth=self._min_depth): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v2.mobilenet_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='layer_19', depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return list(feature_maps.values())
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with slim.arg_scope(self._conv_hyperparams): # TODO: Enable fused batch norm once quantization supports it. with slim.arg_scope([slim.batch_norm], fused=False): with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], } with tf.control_dependencies([shape_assert]): with slim.arg_scope(self._conv_hyperparams): with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: _, image_features = mobilenet_v1.mobilenet_v1_base( preprocessed_inputs, final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with slim.arg_scope(self._conv_hyperparams): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_5c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()