def test_get_expected_feature_map_shapes_with_inception_v3(self): image_features = { 'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32), 'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32), 'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32) } feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=INCEPTION_V3_LAYOUT, depth_multiplier=1, min_depth=32, insert_1x1_conv=True, image_features=image_features) expected_feature_map_shapes = { 'Mixed_5d': (4, 35, 35, 256), 'Mixed_6e': (4, 17, 17, 576), 'Mixed_7c': (4, 8, 8, 1024), 'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512), 'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256), 'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128) } init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) out_feature_maps = sess.run(feature_maps) out_feature_map_shapes = dict( (key, value.shape) for key, value in out_feature_maps.items()) self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1( self): image_features = { 'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512], dtype=tf.float32), 'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32), } feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT, depth_multiplier=1, min_depth=32, insert_1x1_conv=True, image_features=image_features) expected_feature_map_shapes = { 'Conv2d_11_pointwise': (4, 16, 16, 512), 'Conv2d_13_pointwise': (4, 8, 8, 1024), 'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512), 'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256), 'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256) } init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) out_feature_maps = sess.run(feature_maps) out_feature_map_shapes = dict( (key, value.shape) for key, value in out_feature_maps.items()) self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope( is_training=None, regularize_depthwise=True)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], 'layer_depth': [-1, -1, -1, 512, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('InceptionV3', reuse=self._reuse_weights) as scope: _, image_features = inception_v3.inception_v3_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_7c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: if image height or width are not 256 pixels. """ image_shape = preprocessed_inputs.get_shape() image_shape.assert_has_rank(4) image_height = image_shape[1].value image_width = image_shape[2].value if image_height is None or image_width is None: shape_assert = tf.Assert( tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256), tf.equal(tf.shape(preprocessed_inputs)[2], 256)), ['image size must be 256 in both height and width.']) with tf.control_dependencies([shape_assert]): preprocessed_inputs = tf.identity(preprocessed_inputs) elif image_height != 256 or image_width != 256: raise ValueError( 'image size must be = 256 in both height and width;' ' image dim = %d,%d' % (image_height, image_width)) feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256], 'conv_kernel_size': [-1, -1, 3, 3, 2], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v1.mobilenet_v1_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Conv2d_13_pointwise', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()