def testModelHasExpectedNumberOfParameters(self): batch_size = 5 height, width = 224, 224 inputs = random_ops.random_uniform((batch_size, height, width, 3)) with arg_scope(inception_v2.inception_v2_arg_scope()): inception_v2.inception_v2_base(inputs) total_params, _ = model_analyzer.analyze_vars( variables_lib.get_model_variables()) self.assertAlmostEqual(10173112, total_params)
def testBuildAndCheckAllEndPointsUptoMixed5c(self): batch_size = 5 height, width = 224, 224 inputs = random_ops.random_uniform((batch_size, height, width, 3)) _, end_points = inception_v2.inception_v2_base( inputs, final_endpoint='Mixed_5c') endpoints_shapes = { 'Mixed_3b': [batch_size, 28, 28, 256], 'Mixed_3c': [batch_size, 28, 28, 320], 'Mixed_4a': [batch_size, 14, 14, 576], 'Mixed_4b': [batch_size, 14, 14, 576], 'Mixed_4c': [batch_size, 14, 14, 576], 'Mixed_4d': [batch_size, 14, 14, 576], 'Mixed_4e': [batch_size, 14, 14, 576], 'Mixed_5a': [batch_size, 7, 7, 1024], 'Mixed_5b': [batch_size, 7, 7, 1024], 'Mixed_5c': [batch_size, 7, 7, 1024], 'Conv2d_1a_7x7': [batch_size, 112, 112, 64], 'MaxPool_2a_3x3': [batch_size, 56, 56, 64], 'Conv2d_2b_1x1': [batch_size, 56, 56, 64], 'Conv2d_2c_3x3': [batch_size, 56, 56, 192], 'MaxPool_3a_3x3': [batch_size, 28, 28, 192] } self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) for endpoint_name in endpoints_shapes: expected_shape = endpoints_shapes[endpoint_name] self.assertTrue(endpoint_name in end_points) self.assertListEqual( end_points[endpoint_name].get_shape().as_list(), expected_shape)
def testBuildBaseNetwork(self): batch_size = 5 height, width = 224, 224 inputs = random_ops.random_uniform((batch_size, height, width, 3)) mixed_5c, end_points = inception_v2.inception_v2_base(inputs) self.assertTrue(mixed_5c.op.name.startswith('InceptionV2/Mixed_5c')) self.assertListEqual(mixed_5c.get_shape().as_list(), [batch_size, 7, 7, 1024]) expected_endpoints = [ 'Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3' ] self.assertItemsEqual(end_points.keys(), expected_endpoints)
def testBuildOnlyUptoFinalEndpoint(self): batch_size = 5 height, width = 224, 224 endpoints = [ 'Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c' ] for index, endpoint in enumerate(endpoints): with ops.Graph().as_default(): inputs = random_ops.random_uniform( (batch_size, height, width, 3)) out_tensor, end_points = inception_v2.inception_v2_base( inputs, final_endpoint=endpoint) self.assertTrue( out_tensor.op.name.startswith('InceptionV2/' + endpoint)) self.assertItemsEqual(endpoints[:index + 1], end_points)
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) feature_map_layout = { 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], } with tf.control_dependencies([shape_assert]): with slim.arg_scope(self._conv_hyperparams): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_5c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def _CNN(inputs): ''' The second part of the network (CNN) and the group module, are used to extract the final view descriptors together with the discrimination scores, separately. ''' final_view_descriptors = [] n_views = inputs.get_shape().as_list()[1] # transpose views: (NxVxHxWxC) -> (VxNxHxWxC) views = tf.transpose(inputs, perm=[1, 0, 2, 3, 4]) for i in range(n_views): batch_view = tf.gather(views, i) # N x H x W x C net, end_points = \ inception_v2.inception_v2_base(batch_view, scope=None) final_view_descriptors.append(net) return final_view_descriptors
def _extract_proposal_features(self, preprocessed_inputs, scope): """Extracts first stage RPN features. Args: preprocessed_inputs: A [batch, height, width, channels] float32 tensor representing a batch of images. scope: A scope name. Returns: rpn_feature_map: A tensor with shape [batch, height, width, depth] activations: A dictionary mapping feature extractor tensor names to tensors Raises: InvalidArgumentError: If the spatial size of `preprocessed_inputs` (height or width) is less than 33. ValueError: If the created network is missing the required activation. """ preprocessed_inputs.get_shape().assert_has_rank(4) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) with tf.control_dependencies([shape_assert]): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: with _batch_norm_arg_scope( [slim.conv2d, slim.separable_conv2d], batch_norm_scale=True, train_batch_norm=self._train_batch_norm): _, activations = inception_v2.inception_v2_base( preprocessed_inputs, final_endpoint='Mixed_4e', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) return activations['Mixed_4e'], activations
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_5c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()