def testModelHasExpectedNumberOfParameters(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) with slim.arg_scope(inception.inception_v1_arg_scope()): inception.inception_v1_base(inputs) total_params, _ = slim.model_analyzer.analyze_vars( slim.get_model_variables()) self.assertAlmostEqual(5607184, total_params)
def testBuildAndCheckAllEndPointsUptoMixed5c(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) _, end_points = inception.inception_v1_base(inputs, final_endpoint='Mixed_5c') endpoints_shapes = {'Conv2d_1a_7x7': [5, 112, 112, 64], 'MaxPool_2a_3x3': [5, 56, 56, 64], 'Conv2d_2b_1x1': [5, 56, 56, 64], 'Conv2d_2c_3x3': [5, 56, 56, 192], 'MaxPool_3a_3x3': [5, 28, 28, 192], 'Mixed_3b': [5, 28, 28, 256], 'Mixed_3c': [5, 28, 28, 480], 'MaxPool_4a_3x3': [5, 14, 14, 480], 'Mixed_4b': [5, 14, 14, 512], 'Mixed_4c': [5, 14, 14, 512], 'Mixed_4d': [5, 14, 14, 512], 'Mixed_4e': [5, 14, 14, 528], 'Mixed_4f': [5, 14, 14, 832], 'MaxPool_5a_2x2': [5, 7, 7, 832], 'Mixed_5b': [5, 7, 7, 832], 'Mixed_5c': [5, 7, 7, 1024]} self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) for endpoint_name in endpoints_shapes: expected_shape = endpoints_shapes[endpoint_name] self.assertTrue(endpoint_name in end_points) self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), expected_shape)
def testBuildAndCheckAllEndPointsUptoMixed5c(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) _, end_points = inception.inception_v1_base(inputs, final_endpoint='Mixed_5c') endpoints_shapes = { 'Conv2d_1a_7x7': [5, 112, 112, 64], 'MaxPool_2a_3x3': [5, 56, 56, 64], 'Conv2d_2b_1x1': [5, 56, 56, 64], 'Conv2d_2c_3x3': [5, 56, 56, 192], 'MaxPool_3a_3x3': [5, 28, 28, 192], 'Mixed_3b': [5, 28, 28, 256], 'Mixed_3c': [5, 28, 28, 480], 'MaxPool_4a_3x3': [5, 14, 14, 480], 'Mixed_4b': [5, 14, 14, 512], 'Mixed_4c': [5, 14, 14, 512], 'Mixed_4d': [5, 14, 14, 512], 'Mixed_4e': [5, 14, 14, 528], 'Mixed_4f': [5, 14, 14, 832], 'MaxPool_5a_2x2': [5, 7, 7, 832], 'Mixed_5b': [5, 7, 7, 832], 'Mixed_5c': [5, 7, 7, 1024] } self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) for endpoint_name in endpoints_shapes: expected_shape = endpoints_shapes[endpoint_name] self.assertTrue(endpoint_name in end_points) self.assertListEqual( end_points[endpoint_name].get_shape().as_list(), expected_shape)
def build_model(model_name, inputs, num_classes, is_training, dropout_keep_prob): use_fcn = False if model_name.find('fcn') >= 0: use_fcn = True model_base_name = model_name[0:-4] else: model_base_name = model_name if model_base_name == 'vgg16': net = vgg16_base(inputs) elif model_base_name == 'inception_v1': with slim.arg_scope(inception.inception_v1_arg_scope()): net, _ = inception.inception_v1_base(inputs) elif model_base_name == 'inception_v2': with slim.arg_scope(inception.inception_v2_arg_scope()): net, _ = inception.inception_v2_base(inputs) elif model_base_name == 'inception_v3': with slim.arg_scope(inception.inception_v3_arg_scope()): net, _ = inception.inception_v3_base(inputs) else: raise Exception('model {} is not existed'.format(model_name)) with tf.variable_scope('not_pretrained'): if use_fcn: net = fully_convolutional_networks(net, num_classes, is_training, dropout_keep_prob) else: net = fully_connected_networks(net, num_classes, is_training, dropout_keep_prob) return net
def testHalfSizeImages(self): batch_size = 5 height, width = 112, 112 inputs = tf.random_uniform((batch_size, height, width, 3)) mixed_5c, _ = inception.inception_v1_base(inputs) self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c')) self.assertListEqual(mixed_5c.get_shape().as_list(), [batch_size, 4, 4, 1024])
def build(self, inputs, input_pixel_size, is_training, scope='img_inception'): """Inception for BEV feature extraction Args: inputs: a tensor of size [batch_size, height, width, channels]. input_pixel_size: size of the input (H x W) is_training: True for training, False fo validation/testing. scope: Optional scope for the variables. Returns: The net, a rank-4 tensor of size [batch, height_out, width_out, channels_out] and end_points dict. """ inception_config = self.config with tf.variable_scope(scope, 'img_inception', [inputs]) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): if inception_config.inception_v == 'inception_v1': with slim.arg_scope(inception.inception_v1_arg_scope()): net, end_points = inception.inception_v1_base( inputs, scope=scope) elif inception_config.inception_v == 'inception_v2': with slim.arg_scope(inception.inception_v2_arg_scope()): net, end_points = inception.inception_v2_base( inputs, scope=scope) elif inception_config.inception_v == 'inception_v3': with slim.arg_scope(inception.inception_v3_arg_scope()): net, end_points = inception.inception_v3_base( inputs, scope=scope) else: raise ValueError('Invalid Inception version {},'.format( inception_config.inception_v)) with tf.variable_scope('upsampling'): # This feature extractor downsamples the input by a factor # of 32 downsampling_factor = 32 downsampled_shape = input_pixel_size / downsampling_factor upsampled_shape = downsampled_shape * \ inception_config.upsampling_multiplier feature_maps_out = tf.image.resize_bilinear( net, upsampled_shape) return feature_maps_out, end_points
def testBuildBaseNetwork(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) mixed_6c, end_points = inception.inception_v1_base(inputs) self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c')) self.assertListEqual(mixed_6c.get_shape().as_list(), [batch_size, 7, 7, 1024]) expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'] self.assertItemsEqual(end_points.keys(), expected_endpoints)
def testBuildOnlyUptoFinalEndpoint(self): batch_size = 5 height, width = 224, 224 endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'] for index, endpoint in enumerate(endpoints): with tf.Graph().as_default(): inputs = tf.random_uniform((batch_size, height, width, 3)) out_tensor, end_points = inception.inception_v1_base( inputs, final_endpoint=endpoint) self.assertTrue(out_tensor.op.name.startswith( 'InceptionV1/' + endpoint)) self.assertItemsEqual(endpoints[:index+1], end_points)
def compute_embedding_inception_v1(inputs, embedding_dim=64, is_training=True, dropout_keep_prob=0.8, scope='InceptionV1', l2_normalize=True): """Compute embedding with inception v1.""" with tf.variable_scope(scope, 'InceptionV1', [inputs, embedding_dim]) as scope: with slim.arg_scope([layers.batch_norm, layers.dropout], is_training=is_training): with slim.arg_scope(inception.inception_v1_arg_scope()): net, end_points = inception.inception_v1_base(inputs, scope=scope) net = layers.avg_pool2d(net, [7, 7], stride=1, scope='AvgPool_0a_7x7') net = layers.dropout(net, dropout_keep_prob, scope='Dropout_0b') base_variables = slim.get_variables_to_restore( exclude=['global_step']) # Embedding bottleneck. net = layers.conv2d(net, embedding_dim, [1, 1], activation_fn=None, normalizer_fn=None, scope='Bottleneck') embedding = tf.squeeze(net, [1, 2], name='SpatialSqueeze') if l2_normalize: embedding = tf.nn.l2_normalize(embedding, dim=1) end_points['embeddings'] = embedding bottleneck_variables = tf.contrib.framework.get_variables( scope='InceptionV1/Bottleneck') return embedding, end_points, base_variables, bottleneck_variables
def __call__(self, training, img, dropout): """Applies convolutions to the image Args: training: (tf.placeholder) tf.bool img: batch of img, shape = (?, height, width, channels), of type tf.uint8 Returns: the encoded images, shape = (?, h', w', c') """ img = tf.cast(img, tf.float32) / 255. # out, _ = inception.inception_v2_base(img, final_endpoint='MaxPool_3a_3x3', # scope="convolutional_encoder") out, _ = inception.inception_v1_base(img, final_endpoint='MaxPool_3a_3x3', scope="convolutional_encoder") if self._config.positional_embeddings: # from tensor2tensor lib - positional embeddings out = add_timing_signal_nd(out) return out