def testModelHasExpectedNumberOfParameters(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) with slim.arg_scope(inception.inception_v2_arg_scope()): inception.inception_v2_base(inputs) total_params, _ = slim.model_analyzer.analyze_vars( slim.get_model_variables()) self.assertAlmostEqual(10173112, total_params)
def testBuildErrorsForDataFormats(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) # 'NCWH' data format is not supported. with self.assertRaises(ValueError): _ = inception.inception_v2_base(inputs, data_format='NCWH') # 'NCHW' data format is not supported for separable convolution. with self.assertRaises(ValueError): _ = inception.inception_v2_base(inputs, data_format='NCHW')
def testBuildAndCheckAllEndPointsUptoMixed5c(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) _, end_points = inception.inception_v2_base(inputs, final_endpoint='Mixed_5c') endpoints_shapes = {'Mixed_3b': [batch_size, 28, 28, 256], 'Mixed_3c': [batch_size, 28, 28, 320], 'Mixed_4a': [batch_size, 14, 14, 576], 'Mixed_4b': [batch_size, 14, 14, 576], 'Mixed_4c': [batch_size, 14, 14, 576], 'Mixed_4d': [batch_size, 14, 14, 576], 'Mixed_4e': [batch_size, 14, 14, 576], 'Mixed_5a': [batch_size, 7, 7, 1024], 'Mixed_5b': [batch_size, 7, 7, 1024], 'Mixed_5c': [batch_size, 7, 7, 1024], 'Conv2d_1a_7x7': [batch_size, 112, 112, 64], 'MaxPool_2a_3x3': [batch_size, 56, 56, 64], 'Conv2d_2b_1x1': [batch_size, 56, 56, 64], 'Conv2d_2c_3x3': [batch_size, 56, 56, 192], 'MaxPool_3a_3x3': [batch_size, 28, 28, 192]} self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) for endpoint_name in endpoints_shapes: expected_shape = endpoints_shapes[endpoint_name] self.assertTrue(endpoint_name in end_points) self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), expected_shape)
def localization_net_alpha(inputs, num_transformer, num_theta_params): """ Utilize inception_v2 as the localization net of spatial transformer """ # outputs 7*7*1024: default final_endpoint='Mixed_5c' before full connection layer with tf.variable_scope('inception_net'): net, _ = inception_v2.inception_v2_base(inputs) # fc layer using [1, 1] convolution kernel: 1*1*1024 with tf.variable_scope('logits'): net = slim.conv2d(net, 128, [1, 1], scope='conv2d_a_1x1') kernel_size = inception_v2._reduced_kernel_size_for_small_input( net, [7, 7]) net = slim.conv2d(net, 128, kernel_size, padding='VALID', scope='conv2d_b_{}x{}'.format(*kernel_size)) init_biase = tf.constant_initializer([2.0, .0, 2.0, .0] * num_transformer) logits = slim.conv2d( net, num_transformer * num_theta_params, [1, 1], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), biases_initializer=init_biase, normalizer_fn=None, activation_fn=tf.nn.tanh, scope='conv2d_c_1x1') return tf.squeeze(logits, [1, 2])
def network_fn(inputs): """Fine grained classification with multiplex spatial transformation channels utilizing inception nets """ end_points = {} arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=FLAGS.weight_decay) with slim.arg_scope(arg_scope): with tf.variable_scope('stn'): with tf.variable_scope('localization'): transformer_theta = localization_net_alpha(inputs, NUM_TRANSFORMER, NUM_THETA_PARAMS) transformer_theta_split = tf.split(transformer_theta, NUM_TRANSFORMER, axis=1) end_points['stn/localization/transformer_theta'] = transformer_theta transformer_outputs = [] for theta in transformer_theta_split: transformer_outputs.append( transformer(inputs, theta, transformer_output_size, sampling_kernel='bilinear')) inception_outputs = [] transformer_outputs_shape = [FLAGS.batch_size, transformer_output_size[0], transformer_output_size[1], 3] with tf.variable_scope('classification'): for path_idx, inception_inputs in enumerate(transformer_outputs): with tf.variable_scope('path_{}'.format(path_idx)): inception_inputs.set_shape(transformer_outputs_shape) net, _ = inception_v2.inception_v2_base(inception_inputs) inception_outputs.append(net) # concatenate the endpoints: num_batch*7*7*(num_transformer*1024) multipath_outputs = tf.concat(inception_outputs, axis=-1) # final fc layer logits classification_logits = _inception_logits(multipath_outputs, NUM_CLASSES, dropout_keep_prob) end_points['stn/classification/logits'] = classification_logits return classification_logits, end_points
def testBuildEndPointsWithUseSeparableConvolutionFalse(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) _, end_points = inception.inception_v2_base(inputs) endpoint_keys = [ key for key in end_points.keys() if key.startswith('Mixed') or key.startswith('Conv') ] _, end_points_with_replacement = inception.inception_v2_base( inputs, use_separable_conv=False) # The endpoint shapes must be equal to the original shape even when the # separable convolution is replaced with a normal convolution. for key in endpoint_keys: original_shape = end_points[key].get_shape().as_list() self.assertTrue(key in end_points_with_replacement) new_shape = end_points_with_replacement[key].get_shape().as_list() self.assertListEqual(original_shape, new_shape)
def testBuildOnlyUptoFinalEndpoint(self): batch_size = 5 height, width = 224, 224 endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c'] for index, endpoint in enumerate(endpoints): with tf.Graph().as_default(): inputs = tf.random_uniform((batch_size, height, width, 3)) out_tensor, end_points = inception.inception_v2_base( inputs, final_endpoint=endpoint) self.assertTrue(out_tensor.op.name.startswith( 'InceptionV2/' + endpoint)) self.assertItemsEqual(endpoints[:index+1], end_points.keys())
def testBuildBaseNetwork(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) mixed_5c, end_points = inception.inception_v2_base(inputs) self.assertTrue(mixed_5c.op.name.startswith('InceptionV2/Mixed_5c')) self.assertListEqual(mixed_5c.get_shape().as_list(), [batch_size, 7, 7, 1024]) expected_endpoints = ['Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3'] self.assertItemsEqual(end_points.keys(), expected_endpoints)
def testBuildEndPointsNCHWDataFormat(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) _, end_points = inception.inception_v2_base(inputs) endpoint_keys = [ key for key in end_points.keys() if key.startswith('Mixed') or key.startswith('Conv') ] inputs_in_nchw = tf.random_uniform((batch_size, 3, height, width)) _, end_points_with_replacement = inception.inception_v2_base( inputs_in_nchw, use_separable_conv=False, data_format='NCHW') # With the 'NCHW' data format, all endpoint activations have a transposed # shape from the original shape with the 'NHWC' layout. for key in endpoint_keys: transposed_original_shape = tf.transpose( end_points[key], [0, 3, 1, 2]).get_shape().as_list() self.assertTrue(key in end_points_with_replacement) new_shape = end_points_with_replacement[key].get_shape().as_list() self.assertListEqual(transposed_original_shape, new_shape)
def localization_net_beta(inputs, num_transformer, num_theta_parmas): with tf.variable_scope('inception_net'): net, _ = inception_v2.inception_v2_base(inputs) with tf.variable_scope('logits'): with tf.variable_scope('branch_0'): branch0 = slim.conv2d(net, 128, [1, 1], scope='conv2d_a_1x1') branch0 = slim.conv2d(branch0, 144, [3, 3], stride=2, scope='conv2d_b_3x3') with tf.variable_scope('branch_1'): branch1 = slim.conv2d(net, 144, [1, 1], scope='conv2d_a_1x1') branch1 = slim.max_pool2d(branch1, [3, 3], stride=2, padding='SAME', scope='max_pool_b_3x3') net = tf.concat([branch0, branch1], axis=-1) kernel_size = inception_v2._reduced_kernel_size_for_small_input( net, [7, 7]) net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='avg_pool_a_{}x{}'.format(*kernel_size)) init_biase = tf.constant_initializer([2.0, .0, 2.0, .0] * num_transformer) logits = slim.conv2d( net, num_transformer * num_theta_parmas, [1, 1], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), biases_initializer=init_biase, normalizer_fn=None, activation_fn=tf.nn.tanh, scope='conv2d_b_1x1') return tf.squeeze(logits, [1, 2])