def testModelHasExpectedNumberOfParameters(self): batch_size = 5 height, width = 299, 299 inputs = random_ops.random_uniform((batch_size, height, width, 3)) with arg_scope(inception_v3.inception_v3_arg_scope()): inception_v3.inception_v3_base(inputs) total_params, _ = model_analyzer.analyze_vars( variables_lib.get_model_variables()) self.assertAlmostEqual(21802784, total_params)
def testBuildAndCheckAllEndPointsUptoMixed7c(self): batch_size = 5 height, width = 299, 299 inputs = random_ops.random_uniform((batch_size, height, width, 3)) _, end_points = inception_v3.inception_v3_base( inputs, final_endpoint='Mixed_7c') endpoints_shapes = { 'Conv2d_1a_3x3': [batch_size, 149, 149, 32], 'Conv2d_2a_3x3': [batch_size, 147, 147, 32], 'Conv2d_2b_3x3': [batch_size, 147, 147, 64], 'MaxPool_3a_3x3': [batch_size, 73, 73, 64], 'Conv2d_3b_1x1': [batch_size, 73, 73, 80], 'Conv2d_4a_3x3': [batch_size, 71, 71, 192], 'MaxPool_5a_3x3': [batch_size, 35, 35, 192], 'Mixed_5b': [batch_size, 35, 35, 256], 'Mixed_5c': [batch_size, 35, 35, 288], 'Mixed_5d': [batch_size, 35, 35, 288], 'Mixed_6a': [batch_size, 17, 17, 768], 'Mixed_6b': [batch_size, 17, 17, 768], 'Mixed_6c': [batch_size, 17, 17, 768], 'Mixed_6d': [batch_size, 17, 17, 768], 'Mixed_6e': [batch_size, 17, 17, 768], 'Mixed_7a': [batch_size, 8, 8, 1280], 'Mixed_7b': [batch_size, 8, 8, 2048], 'Mixed_7c': [batch_size, 8, 8, 2048] } self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) for endpoint_name in endpoints_shapes: expected_shape = endpoints_shapes[endpoint_name] self.assertTrue(endpoint_name in end_points) self.assertListEqual( end_points[endpoint_name].get_shape().as_list(), expected_shape)
def inception_v3(nlabels, images): batch_norm_params = { "is_training": False, "trainable": True, "decay": 0.9997, "epsilon": 0.001, "variables_collections": { "beta": None, "gamma": None, "moving_mean": ["moving_vars"], "moving_variance": ["moving_vars"], } } weight_decay = 0.00004 stddev = 0.1 weights_regularizer = tf_slim.l2_regularizer(weight_decay) args_for_scope = ( dict(list_ops_or_scope=[tf_slim.layers.conv2d, tf_slim.layers.fully_connected], weights_regularizer=weights_regularizer, trainable=True), dict(list_ops_or_scope=[tf_slim.layers.conv2d], weights_initializer=tf1.truncated_normal_initializer(stddev=stddev), activation_fn=tf1.nn.relu, normalizer_fn=tf_slim.layers.batch_norm, normalizer_params=batch_norm_params), ) with tf1.variable_scope("InceptionV3", "InceptionV3", [images]) as scope, \ tf_slim.arg_scope(**args_for_scope[0]), \ tf_slim.arg_scope(**args_for_scope[1]): net, end_points = inception_v3_base(images, scope=scope) with tf1.variable_scope("logits"): shape = net.get_shape() net = tf_slim.layers.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool") net = tf1.nn.dropout(net, 1, name='droplast') net = tf_slim.layers.flatten(net, scope="flatten") with tf1.variable_scope('output') as scope: weights = tf1.Variable( tf1.truncated_normal([2048, nlabels], mean=0.0, stddev=0.01), name='weights') biases = tf1.Variable( tf1.constant(0.0, shape=[nlabels], dtype=tf1.float32), name='biases') output = tf1.add(tf1.matmul(net, weights), biases, name=scope.name) tensor_name = re.sub('tower_[0-9]*/', '', output.op.name) tf1.summary.histogram(tensor_name + '/activations', output) tf1.summary.scalar(tensor_name + '/sparsity', tf1.nn.zero_fraction(output)) return output
def testBuildBaseNetwork(self): batch_size = 5 height, width = 299, 299 inputs = random_ops.random_uniform((batch_size, height, width, 3)) final_endpoint, end_points = inception_v3.inception_v3_base(inputs) self.assertTrue( final_endpoint.op.name.startswith('InceptionV3/Mixed_7c')) self.assertListEqual(final_endpoint.get_shape().as_list(), [batch_size, 8, 8, 2048]) expected_endpoints = [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c' ] self.assertItemsEqual(end_points.keys(), expected_endpoints)
def testBuildOnlyUptoFinalEndpoint(self): batch_size = 5 height, width = 299, 299 endpoints = [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c' ] for index, endpoint in enumerate(endpoints): with ops.Graph().as_default(): inputs = random_ops.random_uniform( (batch_size, height, width, 3)) out_tensor, end_points = inception_v3.inception_v3_base( inputs, final_endpoint=endpoint) self.assertTrue( out_tensor.op.name.startswith('InceptionV3/' + endpoint)) self.assertItemsEqual(endpoints[:index + 1], end_points)
def style_prediction(style_input_, activation_names, activation_depths, is_training=True, trainable=True, inception_end_point='Mixed_6e', style_prediction_bottleneck=100, reuse=None): """Maps style images to the style embeddings (beta and gamma parameters). Args: style_input_: Tensor. Batch of style input images. activation_names: string. Scope names of the activations of the transformer network which are used to apply style normalization. activation_depths: Shapes of the activations of the transformer network which are used to apply style normalization. is_training: bool. Is it training phase or not? trainable: bool. Should the parameters be marked as trainable? inception_end_point: string. Specifies the endpoint to construct the inception_v3 network up to. This network is part of the style prediction network. style_prediction_bottleneck: int. Specifies the bottleneck size in the number of parameters of the style embedding. reuse: bool. Whether to reuse model parameters. Defaults to False. Returns: Tensor for the output of the style prediction network, Tensor for the bottleneck of style parameters of the style prediction network. """ with tf.name_scope('style_prediction') and tf.variable_scope( tf.get_variable_scope(), reuse=reuse): with slim.arg_scope(_inception_v3_arg_scope(is_training=is_training)): with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.batch_norm], trainable=trainable): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): _, end_points = inception_v3.inception_v3_base( style_input_, scope='InceptionV3', final_endpoint=inception_end_point) # Shape of feat_convlayer is (batch_size, ?, ?, depth). # For Mixed_6e end point, depth is 768, for input image size of 256x265 # width and height are 14x14. feat_convlayer = end_points[inception_end_point] with tf.name_scope('bottleneck'): # (batch_size, 1, 1, depth). bottleneck_feat = tf.reduce_mean(feat_convlayer, axis=[1, 2], keep_dims=True) if style_prediction_bottleneck > 0: with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=trainable): # (batch_size, 1, 1, style_prediction_bottleneck). bottleneck_feat = slim.conv2d(bottleneck_feat, style_prediction_bottleneck, [1, 1]) style_params = {} with tf.variable_scope('style_params'): for i in range(len(activation_depths)): with tf.variable_scope(activation_names[i], reuse=reuse): with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=trainable): # Computing beta parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) beta = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) beta = tf.squeeze(beta, [1, 2], name='SpatialSqueeze') style_params['{}/beta'.format( activation_names[i])] = beta # Computing gamma parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) gamma = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) gamma = tf.squeeze(gamma, [1, 2], name='SpatialSqueeze') style_params['{}/gamma'.format( activation_names[i])] = gamma return style_params, bottleneck_feat
def inception_v3(images, trainable=True, is_training=True, weight_decay=0.00004, stddev=0.1, dropout_keep_prob=0.8, use_batch_norm=True, batch_norm_params=None, add_summaries=True, scope="InceptionV3"): """Builds an Inception V3 subgraph for image embeddings. Args: images: A float32 Tensor of shape [batch, height, width, channels]. trainable: Whether the inception submodel should be trainable or not. is_training: Boolean indicating training mode or not. weight_decay: Coefficient for weight regularization. stddev: The standard deviation of the trunctated normal weight initializer. dropout_keep_prob: Dropout keep probability. use_batch_norm: Whether to use batch normalization. batch_norm_params: Parameters for batch normalization. See tf.contrib.layers.batch_norm for details. add_summaries: Whether to add activation summaries. scope: Optional Variable scope. Returns: end_points: A dictionary of activations from inception_v3 layers. """ # Only consider the inception model to be in training mode if it's trainable. is_inception_model_training = trainable and is_training if use_batch_norm: # Default parameters for batch normalization. if not batch_norm_params: batch_norm_params = { "is_training": is_inception_model_training, "trainable": trainable, # Decay for the moving averages. "decay": 0.9997, # Epsilon to prevent 0s in variance. "epsilon": 0.001, # Collection containing the moving mean and moving variance. "variables_collections": { "beta": None, "gamma": None, "moving_mean": ["moving_vars"], "moving_variance": ["moving_vars"], } } else: batch_norm_params = None if trainable: weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) else: weights_regularizer = None with tf.compat.v1.variable_scope(scope, "InceptionV3", [images]) as scope: with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, trainable=trainable): with slim.arg_scope([slim.conv2d], weights_initializer=tf.compat.v1. truncated_normal_initializer(stddev=stddev), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): net, end_points = inception_v3_base(images, scope=scope) with tf.compat.v1.variable_scope("logits"): shape = net.get_shape() net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool") net = slim.dropout(net, keep_prob=dropout_keep_prob, is_training=is_inception_model_training, scope="dropout") net = slim.flatten(net, scope="flatten") # Add summaries. if add_summaries: for v in end_points.values(): slim.summarize_activation(v) return net