def ssim(self, x, y): """Computes a differentiable structured image similarity measure.""" c1 = 0.01**2 c2 = 0.03**2 mu_x = slim.avg_pool2d(x, 3, 1, 'VALID') mu_y = slim.avg_pool2d(y, 3, 1, 'VALID') sigma_x = slim.avg_pool2d(x**2, 3, 1, 'VALID') - mu_x**2 sigma_y = slim.avg_pool2d(y**2, 3, 1, 'VALID') - mu_y**2 sigma_xy = slim.avg_pool2d(x * y, 3, 1, 'VALID') - mu_x * mu_y ssim_n = (2 * mu_x * mu_y + c1) * (2 * sigma_xy + c2) ssim_d = (mu_x**2 + mu_y**2 + c1) * (sigma_x + sigma_y + c2) ssim = ssim_n / ssim_d return tf.clip_by_value((1 - ssim) / 2, 0, 1)
def _build_nas_aux_head(inputs, dimension, data_format): """Builds the auxiliary head described in the NAS paper.""" shape = inputs.shape if shape.rank < 4: return None shape = shape.as_list() shape = shape[1:3] if data_format == "NHWC" else shape[2:4] if np.any(np.array(shape) < np.array([5, 5])): return None with tf.compat.v1.variable_scope("aux_logits"): with arg_scope(DATA_FORMAT_OPS, data_format=data_format): aux_logits = tf_slim.avg_pool2d(inputs, [5, 5], stride=3, padding="SAME") aux_logits = tf_slim.conv2d(aux_logits, 128, [1, 1], scope="proj") aux_logits = tf_slim.batch_norm(aux_logits, scope="aux_bn0") aux_logits = tf.nn.relu6(aux_logits) # Shape of feature map before the final layer. shape = aux_logits.shape shape = shape[1:3] if data_format == "NHWC" else shape[2:4] aux_logits = tf_slim.conv2d(aux_logits, 768, shape, padding="VALID") aux_logits = tf_slim.batch_norm(aux_logits, scope="aux_bn1") aux_logits = tf.nn.relu6(aux_logits) aux_logits = tf.keras.layers.Flatten()(aux_logits) aux_logits = tf_slim.fully_connected(aux_logits, dimension) return aux_logits
def block_inception_c(inputs, scope=None, reuse=None): """Builds Inception-C block for Inception v4 network.""" # By default use stride=1 and SAME padding with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], stride=1, padding='SAME'): with tf.variable_scope( scope, 'BlockInceptionC', [inputs], reuse=reuse): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') branch_1 = tf.concat(axis=3, values=[ slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'), slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')]) with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1') branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3') branch_2 = tf.concat(axis=3, values=[ slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'), slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')]) with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1') return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
def model( inputs, is_training = True, dropout_keep_prob = 0.8, reuse = None, scope = 'InceptionV4', bottleneck_dim = 512, ): # inputs = tf.image.grayscale_to_rgb(inputs) with tf.variable_scope( scope, 'InceptionV4', [inputs], reuse = reuse ) as scope: with slim.arg_scope( [slim.batch_norm, slim.dropout], is_training = is_training ): net, end_points = inception_v4_base(inputs, scope = scope) print(net.shape) with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride = 1, padding = 'SAME', ): with tf.variable_scope('Logits'): # 8 x 8 x 1536 kernel_size = net.get_shape()[1:3] print(kernel_size) if kernel_size.is_fully_defined(): net = slim.avg_pool2d( net, kernel_size, padding = 'VALID', scope = 'AvgPool_1a', ) else: net = tf.reduce_mean( input_tensor = net, axis = [1, 2], keepdims = True, name = 'global_pool', ) end_points['global_pool'] = net # 1 x 1 x 1536 net = slim.dropout( net, dropout_keep_prob, scope = 'Dropout_1b' ) net = slim.flatten(net, scope = 'PreLogitsFlatten') end_points['PreLogitsFlatten'] = net bottleneck = slim.fully_connected( net, bottleneck_dim, scope = 'bottleneck' ) logits = slim.fully_connected( bottleneck, 2, activation_fn = None, scope = 'Logits_vad', ) return logits
def SSIM(self, x, y): C1 = 0.01 ** 2 C2 = 0.03 ** 2 mu_x = slim.avg_pool2d(x, 3, 1, 'VALID') mu_y = slim.avg_pool2d(y, 3, 1, 'VALID') sigma_x = slim.avg_pool2d(x ** 2, 3, 1, 'VALID') - mu_x ** 2 sigma_y = slim.avg_pool2d(y ** 2, 3, 1, 'VALID') - mu_y ** 2 sigma_xy = slim.avg_pool2d(x * y , 3, 1, 'VALID') - mu_x * mu_y SSIM_n = (2 * mu_x * mu_y + C1) * (2 * sigma_xy + C2) SSIM_d = (mu_x ** 2 + mu_y ** 2 + C1) * (sigma_x + sigma_y + C2) SSIM = SSIM_n / SSIM_d return tf.clip_by_value((1 - SSIM) / 2, 0, 1)
def inference(images, keep_probability, phase_train=True, bottleneck_layer_size=128, weight_decay=0.0, reuse=None): batch_norm_params = { # Decay for the moving averages. 'decay': 0.995, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # force in-place updates of mean and variance estimates 'updates_collections': None, # Moving averages ends up in the trainable variables collection 'variables_collections': [tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES], } with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=tf.compat.v1.keras.initializers. VarianceScaling( scale=1.0, mode="fan_avg", distribution=("uniform" if True else "truncated_normal")), weights_regularizer=tf.keras.regularizers.l2(0.5 * (weight_decay)), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with tf.compat.v1.variable_scope('squeezenet', [images], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=phase_train): net = slim.conv2d(images, 96, [7, 7], stride=2, scope='conv1') net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool1') net = fire_module(net, 16, 64, scope='fire2') net = fire_module(net, 16, 64, scope='fire3') net = fire_module(net, 32, 128, scope='fire4') net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool4') net = fire_module(net, 32, 128, scope='fire5') net = fire_module(net, 48, 192, scope='fire6') net = fire_module(net, 48, 192, scope='fire7') net = fire_module(net, 64, 256, scope='fire8') net = slim.max_pool2d(net, [3, 3], stride=2, scope='maxpool8') net = fire_module(net, 64, 256, scope='fire9') net = slim.dropout(net, keep_probability) net = slim.conv2d(net, 1000, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv10') net = slim.avg_pool2d(net, net.get_shape()[1:3], scope='avgpool10') net = tf.squeeze(net, [1, 2], name='logits') net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, None
def _squeeze_and_excite(h, hidden_dim, activation_fn=tf.nn.relu6): with tf.variable_scope(None, default_name='SqueezeExcite'): height, width = h.shape[1], h.shape[2] u = slim.avg_pool2d(h, [height, width], stride=1, padding='VALID') u = _conv(u, hidden_dim, 1, normalizer_fn=None, activation_fn=activation_fn) u = _conv(u, h.shape[-1], 1, normalizer_fn=None, activation_fn=tf.nn.sigmoid) return u * h
def model( inputs, is_training=True, dropout_keep_prob=0.8, reuse=None, scope='InceptionV4', create_aux_logits=True, num_classes=2, ): with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_v4_base(inputs, scope=scope) print(net.shape) with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME', ): # Final pooling and prediction # TODO(sguada,arnoegw): Consider adding a parameter global_pool which # can be set to False to disable pooling here (as in resnet_*()). with tf.variable_scope('Logits'): # 8 x 8 x 1536 kernel_size = net.get_shape()[1:3] print(kernel_size) if kernel_size.is_fully_defined(): net = slim.avg_pool2d( net, kernel_size, padding='VALID', scope='AvgPool_1a', ) else: net = tf.reduce_mean( input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool', ) end_points['global_pool'] = net # 1 x 1 x 1536 net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b') net = slim.flatten(net, scope='PreLogitsFlatten') end_points['PreLogitsFlatten'] = net # 1536 logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='Logits') return logits
def _pooling(net, stride, operation): """Parses operation and performs the correct pooling operation on net.""" padding = 'SAME' pooling_type, pooling_shape = _operation_to_pooling_info(operation) if pooling_type == 'avg': net = slim.avg_pool2d(net, pooling_shape, stride=stride, padding=padding) elif pooling_type == 'max': net = slim.max_pool2d(net, pooling_shape, stride=stride, padding=padding) else: raise NotImplementedError('Unimplemented pooling type: ', pooling_type) return net
def _transition_block(inputs, num_filters, compression=1.0, scope=None, outputs_collections=None): num_filters = int(num_filters * compression) with tf.variable_scope(scope, 'transition_blockx', [inputs]) as sc: net = inputs net = _conv(net, num_filters, 1, scope='blk') net = slim.avg_pool2d(net, 2) net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) return net, num_filters
def _build_aux_head(net, end_points, num_classes, hparams, scope): """Auxiliary head used for all models across all datasets.""" with tf.compat.v1.variable_scope(scope): aux_logits = tf.identity(net) with tf.compat.v1.variable_scope('aux_logits'): aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, padding='VALID') aux_logits = slim.conv2d(aux_logits, 128, [1, 1], scope='proj') aux_logits = slim.batch_norm(aux_logits, scope='aux_bn0') aux_logits = tf.nn.relu(aux_logits) # Shape of feature map before the final layer. shape = aux_logits.shape if hparams.data_format == 'NHWC': shape = shape[1:3] else: shape = shape[2:4] aux_logits = slim.conv2d(aux_logits, 768, shape, padding='VALID') aux_logits = slim.batch_norm(aux_logits, scope='aux_bn1') aux_logits = tf.nn.relu(aux_logits) aux_logits = slim.flatten(aux_logits) aux_logits = slim.fully_connected(aux_logits, num_classes) end_points['AuxLogits'] = aux_logits
def inception_v4(inputs, is_training=True, dropout_keep_prob=0.8, reuse=None, scope='InceptionV4'): """Creates the Inception V4 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer (before dropout) are returned instead. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. create_aux_logits: Whether to include the auxiliary logits. Returns: net: a Tensor with the logits (pre-softmax activations) if num_classes is a non-zero integer, or the non-dropped input to the logits layer if num_classes is 0 or None. end_points: the set of end_points from the inception model. """ with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_v4_base(inputs, scope=scope) with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # Final pooling and prediction # TODO(sguada,arnoegw): Consider adding a parameter global_pool which # can be set to False to disable pooling here (as in resnet_*()). with tf.variable_scope('Embeddings'): # 8 x 8 x 1536 kernel_size = net.get_shape()[1:3] if kernel_size.is_fully_defined(): net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a') else: net = tf.reduce_mean(input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool') # 1 x 1 x 1536 net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b') net = slim.flatten(net, scope='PreEmbeddingsFlatten') # 1536 net = slim.fully_connected(net, 512, activation_fn=None, scope='Embeddings') net = (tf.math.tanh(net) + 1) / 2 return net
def inception_resnet_v2(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV2'): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') end_points['MaxPool_5a_3x3'] = net # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat([ tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1 ], 3) end_points['Mixed_5b'] = net net = slim.repeat(net, 10, block35, scale=0.17) # 17 x 17 x 1024 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) end_points['Mixed_6a'] = net net = slim.repeat(net, 20, block17, scale=0.10) with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([ tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool ], 3) end_points['Mixed_7a'] = net net = slim.repeat(net, 9, block8, scale=0.20) net = block8(net, activation_fn=None) net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') end_points['Conv2d_7b_1x1'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def mobilenet_v1(inputs, num_classes=1000, dropout_keep_prob=0.999, is_training=True, min_depth=8, depth_multiplier=1.0, conv_defs=None, prediction_fn=tf.nn.softmax, spatial_squeeze=True, reuse=None, scope='MobilenetV1'): """Mobilenet v1 model for classification. Args: inputs: a tensor of shape [batch_size, height, width, channels]. num_classes: number of predicted classes. dropout_keep_prob: the percentage of activation values that are retained. is_training: whether is training or not. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. conv_defs: A list of ConvDef namedtuples specifying the net architecture. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape is [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, num_classes] end_points: a dictionary from components of the network to the corresponding activation. Raises: ValueError: Input rank is invalid. """ input_shape = inputs.get_shape().as_list() if len(input_shape) != 4: raise ValueError('Invalid input tensor rank, expected 4, was: %d' % len(input_shape)) with tf.compat.v1.variable_scope(scope, 'MobilenetV1', [inputs, num_classes], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = mobilenet_v1_base( inputs, scope=scope, min_depth=min_depth, depth_multiplier=depth_multiplier, conv_defs=conv_defs) with tf.compat.v1.variable_scope('Logits'): kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7]) net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a') end_points['AvgPool_1a'] = net # 1 x 1 x 1024 net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1') if spatial_squeeze: logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points['Logits'] = logits if prediction_fn: end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def inception_v1(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.8, prediction_fn=slim.softmax, spatial_squeeze=True, reuse=None, scope='InceptionV1', global_pool=False): """Defines the Inception V1 architecture. This architecture is defined in: Going deeper with convolutions Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. http://arxiv.org/pdf/1409.4842v1.pdf. The default image size used to train this network is 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer (before dropout) are returned instead. is_training: whether is training or not. dropout_keep_prob: the percentage of activation values that are retained. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. global_pool: Optional boolean flag to control the avgpooling before the logits layer. If false or unset, pooling is done with a fixed window that reduces default-sized inputs to 1x1, while larger inputs lead to larger outputs. If true, any input size is pooled down to 1x1. Returns: net: a Tensor with the logits (pre-softmax activations) if num_classes is a non-zero integer, or the non-dropped-out input to the logits layer if num_classes is 0 or None. end_points: a dictionary from components of the network to the corresponding activation. """ # Final pooling and prediction with tf.compat.v1.variable_scope(scope, 'InceptionV1', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_v1_base(inputs, scope=scope) with tf.compat.v1.variable_scope('Logits'): if global_pool: # Global average pooling. net = tf.reduce_mean(input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool') end_points['global_pool'] = net else: # Pooling with a fixed kernel size. net = slim.avg_pool2d(net, [7, 7], stride=1, scope='AvgPool_0a_7x7') end_points['AvgPool_0a_7x7'] = net if not num_classes: return net, end_points net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b') logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_0c_1x1') if spatial_squeeze: logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def inception_v3(inputs, num_classes=1000, is_training=True, drop_out_keep_prob=0.8, prediction_fn=slim.softmax, spatial_squeeze=True, reuse=None, scope='InceptionV3'): with tf.compat.v1.variable_scope(scope, 'InceptionV3', [inputs, num_classes], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_v3_base(inputs, scope=scope) with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): aux_logits = end_points['Mixed_6e'] with tf.compat.v1.variable_scope('AuxLogits'): aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, padding='VALID', scope='AvgPool_1a_5x5') aux_logits = slim.conv2d(aux_logits, 128, [1, 1], scope='Conv2d_1b_1x1') aux_logits = slim.conv2d( aux_logits, 768, [5, 5], weights_initializer=trunc_normal(0.01), padding='VALID', scope='Conv2d_1c_5x5') aux_logits = slim.conv2d( aux_logits, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, weights_initializer=trunc_normal(0.001), scope='Conv2d_1d_1x1') if spatial_squeeze: aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze') end_points['AuxLogits'] = aux_logits with tf.compat.v1.variable_scope('logits'): net = slim.avg_pool2d(net, [8, 8], padding='VALID', scope='AvgPool_1a_8x8') net = slim.dropout(net, keep_prob=drop_out_keep_prob, scope='Dropout_1b') end_points['PreLogits'] = net logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1e_1x1') if spatial_squeeze: logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def inception_resnet_v2(inputs, num_classes=1001, is_training=True, dropout_keep_prob=0.8, reuse=None, scope='InceptionResnetV2', create_aux_logits=True, activation_fn=tf.nn.relu): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. Dimension batch_size may be undefined. If create_aux_logits is false, also height and width may be undefined. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer (before dropout) are returned instead. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. create_aux_logits: Whether to include the auxilliary logits. activation_fn: Activation function for conv2d. Returns: net: the output of the logits layer (if num_classes is a non-zero integer), or the non-dropped-out input to the logits layer (if num_classes is 0 or None). end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_resnet_v2_base( inputs, scope=scope, activation_fn=activation_fn) if create_aux_logits and num_classes: with tf.variable_scope('AuxLogits'): aux = end_points['PreAuxLogits'] aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID', scope='Conv2d_1a_3x3') aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1') aux = slim.conv2d(aux, 768, aux.get_shape()[1:3], padding='VALID', scope='Conv2d_2a_5x5') aux = slim.flatten(aux) aux = slim.fully_connected(aux, num_classes, activation_fn=None, scope='Logits') end_points['AuxLogits'] = aux with tf.variable_scope('Logits'): # TODO(sguada,arnoegw): Consider adding a parameter global_pool which # can be set to False to disable pooling here (as in resnet_*()). kernel_size = net.get_shape()[1:3] if kernel_size.is_fully_defined(): net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a_8x8') else: net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') end_points['global_pool'] = net if not num_classes: return net, end_points net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='Logits') end_points['Logits'] = logits end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') return logits, end_points
def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, min_depth, insert_1x1_conv, image_features, pool_residual=False): """Generates multi resolution feature maps from input image features. Generates multi-scale feature maps for detection as in the SSD papers by Liu et al: https://arxiv.org/pdf/1512.02325v2.pdf, See Sec 2.1. More specifically, it performs the following two tasks: 1) If a layer name is provided in the configuration, returns that layer as a feature map. 2) If a layer name is left as an empty string, constructs a new feature map based on the spatial shape and depth configuration. Note that the current implementation only supports generating new layers using convolution of stride 2 resulting in a spatial resolution reduction by a factor of 2. By default convolution kernel size is set to 3, and it can be customized by caller. An example of the configuration for Inception V3: { 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], 'layer_depth': [-1, -1, -1, 512, 256, 128] } Args: feature_map_layout: Dictionary of specifications for the feature map layouts in the following format (Inception V2/V3 respectively): { 'from_layer': ['Mixed_3c', 'Mixed_4c', 'Mixed_5c', '', '', ''], 'layer_depth': [-1, -1, -1, 512, 256, 128] } or { 'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''], 'layer_depth': [-1, -1, -1, 512, 256, 128] } If 'from_layer' is specified, the specified feature map is directly used as a box predictor layer, and the layer_depth is directly infered from the feature map (instead of using the provided 'layer_depth' parameter). In this case, our convention is to set 'layer_depth' to -1 for clarity. Otherwise, if 'from_layer' is an empty string, then the box predictor layer will be built from the previous layer using convolution operations. Note that the current implementation only supports generating new layers using convolutions of stride 2 (resulting in a spatial resolution reduction by a factor of 2), and will be extended to a more flexible design. Convolution kernel size is set to 3 by default, and can be customized by 'conv_kernel_size' parameter (similarily, 'conv_kernel_size' should be set to -1 if 'from_layer' is specified). The created convolution operation will be a normal 2D convolution by default, and a depthwise convolution followed by 1x1 convolution if 'use_depthwise' is set to True. depth_multiplier: Depth multiplier for convolutional layers. min_depth: Minimum depth for convolutional layers. insert_1x1_conv: A boolean indicating whether an additional 1x1 convolution should be inserted before shrinking the feature map. image_features: A dictionary of handles to activation tensors from the base feature extractor. pool_residual: Whether to add an average pooling layer followed by a residual connection between subsequent feature maps when the channel depth match. For example, with option 'layer_depth': [-1, 512, 256, 256], a pooling and residual layer is added between the third and forth feature map. This option is better used with Weight Shared Convolution Box Predictor when all feature maps have the same channel depth to encourage more consistent features across multi-scale feature maps. Returns: feature_maps: an OrderedDict mapping keys (feature map names) to tensors where each tensor has shape [batch, height_i, width_i, depth_i]. Raises: ValueError: if the number entries in 'from_layer' and 'layer_depth' do not match. ValueError: if the generated layer does not have the same resolution as specified. """ depth_fn = get_depth_fn(depth_multiplier, min_depth) feature_map_keys = [] feature_maps = [] base_from_layer = '' use_explicit_padding = False if 'use_explicit_padding' in feature_map_layout: use_explicit_padding = feature_map_layout['use_explicit_padding'] use_depthwise = False if 'use_depthwise' in feature_map_layout: use_depthwise = feature_map_layout['use_depthwise'] for index, from_layer in enumerate(feature_map_layout['from_layer']): layer_depth = feature_map_layout['layer_depth'][index] conv_kernel_size = 3 if 'conv_kernel_size' in feature_map_layout: conv_kernel_size = feature_map_layout['conv_kernel_size'][index] if from_layer: feature_map = image_features[from_layer] base_from_layer = from_layer feature_map_keys.append(from_layer) else: pre_layer = feature_maps[-1] pre_layer_depth = pre_layer.get_shape().as_list()[3] intermediate_layer = pre_layer if insert_1x1_conv: layer_name = '{}_1_Conv2d_{}_1x1_{}'.format( base_from_layer, index, depth_fn(layer_depth // 2)) intermediate_layer = slim.conv2d( pre_layer, depth_fn(layer_depth // 2), [1, 1], padding='SAME', stride=1, scope=layer_name) layer_name = '{}_2_Conv2d_{}_{}x{}_s2_{}'.format( base_from_layer, index, conv_kernel_size, conv_kernel_size, depth_fn(layer_depth)) stride = 2 padding = 'SAME' if use_explicit_padding: padding = 'VALID' intermediate_layer = ops.fixed_padding( intermediate_layer, conv_kernel_size) if use_depthwise: feature_map = slim.separable_conv2d( intermediate_layer, None, [conv_kernel_size, conv_kernel_size], depth_multiplier=1, padding=padding, stride=stride, scope=layer_name + '_depthwise') feature_map = slim.conv2d( feature_map, depth_fn(layer_depth), [1, 1], padding='SAME', stride=1, scope=layer_name) if pool_residual and pre_layer_depth == depth_fn(layer_depth): feature_map += slim.avg_pool2d( pre_layer, [3, 3], padding='SAME', stride=2, scope=layer_name + '_pool') else: feature_map = slim.conv2d( intermediate_layer, depth_fn(layer_depth), [conv_kernel_size, conv_kernel_size], padding=padding, stride=stride, scope=layer_name) feature_map_keys.append(layer_name) feature_maps.append(feature_map) return collections.OrderedDict( [(x, y) for (x, y) in zip(feature_map_keys, feature_maps)])
def attention_inception_v3_base(inputs, final_endpoint='Mixed_7c', min_depth=16, depth_multiplier=1.0, scope=None, attention_module='', attention_position='all'): """Inception model from http://arxiv.org/abs/1512.00567. Constructs an Inception v3 network from inputs to the given final endpoint. This method can construct the network up to the final inception block Mixed_7c. Note that the names of the layers in the paper do not correspond to the names of the endpoints registered by this function although they build the same network. Here is a mapping from the old_names to the new names: Old name | New name ======================================= conv0 | Conv2d_1a_3x3 conv1 | Conv2d_2a_3x3 conv2 | Conv2d_2b_3x3 pool1 | MaxPool_3a_3x3 conv3 | Conv2d_3b_1x1 conv4 | Conv2d_4a_3x3 pool2 | MaxPool_5a_3x3 mixed_35x35x256a | Mixed_5b mixed_35x35x288a | Mixed_5c mixed_35x35x288b | Mixed_5d mixed_17x17x768a | Mixed_6a mixed_17x17x768b | Mixed_6b mixed_17x17x768c | Mixed_6c mixed_17x17x768d | Mixed_6d mixed_17x17x768e | Mixed_6e mixed_8x8x1280a | Mixed_7a mixed_8x8x2048a | Mixed_7b mixed_8x8x2048b | Mixed_7c Args: inputs: a tensor of size [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. scope: Optional variable_scope. attention_module: Optional attention_module. Accepted values are '' or 'se_block'. attention_position: Optional attention_position. Default is 'all'. Accepted values are 'head', 'extractor', and 'all'. Returns: tensor_out: output tensor corresponding to the final_endpoint. end_points: a set of activations for external use, for example summaries or losses. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or depth_multiplier <= 0 """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} def add_and_check_final(name, net): end_points[name] = net return name == final_endpoint def add_attention_layer(attention_module, attention_position, net, end_point): if attention_module: if attention_position == 'extractor' or attention_position == 'all': end_point, net = att.attach_attention_module( net, attention_module, end_point) return net, end_point if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) with tf.variable_scope(scope, 'InceptionV3', [inputs]): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='VALID'): # 299 x 299 x 3 end_point = 'Input' if attention_module: if attention_position == 'head' or attention_position == 'all': end_point, net = att.attach_attention_module( inputs, attention_module, end_point) if add_and_check_final(end_point, net): return net, end_points else: net = inputs else: net = inputs end_point = 'Conv2d_1a_3x3' net = slim.conv2d(net, depth(32), [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 149 x 149 x 32 end_point = 'Conv2d_2a_3x3' net = slim.conv2d(net, depth(32), [3, 3], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 147 x 147 x 32 end_point = 'Conv2d_2b_3x3' net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 147 x 147 x 64 end_point = 'MaxPool_3a_3x3' net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 73 x 73 x 64 end_point = 'Conv2d_3b_1x1' net = slim.conv2d(net, depth(80), [1, 1], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 73 x 73 x 80. end_point = 'Conv2d_4a_3x3' net = slim.conv2d(net, depth(192), [3, 3], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 71 x 71 x 192. end_point = 'MaxPool_5a_3x3' net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 35 x 35 x 192. # Inception blocks with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # mixed: 35 x 35 x 256. end_point = 'Mixed_5b' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(32), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # Attention Module after Mixed_5b net, end_point = add_attention_layer(attention_module, attention_position, net, end_point) if add_and_check_final(end_point, net): return net, end_points # mixed_1: 35 x 35 x 288. end_point = 'Mixed_5c' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1') branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], scope='Conv_1_0c_5x5') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # Attention Module after Mixed_5c net, end_point = add_attention_layer(attention_module, attention_position, net, end_point) if add_and_check_final(end_point, net): return net, end_points # mixed_2: 35 x 35 x 288. end_point = 'Mixed_5d' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # Attention Module after Mixed_5d net, end_point = add_attention_layer(attention_module, attention_position, net, end_point) if add_and_check_final(end_point, net): return net, end_points # mixed_3: 17 x 17 x 768. end_point = 'Mixed_6a' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_1x1') with tf.variable_scope('Branch_2'): branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # Attention Module after Mixed_6a net, end_point = add_attention_layer(attention_module, attention_position, net, end_point) if add_and_check_final(end_point, net): return net, end_points # mixed4: 17 x 17 x 768. end_point = 'Mixed_6b' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(128), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(128), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # Attention Module after Mixed_6b net, end_point = add_attention_layer(attention_module, attention_position, net, end_point) if add_and_check_final(end_point, net): return net, end_points # mixed_5: 17 x 17 x 768. end_point = 'Mixed_6c' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # Attention Module after Mixed_6c net, end_point = add_attention_layer(attention_module, attention_position, net, end_point) if add_and_check_final(end_point, net): return net, end_points # mixed_6: 17 x 17 x 768. end_point = 'Mixed_6d' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # Attention Module after Mixed_6d net, end_point = add_attention_layer(attention_module, attention_position, net, end_point) if add_and_check_final(end_point, net): return net, end_points # mixed_7: 17 x 17 x 768. end_point = 'Mixed_6e' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # Attention Module after Mixed_6e net, end_point = add_attention_layer(attention_module, attention_position, net, end_point) if add_and_check_final(end_point, net): return net, end_points # mixed_8: 8 x 8 x 1280. end_point = 'Mixed_7a' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # Attention Module after Mixed_7a net, end_point = add_attention_layer(attention_module, attention_position, net, end_point) if add_and_check_final(end_point, net): return net, end_points # mixed_9: 8 x 8 x 2048. end_point = 'Mixed_7b' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') branch_1 = tf.concat(axis=3, values=[ slim.conv2d( branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1') ]) with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') branch_2 = tf.concat(axis=3, values=[ slim.conv2d( branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1') ]) with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # Attention Module after Mixed_7b net, end_point = add_attention_layer(attention_module, attention_position, net, end_point) if add_and_check_final(end_point, net): return net, end_points # mixed_10: 8 x 8 x 2048. end_point = 'Mixed_7c' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') branch_1 = tf.concat(axis=3, values=[ slim.conv2d( branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1') ]) with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') branch_2 = tf.concat(axis=3, values=[ slim.conv2d( branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1') ]) with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points raise ValueError('Unknown final endpoint %s' % final_endpoint)
def inception_v2_base(inputs, final_endpoint='Mixed_5c', min_depth=16, depth_multiplier=1.0, use_separable_conv=True, data_format='NHWC', scope=None): """Inception v2 (6a2). Constructs an Inception v2 network from inputs to the given final endpoint. This method can construct the network up to the layer inception(5b) as described in http://arxiv.org/abs/1502.03167. Args: inputs: a tensor of shape [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c']. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. use_separable_conv: Use a separable convolution for the first layer Conv2d_1a_7x7. If this is False, use a normal convolution instead. data_format: Data format of the activations ('NHWC' or 'NCHW'). scope: Optional variable_scope. Returns: tensor_out: output tensor corresponding to the final_endpoint. end_points: a set of activations for external use, for example summaries or losses. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or depth_multiplier <= 0 """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} # Used to find thinned depths for each layer. if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) if data_format != 'NHWC' and data_format != 'NCHW': raise ValueError('data_format must be either NHWC or NCHW.') if data_format == 'NCHW' and use_separable_conv: raise ValueError( 'separable convolution only supports NHWC layout. NCHW data format can' ' only be used when use_separable_conv is False.' ) concat_dim = 3 if data_format == 'NHWC' else 1 with tf.compat.v1.variable_scope(scope, 'InceptionV2', [inputs]): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME', data_format=data_format): # Note that sizes in the comments below assume an input spatial size of # 224x224, however, the inputs can be of any size greater 32x32. # 224 x 224 x 3 end_point = 'Conv2d_1a_7x7' if use_separable_conv: # depthwise_multiplier here is different from depth_multiplier. # depthwise_multiplier determines the output channels of the initial # depthwise conv (see docs for tf.nn.separable_conv2d), while # depth_multiplier controls the # channels of the subsequent 1x1 # convolution. Must have # in_channels * depthwise_multipler <= out_channels # so that the separable convolution is not overparameterized. depthwise_multiplier = min(int(depth(64) / 3), 8) net = slim.separable_conv2d( inputs, depth(64), [7, 7], depth_multiplier=depthwise_multiplier, stride=2, padding='SAME', weights_initializer=trunc_normal(1.0), scope=end_point) else: # Use a normal convolution instead of a separable convolution. net = slim.conv2d( inputs, depth(64), [7, 7], stride=2, weights_initializer=trunc_normal(1.0), scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 112 x 112 x 64 end_point = 'MaxPool_2a_3x3' net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 56 x 56 x 64 end_point = 'Conv2d_2b_1x1' net = slim.conv2d(net, depth(64), [1, 1], scope=end_point, weights_initializer=trunc_normal(0.1)) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 56 x 56 x 64 end_point = 'Conv2d_2c_3x3' net = slim.conv2d(net, depth(192), [3, 3], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 56 x 56 x 192 end_point = 'MaxPool_3a_3x3' net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 28 x 28 x 192 # Inception module. end_point = 'Mixed_3b' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(64), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(32), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 28 x 28 x 256 end_point = 'Mixed_3c' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(64), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 28 x 28 x 320 end_point = 'Mixed_4a' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_0 = slim.conv2d(branch_0, depth(160), [3, 3], stride=2, scope='Conv2d_1a_3x3') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d( branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_1 = slim.conv2d( branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.max_pool2d( net, [3, 3], stride=2, scope='MaxPool_1a_3x3') net = tf.concat(axis=concat_dim, values=[branch_0, branch_1, branch_2]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4b' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(64), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d( branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(96), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4c' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(96), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(128), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(96), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4d' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(160), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(160), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(160), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(96), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_4e' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(160), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(192), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(192), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(96), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 14 x 14 x 576 end_point = 'Mixed_5a' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2, scope='Conv2d_1a_3x3') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], scope='Conv2d_0b_3x3') branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2, scope='Conv2d_1a_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_1a_3x3') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 7 x 7 x 1024 end_point = 'Mixed_5b' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(160), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 7 x 7 x 1024 end_point = 'Mixed_5c' with tf.compat.v1.variable_scope(end_point): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points raise ValueError('Unknown final endpoint %s' % final_endpoint)
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name (unused). Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ net = proposal_feature_maps depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth) trunc_normal = lambda stddev: tf.truncated_normal_initializer( 0.0, stddev) data_format = 'NHWC' concat_dim = 3 if data_format == 'NHWC' else 1 with tf.variable_scope('InceptionV2', reuse=self._reuse_weights): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME', data_format=data_format): with _batch_norm_arg_scope( [slim.conv2d, slim.separable_conv2d], batch_norm_scale=True, train_batch_norm=self._train_batch_norm): with tf.variable_scope('Mixed_5a'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d( net, depth(128), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], scope='Conv2d_0b_3x3') branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_1a_3x3') net = tf.concat([branch_0, branch_1, branch_2], concat_dim) with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(160), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') net = tf.concat( [branch_0, branch_1, branch_2, branch_3], concat_dim) with tf.variable_scope('Mixed_5c'): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], scope='Conv2d_0b_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d( net, depth(192), [1, 1], weights_initializer=trunc_normal(0.09), scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = slim.conv2d( branch_3, depth(128), [1, 1], weights_initializer=trunc_normal(0.1), scope='Conv2d_0b_1x1') proposal_classifier_features = tf.concat( [branch_0, branch_1, branch_2, branch_3], concat_dim) return proposal_classifier_features
def inception_v2(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.8, min_depth=16, depth_multiplier=1.0, prediction_fn=slim.softmax, spatial_squeeze=True, reuse=None, scope='InceptionV2', global_pool=False): """Inception v2 model for classification. Constructs an Inception v2 network for classification as described in http://arxiv.org/abs/1502.03167. The default image size used to train this network is 224x224. Args: inputs: a tensor of shape [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer (before dropout) are returned instead. is_training: whether is training or not. dropout_keep_prob: the percentage of activation values that are retained. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. global_pool: Optional boolean flag to control the avgpooling before the logits layer. If false or unset, pooling is done with a fixed window that reduces default-sized inputs to 1x1, while larger inputs lead to larger outputs. If true, any input size is pooled down to 1x1. Returns: net: a Tensor with the logits (pre-softmax activations) if num_classes is a non-zero integer, or the non-dropped-out input to the logits layer if num_classes is 0 or None. end_points: a dictionary from components of the network to the corresponding activation. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or depth_multiplier <= 0 """ if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') # Final pooling and prediction with tf.compat.v1.variable_scope(scope, 'InceptionV2', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_v2_base( inputs, scope=scope, min_depth=min_depth, depth_multiplier=depth_multiplier) with tf.compat.v1.variable_scope('Logits'): if global_pool: # Global average pooling. net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') end_points['global_pool'] = net else: # Pooling with a fixed kernel size. kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7]) net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a_{}x{}'.format(*kernel_size)) end_points['AvgPool_1a'] = net if not num_classes: return net, end_points # 1 x 1 x 1024 net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1') if spatial_squeeze: logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def inception_v3_base(inputs, scope=None): end_points = {} with tf.compat.v1.variable_scope(scope, 'InceptionV3', [inputs]): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='VALID'): net = slim.conv2d(inputs, 32, [3, 3], stride=2, scope='Conv2d_1a_3x3') net = slim.conv2d(net, 32, [3, 3], scope='Conv2d_2a_3x3') net = slim.conv2d(net, 64, [3, 3], padding='SAME', scope='Conv2d_2b_3x3') net = slim.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_3a_3x3') net = slim.conv2d(net, 80, [1, 1], scope='Conv2d_3b_1x1') net = slim.conv2d(net, 192, [3, 3], scope='Conv2d_4a_3x3') net = slim.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_5a_3x3') with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): with tf.compat.v1.variable_scope('Mixed_5b'): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_1a_1x1') branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='Conv2d_1b_5x5') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_2a_1x1') branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_2b_3x3') branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_2c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_3a_3x3') branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_3b_1x1') net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) with tf.compat.v1.variable_scope('Mixed_5c'): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_1a_1x1') branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='Conv2d_1b_5x5') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_2a_1x1') branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_2b_3x3') branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_2c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_3a_3x3') branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_3b_1x1') net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) with tf.compat.v1.variable_scope('Mixed_5d'): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_1a_1x1') branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='Conv2d_1b_5x5') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_2a_1x1') branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_2b_3x3') branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_2c_3x3') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_3a_3x3') branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_3b_1x1') net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) with tf.compat.v1.variable_scope('Mixed_6a'): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, 384, [3, 3], stride=2, padding='VALID', scope='Conv2d_0a_3x3') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_1a_1x1') branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_1b_3x3') branch_1 = slim.conv2d(branch_1, 96, [3, 3], stride=2, padding='VALID', scope='Conv2d_1c_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_3 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='MaxPool_3a_3x3') net = tf.concat([branch_3, branch_2, branch_3], 3) with tf.compat.v1.variable_scope('Mixed_6b'): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_1a_1x1') branch_1 = slim.conv2d(branch_1, 128, [1, 7], scope='Conv2d_1b_1x7') branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_1c_7x1') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_2a_1x1') branch_2 = slim.conv2d(branch_1, 128, [7, 1], scope='Conv2d_2b_7x1') branch_2 = slim.conv2d(branch_1, 128, [1, 7], scope='Conv2d_2c_1x7') branch_2 = slim.conv2d(branch_1, 128, [7, 1], scope='Conv2d_2d_7x1') branch_2 = slim.conv2d(branch_1, 192, [1, 7], scope='Conv2d_2e_1x7') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_3a_3x3') branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_3b_1x1') net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) with tf.compat.v1.variable_scope('Mixed_6c'): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_1a_1x1') branch_1 = slim.conv2d(branch_1, 160, [1, 7], scope='Conv2d_1b_1x7') branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_1c_7x1') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_2a_1x1') branch_2 = slim.conv2d(branch_1, 160, [7, 1], scope='Conv2d_2b_7x1') branch_2 = slim.conv2d(branch_1, 160, [1, 7], scope='Conv2d_2c_1x7') branch_2 = slim.conv2d(branch_1, 160, [7, 1], scope='Conv2d_2d_7x1') branch_2 = slim.conv2d(branch_1, 192, [1, 7], scope='Conv2d_2e_1x7') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_3a_3x3') branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_3b_1x1') net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) with tf.compat.v1.variable_scope('Mixed_6d'): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_1a_1x1') branch_1 = slim.conv2d(branch_1, 160, [1, 7], scope='Conv2d_1b_1x7') branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_1c_7x1') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_2a_1x1') branch_2 = slim.conv2d(branch_1, 160, [7, 1], scope='Conv2d_2b_7x1') branch_2 = slim.conv2d(branch_1, 160, [1, 7], scope='Conv2d_2c_1x7') branch_2 = slim.conv2d(branch_1, 160, [7, 1], scope='Conv2d_2d_7x1') branch_2 = slim.conv2d(branch_1, 192, [1, 7], scope='Conv2d_2e_1x7') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_3a_3x3') branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_3b_1x1') net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) with tf.compat.v1.variable_scope('Mixed_6e'): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_1a_1x1') branch_1 = slim.conv2d(branch_1, 192, [1, 7], scope='Conv2d_1b_1x7') branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_1c_7x1') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_2a_1x1') branch_2 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_2b_7x1') branch_2 = slim.conv2d(branch_1, 192, [1, 7], scope='Conv2d_2c_1x7') branch_2 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_2d_7x1') branch_2 = slim.conv2d(branch_1, 192, [1, 7], scope='Conv2d_2e_1x7') with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_3a_3x3') branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_3b_1x1') net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) end_points['Mixed_6e'] = net with tf.compat.v1.variable_scope('Mixed_7a'): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1') branch_0 = slim.conv2d(branch_0, 320, [3, 3], stride=2, padding='VALID', scope='Conv2d_0b_3x3') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_1a_1x1') branch_1 = slim.conv2d(branch_1, 192, [1, 7], scope='Conv2d_1b_1x7') branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_1c_7x1') branch_1 = slim.conv2d(branch_1, 192, [3, 3], stride=2, padding='VALID', scope='Conv2d_1d_3x3') with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.max_pool2d(net, [3, 3], scope='MaxPool_2a_3x3') net = tf.concat([branch_0, branch_1, branch_2], 3) with tf.compat.v1.variable_scope('Mixed_7b'): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, 320, [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_1a_1x1') branch_1 = tf.concat([ slim.conv2d( branch_1, 384, [1, 3], scope='Conv2d_1b_1x3'), slim.conv2d( branch_1, 384, [3, 1], scope='Conv2d_1c_3x1') ], 3) with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, 448, [1, 1], scope='Conv2d_2a_1x1') branch_2 = slim.conv2d(branch_2, 384, [3, 3], scope='Conv2d_2b_1x1') branch_2 = tf.concat([ slim.conv2d( branch_2, 384, [1, 3], scope='Conv2d_2c_1x3'), slim.conv2d( branch_2, 384, [3, 1], scope='Conv2d_2d_3x1') ], 3) with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_3a_3x3') branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_3b_1x1') net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) with tf.compat.v1.variable_scope('Mixed_7c'): with tf.compat.v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, 320, [1, 1], scope='Conv2d_0a_1x1') with tf.compat.v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_1a_1x1') branch_1 = tf.concat([ slim.conv2d( branch_1, 384, [1, 3], scope='Conv2d_1b_1x3'), slim.conv2d( branch_1, 384, [3, 1], scope='Conv2d_1c_3x1') ], 3) with tf.compat.v1.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, 448, [1, 1], scope='Conv2d_2a_1x1') branch_2 = slim.conv2d(branch_2, 384, [3, 3], scope='Conv2d_2b_1x1') branch_2 = tf.concat([ slim.conv2d( branch_2, 384, [1, 3], scope='Conv2d_2c_1x3'), slim.conv2d( branch_2, 384, [3, 1], scope='Conv2d_2d_3x1') ], 3) with tf.compat.v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_3a_3x3') branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_3b_1x1') net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) return net, end_points
def build_loss(self): """Adds ops for computing loss.""" with tf.compat.v1.name_scope('compute_loss'): self.reconstr_loss = 0 self.smooth_loss = 0 self.ssim_loss = 0 self.icp_transform_loss = 0 self.icp_residual_loss = 0 # self.images is organized by ...[scale][B, h, w, seq_len * 3]. self.images = [{} for _ in range(NUM_SCALES)] # Following nested lists are organized by ...[scale][source-target]. self.warped_image = [{} for _ in range(NUM_SCALES)] self.warp_mask = [{} for _ in range(NUM_SCALES)] self.warp_error = [{} for _ in range(NUM_SCALES)] self.ssim_error = [{} for _ in range(NUM_SCALES)] self.icp_transform = [{} for _ in range(NUM_SCALES)] self.icp_residual = [{} for _ in range(NUM_SCALES)] self.middle_frame_index = util.get_seq_middle(self.seq_length) # Compute losses at each scale. for s in range(NUM_SCALES): # Scale image stack. height_s = int(self.img_height / (2**s)) width_s = int(self.img_width / (2**s)) self.images[s] = tf.image.resize( self.image_stack, [height_s, width_s], method=tf.image.ResizeMethod.AREA) # Smoothness. if self.smooth_weight > 0: for i in range(self.seq_length): # In legacy mode, use the depth map from the middle frame only. if not self.legacy_mode or i == self.middle_frame_index: self.smooth_loss += 1.0 / ( 2**s) * self.depth_smoothness( self.disp[i][s], self.images[s][:, :, :, 3 * i:3 * (i + 1)]) for i in range(self.seq_length): for j in range(self.seq_length): # Only consider adjacent frames. if i == j or abs(i - j) != 1: continue # In legacy mode, only consider the middle frame as target. if self.legacy_mode and j != self.middle_frame_index: continue source = self.images[s][:, :, :, 3 * i:3 * (i + 1)] target = self.images[s][:, :, :, 3 * j:3 * (j + 1)] target_depth = self.depth[j][s] key = '%d-%d' % (i, j) # Extract ego-motion from i to j egomotion_index = min(i, j) egomotion_mult = 1 if i > j: # Need to inverse egomotion when going back in sequence. egomotion_mult *= -1 # For compatiblity with SfMLearner, interpret all egomotion vectors # as pointing toward the middle frame. Note that unlike SfMLearner, # each vector captures the motion to/from its next frame, and not # the center frame. Although with seq_length == 3, there is no # difference. if self.legacy_mode: if egomotion_index >= self.middle_frame_index: egomotion_mult *= -1 egomotion = egomotion_mult * self.egomotion[:, egomotion_index, :] # Inverse warp the source image to the target image frame for # photometric consistency loss. self.warped_image[s][key], self.warp_mask[s][key] = ( project.inverse_warp( source, target_depth, egomotion, self.intrinsic_mat[:, s, :, :], self.intrinsic_mat_inv[:, s, :, :])) # Reconstruction loss. self.warp_error[s][key] = tf.abs( self.warped_image[s][key] - target) self.reconstr_loss += tf.reduce_mean( input_tensor=self.warp_error[s][key] * self.warp_mask[s][key]) # SSIM. if self.ssim_weight > 0: self.ssim_error[s][key] = self.ssim( self.warped_image[s][key], target) # TODO(rezama): This should be min_pool2d(). ssim_mask = slim.avg_pool2d( self.warp_mask[s][key], 3, 1, 'VALID') self.ssim_loss += tf.reduce_mean( input_tensor=self.ssim_error[s][key] * ssim_mask) # 3D loss. if self.icp_weight > 0: cloud_a = self.cloud[j][s] cloud_b = self.cloud[i][s] self.icp_transform[s][key], self.icp_residual[s][ key] = icp(cloud_a, egomotion, cloud_b) self.icp_transform_loss += 1.0 / ( 2**s) * tf.reduce_mean(input_tensor=tf.abs( self.icp_transform[s][key])) self.icp_residual_loss += 1.0 / ( 2**s) * tf.reduce_mean(input_tensor=tf.abs( self.icp_residual[s][key])) self.total_loss = self.reconstr_weight * self.reconstr_loss if self.smooth_weight > 0: self.total_loss += self.smooth_weight * self.smooth_loss if self.ssim_weight > 0: self.total_loss += self.ssim_weight * self.ssim_loss if self.icp_weight > 0: self.total_loss += self.icp_weight * (self.icp_transform_loss + self.icp_residual_loss)
def mobilenet_v1(inputs, num_classes=1000, dropout_keep_prob=0.999, is_training=True, min_depth=8, depth_multiplier=1.0, conv_defs=None, prediction_fn=slim.softmax, spatial_squeeze=True, reuse=None, scope='MobilenetV1', global_pool=False): """Mobilenet v1 model for classification. Args: inputs: a tensor of shape [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer (before dropout) are returned instead. dropout_keep_prob: the percentage of activation values that are retained. is_training: whether is training or not. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. conv_defs: A list of ConvDef namedtuples specifying the net architecture. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape is [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. global_pool: Optional boolean flag to control the avgpooling before the logits layer. If false or unset, pooling is done with a fixed window that reduces default-sized inputs to 1x1, while larger inputs lead to larger outputs. If true, any input size is pooled down to 1x1. Returns: net: a 2D Tensor with the logits (pre-softmax activations) if num_classes is a non-zero integer, or the non-dropped-out input to the logits layer if num_classes is 0 or None. end_points: a dictionary from components of the network to the corresponding activation. Raises: ValueError: Input rank is invalid. """ input_shape = inputs.get_shape().as_list() if len(input_shape) != 4: raise ValueError('Invalid input tensor rank, expected 4, was: %d' % len(input_shape)) with tf.compat.v1.variable_scope(scope, 'MobilenetV1', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = mobilenet_v1_base( inputs, scope=scope, min_depth=min_depth, depth_multiplier=depth_multiplier, conv_defs=conv_defs) with tf.compat.v1.variable_scope('Logits'): if global_pool: # Global average pooling. net = tf.reduce_mean(input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool') end_points['global_pool'] = net else: # Pooling with a fixed kernel size. kernel_size = _reduced_kernel_size_for_small_input( net, [7, 7]) net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a') end_points['AvgPool_1a'] = net if not num_classes: return net, end_points # 1 x 1 x 1024 net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1') if spatial_squeeze: logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points['Logits'] = logits if prediction_fn: end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def inception_v3(images, trainable=True, is_training=True, weight_decay=0.00004, stddev=0.1, dropout_keep_prob=0.8, use_batch_norm=True, batch_norm_params=None, add_summaries=True, scope="InceptionV3"): """Builds an Inception V3 subgraph for image embeddings. Args: images: A float32 Tensor of shape [batch, height, width, channels]. trainable: Whether the inception submodel should be trainable or not. is_training: Boolean indicating training mode or not. weight_decay: Coefficient for weight regularization. stddev: The standard deviation of the trunctated normal weight initializer. dropout_keep_prob: Dropout keep probability. use_batch_norm: Whether to use batch normalization. batch_norm_params: Parameters for batch normalization. See tf.contrib.layers.batch_norm for details. add_summaries: Whether to add activation summaries. scope: Optional Variable scope. Returns: end_points: A dictionary of activations from inception_v3 layers. """ # Only consider the inception model to be in training mode if it's trainable. is_inception_model_training = trainable and is_training if use_batch_norm: # Default parameters for batch normalization. if not batch_norm_params: batch_norm_params = { "is_training": is_inception_model_training, "trainable": trainable, # Decay for the moving averages. "decay": 0.9997, # Epsilon to prevent 0s in variance. "epsilon": 0.001, # Collection containing the moving mean and moving variance. "variables_collections": { "beta": None, "gamma": None, "moving_mean": ["moving_vars"], "moving_variance": ["moving_vars"], } } else: batch_norm_params = None if trainable: weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) else: weights_regularizer = None with tf.compat.v1.variable_scope(scope, "InceptionV3", [images]) as scope: with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, trainable=trainable): with slim.arg_scope([slim.conv2d], weights_initializer=tf.compat.v1. truncated_normal_initializer(stddev=stddev), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): net, end_points = inception_v3_base(images, scope=scope) with tf.compat.v1.variable_scope("logits"): shape = net.get_shape() net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool") net = slim.dropout(net, keep_prob=dropout_keep_prob, is_training=is_inception_model_training, scope="dropout") net = slim.flatten(net, scope="flatten") # Add summaries. if add_summaries: for v in end_points.values(): slim.summarize_activation(v) return net
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 5, block35, scale=0.17) # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 192, 192, 256, 384) end_points['Mixed_6a'] = net # 10 x Inception-Resnet-B net = slim.repeat(net, 10, block17, scale=0.10) # Reduction-B with tf.variable_scope('Mixed_7a'): net = reduction_b(net) end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 5, block8, scale=0.20) net = block8(net, activation_fn=None) with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def inception_resnet_v2_base(inputs, final_endpoint='Conv2d_7b_1x1', output_stride=16, align_feature_maps=False, scope=None, activation_fn=tf.nn.relu): """Inception model from http://arxiv.org/abs/1602.07261. Constructs an Inception Resnet v2 network from inputs to the given final endpoint. This method can construct the network up to the final inception block Conv2d_7b_1x1. Args: inputs: a tensor of size [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1'] output_stride: A scalar that specifies the requested ratio of input to output spatial resolution. Only supports 8 and 16. align_feature_maps: When true, changes all the VALID paddings in the network to SAME padding so that the feature maps are aligned. scope: Optional variable_scope. activation_fn: Activation function for block scopes. Returns: tensor_out: output tensor corresponding to the final_endpoint. end_points: a set of activations for external use, for example summaries or losses. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or if the output_stride is not 8 or 16, or if the output_stride is 8 and we request an end point after 'PreAuxLogits'. """ if output_stride != 8 and output_stride != 16: raise ValueError('output_stride must be 8 or 16.') padding = 'SAME' if align_feature_maps else 'VALID' end_points = {} def add_and_check_final(name, net): end_points[name] = net return name == final_endpoint with tf.variable_scope(scope, 'InceptionResnetV2', [inputs]): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding=padding, scope='Conv2d_2a_3x3') if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding=padding, scope='MaxPool_3a_3x3') if add_and_check_final('MaxPool_3a_3x3', net): return net, end_points # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding=padding, scope='Conv2d_3b_1x1') if add_and_check_final('Conv2d_3b_1x1', net): return net, end_points # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding=padding, scope='Conv2d_4a_3x3') if add_and_check_final('Conv2d_4a_3x3', net): return net, end_points # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding=padding, scope='MaxPool_5a_3x3') if add_and_check_final('MaxPool_5a_3x3', net): return net, end_points # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat( [tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) if add_and_check_final('Mixed_5b', net): return net, end_points # TODO(alemi): Register intermediate endpoints net = slim.repeat(net, 10, block35, scale=0.17, activation_fn=activation_fn) # 17 x 17 x 1088 if output_stride == 8, # 33 x 33 x 1088 if output_stride == 16 use_atrous = output_stride == 8 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=1 if use_atrous else 2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=1 if use_atrous else 2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=1 if use_atrous else 2, padding=padding, scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) if add_and_check_final('Mixed_6a', net): return net, end_points # TODO(alemi): register intermediate endpoints with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1): net = slim.repeat(net, 20, block17, scale=0.10, activation_fn=activation_fn) if add_and_check_final('PreAuxLogits', net): return net, end_points if output_stride == 8: # TODO(gpapan): Properly support output_stride for the rest of the net. raise ValueError( 'output_stride==8 is only supported up to the ' 'PreAuxlogits end_point for now.') # 8 x 8 x 2080 with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding=padding, scope='MaxPool_1a_3x3') net = tf.concat( [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) if add_and_check_final('Mixed_7a', net): return net, end_points # TODO(alemi): register intermediate endpoints net = slim.repeat(net, 9, block8, scale=0.20, activation_fn=activation_fn) net = block8(net, activation_fn=None) # 8 x 8 x 1536 net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') if add_and_check_final('Conv2d_7b_1x1', net): return net, end_points raise ValueError('final_endpoint (%s) not recognized', final_endpoint)
def attention_inception_v3(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.8, min_depth=16, depth_multiplier=1.0, prediction_fn=slim.softmax, spatial_squeeze=True, reuse=None, create_aux_logits=True, scope='InceptionV3', global_pool=False, attention_module='', attention_position='all'): """Inception model from http://arxiv.org/abs/1512.00567. "Rethinking the Inception Architecture for Computer Vision" Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna. With the default arguments this method constructs the exact model defined in the paper. However, one can experiment with variations of the inception_v3 network by changing arguments dropout_keep_prob, min_depth and depth_multiplier. The default image size used to train this network is 299x299. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer (before dropout) are returned instead. is_training: whether is training or not. dropout_keep_prob: the percentage of activation values that are retained. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. create_aux_logits: Whether to create the auxiliary logits. scope: Optional variable_scope. global_pool: Optional boolean flag to control the avgpooling before the logits layer. If false or unset, pooling is done with a fixed window that reduces default-sized inputs to 1x1, while larger inputs lead to larger outputs. If true, any input size is pooled down to 1x1. attention_module: Optional attention_module. Accepted values are '' or 'se_block'. attention_position: Optional attention_position. Default is 'all'. Accepted values are 'head', 'extractor', and 'all'. Returns: net: a Tensor with the logits (pre-softmax activations) if num_classes is a non-zero integer, or the non-dropped-out input to the logits layer if num_classes is 0 or None. end_points: a dictionary from components of the network to the corresponding activation. Raises: ValueError: if 'depth_multiplier' is less than or equal to zero. """ if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) with tf.variable_scope(scope, 'InceptionV3', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = attention_inception_v3_base( inputs, scope=scope, min_depth=min_depth, depth_multiplier=depth_multiplier, attention_module=attention_module, attention_position=attention_position) # Auxiliary Head logits if create_aux_logits and num_classes: with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): aux_logits = end_points['Mixed_6e'] with tf.variable_scope('AuxLogits'): aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, padding='VALID', scope='AvgPool_1a_5x5') aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1], scope='Conv2d_1b_1x1') # Shape of feature map before the final layer. kernel_size = _reduced_kernel_size_for_small_input( aux_logits, [5, 5]) aux_logits = slim.conv2d( aux_logits, depth(768), kernel_size, weights_initializer=trunc_normal(0.01), padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size)) aux_logits = slim.conv2d( aux_logits, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, weights_initializer=trunc_normal(0.001), scope='Conv2d_2b_1x1') if spatial_squeeze: aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze') end_points['AuxLogits'] = aux_logits # Final pooling and prediction with tf.variable_scope('Logits'): if global_pool: # Global average pooling. net = tf.reduce_mean(input_tensor=net, axis=[1, 2], keepdims=True, name='GlobalPool') end_points['global_pool'] = net else: # Pooling with a fixed kernel size. kernel_size = _reduced_kernel_size_for_small_input( net, [8, 8]) net = slim.avg_pool2d( net, kernel_size, padding='VALID', scope='AvgPool_1a_{}x{}'.format(*kernel_size)) end_points['AvgPool_1a'] = net if not num_classes: return net, end_points # 1 x 1 x 2048 net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') end_points['PreLogits'] = net # 2048 logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_1c_1x1') if spatial_squeeze: logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') # 1000 end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def inception_v3_head( net, end_points, final_endpoint='Mixed_7c', min_depth=16, depth_multiplier=1.0, scope=None, ): if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) with tf_v1.variable_scope(scope, 'InceptionV3', [net], reuse=tf_v1.AUTO_REUSE): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # mixed_8: 8 x 8 x 1280. end_point = 'Mixed_7a' with tf_v1.variable_scope(end_point): with tf_v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf_v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf_v1.variable_scope('Branch_2'): branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_9: 8 x 8 x 2048. end_point = 'Mixed_7b' with tf_v1.variable_scope(end_point): with tf_v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') with tf_v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') branch_1 = tf.concat(axis=3, values=[ slim.conv2d( branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1') ]) with tf_v1.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') branch_2 = tf.concat(axis=3, values=[ slim.conv2d( branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1') ]) with tf_v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_10: 8 x 8 x 2048. end_point = 'Mixed_7c' with tf_v1.variable_scope(end_point): with tf_v1.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') end_points[end_point + '_b0'] = branch_0 with tf_v1.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') branch_1 = tf.concat(axis=3, values=[ slim.conv2d( branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1') ]) with tf_v1.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') branch_2 = tf.concat(axis=3, values=[ slim.conv2d( branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1') ]) with tf_v1.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points raise ValueError('Unknown final endpoint %s' % final_endpoint) return net, end_points