def _GetAttentionModel( self, images, num_classes, weight_decay=0.0001, attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0], attention_type=_SUPPORTED_ATTENTION_TYPES[0], kernel=1, training_resnet=False, training_attention=False, reuse=False): """Constructs attention model on resnet_v1_50. Args: images: A tensor of size [batch, height, width, channels] num_classes: The number of output classes. weight_decay: The parameters for weight_decay regularizer. attention_nonlinear: Type of non-linearity on top of the attention function. attention_type: Type of the attention structure. kernel: Convolutional kernel to use in attention layers (eg, [3, 3]). training_resnet: Whether or not the Resnet blocks from the model are in training mode. training_attention: Whether or not the attention part of the model is in training mode. reuse: Whether or not the layer and its variables should be reused. Returns: logits: A tensor of size [batch, num_classes]. attention_prob: Attention score after the non-linearity. attention_score: Attention score before the non-linearity. feature_map: Features extracted from the model, which are not l2-normalized. """ attention_feat, attention_prob, attention_score, feature_map, _ = ( self.GetAttentionPrelogit(images, weight_decay, attention_nonlinear=attention_nonlinear, attention_type=attention_type, kernel=kernel, training_resnet=training_resnet, training_attention=training_attention, reuse=reuse)) with arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay, batch_norm_scale=True)): with arg_scope([layers.batch_norm], is_training=training_attention): with tf.compat.v1.variable_scope(_ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse): logits = layers.conv2d(attention_feat, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze') return logits, attention_prob, attention_score, feature_map
def GetAttentionPrelogit( self, images, weight_decay=0.0001, attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0], attention_type=_SUPPORTED_ATTENTION_TYPES[0], kernel=1, training_resnet=False, training_attention=False, reuse=False, use_batch_norm=True): """Constructs attention model on resnet_v1_50. Args: images: A tensor of size [batch, height, width, channels]. weight_decay: The parameters for weight_decay regularizer. attention_nonlinear: Type of non-linearity on top of the attention function. attention_type: Type of the attention structure. kernel: Convolutional kernel to use in attention layers (eg, [3, 3]). training_resnet: Whether or not the Resnet blocks from the model are in training mode. training_attention: Whether or not the attention part of the model is in training mode. reuse: Whether or not the layer and its variables should be reused. use_batch_norm: Whether or not to use batch normalization. Returns: prelogits: A tensor of size [batch, 1, 1, channels]. attention_prob: Attention score after the non-linearity. attention_score: Attention score before the non-linearity. feature_map: Features extracted from the model, which are not l2-normalized. end_points: Set of activations for external use. """ # Construct Resnet50 features. with arg_scope( resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)): _, end_points = self.GetResnet50Subnetwork( images, is_training=training_resnet, reuse=reuse) feature_map = end_points[self._target_layer_type] # Construct attention subnetwork on top of features. with arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay, use_batch_norm=use_batch_norm)): with arg_scope([layers.batch_norm], is_training=training_attention): (prelogits, attention_prob, attention_score, end_points) = self._GetAttentionSubnetwork( feature_map, end_points, attention_nonlinear=attention_nonlinear, attention_type=attention_type, kernel=kernel, reuse=reuse) return prelogits, attention_prob, attention_score, feature_map, end_points
def alexnet_v2_arg_scope(weight_decay=0.0005): with arg_scope( [layers.conv2d, layers_lib.fully_connected], activation_fn=nn_ops.relu, biases_initializer=init_ops.constant_initializer(0.1), weights_regularizer=regularizers.l2_regularizer(weight_decay)): with arg_scope([layers.conv2d], padding='SAME'): with arg_scope([layers_lib.max_pool2d], padding='VALID') as arg_sc: return arg_sc
def inception_v1_arg_scope(weight_decay=0.00004, use_batch_norm=True, batch_norm_var_collection='moving_vars'): """Defines the default InceptionV1 arg scope. Note: Althougth the original paper didn't use batch_norm we found it useful. Args: weight_decay: The weight decay to use for regularizing the model. use_batch_norm: "If `True`, batch_norm is applied after each convolution. batch_norm_var_collection: The name of the collection for the batch norm variables. Returns: An `arg_scope` to use for the inception v3 model. """ batch_norm_params = { # Decay for the moving averages. 'decay': 0.9997, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # collection containing update_ops. 'updates_collections': ops.GraphKeys.UPDATE_OPS, # collection containing the moving mean and moving variance. 'variables_collections': { 'beta': None, 'gamma': None, 'moving_mean': [batch_norm_var_collection], 'moving_variance': [batch_norm_var_collection], } } if use_batch_norm: normalizer_fn = layers_lib.batch_norm normalizer_params = batch_norm_params else: normalizer_fn = None normalizer_params = {} # Set weight_decay for weights in Conv and FC layers. with arg_scope( [layers.conv2d, layers_lib.fully_connected], weights_regularizer=regularizers.l2_regularizer(weight_decay)): with arg_scope([layers.conv2d], weights_initializer=initializers. variance_scaling_initializer(), activation_fn=nn_ops.relu, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params) as sc: return sc
def testEndPointsV1(self): """Test the end points of a tiny v1 bottleneck network.""" blocks = [ resnet_v1.resnet_v1_block('block1', base_depth=1, num_units=2, stride=2), resnet_v1.resnet_v1_block('block2', base_depth=2, num_units=2, stride=1), ] inputs = create_input(2, 32, 16, 3) with arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') expected = [ 'tiny/block1/unit_1/bottleneck_v1/shortcut', 'tiny/block1/unit_1/bottleneck_v1/conv1', 'tiny/block1/unit_1/bottleneck_v1/conv2', 'tiny/block1/unit_1/bottleneck_v1/conv3', 'tiny/block1/unit_2/bottleneck_v1/conv1', 'tiny/block1/unit_2/bottleneck_v1/conv2', 'tiny/block1/unit_2/bottleneck_v1/conv3', 'tiny/block2/unit_1/bottleneck_v1/shortcut', 'tiny/block2/unit_1/bottleneck_v1/conv1', 'tiny/block2/unit_1/bottleneck_v1/conv2', 'tiny/block2/unit_1/bottleneck_v1/conv3', 'tiny/block2/unit_2/bottleneck_v1/conv1', 'tiny/block2/unit_2/bottleneck_v1/conv2', 'tiny/block2/unit_2/bottleneck_v1/conv3' ] self.assertItemsEqual(expected, end_points)
def testAtrousFullyConvolutionalValues(self): """Verify dense feature extraction with atrous convolution.""" nominal_stride = 32 for output_stride in [4, 8, 16, 32, None]: with arg_scope(resnet_utils.resnet_arg_scope()): with ops.Graph().as_default(): with self.cached_session() as sess: random_seed.set_random_seed(0) inputs = create_input(2, 81, 81, 3) # Dense feature extraction followed by subsampling. output, _ = self._resnet_small( inputs, None, is_training=False, global_pool=False, output_stride=output_stride) if output_stride is None: factor = 1 else: factor = nominal_stride // output_stride output = resnet_utils.subsample(output, factor) # Make the two networks use the same weights. variable_scope.get_variable_scope().reuse_variables() # Feature extraction at the nominal network rate. expected, _ = self._resnet_small(inputs, None, is_training=False, global_pool=False) sess.run(variables.global_variables_initializer()) self.assertAllClose(output.eval(), expected.eval(), atol=2e-4, rtol=1e-4)
def vgg_arg_scope(weight_decay=0.0005): """Defines the VGG arg scope. Args: weight_decay: The l2 regularization coefficient. Returns: An arg_scope. """ with arg_scope( [layers.conv2d, layers_lib.fully_connected], activation_fn=nn_ops.relu, weights_regularizer=regularizers.l2_regularizer(weight_decay), biases_initializer=init_ops.zeros_initializer()): with arg_scope([layers.conv2d], padding='SAME') as arg_sc: return arg_sc
def testAtrousValuesBottleneck(self): """Verify the values of dense feature extraction by atrous convolution. Make sure that dense feature extraction by stack_blocks_dense() followed by subsampling gives identical results to feature extraction at the nominal network output stride using the simple self._stack_blocks_nondense() above. """ block = resnet_v1.resnet_v1_block blocks = [ block('block1', base_depth=1, num_units=2, stride=2), block('block2', base_depth=2, num_units=2, stride=2), block('block3', base_depth=4, num_units=2, stride=2), block('block4', base_depth=8, num_units=2, stride=1), ] nominal_stride = 8 # Test both odd and even input dimensions. height = 30 width = 31 with arg_scope(resnet_utils.resnet_arg_scope()): with arg_scope([layers.batch_norm], is_training=False): for output_stride in [1, 2, 4, 8, None]: with ops.Graph().as_default(): with self.cached_session() as sess: random_seed.set_random_seed(0) inputs = create_input(1, height, width, 3) # Dense feature extraction followed by subsampling. output = resnet_utils.stack_blocks_dense( inputs, blocks, output_stride) if output_stride is None: factor = 1 else: factor = nominal_stride // output_stride output = resnet_utils.subsample(output, factor) # Make the two networks use the same weights. variable_scope.get_variable_scope( ).reuse_variables() # Feature extraction at the nominal network rate. expected = self._stack_blocks_nondense( inputs, blocks) sess.run(variables.global_variables_initializer()) output, expected = sess.run([output, expected]) self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): """A plain ResNet without extra layers before or after the ResNet blocks.""" with variable_scope.variable_scope(scope, values=[inputs]): with arg_scope([layers.conv2d], outputs_collections='end_points'): net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) end_points = utils.convert_collection_to_dict('end_points') return net, end_points
def resnet_arg_scope(weight_decay=0.0001, batch_norm_decay=0.997, batch_norm_epsilon=1e-5, batch_norm_scale=True): """Defines the default ResNet arg scope. TODO(gpapan): The batch-normalization related default values above are appropriate for use in conjunction with the reference ResNet models released at https://github.com/KaimingHe/deep-residual-networks. When training ResNets from scratch, they might need to be tuned. Args: weight_decay: The weight decay to use for regularizing the model. batch_norm_decay: The moving average decay when estimating layer activation statistics in batch normalization. batch_norm_epsilon: Small constant to prevent division by zero when normalizing activations by their variance in batch normalization. batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the activations in the batch normalization layer. Returns: An `arg_scope` to use for the resnet models. """ batch_norm_params = { 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale, 'updates_collections': ops.GraphKeys.UPDATE_OPS, } with arg_scope( [layers_lib.conv2d], weights_regularizer=regularizers.l2_regularizer(weight_decay), weights_initializer=initializers.variance_scaling_initializer(), activation_fn=nn_ops.relu, normalizer_fn=layers.batch_norm, normalizer_params=batch_norm_params): with arg_scope([layers.batch_norm], **batch_norm_params): # The following implies padding='SAME' for pool1, which makes feature # alignment easier for dense prediction tasks. This is also used in # https://github.com/facebook/fb.resnet.torch. However the accompanying # code of 'Deep Residual Learning for Image Recognition' uses # padding='VALID' for pool1. You can switch to that choice by setting # tf.contrib.framework.arg_scope([tf.contrib.layers.max_pool2d], padding='VALID'). with arg_scope([layers.max_pool2d], padding='SAME') as arg_sc: return arg_sc
def testModelHasExpectedNumberOfParameters(self): batch_size = 5 height, width = 299, 299 inputs = random_ops.random_uniform((batch_size, height, width, 3)) with arg_scope(inception_v3.inception_v3_arg_scope()): inception_v3.inception_v3_base(inputs) total_params, _ = model_analyzer.analyze_vars( variables_lib.get_model_variables()) self.assertAlmostEqual(21802784, total_params)
def truncated_vgg_16(inputs, is_training=True, scope='vgg_16'): """Oxford Net VGG 16-Layers version D Example. For use in SSD object detection network, which has this particular truncated version of VGG16 detailed in its paper. Args: inputs: a tensor of size [batch_size, height, width, channels]. scope: Optional scope for the variables. Returns: the last op containing the conv5 tensor and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat(inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection) return net, end_points
def _recomputing_grad_fn(compute_fn, original_args, original_vars, output_grads, grad_fn_variables, use_data_dep, tupleize_grads, arg_scope, var_scope, has_is_recompute_kwarg): """Grad fn for recompute_grad.""" variables = grad_fn_variables or [] # Identity ops around the inputs ensures correct gradient graph-walking. inputs = [array_ops.identity(x) for x in list(original_args)] # Recompute outputs # Use a control dependency to ensure that the recompute is not eliminated by # CSE and that it happens on the backwards pass. ctrl_dep_grads = [g for g in output_grads if g is not None] with framework_ops.control_dependencies(ctrl_dep_grads): if use_data_dep: inputs = _force_data_dependency(output_grads, inputs) # Re-enter scopes with arg_scope_lib.arg_scope(arg_scope): with variable_scope.variable_scope(var_scope, reuse=True): # Re-call the function and ensure that the touched variables are the # same as in the first call. with backprop.GradientTape() as tape: fn_kwargs = {} if has_is_recompute_kwarg: fn_kwargs["is_recomputing"] = True outputs = compute_fn(*inputs, **fn_kwargs) recompute_vars = set( _as_ref(v) for v in tape.watched_variables()) if original_vars != recompute_vars: raise ValueError(_WRONG_VARS_ERR) if not isinstance(outputs, (list, tuple)): outputs = [outputs] outputs = list(outputs) # Compute gradients grads = _gradients(outputs, inputs + variables, output_grads, stop_gradients=inputs) if tupleize_grads: if use_data_dep: grads = _tuple_with_data_dep(grads) else: grads = control_flow_ops.tuple(grads) grad_inputs = grads[:len(inputs)] grad_vars = grads[len(inputs):] return grad_inputs, grad_vars
def testClassificationEndPoints(self): global_pool = True num_classes = 10 inputs = create_input(2, 224, 224, 3) with arg_scope(resnet_utils.resnet_arg_scope()): logits, end_points = self._resnet_small(inputs, num_classes, global_pool=global_pool, scope='resnet') self.assertTrue(logits.op.name.startswith('resnet/logits')) self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes]) self.assertTrue('predictions' in end_points) self.assertListEqual(end_points['predictions'].get_shape().as_list(), [2, 1, 1, num_classes])
def testFullyConvolutionalUnknownHeightWidth(self): batch = 2 height, width = 65, 65 global_pool = False inputs = create_input(batch, None, None, 3) with arg_scope(resnet_utils.resnet_arg_scope()): output, _ = self._resnet_small(inputs, None, global_pool=global_pool) self.assertListEqual(output.get_shape().as_list(), [batch, None, None, 32]) images = create_input(batch, height, width, 3) with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) output = sess.run(output, {inputs: images.eval()}) self.assertEqual(output.shape, (batch, 3, 3, 32))
def testUnknownBatchSize(self): batch = 2 height, width = 65, 65 global_pool = True num_classes = 10 inputs = create_input(None, height, width, 3) with arg_scope(resnet_utils.resnet_arg_scope()): logits, _ = self._resnet_small(inputs, num_classes, global_pool=global_pool, scope='resnet') self.assertTrue(logits.op.name.startswith('resnet/logits')) self.assertListEqual(logits.get_shape().as_list(), [None, 1, 1, num_classes]) images = create_input(batch, height, width, 3) with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) output = sess.run(logits, {inputs: images.eval()}) self.assertEqual(output.shape, (batch, 1, 1, num_classes))
def testFullyConvolutionalEndpointShapes(self): global_pool = False num_classes = 10 inputs = create_input(2, 321, 321, 3) with arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = self._resnet_small(inputs, num_classes, global_pool=global_pool, scope='resnet') endpoint_to_shape = { 'resnet/block1': [2, 41, 41, 4], 'resnet/block2': [2, 21, 21, 8], 'resnet/block3': [2, 11, 11, 16], 'resnet/block4': [2, 11, 11, 32] } for endpoint in endpoint_to_shape: shape = endpoint_to_shape[endpoint] self.assertListEqual( end_points[endpoint].get_shape().as_list(), shape)
def testClassificationShapes(self): global_pool = True num_classes = 10 inputs = create_input(2, 224, 224, 3) with arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = self._resnet_small(inputs, num_classes, global_pool=global_pool, scope='resnet') endpoint_to_shape = { 'resnet/block1': [2, 28, 28, 4], 'resnet/block2': [2, 14, 14, 8], 'resnet/block3': [2, 7, 7, 16], 'resnet/block4': [2, 7, 7, 32] } for endpoint in endpoint_to_shape: shape = endpoint_to_shape[endpoint] self.assertListEqual( end_points[endpoint].get_shape().as_list(), shape)
def resnet_v2(inputs, blocks, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope=None): """Generator for v2 (preactivation) ResNet models. This function generates a family of ResNet v2 models. See the resnet_v2_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce ResNets of various depths. Training for image classification on Imagenet is usually done with [224, 224] inputs, resulting in [7, 7] feature maps at the output of the last ResNet block for the ResNets defined in [1] that have nominal stride equal to 32. However, for dense prediction tasks we advise that one uses inputs with spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In this case the feature maps at the ResNet output will have spatial shape [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] and corners exactly aligned with the input image corners, which greatly facilitates alignment of the features to the image. Using as input [225, 225] images results in [8, 8] feature maps at the output of the last ResNet block. For dense prediction tasks, the ResNet needs to run in fully-convolutional (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all have nominal stride equal to 32 and a good choice in FCN mode is to use output_stride=16 in order to increase the density of the computed features at small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. blocks: A list of length equal to the number of ResNet blocks. Each element is a resnet_utils.Block object describing the units in the block. num_classes: Number of predicted classes for classification tasks. If None we return the features before the logit layer. is_training: whether batch_norm layers are in training mode. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. include_root_block: If True, include the initial convolution followed by max-pooling, if False excludes it. If excluded, `inputs` should be the results of an activation-less convolution. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. If global_pool is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, else both height_out and width_out equal one. If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. If num_classes is not None, net contains the pre-softmax activations. end_points: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ with variable_scope.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with arg_scope( [layers_lib.conv2d, bottleneck, resnet_utils.stack_blocks_dense], outputs_collections=end_points_collection): with arg_scope([layers.batch_norm], is_training=is_training): net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError( 'The output_stride needs to be a multiple of 4.' ) output_stride /= 4 # We do not include batch normalization or activation functions in # conv1 because the first ResNet unit will perform these. Cf. # Appendix of [2]. with arg_scope([layers_lib.conv2d], activation_fn=None, normalizer_fn=None): net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = layers.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) # This is needed because the pre-activation variant does not have batch # normalization or activation functions in the residual unit output. See # Appendix of [2]. net = layers.batch_norm(net, activation_fn=nn_ops.relu, scope='postnorm') if global_pool: # Global average pooling. net = math_ops.reduce_mean(net, [1, 2], name='pool5', keepdims=True) if num_classes is not None: net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') # Convert end_points_collection into a dictionary of end_points. end_points = utils.convert_collection_to_dict( end_points_collection) if num_classes is not None: end_points['predictions'] = layers.softmax( net, scope='predictions') return net, end_points
def overfeat(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='overfeat'): """Contains the model definition for the OverFeat network. The definition for the network was obtained from: OverFeat: Integrated Recognition, Localization and Detection using Convolutional Networks Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and Yann LeCun, 2014 http://arxiv.org/abs/1312.6229 Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 231x231. To use in fully convolutional mode, set spatial_squeeze to false. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'overfeat', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers.conv2d( inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers.conv2d(net, 512, [3, 3], scope='conv3') net = layers.conv2d(net, 1024, [3, 3], scope='conv4') net = layers.conv2d(net, 1024, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') with arg_scope( [layers.conv2d], weights_initializer=trunc_normal(0.005), biases_initializer=init_ops.constant_initializer(0.1)): # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout( net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, biases_initializer=init_ops.zeros_initializer(), scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def alexnet_v2(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='alexnet_v2'): """AlexNet version 2. Described in: http://arxiv.org/pdf/1404.5997v2.pdf Parameters from: github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ layers-imagenet-1gpu.cfg Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. To use in fully convolutional mode, set spatial_squeeze to false. The LRN layers have been removed and change the initializers from random_normal_initializer to xavier_initializer. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope( [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=[end_points_collection]): net = layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1') net = layers.conv2d(net, 192, [5, 5], scope='conv2') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2') net = layers.conv2d(net, 384, [3, 3], scope='conv3') net = layers.conv2d(net, 384, [3, 3], scope='conv4') net = layers.conv2d(net, 256, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5') # Use conv2d instead of fully_connected layers. with arg_scope( [layers.conv2d], weights_initializer=trunc_normal(0.005), biases_initializer=init_ops.constant_initializer(0.1)): net = layers.conv2d(net, 4096, [5, 5], padding='VALID', scope='fc6') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d( net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, biases_initializer=init_ops.zeros_initializer(), scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def inception_v1_base(inputs, final_endpoint='Mixed_5c', scope='InceptionV1'): """Defines the Inception V1 base architecture. This architecture is defined in: Going deeper with convolutions Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. http://arxiv.org/pdf/1409.4842v1.pdf. Args: inputs: a tensor of size [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'] scope: Optional variable_scope. Returns: A dictionary from components of the network to the corresponding activation. Raises: ValueError: if final_endpoint is not set to one of the predefined values. """ end_points = {} with variable_scope.variable_scope(scope, 'InceptionV1', [inputs]): with arg_scope([layers.conv2d, layers_lib.fully_connected], weights_initializer=trunc_normal(0.01)): with arg_scope([layers.conv2d, layers_lib.max_pool2d], stride=1, padding='SAME'): end_point = 'Conv2d_1a_7x7' net = layers.conv2d(inputs, 64, [7, 7], stride=2, scope=end_point) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'MaxPool_2a_3x3' net = layers_lib.max_pool2d(net, [3, 3], stride=2, scope=end_point) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'Conv2d_2b_1x1' net = layers.conv2d(net, 64, [1, 1], scope=end_point) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'Conv2d_2c_3x3' net = layers.conv2d(net, 192, [3, 3], scope=end_point) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'MaxPool_3a_3x3' net = layers_lib.max_pool2d(net, [3, 3], stride=2, scope=end_point) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'Mixed_3b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, 32, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.max_pool2d( net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'Mixed_3c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.max_pool2d( net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'MaxPool_4a_3x3' net = layers_lib.max_pool2d(net, [3, 3], stride=2, scope=end_point) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'Mixed_4b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, 208, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, 48, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.max_pool2d( net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'Mixed_4c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.max_pool2d( net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'Mixed_4d' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.max_pool2d( net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'Mixed_4e' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, 144, [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, 288, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.max_pool2d( net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'Mixed_4f' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.max_pool2d( net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'MaxPool_5a_2x2' net = layers_lib.max_pool2d(net, [2, 2], stride=2, scope=end_point) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'Mixed_5b' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0a_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.max_pool2d( net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if final_endpoint == end_point: return net, end_points end_point = 'Mixed_5c' with variable_scope.variable_scope(end_point): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1') branch_1 = layers.conv2d(branch_1, 384, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1') branch_2 = layers.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.max_pool2d( net, [3, 3], scope='MaxPool_0a_3x3') branch_3 = layers.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat( [branch_0, branch_1, branch_2, branch_3], 3) end_points[end_point] = net if final_endpoint == end_point: return net, end_points raise ValueError('Unknown final endpoint %s' % final_endpoint)
def inception_v1(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.8, prediction_fn=layers_lib.softmax, spatial_squeeze=True, reuse=None, scope='InceptionV1'): """Defines the Inception V1 architecture. This architecture is defined in: Going deeper with convolutions Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. http://arxiv.org/pdf/1409.4842v1.pdf. The default image size used to train this network is 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: the percentage of activation values that are retained. prediction_fn: a function to get predictions out of logits. spatial_squeeze: if True, logits is of shape is [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, num_classes] end_points: a dictionary from components of the network to the corresponding activation. """ # Final pooling and prediction with variable_scope.variable_scope(scope, 'InceptionV1', [inputs, num_classes], reuse=reuse) as scope: with arg_scope([layers_lib.batch_norm, layers_lib.dropout], is_training=is_training): net, end_points = inception_v1_base(inputs, scope=scope) with variable_scope.variable_scope('Logits'): net = layers_lib.avg_pool2d(net, [7, 7], stride=1, scope='MaxPool_0a_7x7') net = layers_lib.dropout(net, dropout_keep_prob, scope='Dropout_0b') logits = layers.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_0c_1x1') if spatial_squeeze: logits = array_ops.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def vgg_a(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_a'): """Oxford Net VGG 11-Layers version A Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope([layers.conv2d, layers_lib.max_pool2d], outputs_collections=end_points_collection): net = layers_lib.repeat(inputs, 1, layers.conv2d, 64, [3, 3], scope='conv1') net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') net = layers_lib.repeat(net, 1, layers.conv2d, 128, [3, 3], scope='conv2') net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') net = layers_lib.repeat(net, 2, layers.conv2d, 256, [3, 3], scope='conv3') net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.conv2d(net, 4096, [1, 1], scope='fc7') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = layers.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = utils.convert_collection_to_dict( end_points_collection) if spatial_squeeze: net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points