def _build_tail(self, inputs, is_training=False): if not self._use_tail: return inputs if self._architecture == "resnet_v1_101": train_batch_norm = is_training and self._config.get("train_batch_norm") with self._enter_variable_scope(): weight_decay = self._config.get("arg_scope", {}).get("weight_decay", 0) with tf.variable_scope(self._architecture, reuse=True): resnet_arg_scope = resnet_utils.resnet_arg_scope( batch_norm_epsilon=1e-5, batch_norm_scale=True, weight_decay=weight_decay ) with slim.arg_scope(resnet_arg_scope): with slim.arg_scope([slim.batch_norm], is_training=train_batch_norm): blocks = [ resnet_utils.Block( "block4", resnet_v1.bottleneck, [{"depth": 2048, "depth_bottleneck": 512, "stride": 1}] * 3, ) ] proposal_classifier_features = resnet_utils.stack_blocks_dense(inputs, blocks) else: proposal_classifier_features = inputs return proposal_classifier_features
def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): """A plain ResNet without extra layers before or after the ResNet blocks.""" with variable_scope.variable_scope(scope, values=[inputs]): with arg_scope([layers.conv2d], outputs_collections='end_points'): net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) end_points = utils.convert_collection_to_dict('end_points') return net, end_points
def testAtrousValuesBottleneck(self): """Verify the values of dense feature extraction by atrous convolution. Make sure that dense feature extraction by stack_blocks_dense() followed by subsampling gives identical results to feature extraction at the nominal network output stride using the simple self._stack_blocks_nondense() above. """ block = resnet_v1.resnet_v1_block blocks = [ block('block1', base_depth=1, num_units=2, stride=2), block('block2', base_depth=2, num_units=2, stride=2), block('block3', base_depth=4, num_units=2, stride=2), block('block4', base_depth=8, num_units=2, stride=1), ] nominal_stride = 8 # Test both odd and even input dimensions. height = 30 width = 31 with arg_scope(resnet_utils.resnet_arg_scope()): with arg_scope([layers.batch_norm], is_training=False): for output_stride in [1, 2, 4, 8, None]: with ops.Graph().as_default(): with self.cached_session() as sess: random_seed.set_random_seed(0) inputs = create_input(1, height, width, 3) # Dense feature extraction followed by subsampling. output = resnet_utils.stack_blocks_dense( inputs, blocks, output_stride) if output_stride is None: factor = 1 else: factor = nominal_stride // output_stride output = resnet_utils.subsample(output, factor) # Make the two networks use the same weights. variable_scope.get_variable_scope( ).reuse_variables() # Feature extraction at the nominal network rate. expected = self._stack_blocks_nondense( inputs, blocks) sess.run(variables.global_variables_initializer()) output, expected = sess.run([output, expected]) self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
def _build_tail(self, inputs, is_training=False): if not self._use_tail: return inputs if self._architecture == 'resnet_v1_101': train_batch_norm = ( is_training and self._config.get('train_batch_norm') ) with self._enter_variable_scope(): weight_decay = ( self._config.get('arg_scope', {}).get('weight_decay', 0) ) with tf.variable_scope(self._architecture, reuse=True): resnet_arg_scope = resnet_utils.resnet_arg_scope( batch_norm_epsilon=1e-5, batch_norm_scale=True, weight_decay=weight_decay ) with slim.arg_scope(resnet_arg_scope): with slim.arg_scope( [slim.batch_norm], is_training=train_batch_norm ): blocks = [ resnet_utils.Block( 'block4', resnet_v1.bottleneck, [{ 'depth': 2048, 'depth_bottleneck': 512, 'stride': 1 }] * 3 ) ] proposal_classifier_features = ( resnet_utils.stack_blocks_dense(inputs, blocks) ) else: proposal_classifier_features = inputs return proposal_classifier_features
def resnet_v2(inputs, blocks, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, reuse=None, scope=None): """Generator for v2 (preactivation) ResNet models. This function generates a family of ResNet v2 models. See the resnet_v2_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce ResNets of various depths. Training for image classification on Imagenet is usually done with [224, 224] inputs, resulting in [7, 7] feature maps at the output of the last ResNet block for the ResNets defined in [1] that have nominal stride equal to 32. However, for dense prediction tasks we advise that one uses inputs with spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In this case the feature maps at the ResNet output will have spatial shape [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] and corners exactly aligned with the input image corners, which greatly facilitates alignment of the features to the image. Using as input [225, 225] images results in [8, 8] feature maps at the output of the last ResNet block. For dense prediction tasks, the ResNet needs to run in fully-convolutional (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all have nominal stride equal to 32 and a good choice in FCN mode is to use output_stride=16 in order to increase the density of the computed features at small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. blocks: A list of length equal to the number of ResNet blocks. Each element is a resnet_utils.Block object describing the units in the block. num_classes: Number of predicted classes for classification tasks. If None we return the features before the logit layer. is_training: whether batch_norm layers are in training mode. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. include_root_block: If True, include the initial convolution followed by max-pooling, if False excludes it. If excluded, `inputs` should be the results of an activation-less convolution. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. If global_pool is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, else both height_out and width_out equal one. If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. If num_classes is not None, net contains the pre-softmax activations. end_points: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ with variable_scope.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with arg_scope( [layers_lib.conv2d, bottleneck, resnet_utils.stack_blocks_dense], outputs_collections=end_points_collection): with arg_scope([layers.batch_norm], is_training=is_training): net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError( 'The output_stride needs to be a multiple of 4.' ) output_stride /= 4 # We do not include batch normalization or activation functions in # conv1 because the first ResNet unit will perform these. Cf. # Appendix of [2]. with arg_scope([layers_lib.conv2d], activation_fn=None, normalizer_fn=None): net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = layers.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) # This is needed because the pre-activation variant does not have batch # normalization or activation functions in the residual unit output. See # Appendix of [2]. net = layers.batch_norm(net, activation_fn=nn_ops.relu, scope='postnorm') if global_pool: # Global average pooling. net = math_ops.reduce_mean(net, [1, 2], name='pool5', keepdims=True) if num_classes is not None: net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') # Convert end_points_collection into a dictionary of end_points. end_points = utils.convert_collection_to_dict( end_points_collection) if num_classes is not None: end_points['predictions'] = layers.softmax( net, scope='predictions') return net, end_points