コード例 #1
0
    def _extract_box_classifier_features(self, proposal_feature_maps, scope):
        """Extracts second stage box classifier features.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name (unused).

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
        with tf.variable_scope(self._architecture, reuse=self._reuse_weights):
            with slim.arg_scope(
                    resnet_utils.resnet_arg_scope(
                        batch_norm_epsilon=1e-5,
                        batch_norm_scale=True,
                        weight_decay=self._weight_decay)):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self._train_batch_norm):
                    blocks = [
                        resnet_utils.Block('block4', resnet_v1.bottleneck,
                                           [{
                                               'depth': 2048,
                                               'depth_bottleneck': 512,
                                               'stride': 1
                                           }] * 3)
                    ]
                    proposal_classifier_features = resnet_utils.stack_blocks_dense(
                        proposal_feature_maps, blocks)
        return proposal_classifier_features
コード例 #2
0
 def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
   """A plain ResNet without extra layers before or after the ResNet blocks."""
   with variable_scope.variable_scope(scope, values=[inputs]):
     with arg_scope([layers.conv2d], outputs_collections='end_points'):
       net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)
       end_points = utils.convert_collection_to_dict('end_points')
       return net, end_points
コード例 #3
0
    def _atrousValues(self, bottleneck):
        """Verify the values of dense feature extraction by atrous convolution.

    Make sure that dense feature extraction by stack_blocks_dense() followed by
    subsampling gives identical results to feature extraction at the nominal
    network output stride using the simple self._stack_blocks_nondense() above.

    Args:
      bottleneck: The bottleneck function.
    """
        blocks = [
            resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
            resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
            resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
            resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
        ]
        nominal_stride = 8

        # Test both odd and even input dimensions.
        height = 30
        width = 31
        with slim.arg_scope(resnet_utils.resnet_arg_scope()):
            with slim.arg_scope([slim.batch_norm], is_training=False):
                for output_stride in [1, 2, 4, 8, None]:
                    with tf.Graph().as_default():
                        with self.test_session() as sess:
                            tf.set_random_seed(0)
                            inputs = create_test_input(1, height, width, 3)
                            # Dense feature extraction followed by subsampling.
                            output = resnet_utils.stack_blocks_dense(
                                inputs, blocks, output_stride)
                            if output_stride is None:
                                factor = 1
                            else:
                                factor = nominal_stride // output_stride

                            output = resnet_utils.subsample(output, factor)
                            # Make the two networks use the same weights.
                            tf.get_variable_scope().reuse_variables()
                            # Feature extraction at the nominal network rate.
                            expected = self._stack_blocks_nondense(
                                inputs, blocks)
                            sess.run(tf.global_variables_initializer())
                            output, expected = sess.run([output, expected])
                            self.assertAllClose(output,
                                                expected,
                                                atol=1e-4,
                                                rtol=1e-4)
コード例 #4
0
    def testAtrousValuesBottleneck(self):
        """Verify the values of dense feature extraction by atrous convolution.

    Make sure that dense feature extraction by stack_blocks_dense() followed by
    subsampling gives identical results to feature extraction at the nominal
    network output stride using the simple self._stack_blocks_nondense() above.
    """
        block = resnet_v2.resnet_v2_block
        blocks = [
            block('block1', base_depth=1, num_units=2, stride=2),
            block('block2', base_depth=2, num_units=2, stride=2),
            block('block3', base_depth=4, num_units=2, stride=2),
            block('block4', base_depth=8, num_units=2, stride=1),
        ]
        nominal_stride = 8

        # Test both odd and even input dimensions.
        height = 30
        width = 31
        with arg_scope(resnet_utils.resnet_arg_scope()):
            with arg_scope([layers.batch_norm], is_training=False):
                for output_stride in [1, 2, 4, 8, None]:
                    with ops.Graph().as_default():
                        with self.test_session() as sess:
                            random_seed.set_random_seed(0)
                            inputs = create_test_input(1, height, width, 3)
                            # Dense feature extraction followed by subsampling.
                            output = resnet_utils.stack_blocks_dense(
                                inputs, blocks, output_stride)
                            if output_stride is None:
                                factor = 1
                            else:
                                factor = nominal_stride // output_stride

                            output = resnet_utils.subsample(output, factor)
                            # Make the two networks use the same weights.
                            variable_scope.get_variable_scope(
                            ).reuse_variables()
                            # Feature extraction at the nominal network rate.
                            expected = self._stack_blocks_nondense(
                                inputs, blocks)
                            sess.run(variables.global_variables_initializer())
                            output, expected = sess.run([output, expected])
                            self.assertAllClose(output,
                                                expected,
                                                atol=1e-4,
                                                rtol=1e-4)
コード例 #5
0
def resnet_v1(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              reuse=None,
              scope=None):
    """Generator for v1 ResNet models.

  This function generates a family of ResNet v1 models. See the resnet_v1_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce ResNets of various depths.

  Training for image classification on Imagenet is usually done with [224, 224]
  inputs, resulting in [7, 7] feature maps at the output of the last ResNet
  block for the ResNets defined in [1] that have nominal stride equal to 32.
  However, for dense prediction tasks we advise that one uses inputs with
  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
  this case the feature maps at the ResNet output will have spatial shape
  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
  and corners exactly aligned with the input image corners, which greatly
  facilitates alignment of the features to the image. Using as input [225, 225]
  images results in [8, 8] feature maps at the output of the last ResNet block.

  For dense prediction tasks, the ResNet needs to run in fully-convolutional
  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
  have nominal stride equal to 32 and a good choice in FCN mode is to use
  output_stride=16 in order to increase the density of the computed features at
  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: whether is training or not.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    include_root_block: If True, include the initial convolution followed by
      max-pooling, if False excludes it.
    spatial_squeeze: if True, logits is of shape [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.name + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    net = resnet_utils.conv2d_same(net,
                                                   64,
                                                   7,
                                                   stride=2,
                                                   scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keep_dims=True)
                if num_classes is not None:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')
                if spatial_squeeze:
                    logits = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        logits, scope='predictions')
                return logits, end_points
コード例 #6
0
  def testStridingLastUnitVsSubsampleBlockEnd(self):
    """Compares subsampling at the block's last unit or block's end.

    Makes sure that the final output is the same when we use a stride at the
    last unit of a block vs. we subsample activations at the end of a block.
    """
    block = resnet_v1.resnet_v1_block

    blocks = [
        block('block1', base_depth=1, num_units=2, stride=2),
        block('block2', base_depth=2, num_units=2, stride=2),
        block('block3', base_depth=4, num_units=2, stride=2),
        block('block4', base_depth=8, num_units=2, stride=1),
    ]

    # Test both odd and even input dimensions.
    height = 30
    width = 31
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      with slim.arg_scope([slim.batch_norm], is_training=False):
        for output_stride in [1, 2, 4, 8, None]:
          with tf.Graph().as_default():
            with self.test_session() as sess:
              tf.set_random_seed(0)
              inputs = create_test_input(1, height, width, 3)

              # Subsampling at the last unit of the block.
              output = resnet_utils.stack_blocks_dense(
                  inputs, blocks, output_stride,
                  store_non_strided_activations=False,
                  outputs_collections='output')
              output_end_points = slim.utils.convert_collection_to_dict(
                  'output')

              # Make the two networks use the same weights.
              tf.get_variable_scope().reuse_variables()

              # Subsample activations at the end of the blocks.
              expected = resnet_utils.stack_blocks_dense(
                  inputs, blocks, output_stride,
                  store_non_strided_activations=True,
                  outputs_collections='expected')
              expected_end_points = slim.utils.convert_collection_to_dict(
                  'expected')

              sess.run(tf.global_variables_initializer())

              # Make sure that the final output is the same.
              output, expected = sess.run([output, expected])
              self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)

              # Make sure that intermediate block activations in
              # output_end_points are subsampled versions of the corresponding
              # ones in expected_end_points.
              for i, block in enumerate(blocks[:-1:]):
                output = output_end_points[block.scope]
                expected = expected_end_points[block.scope]
                atrous_activated = (output_stride is not None and
                                    2 ** i >= output_stride)
                if not atrous_activated:
                  expected = resnet_utils.subsample(expected, 2)
                output, expected = sess.run([output, expected])
                self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
コード例 #7
0
ファイル: resnet_v1_beta.py プロジェクト: qztseng/models
def resnet_mod(inputs,
               num_classes=None,
               is_training=None,
               global_pool=False,
               output_stride=None,
               multi_grid=None,
               root_depth_multiplier=0.25,
               reuse=None,
               scope='resnet_v1_18',
               sync_batch_norm_method='None'):
    """
    A custom Resnet variant based on v2 preact architecture.
    """

    ## define the multi_grid/atrous blocks
    if multi_grid is None:
        multi_grid = [1, 1]
    else:
        if len(multi_grid) != 2:
            raise ValueError('Expect multi_grid to have length 2.')

    block4_args = []
    for rate in multi_grid:
        block4_args.append({'depth': 512, 'stride': 1, 'unit_rate': rate})

    blocks = [
        resnet_v2_small_beta_block('block1',
                                   base_depth=64,
                                   num_units=1,
                                   stride=2),
        resnet_v2_small_beta_block('block2',
                                   base_depth=128,
                                   num_units=1,
                                   stride=2),
        resnet_v2_small_beta_block('block3',
                                   base_depth=256,
                                   num_units=1,
                                   stride=2),
        resnet_utils.Block('block4', lite_bottleneck_v2, block4_args),
    ]

    #     root_block_fn = root_block_fn_for_beta_variant
    root_block_fn = functools.partial(conv2d_ws.conv2d_same,
                                      num_outputs=64,
                                      kernel_size=3,
                                      stride=2,
                                      scope='root_conv1')

    batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method)
    with tf.variable_scope(scope, 'resnet_mod', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope([
                slim.conv2d, conv2d_ws.conv2d, lite_bottleneck_v2,
                resnet_utils.stack_blocks_dense
        ],
                            outputs_collections=end_points_collection):
            if is_training is not None:
                arg_scope = slim.arg_scope([batch_norm],
                                           is_training=is_training)
            else:
                arg_scope = slim.arg_scope([])
            with arg_scope:
                net = inputs
                if output_stride is not None:
                    if output_stride % 4 != 0:
                        raise ValueError(
                            'The output_stride needs to be a multiple of 4.')
                    output_stride //= 2
                net = root_block_fn(net)
                #             net = slim.max_pool2d(net, 3, stride=2, padding='SAME', scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                ## add a batchnorm and relu layer since the last conv output don't have them in v2
                net = slim.batch_norm(net,
                                      activation_fn=tf.nn.relu,
                                      scope='postnorm')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                return net, end_points
コード例 #8
0
ファイル: resnet_v1_beta.py プロジェクト: qztseng/models
def resnet_v1_beta(inputs,
                   blocks,
                   num_classes=None,
                   is_training=None,
                   global_pool=True,
                   output_stride=None,
                   root_block_fn=None,
                   reuse=None,
                   scope=None,
                   sync_batch_norm_method='None'):
    """Generator for v1 ResNet models (beta variant).

  This function generates a family of modified ResNet v1 models. In particular,
  the first original 7x7 convolution is replaced with three 3x3 convolutions.
  See the resnet_v1_*() methods for specific model instantiations, obtained by
  selecting different block instantiations that produce ResNets of various
  depths.

  The code is modified from slim/nets/resnet_v1.py, and please refer to it for
  more details.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: Enable/disable is_training for batch normalization.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    root_block_fn: The function consisting of convolution operations applied to
      the root input. If root_block_fn is None, use the original setting of
      RseNet-v1, which is simply one convolution with 7x7 kernel and stride=2.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    sync_batch_norm_method: String, sync batchnorm method.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
    if root_block_fn is None:
        root_block_fn = functools.partial(conv2d_ws.conv2d_same,
                                          num_outputs=64,
                                          kernel_size=7,
                                          stride=2,
                                          scope='conv1')
    batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method)
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope([
                conv2d_ws.conv2d, bottleneck, lite_bottleneck,
                resnet_utils.stack_blocks_dense
        ],
                            outputs_collections=end_points_collection):
            if is_training is not None:
                arg_scope = slim.arg_scope([batch_norm],
                                           is_training=is_training)
            else:
                arg_scope = slim.arg_scope([])
            with arg_scope:
                net = inputs
                if output_stride is not None:
                    if output_stride % 4 != 0:
                        raise ValueError(
                            'The output_stride needs to be a multiple of 4.')
                    output_stride //= 4
                net = root_block_fn(net)
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='SAME',
                                      scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)

                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keepdims=True)
                if num_classes is not None:
                    net = conv2d_ws.conv2d(net,
                                           num_classes, [1, 1],
                                           activation_fn=None,
                                           normalizer_fn=None,
                                           scope='logits',
                                           use_weight_standardization=False)
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                return net, end_points