예제 #1
0
    def _build_tail(self, inputs, is_training=False):
        if not self._use_tail:
            return inputs

        if self._architecture == 'resnet_v1_101':
            train_batch_norm = (is_training
                                and self._config.get('train_batch_norm'))
            with self._enter_variable_scope():
                weight_decay = (self._config.get('arg_scope',
                                                 {}).get('weight_decay', 0))
                with tf.variable_scope(self._architecture, reuse=True):
                    resnet_arg_scope = resnet_utils.resnet_arg_scope(
                        batch_norm_epsilon=1e-5,
                        batch_norm_scale=True,
                        weight_decay=weight_decay)
                    with slim.arg_scope(resnet_arg_scope):
                        with slim.arg_scope([slim.batch_norm],
                                            is_training=train_batch_norm):
                            blocks = [
                                resnet_utils.Block('block4',
                                                   resnet_v1.bottleneck,
                                                   [{
                                                       'depth': 2048,
                                                       'depth_bottleneck': 512,
                                                       'stride': 1
                                                   }] * 3)
                            ]
                            proposal_classifier_features = (
                                resnet_utils.stack_blocks_dense(
                                    inputs, blocks))
        else:
            proposal_classifier_features = inputs

        return proposal_classifier_features
예제 #2
0
 def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
   """A plain ResNet without extra layers before or after the ResNet blocks."""
   with tf.variable_scope(scope, values=[inputs]):
     with slim.arg_scope([slim.conv2d], outputs_collections='end_points'):
       net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)
       end_points = slim.utils.convert_collection_to_dict('end_points')
       return net, end_points
    def _extract_box_classifier_features(self, proposal_feature_maps, scope):
        """Extracts second stage box classifier features.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name (unused).

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
        with tf.variable_scope(self._architecture, reuse=self._reuse_weights):
            with slim.arg_scope(
                    resnet_utils.resnet_arg_scope(
                        batch_norm_epsilon=1e-5,
                        batch_norm_scale=True,
                        weight_decay=self._weight_decay)):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self._train_batch_norm):
                    blocks = [
                        resnet_utils.Block('block4', resnet_v1.bottleneck,
                                           [{
                                               'depth': 2048,
                                               'depth_bottleneck': 512,
                                               'stride': 1
                                           }] * 3)
                    ]
                    proposal_classifier_features = resnet_utils.stack_blocks_dense(
                        proposal_feature_maps, blocks)
        return proposal_classifier_features
예제 #4
0
 def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
   """A plain ResNet without extra layers before or after the ResNet blocks."""
   with tf.variable_scope(scope, values=[inputs]):
     with slim.arg_scope([slim.conv2d], outputs_collections='end_points'):
       net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)
       end_points = dict(tf.get_collection('end_points'))
       return net, end_points
def resnet_v1(inputs,
              blocks,
              filter_scale=1.0,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              reuse=None,
              scope=None):
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    net = resnet_utils.conv2d_same(net,
                                                   64 // filter_scale,
                                                   7,
                                                   stride=2,
                                                   scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                if global_pool:
                    # Global average pooling.
                    net = math_ops.reduce_mean(net, [1, 2],
                                               name='pool5',
                                               keepdims=True)
                if num_classes is not None:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                return net, end_points
예제 #6
0
    def _atrousValues(self, bottleneck):
        """Verify the values of dense feature extraction by atrous convolution.

    Make sure that dense feature extraction by stack_blocks_dense() followed by
    subsampling gives identical results to feature extraction at the nominal
    network output stride using the simple self._stack_blocks_nondense() above.

    Args:
      bottleneck: The bottleneck function.
    """
        blocks = [
            resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
            resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
            resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
            resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
        ]
        nominal_stride = 8

        # Test both odd and even input dimensions.
        height = 30
        width = 31
        with slim.arg_scope(resnet_utils.resnet_arg_scope(is_training=False)):
            for output_stride in [1, 2, 4, 8, None]:
                with tf.Graph().as_default():
                    with self.test_session() as sess:
                        tf.set_random_seed(0)
                        inputs = create_test_input(1, height, width, 3)
                        # Dense feature extraction followed by subsampling.
                        output = resnet_utils.stack_blocks_dense(
                            inputs, blocks, output_stride)
                        if output_stride is None:
                            factor = 1
                        else:
                            factor = nominal_stride // output_stride

                        output = resnet_utils.subsample(output, factor)
                        # Make the two networks use the same weights.
                        tf.get_variable_scope().reuse_variables()
                        # Feature extraction at the nominal network rate.
                        expected = self._stack_blocks_nondense(inputs, blocks)
                        sess.run(tf.initialize_all_variables())
                        output, expected = sess.run([output, expected])
                        self.assertAllClose(output,
                                            expected,
                                            atol=1e-4,
                                            rtol=1e-4)
예제 #7
0
  def _atrousValues(self, bottleneck):
    """Verify the values of dense feature extraction by atrous convolution.

    Make sure that dense feature extraction by stack_blocks_dense() followed by
    subsampling gives identical results to feature extraction at the nominal
    network output stride using the simple self._stack_blocks_nondense() above.

    Args:
      bottleneck: The bottleneck function.
    """
    blocks = [
        resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
        resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
        resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
        resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
    ]
    nominal_stride = 8

    # Test both odd and even input dimensions.
    height = 30
    width = 31
    with slim.arg_scope(resnet_utils.resnet_arg_scope(is_training=False)):
      for output_stride in [1, 2, 4, 8, None]:
        with tf.Graph().as_default():
          with self.test_session() as sess:
            tf.set_random_seed(0)
            inputs = create_test_input(1, height, width, 3)
            # Dense feature extraction followed by subsampling.
            output = resnet_utils.stack_blocks_dense(inputs,
                                                     blocks,
                                                     output_stride)
            if output_stride is None:
              factor = 1
            else:
              factor = nominal_stride // output_stride

            output = resnet_utils.subsample(output, factor)
            # Make the two networks use the same weights.
            tf.get_variable_scope().reuse_variables()
            # Feature extraction at the nominal network rate.
            expected = self._stack_blocks_nondense(inputs, blocks)
            sess.run(tf.global_variables_initializer())
            output, expected = sess.run([output, expected])
            self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
예제 #8
0
    def _build_tail(self, inputs, is_training=False):
        if not self._use_tail:
            return inputs

        if self._architecture == 'resnet_v1_101':
            train_batch_norm = (
                is_training and self._config.get('train_batch_norm')
            )
            with self._enter_variable_scope():
                weight_decay = (
                    self._config.get('arg_scope', {}).get('weight_decay', 0)
                )
                with tf.variable_scope(self._architecture, reuse=True):
                    resnet_arg_scope = resnet_utils.resnet_arg_scope(
                            batch_norm_epsilon=1e-5,
                            batch_norm_scale=True,
                            weight_decay=weight_decay
                        )
                    with slim.arg_scope(resnet_arg_scope):
                        with slim.arg_scope(
                            [slim.batch_norm], is_training=train_batch_norm
                        ):
                            blocks = [
                                resnet_utils.Block(
                                    'block4',
                                    resnet_v1.bottleneck,
                                    [{
                                        'depth': 2048,
                                        'depth_bottleneck': 512,
                                        'stride': 1
                                    }] * 3
                                )
                            ]
                            proposal_classifier_features = (
                                resnet_utils.stack_blocks_dense(inputs, blocks)
                            )
        else:
            proposal_classifier_features = inputs

        return proposal_classifier_features
def resnet_rcnn(features, num_classes):
    resnet_arg_scope = resnet_utils.resnet_arg_scope(batch_norm_epsilon=1e-5,
                                                     batch_norm_scale=True,
                                                     weight_decay=5e-4)
    with slim.arg_scope(resnet_arg_scope):
        with slim.arg_scope([slim.batch_norm], is_training=True):
            blocks = [
                resnet_utils.Block('block4', resnet_v1.bottleneck,
                                   [{
                                       'depth': 2048,
                                       'depth_bottleneck': 512,
                                       'stride': 1
                                   }] * 3)
            ]
            net = resnet_utils.stack_blocks_dense(features, blocks)
    flat = tf.layers.flatten(net)
    cls_score = tf.layers.dense(flat, units=(num_classes + 1))

    reg_score = tf.layers.dense(flat, units=(num_classes * 4))

    cls_probs = tf.nn.softmax(cls_score)

    return cls_score, cls_probs, reg_score
예제 #10
0
    def _build_tail(self, inputs, is_training=False):
        if not self._use_tail:
            return inputs

        if self._architecture == "resnet_v1_101":
            train_batch_norm = is_training and self._config.get(
                "train_batch_norm")
            with self._enter_variable_scope():
                weight_decay = self._config.get("arg_scope",
                                                {}).get("weight_decay", 0)
                with tf.variable_scope(self._architecture, reuse=True):
                    resnet_arg_scope = resnet_utils.resnet_arg_scope(
                        batch_norm_epsilon=1e-5,
                        batch_norm_scale=True,
                        weight_decay=weight_decay,
                    )
                    with slim.arg_scope(resnet_arg_scope):
                        with slim.arg_scope([slim.batch_norm],
                                            is_training=train_batch_norm):
                            blocks = [
                                resnet_utils.Block(
                                    "block4",
                                    resnet_v1.bottleneck,
                                    [{
                                        "depth": 2048,
                                        "depth_bottleneck": 512,
                                        "stride": 1,
                                    }] * 3,
                                )
                            ]
                            proposal_classifier_features = (
                                resnet_utils.stack_blocks_dense(
                                    inputs, blocks))
        else:
            proposal_classifier_features = inputs

        return proposal_classifier_features
def resnet_v1_beta(inputs,
                   blocks,
                   num_classes=None,
                   is_training=None,
                   global_pool=True,
                   output_stride=None,
                   root_block_fn=None,
                   reuse=None,
                   scope=None):
    """Generator for v1 ResNet models (beta variant).

      This function generates a family of modified ResNet v1 models. In particular,
      the first original 7x7 convolution is replaced with three 3x3 convolutions.
      See the resnet_v1_*() methods for specific model instantiations, obtained by
      selecting different block instantiations that produce ResNets of various
      depths.

      The code is modified from slim/nets/resnet_v1.py, and please refer to it for
      more details.

      Args:
        inputs: A tensor of size [batch, height_in, width_in, channels].
        blocks: A list of length equal to the number of ResNet blocks. Each element
          is a resnet_utils.Block object describing the units in the block.
        num_classes: Number of predicted classes for classification tasks. If None
          we return the features before the logit layer.
        is_training: Enable/disable is_training for batch normalization.
        global_pool: If True, we perform global average pooling before computing the
          logits. Set to True for image classification, False for dense prediction.
        output_stride: If None, then the output will be computed at the nominal
          network stride. If output_stride is not None, it specifies the requested
          ratio of input to output spatial resolution.
        root_block_fn: The function consisting of convolution operations applied to
          the root input. If root_block_fn is None, use the original setting of
          RseNet-v1, which is simply one convolution with 7x7 kernel and stride=2.
        reuse: whether or not the network and its variables should be reused. To be
          able to reuse 'scope' must be given.
        scope: Optional variable_scope.

      Returns:
        net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
          If global_pool is False, then height_out and width_out are reduced by a
          factor of output_stride compared to the respective height_in and width_in,
          else both height_out and width_out equal one. If num_classes is None, then
          net is the output of the last ResNet block, potentially after global
          average pooling. If num_classes is not None, net contains the pre-softmax
          activations.
        end_points: A dictionary from components of the network to the corresponding
          activation.

      Raises:
        ValueError: If the target output_stride is not valid.
      """
    if root_block_fn is None:
        root_block_fn = functools.partial(resnet_utils.conv2d_same,
                                          num_outputs=64,
                                          kernel_size=7,
                                          stride=2,
                                          scope='conv1')
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            if is_training is not None:
                arg_scope = slim.arg_scope([slim.batch_norm],
                                           is_training=is_training)
            else:
                arg_scope = slim.arg_scope([])
            with arg_scope:
                net = inputs
                if output_stride is not None:
                    if output_stride % 4 != 0:
                        raise ValueError(
                            'The output_stride needs to be a multiple of 4.')
                    output_stride /= 4
                net = root_block_fn(net)  # (N, 375, 375, 128)

                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='SAME',
                                      scope='pool1')  # (N, 188, 188, 128)
                net = resnet_utils.stack_blocks_dense(
                    net, blocks, output_stride)  # (N, 94, 94, 2048)

                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keepdims=True)

                if num_classes is not None:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')

                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')

                return net, end_points
예제 #12
0
def resnet_v1(inputs,
              blocks,
              num_classes=None,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              reuse=None,
              scope=None):
  """Generator for v1 ResNet models.

  This function generates a family of ResNet v1 models. See the resnet_v1_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce ResNets of various depths.

  Training for image classification on Imagenet is usually done with [224, 224]
  inputs, resulting in [7, 7] feature maps at the output of the last ResNet
  block for the ResNets defined in [1] that have nominal stride equal to 32.
  However, for dense prediction tasks we advise that one uses inputs with
  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
  this case the feature maps at the ResNet output will have spatial shape
  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
  and corners exactly aligned with the input image corners, which greatly
  facilitates alignment of the features to the image. Using as input [225, 225]
  images results in [8, 8] feature maps at the output of the last ResNet block.

  For dense prediction tasks, the ResNet needs to run in fully-convolutional
  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
  have nominal stride equal to 32 and a good choice in FCN mode is to use
  output_stride=16 in order to increase the density of the computed features at
  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    include_root_block: If True, include the initial convolution followed by
      max-pooling, if False excludes it.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
  with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.original_name_scope + '_end_points'
    with slim.arg_scope([slim.conv2d, bottleneck,
                         resnet_utils.stack_blocks_dense],
                        outputs_collections=end_points_collection):
      net = inputs
      if include_root_block:
        if output_stride is not None:
          if output_stride % 4 != 0:
            raise ValueError('The output_stride needs to be a multiple of 4.')
          output_stride /= 4
        net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
        net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
      net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
      if global_pool:
        # Global average pooling.
        net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
      if num_classes is not None:
        net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                          normalizer_fn=None, scope='logits')
      # Convert end_points_collection into a dictionary of end_points.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)
      if num_classes is not None:
        end_points['predictions'] = slim.softmax(net, scope='predictions')
      return net, end_points
예제 #13
0
def resnet_v1_beta(inputs,
                   blocks,
                   num_classes=None,
                   is_training=None,
                   global_pool=True,
                   output_stride=None,
                   root_block_fn=None,
                   reuse=None,
                   scope=None):
    """V1 ResNet模型的生成器
    这个函数生成一系列经过修改的ResNet v1模型。 特别是,第一个原始7x7卷积被三个3x3卷积替换。
     有关特定模型实例的信息,请参阅resnet_v1 _ *()方法,这些方法是通过选择生成不同深度ResNets的不同块实例来获得的。
    :param inputs:tensor,[batch, height_in, width_in, channels]
    :param blocks:长度等于ResNet块数的列表。 每个元素都是一个resnet_utils.Block对象,用于描述块中的单位。
    :param num_classes:用于分类任务的预测类数。如果为None在最后的回归层返回特征
    :param is_training:是否用批正则化
    :param global_pool:如果为True,在计算回归之前使用全局平均池化。设置为真用于图像分类,假则用于密集预测。
    :param output_stride:如果为None,则输出将在标称网络步长处计算。 如果output_stride不是None,则它指定所请求的输入与输出空间分辨率的比率。
    :param root_block_fn:该函数由应用于根输入的卷积运算组成。 如果root_block_fn为None,
    则使用RseNet-v1的原始设置,该设置只是一个带有7x7内核和stride = 2的卷积。
    :param reuse:是否应该重用网络及其变量。 重用的话,必须给定重用的'scope'
    :param scope:可选variable_scope

    :return:nets.等级-4张量的大小[batch,height_out,width_out,channels_out]。 如果global_pool为False,
    则height_out和width_out与相应的height_in和width_in相比减少了output_stride因子,
    否则height_out和width_out都等于1。 如果num_classes为None,则net是最后一个ResNet块的输出,可能是在全局平均池之后。
    如果num_classes不是None,则net包含pre-softmax激活。
    end_points: 从网络组件到相应激活的字典。

    :raise:如果目标输出步长无效则
  """
    if root_block_fn is None:
        root_block_fn = functools.partial(resnet_utils.conv2d_same,
                                          num_outputs=64,
                                          kernel_size=7,
                                          stride=2,
                                          scope='conv1')
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            if is_training is not None:
                arg_scope = slim.arg_scope([slim.batch_norm],
                                           is_training=is_training)
            else:
                arg_scope = slim.arg_scope([])
            with arg_scope:
                net = inputs
                if output_stride is not None:
                    if output_stride % 4 != 0:
                        raise ValueError(
                            'The output_stride needs to be a multiple of 4.')
                    output_stride /= 4
                net = root_block_fn(net)
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='SAME',
                                      scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)

                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keepdims=True)
                if num_classes is not None:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                return net, end_points
예제 #14
0
def resnet_v1_beta(inputs,
                   blocks,
                   num_classes=None,
                   is_training=None,
                   global_pool=True,
                   output_stride=None,
                   root_block_fn=None,
                   reuse=None,
                   scope=None):
  """Generator for v1 ResNet models (beta variant).

  This function generates a family of modified ResNet v1 models. In particular,
  the first original 7x7 convolution is replaced with three 3x3 convolutions.
  See the resnet_v1_*() methods for specific model instantiations, obtained by
  selecting different block instantiations that produce ResNets of various
  depths.

  The code is modified from slim/nets/resnet_v1.py, and please refer to it for
  more details.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: Enable/disable is_training for batch normalization.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    root_block_fn: The function consisting of convolution operations applied to
      the root input. If root_block_fn is None, use the original setting of
      RseNet-v1, which is simply one convolution with 7x7 kernel and stride=2.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
  if root_block_fn is None:
    root_block_fn = functools.partial(resnet_utils.conv2d_same,
                                      num_outputs=64,
                                      kernel_size=7,
                                      stride=2,
                                      scope='conv1')
  with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.original_name_scope + '_end_points'
    with slim.arg_scope([slim.conv2d, bottleneck,
                         resnet_utils.stack_blocks_dense],
                        outputs_collections=end_points_collection):
      if is_training is not None:
        arg_scope = slim.arg_scope([slim.batch_norm], is_training=is_training)
      else:
        arg_scope = slim.arg_scope([])
      with arg_scope:
        net = inputs
        if output_stride is not None:
          if output_stride % 4 != 0:
            raise ValueError('The output_stride needs to be a multiple of 4.')
          output_stride /= 4
        net = root_block_fn(net)
        net = slim.max_pool2d(net, 3, stride=2, padding='SAME', scope='pool1')
        net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)

        if global_pool:
          # Global average pooling.
          net = tf.reduce_mean(net, [1, 2], name='pool5', keepdims=True)
        if num_classes is not None:
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn=None, scope='logits')
        # Convert end_points_collection into a dictionary of end_points.
        end_points = slim.utils.convert_collection_to_dict(
            end_points_collection)
        if num_classes is not None:
          end_points['predictions'] = slim.softmax(net, scope='predictions')
        return net, end_points
예제 #15
0
def resnet_v2(inputs,
              blocks,
              blocks2,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              reuse=None,
              scope=None):

    with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    # We do not include batch normalization or activation functions in
                    # conv1 because the first ResNet unit will perform these. Cf.
                    # Appendix of [2].
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None):
                        net = resnet_utils.conv2d_same(net,
                                                       64,
                                                       7,
                                                       stride=2,
                                                       scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                #net = resnet_utils.stack_blocks_dense(net, blocks[:-2], output_stride)
                CONV4 = net
                Gap4 = tf.reduce_mean(net, [1, 2],
                                      name='pool_gp',
                                      keep_dims=True)
                net = resnet_utils.stack_blocks_dense(net, blocks2,
                                                      output_stride)
                # This is needed because the pre-activation variant does not have batch
                # normalization or activation functions in the residual unit output. See
                # Appendix of [2].
                net = slim.batch_norm(net,
                                      activation_fn=tf.nn.relu,
                                      scope='postnorm')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)

                if global_pool:
                    # Global average pooling.
                    SPATIAL = net
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keep_dims=True)
                    Gap = net
                    end_points['global_pool'] = net
                if num_classes is not None:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')
                    end_points[sc.name + '/logits'] = net
                    if spatial_squeeze:
                        net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
                        end_points[sc.name + '/spatial_squeeze'] = net
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                #return Gap, CONV4, net, end_points
                return Gap, SPATIAL, net, end_points