Example #1
0
def root_block(inputs, is_training, collections):

    arg_scope = xception_arg_scope()
    # Extract features for entry_flow, middle_flow, and exit_flow.
    with slim.arg_scope(arg_scope):
        with tf.variable_scope('xception_65', 'xception') as sc:
            with slim.arg_scope([
                    slim.conv2d, slim.separable_conv2d, xception_module,
                    get_block, root_block
            ],
                                outputs_collections=collections):
                # Root block function operated on inputs.
                with slim.arg_scope([slim.batch_norm],
                                    is_training=is_training):
                    net = inputs
                    print(net.get_shape())
                    net = resnet_utils.conv2d_same(net,
                                                   32,
                                                   3,
                                                   stride=2,
                                                   scope='entry_flow/conv1_1')
                    print(net.get_shape())
                    net = resnet_utils.conv2d_same(net,
                                                   64,
                                                   3,
                                                   stride=1,
                                                   scope='entry_flow/conv1_2')
                    print(net.get_shape())
    return net
Example #2
0
  def _apply_conv_operation(self, net, operation, stride,
                            is_from_original_input):
    """Applies the predicted conv operation to net."""
    if stride > 1 and not is_from_original_input:
      stride = 1
    input_filters = net.shape[3]
    filter_size = self._filter_size
    if 'separable' in operation:
      num_layers = int(operation.split('_')[-1])
      kernel_size = int(operation.split('x')[0][-1])
      for layer_num in range(num_layers):
        net = tf.nn.relu(net)
        net = separable_conv2d_same(
            net,
            filter_size,
            kernel_size,
            depth_multiplier=1,
            scope='separable_{0}x{0}_{1}'.format(kernel_size, layer_num + 1),
            stride=stride)
        net = self._batch_norm_fn(
            net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, layer_num + 1))
        stride = 1
    elif 'atrous' in operation:
      kernel_size = int(operation.split('x')[0][-1])
      net = tf.nn.relu(net)
      if stride == 2:
        scaled_height = scale_dimension(tf.shape(net)[1], 0.5)
        scaled_width = scale_dimension(tf.shape(net)[2], 0.5)
        net = resize_bilinear(net, [scaled_height, scaled_width], net.dtype)
        net = resnet_utils.conv2d_same(
            net, filter_size, kernel_size, rate=1, stride=1,
            scope='atrous_{0}x{0}'.format(kernel_size))
      else:
        net = resnet_utils.conv2d_same(
            net, filter_size, kernel_size, rate=2, stride=1,
            scope='atrous_{0}x{0}'.format(kernel_size))
      net = self._batch_norm_fn(net, scope='bn_atr_{0}x{0}'.format(kernel_size))
    elif operation in ['none']:
      if stride > 1 or (input_filters != filter_size):
        net = tf.nn.relu(net)
        net = slim.conv2d(net, filter_size, 1, stride=stride, scope='1x1')
        net = self._batch_norm_fn(net, scope='bn_1')
    elif 'pool' in operation:
      pooling_type = operation.split('_')[0]
      pooling_shape = int(operation.split('_')[-1].split('x')[0])
      if pooling_type == 'avg':
        net = slim.avg_pool2d(net, pooling_shape, stride=stride, padding='SAME')
      elif pooling_type == 'max':
        net = slim.max_pool2d(net, pooling_shape, stride=stride, padding='SAME')
      else:
        raise ValueError('Unimplemented pooling type: ', pooling_type)
      if input_filters != filter_size:
        net = slim.conv2d(net, filter_size, 1, stride=1, scope='1x1')
        net = self._batch_norm_fn(net, scope='bn_1')
    else:
      raise ValueError('Unimplemented operation', operation)

    if operation != 'none':
      net = self._apply_drop_path(net)
    return net
Example #3
0
def root_block_fn_for_beta_variant(net):
    """获取beta变体的root_block_fn。
    Args:
    :param net:tensor,模型的输入[batch, height, width, channels]
    :return:在3个3*3卷积后的tensor
  """
    net = resnet_utils.conv2d_same(net, 64, 3, stride=2, scope='conv1_1')
    net = resnet_utils.conv2d_same(net, 64, 3, stride=1, scope='conv1_2')
    net = resnet_utils.conv2d_same(net, 128, 3, stride=1, scope='conv1_3')

    return net
def _nas_stem(inputs, batch_norm_fn=slim.batch_norm):
    """Stem used for NAS models."""
    net = resnet_utils.conv2d_same(inputs, 64, 3, stride=2, scope='conv0')
    net = batch_norm_fn(net, scope='conv0_bn')
    net = tf.nn.relu(net)
    net = resnet_utils.conv2d_same(net, 64, 3, stride=1, scope='conv1')
    net = batch_norm_fn(net, scope='conv1_bn')
    cell_outputs = [net]
    net = tf.nn.relu(net)
    net = resnet_utils.conv2d_same(net, 128, 3, stride=2, scope='conv2')
    net = batch_norm_fn(net, scope='conv2_bn')
    cell_outputs.append(net)
    return net, cell_outputs
Example #5
0
def root_block_fn_for_beta_variant(net):
  """Gets root_block_fn for beta variant.
  ResNet-v1 beta variant modifies the first original 7x7 convolution to three
  3x3 convolutions.
  Args:
    net: A tensor of size [batch, height, width, channels], input to the model.
  Returns:
    A tensor after three 3x3 convolutions.
  """
  net = resnet_utils.conv2d_same(net, 64, 3, stride=2, scope='conv1_1')
  net = resnet_utils.conv2d_same(net, 64, 3, stride=1, scope='conv1_2')
  net = resnet_utils.conv2d_same(net, 128, 3, stride=1, scope='conv1_3')

  return net
Example #6
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None):
    """Bottleneck residual unit variant with BN after convolutions.

  This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
  its definition. Note that we use here the bottleneck variant which has an
  extra bottleneck layer.

  When putting together two consecutive ResNet blocks that use this unit, one
  should use stride = 2 in the last unit of the first block.

  Args:
    inputs: A tensor of size [batch, height, width, channels].
    depth: The depth of the ResNet unit output.
    depth_bottleneck: The depth of the bottleneck layers.
    stride: The ResNet unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    rate: An integer, rate for atrous convolution.
    outputs_collections: Collection to add the ResNet unit output.
    scope: Optional variable_scope.

  Returns:
    The ResNet unit's output.
  """
    with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(inputs,
                                   depth, [1, 1],
                                   stride=stride,
                                   activation_fn=None,
                                   scope='shortcut')

        residual = slim.conv2d(inputs,
                               depth_bottleneck, [1, 1],
                               stride=1,
                               scope='conv1')
        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            stride,
                                            rate=rate,
                                            scope='conv2')
        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               activation_fn=None,
                               scope='conv3')

        output = tf.nn.relu(shortcut + residual)

        return slim.utils.collect_named_outputs(outputs_collections, sc.name,
                                                output)
Example #7
0
    def testConv2DSameEven(self):
        n, n2 = 4, 2

        # Input image.
        x = create_test_input(1, n, n, 1)

        # Convolution kernel.
        w = create_test_input(1, 3, 3, 1)
        w = tf.reshape(w, [3, 3, 1, 1])

        tf.get_variable("Conv/weights", initializer=w)
        tf.get_variable("Conv/biases", initializer=tf.zeros([1]))
        tf.get_variable_scope().reuse_variables()

        y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope="Conv")
        y1_expected = tf.to_float([[14, 28, 43, 26], [28, 48, 66, 37], [43, 66, 84, 46], [26, 37, 46, 22]])
        y1_expected = tf.reshape(y1_expected, [1, n, n, 1])

        y2 = resnet_utils.subsample(y1, 2)
        y2_expected = tf.to_float([[14, 43], [43, 84]])
        y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])

        y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope="Conv")
        y3_expected = y2_expected

        y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope="Conv")
        y4_expected = tf.to_float([[48, 37], [37, 22]])
        y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1])

        with self.test_session() as sess:
            sess.run(tf.initialize_all_variables())
            self.assertAllClose(y1.eval(), y1_expected.eval())
            self.assertAllClose(y2.eval(), y2_expected.eval())
            self.assertAllClose(y3.eval(), y3_expected.eval())
            self.assertAllClose(y4.eval(), y4_expected.eval())
Example #8
0
def root_block_fn_for_beta_variant(net):
  """Gets root_block_fn for beta variant.

  ResNet-v1 beta variant modifies the first original 7x7 convolution to three
  3x3 convolutions.

  Args:
    net: A tensor of size [batch, height, width, channels], input to the model.

  Returns:
    A tensor after three 3x3 convolutions.
  """
  net = resnet_utils.conv2d_same(net, 64, 3, stride=2, scope='conv1_1')
  net = resnet_utils.conv2d_same(net, 64, 3, stride=1, scope='conv1_2')
  net = resnet_utils.conv2d_same(net, 128, 3, stride=1, scope='conv1_3')

  return net
Example #9
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               unit_rate=1,
               rate=1,
               outputs_collections=None,
               scope=None):
  """Bottleneck residual unit variant with BN after convolutions.

  This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
  its definition. Note that we use here the bottleneck variant which has an
  extra bottleneck layer.

  When putting together two consecutive ResNet blocks that use this unit, one
  should use stride = 2 in the last unit of the first block.

  Args:
    inputs: A tensor of size [batch, height, width, channels].
    depth: The depth of the ResNet unit output.
    depth_bottleneck: The depth of the bottleneck layers.
    stride: The ResNet unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    unit_rate: An integer, unit rate for atrous convolution.
    rate: An integer, rate for atrous convolution.
    outputs_collections: Collection to add the ResNet unit output.
    scope: Optional variable_scope.

  Returns:
    The ResNet unit's output.
  """
  with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
    depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
    if depth == depth_in:
      shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
    else:
      shortcut = slim.conv2d(
          inputs,
          depth,
          [1, 1],
          stride=stride,
          activation_fn=None,
          scope='shortcut')

    residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1,
                           scope='conv1')
    residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
                                        rate=rate*unit_rate, scope='conv2')
    residual = slim.conv2d(residual, depth, [1, 1], stride=1,
                           activation_fn=None, scope='conv3')
    output = tf.nn.relu(shortcut + residual)

    return slim.utils.collect_named_outputs(outputs_collections,
                                            sc.name,
                                            output)
Example #10
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               unit_rate=1,
               rate=1,
               outputs_collections=None,
               scope=None):
    """卷积后BN的瓶颈残余单元变体。
    请注意,我们在这里使用瓶颈变体,它具有额外的瓶颈层。
    将两个连续的ResNet块放在一起使用时,应该在第一个块的最后一个单元中使用stride = 2。
    Args:
    :param inputs:tensor,[batch, height, width, channels]
    :param depth:ResNet单元的输出深度
    :param depth_bottleneck:瓶颈层的深度
    :param stride:ResNet单元的步长.确定与输入相比的单位输出的下采样量。
    :param unit_rate:Integer,用于atrous 卷积的单元率
    :param rate:Integer.atrous卷积率
    :param outputs_collections:用于添加ResNet单元输出的集合。
    :param scope:可选变量范围

    :return:ResNet单元的输出
    """
    with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(inputs,
                                   depth, [1, 1],
                                   stride=stride,
                                   activation_fn=None,
                                   scope='shortcut')
        residual = slim.conv2d(inputs,
                               depth_bottleneck, [1, 1],
                               stride=1,
                               scope='conv1')

        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            stride,
                                            rate=rate * unit_rate,
                                            scope='conv2')
        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               activation_fn=None,
                               scope='conv3')

        output = tf.nn.relu(shortcut + residual)

        return slim.utils.collect_named_outputs(outputs_collections, sc.name,
                                                output)
def resnet_v1(inputs,
              blocks,
              filter_scale=1.0,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              reuse=None,
              scope=None):
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    net = resnet_utils.conv2d_same(net,
                                                   64 // filter_scale,
                                                   7,
                                                   stride=2,
                                                   scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                if global_pool:
                    # Global average pooling.
                    net = math_ops.reduce_mean(net, [1, 2],
                                               name='pool5',
                                               keepdims=True)
                if num_classes is not None:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                return net, end_points
Example #12
0
def bottleneck_features(inputT,
                        depth,
                        depth_bottleneck,
                        stride,
                        rate=1,
                        outputs_collections=None,
                        scope=None):
  """
  simplified bottleneck_v1 with features

  Args:
    inputT: A tensor of size [batch, height, width, channel]
    depth: output depth
    depth_bottleneck: depth of bottleneck
    stride: Amount of downsampling of the output to input
    rate: An integer, rate for atrous convolution
    outputs_collections: Collection to add the output
    scope: optional variable scope
  """
  with tf.variable_scope(scope, 'bottleneck_v1', [inputT]) as sc:
    depth_in = slim.utils.last_dimension(inputT.get_shape(), min_rank=4)
    
    # skip-connection part
    if depth == depth_in:
      shortcut = subsample(inputT, stride, scope='shortcut')
    else:
      shortcut = slim.conv2d(
          inputT,
          depth, [1, 1],
          stride=stride,
          activation_fn=None,
          scope='shortcut')

    # residual part
    residual = slim.conv2d(inputT, depth_bottleneck, [1, 1], stride=1,
                           scope='conv1')
    residual = conv2d_same(residual, depth_bottleneck, 3, stride,
                                        rate=rate, scope='conv2')
    conv_3 = slim.conv2d(residual, depth, [1, 1], stride=1,
                           activation_fn=None, normalizer_fn=None, scope='conv3')
    residual = slim.batch_norm(conv_3)


    output = tf.nn.relu(shortcut + residual)

    return slim.utils.collect_named_outputs(outputs_collections,
                                            sc.name,
                                            output)
Example #13
0
  def testConv2DSameEven(self):
    n, n2 = 4, 2

    # Input image.
    x = create_test_input(1, n, n, 1)

    # Convolution kernel.
    w = create_test_input(1, 3, 3, 1)
    w = tf.reshape(w, [3, 3, 1, 1])

    tf.get_variable('Conv/weights', initializer=w)
    tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
    tf.get_variable_scope().reuse_variables()

    y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
    y1_expected = tf.to_float([[14, 28, 43, 26],
                               [28, 48, 66, 37],
                               [43, 66, 84, 46],
                               [26, 37, 46, 22]])
    y1_expected = tf.reshape(y1_expected, [1, n, n, 1])

    y2 = resnet_utils.subsample(y1, 2)
    y2_expected = tf.to_float([[14, 43],
                               [43, 84]])
    y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])

    y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
    y3_expected = y2_expected

    y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
    y4_expected = tf.to_float([[48, 37],
                               [37, 22]])
    y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1])

    with self.test_session() as sess:
      sess.run(tf.initialize_all_variables())
      self.assertAllClose(y1.eval(), y1_expected.eval())
      self.assertAllClose(y2.eval(), y2_expected.eval())
      self.assertAllClose(y3.eval(), y3_expected.eval())
      self.assertAllClose(y4.eval(), y4_expected.eval())
Example #14
0
  def testConv2DSameOdd(self):
    n, n2 = 5, 3

    # Input image.
    x = create_test_input(1, n, n, 1)

    # Convolution kernel.
    w = create_test_input(1, 3, 3, 1)
    w = tf.reshape(w, [3, 3, 1, 1])

    tf.get_variable('Conv/weights', initializer=w)
    tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
    tf.get_variable_scope().reuse_variables()

    y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
    y1_expected = tf.to_float([[14, 28, 43, 58, 34],
                               [28, 48, 66, 84, 46],
                               [43, 66, 84, 102, 55],
                               [58, 84, 102, 120, 64],
                               [34, 46, 55, 64, 30]])
    y1_expected = tf.reshape(y1_expected, [1, n, n, 1])

    y2 = resnet_utils.subsample(y1, 2)
    y2_expected = tf.to_float([[14, 43, 34],
                               [43, 84, 55],
                               [34, 55, 30]])
    y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])

    y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
    y3_expected = y2_expected

    y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
    y4_expected = y2_expected

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      self.assertAllClose(y1.eval(), y1_expected.eval())
      self.assertAllClose(y2.eval(), y2_expected.eval())
      self.assertAllClose(y3.eval(), y3_expected.eval())
      self.assertAllClose(y4.eval(), y4_expected.eval())
    def __call__(self, features):
        """ Define tf graph.
    """
        inputs = features['image']

        with tf.variable_scope('encoder') as vsc:
            with slim.arg_scope(resnet_v2.resnet_arg_scope()):
                # conv1
                with arg_scope([layers_lib.conv2d],
                               activation_fn=None,
                               normalizer_fn=None):
                    net = resnet_utils.conv2d_same(inputs,
                                                   16,
                                                   5,
                                                   stride=2,
                                                   scope='conv1')
                tf.add_to_collection(vsc.original_name_scope, net)

                # resnet blocks
                blocks = []
                for i in range(len(self.encoder_params['block_name'])):
                    block = resnet_v2.resnet_v2_block(
                        scope=self.encoder_params['block_name'][i],
                        base_depth=self.encoder_params['base_depth'][i],
                        num_units=self.encoder_params['num_units'][i],
                        stride=self.encoder_params['stride'][i])
                    blocks.append(block)
                net, _ = resnet_v2.resnet_v2(
                    net,
                    blocks,
                    is_training=(self.mode == ModeKeys.TRAIN),
                    global_pool=False,
                    output_stride=2,
                    include_root_block=False,
                    scope='resnet')

                tf.add_to_collection(vsc.original_name_scope, net)
        return net
Example #16
0
def resnet_base(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr')

    blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
              resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
              # use stride 1 for the last conv4 layer.

              resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1)]
              # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(
                img_batch, 64, 7, stride=2, scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(
                net, [3, 3], stride=2, padding='VALID', scope='pool1')

    not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, _ = resnet_v1.resnet_v1(net,
                                    blocks[0:1],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, _ = resnet_v1.resnet_v1(C2,
                                    blocks[1:2],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, _ = resnet_v1.resnet_v1(C3,
                                    blocks[2:3],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    return C2, C4
Example #17
0
def resnet_base(img_batch, scope_name, is_training):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23  #101第3个block是23
    elif scope_name == 'resnet_v1_152':
        middle_num_units = 36
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 、resnet_v1_101 、resnet152. Check your network name....yjr'
        )

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=1),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=2)
    ]
    with slim.arg_scope(
            resnet_arg_scope(is_training=is_training)):  #resnet_arg_scope配置参数
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(
                img_batch, 64, 7, stride=2,
                scope='conv1')  #RESNET第一个卷积层, 7*7*64, stride=2

            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]
                               ])  #padding 0 ?? 类似与后面的samepadding?
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')  #3*3最大池化
    #not_freezed = [False] * cfgs.FIXED_BLOCKS + (4-cfgs.FIXED_BLOCKS)*[True] #不冻结的Blocks层
    #net = tf.Print(net, [tf.shape(net)], summarize=10, message='net')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C2, end_points_C2 = resnet_v1.resnet_v1(
            net,
            blocks[0:1],  #传入的是一个resnet_utils.Block类  一整个Resnet block
            global_pool=False,
            include_root_block=False,
            scope=scope_name
        )  #返回当前构建resnet block层:C2 end_points_C2: collection中已有的特征图 越到后面越多

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C3, end_points_C3 = resnet_v1.resnet_v1(
            C2,
            blocks[1:2],
            global_pool=False,
            include_root_block=False,
            scope=scope_name)  #构建第二个block模块

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                blocks[3:4],
                                                num_classes=cfgs.num_classes,
                                                global_pool=True,
                                                include_root_block=False,
                                                scope=scope_name)
        C5 = tf.reshape(C5, [-1, cfgs.num_classes])
    return C5
Example #18
0
def resnet_base_balance(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr'
        )

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        # use stride 1 for the last conv4 layer.
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=1),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        # print('c5 input shape', input.shape)
        C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    add_heatmap(C3, 'img/feature_map_C3')
    add_heatmap(C4, 'img/feature_map_C4')
    add_heatmap(C5, 'img/feature_map_C5')
    C4_shape = tf.shape(C4)
    C4_resize = C4

    C3_resize = tf.image.resize_bilinear(C4, (C4_shape[1], C4_shape[2]))
    C3_resize = slim.conv2d(C3_resize,
                            1024, [1, 1],
                            trainable=is_training,
                            weights_initializer=cfgs.INITIALIZER,
                            activation_fn=tf.nn.relu,
                            scope='C3_conv1x1')
    C5_resize = tf.image.resize_bilinear(C5, (C4_shape[1], C4_shape[2]))
    C5_resize = slim.conv2d(C5_resize,
                            1024, [1, 1],
                            trainable=is_training,
                            weights_initializer=cfgs.INITIALIZER,
                            activation_fn=tf.nn.relu,
                            scope='C5_conv1x1')

    C_integrate = (C4_resize + C3_resize + C5_resize) / 3
    # # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    return C_integrate
Example #19
0
def xception(inputs,
             blocks,
             num_classes=None,
             is_training=True,
             global_pool=True,
             keep_prob=0.5,
             output_stride=None,
             reuse=None,
             scope=None):
  """Generator for Xception models.

  This function generates a family of Xception models. See the xception_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce Xception of various depths.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels]. Must be
      floating point. If a pretrained checkpoint is used, pixel values should be
      the same as during training (see go/slim-classification-models for
      specifics).
    blocks: A list of length equal to the number of Xception blocks. Each
      element is an Xception Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks.
      If 0 or None, we return the features before the logit layer.
    is_training: whether batch_norm layers are in training mode.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    keep_prob: Keep probability used in the pre-logits dropout layer.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is 0 or None,
      then net is the output of the last Xception block, potentially after
      global average pooling. If num_classes is a non-zero integer, net contains
      the pre-softmax activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
  with tf.variable_scope(
      scope, 'xception', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.original_name_scope + 'end_points'
    with slim.arg_scope([slim.conv2d,
                         slim.separable_conv2d,
                         xception_module,
                         stack_blocks_dense],
                        outputs_collections=end_points_collection):
      with slim.arg_scope([slim.batch_norm], is_training=is_training):
        net = inputs
        if output_stride is not None:
          if output_stride % 2 != 0:
            raise ValueError('The output_stride needs to be a multiple of 2.')
          output_stride /= 2
        # Root block function operated on inputs.
        net = resnet_utils.conv2d_same(net, 32, 3, stride=2,
                                       scope='entry_flow/conv1_1')
        net = resnet_utils.conv2d_same(net, 64, 3, stride=1,
                                       scope='entry_flow/conv1_2')

        # Extract features for entry_flow, middle_flow, and exit_flow.
        net = stack_blocks_dense(net, blocks, output_stride)

        # Convert end_points_collection into a dictionary of end_points.
        end_points = slim.utils.convert_collection_to_dict(
            end_points_collection, clear_collection=True)

        if global_pool:
          # Global average pooling.
          net = tf.reduce_mean(net, [1, 2], name='global_pool', keepdims=True)
          end_points['global_pool'] = net
        if num_classes:
          net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training,
                             scope='prelogits_dropout')
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn=None, scope='logits')
          end_points[sc.name + '/logits'] = net
          end_points['predictions'] = slim.softmax(net, scope='predictions')
        return net, end_points
def resnet_base(rgb_img_batch, ir_img_batch, scope_name, is_training=True):

    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError('We only support resnet_v1_50 or resnet_v1_101. ')
    org_scope_name = scope_name
    blocks = [resnet_v1_block('RGB/resnet_v1_50/block1', base_depth=64, num_units=3, stride=2),
              resnet_v1_block('RGB/resnet_v1_50/block2', base_depth=128, num_units=4, stride=2),
              resnet_v1_block('RGB/resnet_v1_50/block3', base_depth=256, num_units=middle_num_units, stride=2),
              resnet_v1_block('RGB/resnet_v1_50/block4', base_depth=512, num_units=3, stride=1)]
    # when use fpn . stride list is [1, 2, 2]

    scope_name = "RGB/"+org_scope_name

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net_rgb = resnet_utils.conv2d_same(
                rgb_img_batch, 64, 7, stride=2, scope='conv1')
            net_rgb = tf.pad(net_rgb, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net_rgb = slim.max_pool2d(
                net_rgb, [3, 3], stride=2, padding='VALID', scope='pool1')

    not_freezed = [False] * cfgs.RGB_FIXED_BLOCKS + (4-cfgs.RGB_FIXED_BLOCKS)*[True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2_rgb, end_points_C2_rgb = resnet_v1.resnet_v1(net_rgb,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3_rgb, end_points_C3_rgb = resnet_v1.resnet_v1(C2_rgb,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4_rgb, end_points_C4_rgb = resnet_v1.resnet_v1(C3_rgb,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5_rgb, end_points_C5_rgb = resnet_v1.resnet_v1(C4_rgb,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')
    blocks = [resnet_v1_block('IR/resnet_v1_50/block1', base_depth=64, num_units=3, stride=2),
              resnet_v1_block('IR/resnet_v1_50/block2', base_depth=128, num_units=4, stride=2),
              resnet_v1_block('IR/resnet_v1_50/block3', base_depth=256, num_units=middle_num_units, stride=2),
              resnet_v1_block('IR/resnet_v1_50/block4', base_depth=512, num_units=3, stride=1)]

    scope_name = "IR/"+org_scope_name

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net_ir = resnet_utils.conv2d_same(
                ir_img_batch, 64, 7, stride=2, scope='conv1')
            net_ir = tf.pad(net_ir, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net_ir = slim.max_pool2d(
                net_ir, [3, 3], stride=2, padding='VALID', scope='pool1')

    not_freezed = [False] * cfgs.IR_FIXED_BLOCKS + (4-cfgs.IR_FIXED_BLOCKS)*[True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2_ir, end_points_C2_ir = resnet_v1.resnet_v1(net_ir,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3_ir, end_points_C3_ir = resnet_v1.resnet_v1(C2_ir,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4_ir, end_points_C4_ir = resnet_v1.resnet_v1(C3_ir,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5_ir, end_points_C5_ir = resnet_v1.resnet_v1(C4_ir,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')

    multi_end_points_C2 = tf.concat(axis=3, values = [end_points_C2_rgb['{}/block1/unit_2/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C2_ir['{}/block1/unit_2/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]])

    multi_end_points_C3 = tf.concat(axis=3, values = [end_points_C3_rgb['{}/block2/unit_3/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C3_ir['{}/block2/unit_3/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]])

    multi_end_points_C4 = tf.concat(axis=3, values = [end_points_C4_rgb['{}/block3/unit_{}/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name, middle_num_units - 1)], end_points_C4_ir['{}/block3/unit_{}/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name, middle_num_units - 1)]])

    multi_end_points_C5 = tf.concat(axis=3, values = [end_points_C5_rgb['{}/block4/unit_3/bottleneck_v1'.format("RGB/resnet_v1_50/RGB/"+org_scope_name)], end_points_C5_ir['{}/block4/unit_3/bottleneck_v1'.format("IR/resnet_v1_50/IR/"+org_scope_name)]])


    feature_dict = {'C2': multi_end_points_C2,
                    'C3': multi_end_points_C3,
                    'C4': multi_end_points_C4,
                    'C5': multi_end_points_C5,
                    # 'C5': end_points_C5['{}/block4'.format(scope_name)],
                    }

    scope_name = org_scope_name

    pyramid_dict = {}
    with tf.variable_scope('build_pyramid'):
        with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(cfgs.WEIGHT_DECAY),
                            activation_fn=None, normalizer_fn=None):

            conv_channels = 256
            last_fm = None
            for i in range(3):
                fm = feature_dict['C{}'.format(5-i)]
                fm_1x1_conv = slim.conv2d(fm,  num_outputs=conv_channels, kernel_size=[1, 1],
                                          stride=1, scope='p{}_1x1_conv'.format(5-i))
                if last_fm is not None:
                    h, w = tf.shape(fm_1x1_conv)[1], tf.shape(fm_1x1_conv)[2]
                    last_resize = tf.image.resize_bilinear(last_fm,
                                                           size=[h, w],
                                                           name='p{}_up2x'.format(5-i))

                    fm_1x1_conv = fm_1x1_conv + last_resize

                last_fm = fm_1x1_conv

                fm_3x3_conv = slim.conv2d(fm_1x1_conv,
                                          num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME",
                                          stride=1, scope='p{}_3x3_conv'.format(5 - i))
                pyramid_dict['P{}'.format(5-i)] = fm_3x3_conv

            p6 = slim.conv2d(pyramid_dict['P5'],
                             num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME",
                             stride=2, scope='p6_conv')
            pyramid_dict['P6'] = p6

            p7 = tf.nn.relu(p6)

            p7 = slim.conv2d(p7,
                             num_outputs=conv_channels, kernel_size=[3, 3], padding="SAME",
                             stride=2, scope='p7_conv')

            pyramid_dict['P7'] = p7

    # for level in range(7, 1, -1):
    #     add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level))

    return pyramid_dict
Example #21
0
def resnet_base(img_batch, scope_name, is_training=False):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr'
        )

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3', base_depth=256, num_units=9, stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False] * 0 + (4 - 0) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
        #C2=tf.layers.average_pooling2d(inputs=C2, pool_size=3, strides=2,padding="valid")
        #C2=tf.reduce_mean(C2, axis=[1, 2], keep_dims=False, name='global_average_pooling')
    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
        C3 = slim.avg_pool2d(C3, 2)
        #C3 = tf.reduce_mean(C3, axis=[1, 2], keep_dims=False, name='global_average_pooling')
    #return C3
    '''with slim.arg_scope(resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)'''
    return C3
Example #22
0
def resnet_v2(inputs,
              blocks,
              blocks2,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              reuse=None,
              scope=None):

    with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    # We do not include batch normalization or activation functions in
                    # conv1 because the first ResNet unit will perform these. Cf.
                    # Appendix of [2].
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None):
                        net = resnet_utils.conv2d_same(net,
                                                       64,
                                                       7,
                                                       stride=2,
                                                       scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                #net = resnet_utils.stack_blocks_dense(net, blocks[:-2], output_stride)
                CONV4 = net
                Gap4 = tf.reduce_mean(net, [1, 2],
                                      name='pool_gp',
                                      keep_dims=True)
                net = resnet_utils.stack_blocks_dense(net, blocks2,
                                                      output_stride)
                # This is needed because the pre-activation variant does not have batch
                # normalization or activation functions in the residual unit output. See
                # Appendix of [2].
                net = slim.batch_norm(net,
                                      activation_fn=tf.nn.relu,
                                      scope='postnorm')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)

                if global_pool:
                    # Global average pooling.
                    SPATIAL = net
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keep_dims=True)
                    Gap = net
                    end_points['global_pool'] = net
                if num_classes is not None:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')
                    end_points[sc.name + '/logits'] = net
                    if spatial_squeeze:
                        net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
                        end_points[sc.name + '/spatial_squeeze'] = net
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                #return Gap, CONV4, net, end_points
                return Gap, SPATIAL, net, end_points
Example #23
0
def build_base(input_tensor):
    with tf.variable_scope(scope,scope):
        net=resnet_utils.conv2d_same(input_tensor,64,7,stride=2,scope='conv1')
        net=tf.pad(net,[[0,0],[1,1],[1,1],[0,0]])
        net=slim.max_pool2d(net,[3,3],stride=2,padding='VALID',scope='pool1')
    return net
Example #24
0
def resnet_v1(inputs,
              blocks,
              num_classes=None,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              reuse=None,
              scope=None):
  """Generator for v1 ResNet models.

  This function generates a family of ResNet v1 models. See the resnet_v1_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce ResNets of various depths.

  Training for image classification on Imagenet is usually done with [224, 224]
  inputs, resulting in [7, 7] feature maps at the output of the last ResNet
  block for the ResNets defined in [1] that have nominal stride equal to 32.
  However, for dense prediction tasks we advise that one uses inputs with
  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
  this case the feature maps at the ResNet output will have spatial shape
  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
  and corners exactly aligned with the input image corners, which greatly
  facilitates alignment of the features to the image. Using as input [225, 225]
  images results in [8, 8] feature maps at the output of the last ResNet block.

  For dense prediction tasks, the ResNet needs to run in fully-convolutional
  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
  have nominal stride equal to 32 and a good choice in FCN mode is to use
  output_stride=16 in order to increase the density of the computed features at
  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    include_root_block: If True, include the initial convolution followed by
      max-pooling, if False excludes it.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
  with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.original_name_scope + '_end_points'
    with slim.arg_scope([slim.conv2d, bottleneck,
                         resnet_utils.stack_blocks_dense],
                        outputs_collections=end_points_collection):
      net = inputs
      if include_root_block:
        if output_stride is not None:
          if output_stride % 4 != 0:
            raise ValueError('The output_stride needs to be a multiple of 4.')
          output_stride /= 4
        net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
        net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
      net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
      if global_pool:
        # Global average pooling.
        net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
      if num_classes is not None:
        net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                          normalizer_fn=None, scope='logits')
      # Convert end_points_collection into a dictionary of end_points.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)
      if num_classes is not None:
        end_points['predictions'] = slim.softmax(net, scope='predictions')
      return net, end_points
Example #25
0
def xception(inputs,
             blocks,
             num_classes=None,
             is_training=True,
             global_pool=True,
             keep_prob=0.5,
             output_stride=None,
             reuse=None,
             scope=None):
    """Xception 模块的生成器
    此函数生成一系列Xception模型。 请参阅特定模型实例化的xception _ *()方法,
    这些方法是通过选择生成不同深度Xception的不同块实例来获得的。
    Args:
    :param inputs:tensor,[batch, height_in, width_in, channels].必须是浮点数。 如果使用预训练检查点,则像素值应与训练期间相同.
    :param blocks:长度等于Xception块数的列表。 每个元素都是一个Xception Block对象,用于描述块中的单元。
    :param num_classes:分类任务的预测类数。如果为0或无,则返回logit层之前的特征。
    :param is_training:batch_norm层是否处于训练模式。
    :param global_pool:如果为True,我们在计算logits之前执行全局平均池。 设置为True表示图像分类,False表示密集预测。
    :param keep_prob:保留在log-logits dropout图层中使用的概率。
    :param output_stride:如果为空,,则输出将在虚设的网络步幅计算。
    如果output_stride不是空,则它指定所请求的输入与输出空间分辨率的比率,该比率需要等于从启动到某个Xception级别的单位步幅的乘积。
    :param reuse:是否应该重用网络及其变量。 必须给出能够重用“scope”。
    :param scope:可选'scope'
    :return:
        nets: 4级tensor[batch, heighy_out, width_out, channels_out].
        如果global_pool为False,则height_out和width_out与相应的height_in和width_in相比减少了output_stride因子,
        否则height_out和width_out都等于1。 如果num_classes为0或None,则net是最后一个Xception块的输出,
        可能在全局平均池之后。 如果num_classes是非零整数,则net包含pre-softmax激活。
        end_points: 网络相关函数的字典
    :raises:
        ValueError:如果目标output_stride 无效
    """
    with tf.variable_scope(
        scope,'xception', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + 'end_points'
        with slim.arg_scope([slim.conv2d,
                             slim.separable_conv2d,
                             xception_module,
                             stack_blocks_dense,],
                            outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if output_stride is not None:
                    if output_stride % 2 != 0:
                        raise  ValueError('The output_stride needs to be a multiple of 2.')
                    output_stride /= 2
                #根块函数在输入上操作
                net = resnet_utils.conv2d_same(net, 32, 3, stride=2,
                                               scope='entry_flow/conv1_1')
                net = resnet_utils.conv2d_same(net, 64, 3, stride=1,
                                               scope='entry_flow/conv1_2')

                #提取特征用于entry_flow, middle_flow,exit_flow
                net = stack_blocks_dense(net, blocks, output_stride)

                #转换end_points_collection到end_points字典
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection, clear_collection=True)

                if global_pool:
                    #全局平均池化
                    net = tf.reduce_mean(net, [1, 2], name='global_pool', keepdims=True)
                    end_points['global_pool'] = net
                if num_classes:
                    net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training,
                                       scope='prelogits_dropout')
                    net = slim.conv2d(net, num_classes, [1,1], activation_fn=None,
                                      normalizer_fn=None, scope='logits')
                    end_points[sc.name + '/logits'] = net
                    end_points['predictions'] = slim.softmax(net, scope='predictions')
                return net, end_points
Example #26
0
    def resnet_base(self, img_batch, scope_name, is_training=True):

        if scope_name == 'resnet_v1_50':
            middle_num_units = 6
        elif scope_name == 'resnet_v1_101':
            middle_num_units = 23
        else:
            raise NotImplementedError(
                'We only support resnet_v1_50 or resnet_v1_101. ')

        blocks = [
            resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
            resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
            resnet_v1_block('block3',
                            base_depth=256,
                            num_units=middle_num_units,
                            stride=2),
            resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
        ]
        # when use fpn . stride list is [1, 2, 2]

        with slim.arg_scope(self.resnet_arg_scope(is_training=False)):
            with tf.variable_scope(scope_name, scope_name):
                # Do the first few layers manually, because 'SAME' padding can behave inconsistently
                # for images of different sizes: sometimes 0, sometimes 1
                net = resnet_utils.conv2d_same(img_batch,
                                               64,
                                               7,
                                               stride=2,
                                               scope='conv1')
                net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
                net = slim.max_pool2d(net, [3, 3],
                                      stride=2,
                                      padding='VALID',
                                      scope='pool1')

        not_freezed = [False] * self.cfgs.FIXED_BLOCKS + (
            4 - self.cfgs.FIXED_BLOCKS) * [True]
        # Fixed_Blocks can be 1~3

        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and not_freezed[0]))):
            C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                    blocks[0:1],
                                                    global_pool=False,
                                                    include_root_block=False,
                                                    scope=scope_name)

        # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
        # self.add_heatmap(C2, name='Layer2/C2_heat')

        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and not_freezed[1]))):
            C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                    blocks[1:2],
                                                    global_pool=False,
                                                    include_root_block=False,
                                                    scope=scope_name)

        # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
        # self.add_heatmap(C3, name='Layer3/C3_heat')
        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and not_freezed[2]))):
            C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                    blocks[2:3],
                                                    global_pool=False,
                                                    include_root_block=False,
                                                    scope=scope_name)

        # self.add_heatmap(C4, name='Layer4/C4_heat')

        # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
        with slim.arg_scope(self.resnet_arg_scope(is_training=is_training)):
            C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                    blocks[3:4],
                                                    global_pool=False,
                                                    include_root_block=False,
                                                    scope=scope_name)
        # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
        # self.add_heatmap(C5, name='Layer5/C5_heat')

        feature_dict = {
            'C2':
            end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)],
            'C3':
            end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)],
            'C4':
            end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format(
                scope_name, middle_num_units - 1)],
            'C5':
            end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)],
            # 'C5': end_points_C5['{}/block4'.format(scope_name)],
        }

        return feature_dict
Example #27
0
    def resnet_base(self, inputs, is_training):
        if self.scope_name == 'resnet_v1_50':
            middle_num_units = 6
        elif self.scope_name == 'resnet_v1_101':
            middle_num_units = 23
        else:
            raise NotImplementedError(
                'We only support resnet_v1_50 or resnet_v1_101. Check your network name....'
            )

        blocks = [
            resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
            resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
            # use stride 1 for the last conv4 layer.
            resnet_v1_block('block3',
                            base_depth=256,
                            num_units=middle_num_units,
                            stride=1)
        ]
        # when use fpn . stride list is [1, 2, 2]

        with slim.arg_scope(self.resnet_arg_scope(is_training=False)):
            with tf.variable_scope(self.scope_name, 'resnet_v1_101'):
                # Do the first few layers manually, because 'SAME' padding can behave inconsistently
                # for images of different sizes: sometimes 0, sometimes 1
                net = resnet_utils.conv2d_same(inputs,
                                               num_outputs=64,
                                               kernel_size=7,
                                               stride=2,
                                               scope='conv1')
                net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
                net = slim.max_pool2d(net,
                                      kernel_size=[3, 3],
                                      stride=2,
                                      padding='VALID',
                                      scope='pool1')

        # generate freeze flag
        block_freeze = [False
                        ] * self.fixed_block + (4 - self.fixed_block) * [True]

        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and block_freeze[0]))):
            net, _ = resnet_v1.resnet_v1(net,
                                         blocks[0:1],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=self.scope_name)

        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and block_freeze[1]))):
            net, _ = resnet_v1.resnet_v1(net,
                                         blocks[1:2],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=self.scope_name)
        # add_heatmap(C3, name='Layer/C3')
        # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')

        with slim.arg_scope(
                self.resnet_arg_scope(
                    is_training=(is_training and block_freeze[2]))):
            net, _ = resnet_v1.resnet_v1(net,
                                         blocks[2:3],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope=self.scope_name)
        return net
Example #28
0
def resnet_base(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101 or mobilenetv2. '
            'Check your network name.')

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        # use stride 1 for the last conv4 layer.
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=1)
    ]
    # when use fpn, stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, _ = resnet_v1.resnet_v1(C3,
                                    blocks[2:3],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

        if cfgs.ADD_FUSION:

            # C3_ = end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)]
            # # channels = C3_.get_shape().as_list()
            # filters1 = tf.random_normal([3, 3, 512, 1024], mean=0.0, stddev=0.01)
            # C3_atrous_conv2d = tf.nn.atrous_conv2d(C3_, filters=filters1, rate=2, padding='SAME')
            # C3_shape = tf.shape(C3_atrous_conv2d)
            #
            # C2_ = end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)]
            # filters2 = tf.random_normal([3, 3, 256, 512], mean=0.0, stddev=0.01)
            # filters3 = tf.random_normal([3, 3, 512, 1024], mean=0.0, stddev=0.01)
            # C2_atrous_conv2d = tf.nn.atrous_conv2d(C2_, filters=filters2, rate=2, padding='SAME')
            # C2_atrous_conv2d = tf.nn.atrous_conv2d(C2_atrous_conv2d, filters=filters3, rate=2, padding='SAME')
            # C2_downsampling = tf.image.resize_bilinear(C2_atrous_conv2d, (C3_shape[1], C3_shape[2]))
            #
            # C4_upsampling = tf.image.resize_bilinear(C4, (C3_shape[1], C3_shape[2]))
            # C4 = C3_atrous_conv2d + C4_upsampling + C2_downsampling

            # C4 = slim.conv2d(C4,
            #                  1024, [5, 5],
            #                  trainable=is_training,
            #                  weights_initializer=cfgs.INITIALIZER,
            #                  activation_fn=None,
            #                  scope='C4_conv5x5')

            C3_shape = tf.shape(end_points_C3[
                '{}/block2/unit_3/bottleneck_v1'.format(scope_name)])
            C4 = tf.image.resize_bilinear(C4, (C3_shape[1], C3_shape[2]))
            _C3 = slim.conv2d(end_points_C3[
                '{}/block2/unit_3/bottleneck_v1'.format(scope_name)],
                              1024, [3, 3],
                              trainable=is_training,
                              weights_initializer=cfgs.INITIALIZER,
                              activation_fn=tf.nn.relu,
                              scope='C3_conv3x3')
            # _C3 = build_inception(end_points_C3['resnet_v1_101/block2/unit_3/bottleneck_v1'], is_training)

            C4 += _C3

        if cfgs.ADD_ATTENTION:
            with tf.variable_scope('build_C4_attention',
                                   regularizer=slim.l2_regularizer(
                                       cfgs.WEIGHT_DECAY)):
                # tf.summary.image('add_attention_before',
                #                  tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1))

                # SE_C4 = squeeze_excitation_layer(C4, 1024, 16, 'SE_C4', is_training)

                add_heatmap(
                    tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1),
                    'add_attention_before')
                C4_attention_layer = build_attention(C4, is_training)
                # C4_attention_layer = build_inception_attention(C4, is_training)

                C4_attention = tf.nn.softmax(C4_attention_layer)
                # C4_attention = C4_attention[:, :, :, 1]
                C4_attention = C4_attention[:, :, :, 0]
                C4_attention = tf.expand_dims(C4_attention, axis=-1)
                # tf.summary.image('C3_attention', C4_attention)
                add_heatmap(C4_attention, 'C4_attention')

                C4 = tf.multiply(C4_attention, C4)

                # C4 = SE_C4 * C4
                # tf.summary.image('add_attention_after', tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1))
                add_heatmap(
                    tf.expand_dims(tf.reduce_mean(C4, axis=-1), axis=-1),
                    'add_attention_after')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    if cfgs.ADD_ATTENTION:
        return C4, C4_attention_layer
    else:
        return C4
def resnet_base(img_batch, scope_name, is_training=True):

    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. ')

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')

    feature_dict = {
        'C2':
        end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)],
        'C3':
        end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)],
        'C4':
        end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format(
            scope_name, middle_num_units - 1)],
        'C5':
        end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)],
        # 'C5': end_points_C5['{}/block4'.format(scope_name)],
    }

    pyramid_dict = {}
    with tf.variable_scope('build_pyramid'):
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(
                                cfgs.WEIGHT_DECAY),
                            activation_fn=None,
                            normalizer_fn=None):

            P5 = slim.conv2d(feature_dict['C5'],
                             num_outputs=256,
                             kernel_size=[1, 1],
                             stride=1,
                             scope='build_P5')

            pyramid_dict['P5'] = P5

            for level in range(4, 2, -1):  # build [P4, P3]

                pyramid_dict['P%d' % level] = fusion_two_layer(
                    C_i=feature_dict["C%d" % level],
                    P_j=pyramid_dict["P%d" % (level + 1)],
                    scope='build_P%d' % level)
            for level in range(5, 2, -1):
                pyramid_dict['P%d' % level] = slim.conv2d(
                    pyramid_dict['P%d' % level],
                    num_outputs=256,
                    kernel_size=[3, 3],
                    padding="SAME",
                    stride=1,
                    scope="fuse_P%d" % level)

            p6 = slim.conv2d(
                pyramid_dict['P5'] if cfgs.USE_P5 else feature_dict['C5'],
                num_outputs=256,
                kernel_size=[3, 3],
                padding="SAME",
                stride=2,
                scope='p6_conv')
            pyramid_dict['P6'] = p6

            p7 = tf.nn.relu(p6, name='p6_relu')

            p7 = slim.conv2d(p7,
                             num_outputs=256,
                             kernel_size=[3, 3],
                             padding="SAME",
                             stride=2,
                             scope='p7_conv')

            pyramid_dict['P7'] = p7

    # for level in range(7, 1, -1):
    #     add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level))

    return pyramid_dict
def resnet_v1_beta(inputs,
                   blocks,
                   num_classes=None,
                   is_training=None,
                   global_pool=True,
                   output_stride=None,
                   root_block_fn=None,
                   reuse=None,
                   scope=None):
    """Generator for v1 ResNet models (beta variant).

  This function generates a family of modified ResNet v1 models. In particular,
  the first original 7x7 convolution is replaced with three 3x3 convolutions.
  See the resnet_v1_*() methods for specific model instantiations, obtained by
  selecting different block instantiations that produce ResNets of various
  depths.

  The code is modified from slim/nets/resnet_v1.py, and please refer to it for
  more details.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: Enable/disable is_training for batch normalization.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    root_block_fn: The function consisting of convolution operations applied to
      the root input. If root_block_fn is None, use the original setting of
      RseNet-v1, which is simply one convolution with 7x7 kernel and stride=2.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
    if root_block_fn is None:
        root_block_fn = functools.partial(resnet_utils.conv2d_same,
                                          num_outputs=64,
                                          kernel_size=7,
                                          stride=2,
                                          scope='conv1')
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            if is_training is not None:
                arg_scope = slim.arg_scope([slim.batch_norm],
                                           is_training=is_training)
            else:
                arg_scope = slim.arg_scope([])
            with arg_scope:
                net = inputs
                if output_stride is not None:
                    if output_stride % 4 != 0:
                        raise ValueError(
                            'The output_stride needs to be a multiple of 4.')
                    output_stride /= 4
                #net = root_block_fn(net)
                net = resnet_utils.conv2d_same(net,
                                               64,
                                               7,
                                               stride=2,
                                               scope='conv1')
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='SAME',
                                      scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)

                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keepdims=True)
                if num_classes is not None:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                return net, end_points
Example #31
0
def resnet_base(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. Check your network name....'
        )

    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=2),
        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, end_points_C2 = resnet_v1.resnet_v1(net,
                                                blocks[0:1],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, name='Layer2/C2_heat')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, end_points_C3 = resnet_v1.resnet_v1(C2,
                                                blocks[1:2],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')
    # add_heatmap(C3, name='Layer3/C3_heat')
    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, end_points_C4 = resnet_v1.resnet_v1(C3,
                                                blocks[2:3],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)

    # add_heatmap(C4, name='Layer4/C4_heat')

    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')
    with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        C5, end_points_C5 = resnet_v1.resnet_v1(C4,
                                                blocks[3:4],
                                                global_pool=False,
                                                include_root_block=False,
                                                scope=scope_name)
    # C5 = tf.Print(C5, [tf.shape(C5)], summarize=10, message='C5_shape')
    # add_heatmap(C5, name='Layer5/C5_heat')

    feature_dict = {
        'C2':
        end_points_C2['{}/block1/unit_2/bottleneck_v1'.format(scope_name)],
        'C3':
        end_points_C3['{}/block2/unit_3/bottleneck_v1'.format(scope_name)],
        'C4':
        end_points_C4['{}/block3/unit_{}/bottleneck_v1'.format(
            scope_name, middle_num_units - 1)],
        'C5':
        end_points_C5['{}/block4/unit_3/bottleneck_v1'.format(scope_name)],
        # 'C5': end_points_C5['{}/block4'.format(scope_name)],
    }

    pyramid_dict = {}
    with tf.variable_scope('build_pyramid'):
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(
                                cfgs.WEIGHT_DECAY),
                            activation_fn=None,
                            normalizer_fn=None):

            P5 = slim.conv2d(feature_dict['C5'],
                             num_outputs=256,
                             kernel_size=[1, 1],
                             stride=1,
                             scope='build_P5')

            pyramid_dict['P5'] = P5

            for level in range(4, 1, -1):  # build [P4, P3, P2]

                pyramid_dict['P%d' % level] = fusion_two_layer(
                    C_i=feature_dict["C%d" % level],
                    P_j=pyramid_dict["P%d" % (level + 1)],
                    scope='build_P%d' % level)
            for level in range(5, 1, -1):
                pyramid_dict['P%d' % level] = slim.conv2d(
                    pyramid_dict['P%d' % level],
                    num_outputs=256,
                    kernel_size=[3, 3],
                    padding="SAME",
                    stride=1,
                    scope="fuse_P%d" % level)

            if "P6" in cfgs.LEVLES:
                P6 = slim.avg_pool2d(pyramid_dict['P5'],
                                     kernel_size=[1, 1],
                                     stride=2,
                                     scope='build_P6')
                pyramid_dict['P6'] = P6

    # for level in range(5, 1, -1):
    #     add_heatmap(feature_dict['C%d' % level], name='Layer%d/C%d_heat' % (level, level))
    #     add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level))

    # return [P2, P3, P4, P5, P6]
    print("we are in Pyramid::-======>>>>")
    print(cfgs.LEVLES)
    print("base_anchor_size are: ", cfgs.BASE_ANCHOR_SIZE_LIST)
    print(20 * "__")
    return [pyramid_dict[level_name] for level_name in cfgs.LEVLES]
def _build_nas_base(images,
                    cell,
                    backbone,
                    num_classes,
                    hparams,
                    global_pool=False,
                    output_stride=16,
                    nas_use_classification_head=False,
                    reuse=None,
                    scope=None,
                    final_endpoint=None,
                    batch_norm_fn=slim.batch_norm,
                    nas_remove_os32_stride=False):
    """Constructs a NAS model.

  Args:
    images: A tensor of size [batch, height, width, channels].
    cell: Cell structure used in the network.
    backbone: Backbone structure used in the network. A list of integers in
      which value 0 means "output_stride=4", value 1 means "output_stride=8",
      value 2 means "output_stride=16", and value 3 means "output_stride=32".
    num_classes: Number of classes to predict.
    hparams: Hyperparameters needed to construct the network.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: Interger, the stride of output feature maps.
    nas_use_classification_head: Boolean, use image classification head.
    reuse: Whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    final_endpoint: The endpoint to construct the network up to.
    batch_norm_fn: Batch norm function.
    nas_remove_os32_stride: Boolean, remove stride in output_stride 32 branch.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If output_stride is not a multiple of backbone output stride.
  """
    with tf.variable_scope(scope, 'nas', [images], reuse=reuse):
        end_points = {}

        def add_and_check_endpoint(endpoint_name, net):
            end_points[endpoint_name] = net
            return final_endpoint and (endpoint_name == final_endpoint)

        net, cell_outputs = _nas_stem(images, batch_norm_fn=batch_norm_fn)
        if add_and_check_endpoint('Stem', net):
            return net, end_points

        # Run the cells
        filter_scaling = 1.0
        for cell_num in range(len(backbone)):
            stride = 1
            if cell_num == 0:
                if backbone[0] == 1:
                    stride = 2
                    filter_scaling *= hparams.filter_scaling_rate
            else:
                if backbone[cell_num] == backbone[cell_num - 1] + 1:
                    stride = 2
                    if backbone[cell_num] == 3 and nas_remove_os32_stride:
                        stride = 1
                    filter_scaling *= hparams.filter_scaling_rate
                elif backbone[cell_num] == backbone[cell_num - 1] - 1:
                    if backbone[cell_num - 1] == 3 and nas_remove_os32_stride:
                        # No need to rescale features.
                        pass
                    else:
                        # Scale features by a factor of 2.
                        scaled_height = scale_dimension(net.shape[1].value, 2)
                        scaled_width = scale_dimension(net.shape[2].value, 2)
                        net = resize_bilinear(net,
                                              [scaled_height, scaled_width],
                                              net.dtype)
                    filter_scaling /= hparams.filter_scaling_rate
            net = cell(net,
                       scope='cell_{}'.format(cell_num),
                       filter_scaling=filter_scaling,
                       stride=stride,
                       prev_layer=cell_outputs[-2],
                       cell_num=cell_num)
            if add_and_check_endpoint('Cell_{}'.format(cell_num), net):
                return net, end_points
            cell_outputs.append(net)
        net = tf.nn.relu(net)

        if nas_use_classification_head:
            # Add image classification head.
            # We will expand the filters for different output_strides.
            output_stride_to_expanded_filters = {8: 256, 16: 512, 32: 1024}
            current_output_scale = 2 + backbone[-1]
            current_output_stride = 2**current_output_scale
            if output_stride % current_output_stride != 0:
                raise ValueError(
                    'output_stride must be a multiple of backbone output stride.'
                )
            output_stride //= current_output_stride
            rate = 1
            if current_output_stride != 32:
                num_downsampling = 5 - current_output_scale
                for i in range(num_downsampling):
                    # Gradually donwsample feature maps to output stride = 32.
                    target_output_stride = 2**(current_output_scale + 1 + i)
                    target_filters = output_stride_to_expanded_filters[
                        target_output_stride]
                    scope = 'downsample_os{}'.format(target_output_stride)
                    if output_stride != 1:
                        stride = 2
                        output_stride //= 2
                    else:
                        stride = 1
                        rate *= 2
                    net = resnet_utils.conv2d_same(net,
                                                   target_filters,
                                                   3,
                                                   stride=stride,
                                                   rate=rate,
                                                   scope=scope + '_conv')
                    net = batch_norm_fn(net, scope=scope + '_bn')
                    add_and_check_endpoint(scope, net)
                    net = tf.nn.relu(net)
            # Apply 1x1 convolution to expand dimension to 2048.
            scope = 'classification_head'
            net = slim.conv2d(net, 2048, 1, scope=scope + '_conv')
            net = batch_norm_fn(net, scope=scope + '_bn')
            add_and_check_endpoint(scope, net)
            net = tf.nn.relu(net)
        if global_pool:
            # Global average pooling.
            net = tf.reduce_mean(net, [1, 2],
                                 name='global_pool',
                                 keepdims=True)
        if num_classes is not None:
            net = slim.conv2d(net,
                              num_classes,
                              1,
                              activation_fn=None,
                              normalizer_fn=None,
                              scope='logits')
            end_points['predictions'] = slim.softmax(net, scope='predictions')
        return net, end_points
def xception(inputs,
             blocks,
             num_classes=None,
             is_training=True,
             global_pool=True,
             keep_prob=0.5,
             output_stride=None,
             reuse=None,
             scope=None):
  """Generator for Xception models.

  This function generates a family of Xception models. See the xception_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce Xception of various depths.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels]. Must be
      floating point. If a pretrained checkpoint is used, pixel values should be
      the same as during training (see go/slim-classification-models for
      specifics).
    blocks: A list of length equal to the number of Xception blocks. Each
      element is an Xception Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks.
      If 0 or None, we return the features before the logit layer.
    is_training: whether batch_norm layers are in training mode.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    keep_prob: Keep probability used in the pre-logits dropout layer.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is 0 or None,
      then net is the output of the last Xception block, potentially after
      global average pooling. If num_classes is a non-zero integer, net contains
      the pre-softmax activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
  with tf.variable_scope(
      scope, 'xception', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.original_name_scope + 'end_points'
    with slim.arg_scope([slim.conv2d,
                         slim.separable_conv2d,
                         xception_module,
                         stack_blocks_dense],
                        outputs_collections=end_points_collection):
      with slim.arg_scope([sync_bn.batch_norm], is_training=is_training):
        net = inputs
        if output_stride is not None:
          if output_stride % 2 != 0:
            raise ValueError('The output_stride needs to be a multiple of 2.')
          output_stride /= 2
        # Root block function operated on inputs.
        net = resnet_utils.conv2d_same(net, 32, 3, stride=2,
                                       scope='entry_flow/conv1_1')
        net = resnet_utils.conv2d_same(net, 64, 3, stride=1,
                                       scope='entry_flow/conv1_2')

        # Extract features for entry_flow, middle_flow, and exit_flow.
        net = stack_blocks_dense(net, blocks, output_stride)

        # Convert end_points_collection into a dictionary of end_points.
        end_points = slim.utils.convert_collection_to_dict(
            end_points_collection, clear_collection=True)

        if global_pool:
          # Global average pooling.
          net = tf.reduce_mean(net, [1, 2], name='global_pool', keepdims=True)
          end_points['global_pool'] = net
        if num_classes:
          net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training,
                             scope='prelogits_dropout')
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn=None, scope='logits')
          end_points[sc.name + '/logits'] = net
          end_points['predictions'] = slim.softmax(net, scope='predictions')
        return net, end_points
Example #34
0
def resnet_base(img_batch, scope_name, is_training=True):
    '''
    this code is derived from light-head rcnn.
    https://github.com/zengarden/light_head_rcnn

    It is convenient to freeze blocks. So we adapt this mode.
    '''
    if scope_name == 'resnet_v1_50':
        middle_num_units = 6
    elif scope_name == 'resnet_v1_101':
        middle_num_units = 23
    else:
        raise NotImplementedError(
            'We only support resnet_v1_50 or resnet_v1_101. Check your network name....yjr'
        )

    # clw note:调用slim的resnet_v1_block接口;下面的配置可以在ResNet论文中不同层数时的网络配置查到
    #           对于ResNet_v1_50,为 1(conv1)+ 3 * 3(conv2)+ 4 * 3(conv3)+ 6 * 3(conv4)+
    #           # 3 * 3(conv5) = 1+9+12+18+9+1fc=50
    blocks = [
        resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
        # use stride 1 for the last conv4 layer.

        # 注意这里block3的stride=1呢,正常的resnet不应该是stride=2;
        # 原因作者讲,tf.slim对resnet的实现方式和论文有一点不样。
        resnet_v1_block('block3',
                        base_depth=256,
                        num_units=middle_num_units,
                        stride=1)
    ]
    # when use fpn . stride list is [1, 2, 2]   clw note:TODO

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        with tf.variable_scope(scope_name, scope_name):
            # Do the first few layers manually, because 'SAME' padding can behave inconsistently
            # for images of different sizes: sometimes 0, sometimes 1
            net = resnet_utils.conv2d_same(img_batch,
                                           64,
                                           7,
                                           stride=2,
                                           scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3],
                                  stride=2,
                                  padding='VALID',
                                  scope='pool1')

    # clw note:在resnet.py文件中,定义了resenet_base网络以及resnet_head网络,一个作为基础的特征提取网络,
    # 另一个则作为RoI Pooling后的检测,分类顶层网络。在建立base网络时,根据not_freezed确定是否对特征提取网络进行再训练
    # 举例说明,比如ResNet50的conv2~conv5,对应卷积组个数分别为3,4,6,3
    # 比如默认FIXED_BLOCKS=1,not_freezed结果为[False, True, True, True],那么conv2,也就是block0不会被训练,conv3会被训练
    # 比如改成FIXED_BLOCKS=2,那么conv2,3都不会被训练,conv4会被训练;
    # 比如改成FIXED_BLOCKS=3,那么conv2,3,4都不会被训练;
    #
    not_freezed = [False
                   ] * cfgs.FIXED_BLOCKS + (4 - cfgs.FIXED_BLOCKS) * [True]
    # Fixed_Blocks can be 1~3

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[0]))):
        C2, _ = resnet_v1.resnet_v1(net,
                                    blocks[0:1],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)

    # C2 = tf.Print(C2, [tf.shape(C2)], summarize=10, message='C2_shape')
    # add_heatmap(C2, 'Layer/C2')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[1]))):
        C3, _ = resnet_v1.resnet_v1(C2,
                                    blocks[1:2],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)
    # add_heatmap(C3, name='Layer/C3')
    # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape')

    with slim.arg_scope(
            resnet_arg_scope(is_training=(is_training and not_freezed[2]))):
        C4, _ = resnet_v1.resnet_v1(C3,
                                    blocks[2:3],
                                    global_pool=False,
                                    include_root_block=False,
                                    scope=scope_name)
    # add_heatmap(C4, name='Layer/C4')
    # C4 = tf.Print(C4, [tf.shape(C4)], summarize=10, message='C4_shape')

    # 网友提问:我看你的代码中关于采用resnet提取特征的,发现特征图是从conv_4之后那个进入roi pooling的,
    # 为什么不是从最后得到的特征图conv_5进入roi pooling呢,
    # 另外是采用的Object Detection Networks on Convolutional Feature Maps中NoC的方法吗?
    # 作者:没错,是NoC方法。采用resNet作为backbone的话,一般都用conv_5作为head对每个roi进行分类和回归。
    # 自注:确实论文中是用conv_4的输出送入RPN,然后统一RoI Pooling后送入全卷积层conv_5,相当于fc层,进行分类和回归;
    return C4