def nonlocal_dot(net, depth, embed=True, softmax=False, maxpool=2, scope=None):

    with tf.variable_scope(scope, 'nonlocal', values=[net]) as sc:
        with slim.arg_scope([slim.conv2d], normalizer_fn=None):
            if embed:
                a = conv2d_same(net, depth, 1, stride=1, scope='embA')
                b = conv2d_same(net, depth, 1, stride=1, scope='embB')
            else:
                a, b = net, net
            g_orig = g = conv2d_same(net, depth, 1, stride=1, scope='g')
        if maxpool is not False and maxpool > 1:
            b = slim.max_pool2d(b, [maxpool, maxpool],
                                stride=maxpool,
                                scope='pool')
            g = slim.max_pool2d(g, [maxpool, maxpool],
                                stride=maxpool,
                                scope='pool')

        # Flatten from (B,H,W,C) to (B,HW,C) or similar
        a_flat = tf.reshape(a, [tf.shape(a)[0], -1, tf.shape(a)[-1]])
        b_flat = tf.reshape(b, [tf.shape(b)[0], -1, tf.shape(b)[-1]])
        g_flat = tf.reshape(g, [tf.shape(g)[0], -1, tf.shape(g)[-1]])
        a_flat.set_shape([
            a.shape[0],
            a.shape[1] * a.shape[2] if None not in a.shape[1:3] else None,
            a.shape[-1]
        ])
        b_flat.set_shape([
            b.shape[0],
            b.shape[1] * b.shape[2] if None not in b.shape[1:3] else None,
            b.shape[-1]
        ])
        g_flat.set_shape([
            g.shape[0],
            g.shape[1] * g.shape[2] if None not in g.shape[1:3] else None,
            g.shape[-1]
        ])
        # Compute f(a, b) -> (B,HW,HW)
        f = tf.matmul(a_flat, tf.transpose(b_flat, [0, 2, 1]))
        if softmax:
            f = tf.nn.softmax(f)
        else:
            f = f / tf.cast(tf.shape(f)[-1], tf.float32)
        # Compute f * g ("self-attention") -> (B,HW,C)
        fg = tf.matmul(f, g_flat)
        # Expand and fix the static shapes TF lost track of.
        fg = tf.reshape(fg, tf.shape(g_orig))
        # fg.set_shape(g.shape)  # NOTE: This actually appears unnecessary.

        # Go back up to the original depth, add residually, zero-init.
        #with slim.arg_scope([slim.conv2d],
        #                    weights_initializer=tf.zeros_initializer()):
        with slim.arg_scope(
            [slim.batch_norm],
                param_initializers={'gamma': tf.zeros_initializer()}):
            fg = conv2d_same(fg, net.shape[-1], 1, stride=1, scope='fgup')
        net = net + fg

        return slim.utils.collect_named_outputs(None, sc.name, net)
Beispiel #2
0
def bottleneck_skip(inputs, skip, depth, depth_bottleneck, stride=1, rate=1,
                    outputs_collections=None, scope=None):
  assert stride == 1
  if args.resize == 'bilinear':
    resize_method = tf.image.ResizeMethod.BILINEAR
  if args.resize == 'nearest':
    resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR
  with tf.variable_scope(scope, 'bottleneck_skip', [inputs, skip]) as sc:
    depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
    res_inpt = tf.image.resize_images(inputs, tf.shape(skip)[1:3], method=resize_method)
    if depth != depth_in:
      shortcut = slim.conv2d(res_inpt, depth, [1, 1], stride=stride,
                             activation_fn=None, scope='shortcut')
    else:
      shortcut = res_inpt

    # print("Live from skip bottleneck block! We got %s as input and %s as skip connection" % (inputs.get_shape(), skip.get_shape()))
    concat = tf.concat([res_inpt, skip], 3)
    residual = slim.conv2d(concat, depth_bottleneck, [1, 1], stride=stride,
                           scope='conv1')
    residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, 1,
                                        rate=rate, scope='conv2')
    residual = slim.conv2d(residual, depth, [1, 1], stride=1,
                           activation_fn=None, scope='conv3')

    output = tf.nn.relu(shortcut + residual)
    # print("So far in the end of bottleneck skip we have %s" % (output.get_shape()))

    return slim.utils.collect_named_outputs(outputs_collections, sc.name,
                                            output)
Beispiel #3
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None):
    with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(inputs,
                                   depth, [1, 1],
                                   stride=1,
                                   activation_fn=None,
                                   scope='shortcut')

        residual = slim.conv2d(inputs,
                               depth_bottleneck, [1, 1],
                               stride=1,
                               scope='conv1')
        residual = resnet_utils.conv2d_same(
            residual, depth_bottleneck, 3, stride=stride,
            scope='conv2')  # why stride=stride?
        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               activation_fn=None,
                               scope='conv3')

        output = tf.nn.relu(residual + shortcut)

        return slim.utils.collect_named_outputs(outputs_collections,
                                                sc.original_name_scope, output)
Beispiel #4
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None):
    """Bottleneck residual unit variant with BN after convolutions.

  This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
  its definition. Note that we use here the bottleneck variant which has an
  extra bottleneck layer.

  When putting together two consecutive ResNet blocks that use this unit, one
  should use stride = 2 in the last unit of the first block.

  Args:
    inputs: A tensor of size [batch, height, width, channels].
    depth: The depth of the ResNet unit output.
    depth_bottleneck: The depth of the bottleneck layers.
    stride: The ResNet unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    rate: An integer, rate for atrous convolution.
    outputs_collections: Collection to add the ResNet unit output.
    scope: Optional variable_scope.

  Returns:
    The ResNet unit's output.
  """
    with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(inputs,
                                   depth, [1, 1],
                                   stride=stride,
                                   activation_fn=None,
                                   scope='shortcut')

        residual = slim.conv2d(inputs,
                               depth_bottleneck, [1, 1],
                               stride=1,
                               scope='conv1')
        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            stride,
                                            rate=rate,
                                            scope='conv2')
        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               activation_fn=None,
                               scope='conv3')

        output = tf.nn.relu(shortcut + residual)

        return slim.utils.collect_named_outputs(outputs_collections,
                                                sc.original_name_scope, output)
Beispiel #5
0
    def _build_base(self):
        with tf.variable_scope(self._resnet_scope):
            net = resnet_utils.conv2d_same(self._input, 64, 7, stride=2, scope='conv1')
            net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
            net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1')

        return net
Beispiel #6
0
def resnet_v1(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              store_non_strided_activations=False,
              reuse=None,
              scope=None):

    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with (slim.arg_scope([slim.batch_norm], is_training=is_training)
                  if is_training is not None else NoOpScope()):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    net = resnet_utils.conv2d_same(net,
                                                   64,
                                                   7,
                                                   stride=2,
                                                   scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(
                    net, blocks, output_stride, store_non_strided_activations)
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)

                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(input_tensor=net,
                                         axis=[1, 2],
                                         name='pool5',
                                         keepdims=True)
                    end_points['global_pool'] = net
                if num_classes:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')
                    end_points[sc.name + '/logits'] = net
                    if spatial_squeeze:
                        net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
                        end_points[sc.name + '/spatial_squeeze'] = net
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                return net, end_points
Beispiel #7
0
def basic(inputs,
          depth,
          stride,
          rate=1,
          use_batch_norm=True,
          outputs_collections=None,
          scope=None):
    """Builds a standard JITNet encoder or decoder block."""

    with tf.variable_scope(scope, 'basic_v2', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        if use_batch_norm:
            preact = slim.batch_norm(inputs,
                                     activation_fn=tf.nn.relu,
                                     scope='preact')
        else:
            preact = tf.contrib.layers.layer_norm(inputs,
                                                  activation_fn=tf.nn.relu,
                                                  scope='preact')

        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(preact,
                                   depth, [1, 1],
                                   stride=stride,
                                   normalizer_fn=None,
                                   activation_fn=None,
                                   scope='shortcut')

        residual = resnet_utils.conv2d_same(preact,
                                            depth,
                                            3,
                                            stride,
                                            rate=rate,
                                            scope='conv1')

        residual = slim.conv2d(residual,
                               depth, [1, 3],
                               stride=1,
                               normalizer_fn=None,
                               activation_fn=None,
                               scope='conv2_1x3')

        residual = slim.conv2d(residual,
                               depth, [3, 1],
                               stride=1,
                               normalizer_fn=None,
                               activation_fn=None,
                               scope='conv2_3x1')

        output = shortcut + residual

        return slim.utils.collect_named_outputs(outputs_collections,
                                                sc.original_name_scope, output)
Beispiel #8
0
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, scope=None):
    """Bottleneck residual unit variant with BN before convolutions
    When putting together 2 consecutive ResNet blocks that use this unit,
    one should use stride =2 in the last unit of first block

    NOTE: This scripts refer to keras resnet50
    Args:
        inputs: A tensor of size [batchsize, height, width, channels] (after BN)
        depth: The depth of the ResNet unit output
        depth_bottleneck: The depth of bottleneck layers
        stride: the ResNet unit's stride. Determines the amount of downsampling of
            the units output compared to its input
        scope: Optional variable_scope

    Returns:
        The ResNet unit output
    """
    with tf.variable_scope(scope, 'bottleneck', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        preact = slim.batch_norm(inputs,
                                 activation_fn=tf.nn.relu,
                                 scope='preact')
        # shortcut
        if depth == depth_in:
            # identity block with no conv layer at shortcut
            shortcut = ru.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(preact,
                                   depth, [1, 1],
                                   stride=1,
                                   scope='shortcut')
        # layer1
        residual = slim.conv2d(inputs,
                               depth_bottleneck, [1, 1],
                               stride=1,
                               scope='conv1',
                               normalizer_fn=None,
                               activation_fn=None)
        # layer 2
        residual = ru.conv2d_same(residual,
                                  depth_bottleneck,
                                  3,
                                  stride=stride,
                                  rate=rate,
                                  scope='conv2')
        # layer 3
        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               scope='conv3',
                               normalizer_fn=None,
                               activation_fn=None)

        output = shortcut + residual
        return output
Beispiel #9
0
def tail_att(inputs,
             skip,
             depth,
             depth_bottleneck,
             stride=1,
             rate=1,
             outputs_collections=None,
             scope=None):
    assert stride == 1
    if args.resize == 'bilinear':  #双线性插值
        resize_method = tf.image.ResizeMethod.BILINEAR
    if args.resize == 'nearest':  #最近邻插值
        resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR
    with tf.variable_scope(scope, 'bottleneck_skip', [inputs, skip]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        #skip = attention_block(skip)
        res_inpt = tf.image.resize_images(
            inputs, tf.shape(skip)[1:3],
            method=resize_method)  #res_inpt为input经过插值处理
        if depth != depth_in:  #
            shortcut = slim.conv2d(
                res_inpt,
                depth, [1, 1],
                stride=stride,
                activation_fn=None,
                scope='shortcut')  #第一次resskip时,2048 != 512,要把shortcut的深度变为512
        else:
            shortcut = res_inpt

        # print("Live from skip bottleneck block! We got %s as input and %s as skip connection" % (inputs.get_shape(), skip.get_shape()))
        concat = tf.concat([res_inpt, skip], 3)
        residual = slim.conv2d(concat,
                               depth_bottleneck, [1, 1],
                               stride=stride,
                               scope='conv1')
        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            1,
                                            rate=rate,
                                            scope='conv2')
        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               activation_fn=None,
                               scope='conv3')

        output = tf.nn.relu(shortcut + residual)
        '''********************attention****************************'''
        #output = attention_block(output)
        '''********************attention****************************'''

        return slim.utils.collect_named_outputs(outputs_collections, sc.name,
                                                output)
Beispiel #10
0
def resnet_v1(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              reuse=None,
              scope=None):
    with tf.variable_scope(scope, 'resnet_v1', [input], reuse=reuse) as sc:
        end_points_collection = sc.name + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs

                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4

                    net = resnet_utils.conv2d_same(net,
                                                   64,
                                                   7,
                                                   stride=2,
                                                   scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                    slim.utils.collect_named_outputs(end_points_collection,
                                                     'pool2', net)

                net = resnet_utils.stack_blocks_dense(
                    net, blocks, output_stride=output_stride)
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)

                try:
                    end_points['pool3'] = end_points['resnet_v1_50/block1']
                    end_points['pool4'] = end_points['resnet_v1_50/block2']
                except:
                    end_points['pool3'] = end_points[
                        'Detection/resnet_v1_50/block1']
                    end_points['pool4'] = end_points[
                        'Detection/resnet_v1_50/block2']
                end_points['pool5'] = net

                return net, end_points
def bottleneck_skip(inputs,
                    skip,
                    depth,
                    depth_bottleneck,
                    stride=1,
                    rate=1,
                    outputs_collections=None,
                    scope=None):
    assert stride == 1
    if args.resize == 'bilinear':
        resize_method = tf.image.ResizeMethod.BILINEAR
    if args.resize == 'nearest':
        resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR
    with tf.variable_scope(scope, 'bottleneck_skip', [inputs, skip]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        res_inpt = tf.image.resize_images(inputs,
                                          tf.shape(skip)[1:3],
                                          method=resize_method)
        if depth != depth_in:
            shortcut = slim.conv2d(res_inpt,
                                   depth, [1, 1],
                                   stride=stride,
                                   activation_fn=None,
                                   scope='shortcut')
        else:
            shortcut = res_inpt

        # print("Live from skip bottleneck block! We got %s as input and %s as skip connection" % (inputs.get_shape(), skip.get_shape()))
        concat = tf.concat([res_inpt, skip], 3)
        residual = slim.conv2d(concat,
                               depth_bottleneck, [1, 1],
                               stride=stride,
                               scope='conv1')
        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            1,
                                            rate=rate,
                                            scope='conv2')
        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               activation_fn=None,
                               scope='conv3')

        output = tf.nn.relu(shortcut + residual)
        # print("So far in the end of bottleneck skip we have %s" % (output.get_shape()))

        return slim.utils.collect_named_outputs(outputs_collections, sc.name,
                                                output)
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None):

    with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        preact = slim.batch_norm(inputs,
                                 activation_fn=tf.nn.relu,
                                 scope='preact')
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(preact,
                                   depth, [1, 1],
                                   stride=stride,
                                   normalizer_fn=None,
                                   activation_fn=None,
                                   scope='shortcut')

        residual = slim.conv2d(preact,
                               depth_bottleneck, [1, 1],
                               stride=1,
                               scope='conv1')
        #    residual=tf.nn.dropout(residual,0.5)

        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            stride,
                                            rate=rate,
                                            scope='conv2')
        residual = tf.nn.dropout(residual, 1.0)

        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               normalizer_fn=None,
                               activation_fn=None,
                               scope='conv3')
        #    residual=tf.nn.dropout(residual,0.5)

        output = shortcut + residual

        return slim.utils.collect_named_outputs(outputs_collections,
                                                sc.original_name_scope, output)
Beispiel #13
0
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
               outputs_collections=None, scope=None):
  """Bottleneck residual unit variant with BN after convolutions.

  This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
  its definition. Note that we use here the bottleneck variant which has an
  extra bottleneck layer.

  When putting together two consecutive ResNet blocks that use this unit, one
  should use stride = 2 in the last unit of the first block.

  Args:
    inputs: A tensor of size [batch, height, width, channels].
    depth: The depth of the ResNet unit output.
    depth_bottleneck: The depth of the bottleneck layers.
    stride: The ResNet unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    rate: An integer, rate for atrous convolution.
    outputs_collections: Collection to add the ResNet unit output.
    scope: Optional variable_scope.

  Returns:
    The ResNet unit's output.
  """
  with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
    depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
    if depth == depth_in:
      shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
    else:
      shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride,
                             activation_fn=None, scope='shortcut')

    # residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1,
    #                        scope='conv1')
    # residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
    #                                     rate=rate, scope='conv2')

    residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=stride,
                           scope='conv1')
    residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, 1,
                                        rate=rate, scope='conv2')
    residual = slim.conv2d(residual, depth, [1, 1], stride=1,
                           activation_fn=None, scope='conv3')

    output = tf.nn.relu(shortcut + residual)

    return slim.utils.collect_named_outputs(outputs_collections, sc.name,
                                            output)
Beispiel #14
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None,
               use_bounded_activations=False):

    with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(
                inputs,
                depth, [1, 1],
                stride=stride,
                activation_fn=tf.nn.relu6 if use_bounded_activations else None,
                scope='shortcut')

        residual = slim.conv2d(inputs,
                               depth_bottleneck, [1, 1],
                               stride=1,
                               scope='conv1')
        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            stride,
                                            rate=rate,
                                            scope='conv2')
        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               activation_fn=None,
                               scope='conv3')

        if use_bounded_activations:
            # Use clip_by_value to simulate bandpass activation.
            residual = tf.clip_by_value(residual, -6.0, 6.0)
            output = tf.nn.relu6(shortcut + residual)
        else:
            output = tf.nn.relu(shortcut + residual)

        return slim.utils.collect_named_outputs(outputs_collections, sc.name,
                                                output)
Beispiel #15
0
  def testConv2DSameOdd(self):
    n, n2 = 5, 3

    # Input image.
    x = create_test_input(1, n, n, 1)

    # Convolution kernel.
    w = create_test_input(1, 3, 3, 1)
    w = tf.reshape(w, [3, 3, 1, 1])

    tf.get_variable('Conv/weights', initializer=w)
    tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
    tf.get_variable_scope().reuse_variables()

    y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
    y1_expected = tf.to_float([[14, 28, 43, 58, 34],
                               [28, 48, 66, 84, 46],
                               [43, 66, 84, 102, 55],
                               [58, 84, 102, 120, 64],
                               [34, 46, 55, 64, 30]])
    y1_expected = tf.reshape(y1_expected, [1, n, n, 1])

    y2 = resnet_utils.subsample(y1, 2)
    y2_expected = tf.to_float([[14, 43, 34],
                               [43, 84, 55],
                               [34, 55, 30]])
    y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])

    y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
    y3_expected = y2_expected

    y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
    y4_expected = y2_expected

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      self.assertAllClose(y1.eval(), y1_expected.eval())
      self.assertAllClose(y2.eval(), y2_expected.eval())
      self.assertAllClose(y3.eval(), y3_expected.eval())
      self.assertAllClose(y4.eval(), y4_expected.eval())
Beispiel #16
0
def ressep_backseg(inputs,
                   frame_encoder_filter_sizes=[16, 64, 128, 256, 512],
                   background_decoder_filter_sizes=[128, 64, 32, 16, 8],
                   use_seperable_convolution=False,
                   num_classes=None,
                   is_training=True,
                   use_batch_norm=True,
                   freeze_batch_norm=False,
                   num_units=1,
                   depth_multiplier=1,
                   filter_depth_multiplier=1.0,
                   min_enc_filters=8,
                   min_dec_filters=16,
                   reuse=None,
                   scope=None,
                   scale=1.0):
    """Builds a JITNet model."""

    background_decoder_num_units = [
        num_units for _ in background_decoder_filter_sizes
    ]
    frame_encoder_num_units = [num_units for _ in frame_encoder_filter_sizes]

    assert (len(frame_encoder_filter_sizes) == len(frame_encoder_num_units))
    assert (len(background_decoder_filter_sizes) == len(
        background_decoder_num_units))

    assert (len(background_decoder_filter_sizes) == len(
        frame_encoder_filter_sizes))

    in_shape = inputs.shape.as_list()
    h = in_shape[1]
    w = in_shape[2]

    if (scale < 1.0):
        original_dims = [h, w]
        rescale_dims = [int(scale * h), int(scale * w)]
        low_res_inputs = tf.image.resize_images(inputs, rescale_dims)
    else:
        low_res_inputs = inputs

    with tf.variable_scope(scope, 'ressep_map_seg', [inputs],
                           reuse=reuse) as sc:
        end_points_collection = sc.name + '_end_points'
        with slim.arg_scope([slim.conv2d],
                            outputs_collections=end_points_collection):
            train_bnorm = is_training and not freeze_batch_norm
            with slim.arg_scope([slim.batch_norm], is_training=train_bnorm):
                # frame stream encoder
                net = low_res_inputs
                shp = net.get_shape().as_list()
                conv1_in_size = [shp[1], shp[2]]

                channels = max(int(16 * filter_depth_multiplier),
                               min_enc_filters)

                net = resnet_utils.conv2d_same(net,
                                               channels,
                                               3,
                                               stride=2,
                                               scope='conv1')

                shp = net.get_shape().as_list()
                conv2_in_size = [shp[1], shp[2]]

                channels = max(int(64 * filter_depth_multiplier),
                               min_enc_filters)

                net = resnet_utils.conv2d_same(net,
                                               channels,
                                               3,
                                               stride=2,
                                               scope='conv2')

                frame_encoder_outs = []
                frame_encoder_sizes = []
                with tf.variable_scope('frame_encoder', values=[net]):
                    for b in range(len(frame_encoder_filter_sizes)):
                        shp = net.get_shape().as_list()
                        frame_encoder_sizes.append([shp[1], shp[2]])

                        filter_depth = frame_encoder_filter_sizes[b]
                        num_units = frame_encoder_num_units[b]
                        stride = 1
                        for u in range(num_units - 1):
                            if use_seperable_convolution:
                                net = basic_sep(net,
                                                filter_depth,
                                                stride,
                                                use_batch_norm=use_batch_norm)
                            else:
                                net = basic(net,
                                            filter_depth,
                                            stride,
                                            use_batch_norm=use_batch_norm)
                        # Downsample
                        stride = 2
                        if use_seperable_convolution:
                            net = basic_sep(net,
                                            filter_depth,
                                            stride,
                                            use_batch_norm=use_batch_norm)
                        else:
                            net = basic(net,
                                        filter_depth,
                                        stride,
                                        use_batch_norm=use_batch_norm)

                        frame_encoder_outs.append(net)

                # background foreground decoder
                with tf.variable_scope('background_decoder', values=[net]):
                    num_decoder_blocks = len(background_decoder_filter_sizes)
                    net = None

                    for b in range(num_decoder_blocks):
                        filter_depth = background_decoder_filter_sizes[b]
                        num_units = background_decoder_num_units[b]
                        stride = 1

                        frame_stream = frame_encoder_outs[num_decoder_blocks -
                                                          b - 1]
                        if net is None:
                            net = frame_stream
                        else:
                            net = tf.concat([frame_stream, net], axis=3)
                        shp = net.get_shape().as_list()
                        for u in range(num_units):
                            if use_seperable_convolution:
                                net = basic_sep(net,
                                                filter_depth,
                                                stride,
                                                use_batch_norm=use_batch_norm)
                            else:
                                net = basic(net,
                                            filter_depth,
                                            stride,
                                            use_batch_norm=use_batch_norm)

                        stream_size = frame_encoder_sizes[num_decoder_blocks -
                                                          1 - b]

                        # Usample features
                        net = tf.image.resize_images(net, (stream_size),
                                                     align_corners=True)

                    net = tf.image.resize_images(net, (conv2_in_size),
                                                 align_corners=True)
                    channels = max(int(64 * filter_depth_multiplier),
                                   min_dec_filters)
                    net = resnet_utils.conv2d_same(net,
                                                   channels,
                                                   3,
                                                   stride=1,
                                                   scope='decoder_conv2')
                    channels = max(int(16 * filter_depth_multiplier),
                                   min_dec_filters)
                    net = resnet_utils.conv2d_same(net,
                                                   channels,
                                                   3,
                                                   stride=1,
                                                   scope='decoder_conv1')

                    net = tf.image.resize_images(net, (conv1_in_size),
                                                 align_corners=True)

        # Convert end_points_collection into a dictionary of end_points.
        end_points = slim.utils.convert_collection_to_dict(
            end_points_collection)

        if scale < 1.0:
            net = tf.image.resize_images(net, (in_shape[1], in_shape[2]),
                                         align_corners=True)

        end_points['final_full_res'] = net
        end_points['low_res_inputs'] = low_res_inputs

        logits = slim.conv2d(net,
                             num_classes, [1, 1],
                             normalizer_fn=None,
                             activation_fn=None,
                             scope='logits')
        return logits, end_points
Beispiel #17
0
def resnet_v1(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              reuse=None,
              scope=None):
    """Generator for v1 ResNet models.

    This function generates a family of ResNet v1 models. See the resnet_v1_*()
    methods for specific model instantiations, obtained by selecting different
    block instantiations that produce ResNets of various depths.

    Training for image classification on Imagenet is usually done with [224, 224]
    inputs, resulting in [7, 7] feature maps at the output of the last ResNet
    block for the ResNets defined in [1] that have nominal stride equal to 32.
    However, for dense prediction tasks we advise that one uses inputs with
    spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
    this case the feature maps at the ResNet output will have spatial shape
    [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
    and corners exactly aligned with the input image corners, which greatly
    facilitates alignment of the features to the image. Using as input [225, 225]
    images results in [8, 8] feature maps at the output of the last ResNet block.

    For dense prediction tasks, the ResNet needs to run in fully-convolutional
    (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
    have nominal stride equal to 32 and a good choice in FCN mode is to use
    output_stride=16 in order to increase the density of the computed features at
    small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

    Args:
      inputs: A tensor of size [batch, height_in, width_in, channels].
      blocks: A list of length equal to the number of ResNet blocks. Each element
        is a resnet_utils.Block object describing the units in the block.
      num_classes: Number of predicted classes for classification tasks. If None
        we return the features before the logit layer.
      is_training: whether is training or not.
      global_pool: If True, we perform global average pooling before computing the
        logits. Set to True for image classification, False for dense prediction.
      output_stride: If None, then the output will be computed at the nominal
        network stride. If output_stride is not None, it specifies the requested
        ratio of input to output spatial resolution.
      include_root_block: If True, include the initial convolution followed by
        max-pooling, if False excludes it.
      spatial_squeeze: if True, logits is of shape [B, C], if false logits is
          of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.

    Returns:
      net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
        If global_pool is False, then height_out and width_out are reduced by a
        factor of output_stride compared to the respective height_in and width_in,
        else both height_out and width_out equal one. If num_classes is None, then
        net is the output of the last ResNet block, potentially after global
        average pooling. If num_classes is not None, net contains the pre-softmax
        activations.
      end_points: A dictionary from components of the network to the corresponding
        activation.

    Raises:
      ValueError: If the target output_stride is not valid.
    """
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.name + '_end_points'
        with slim.arg_scope([slim.conv2d, bottleneck,
                             resnet_utils.stack_blocks_dense],
                            outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError('The output_stride needs to be a multiple of 4.')
                        output_stride /= 4
                    net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')

                    net = slim.utils.collect_named_outputs(end_points_collection, 'pool2', net)

                net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)

                end_points = slim.utils.convert_collection_to_dict(end_points_collection)

                # end_points['pool2'] = end_points['resnet_v1_50/pool1/MaxPool:0']
                try:
                    end_points['pool3'] = end_points['resnet_v1_50/block1']
                    end_points['pool4'] = end_points['resnet_v1_50/block2']
                except:
                    end_points['pool3'] = end_points['Detection/resnet_v1_50/block1']
                    end_points['pool4'] = end_points['Detection/resnet_v1_50/block2']
                end_points['pool5'] = net
                # if global_pool:
                #     # Global average pooling.
                #     net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
                # if num_classes is not None:
                #     net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                #                       normalizer_fn=None, scope='logits')
                # if spatial_squeeze:
                #     logits = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
                # else:
                #     logits = net
                # # Convert end_points_collection into a dictionary of end_points.
                # end_points = slim.utils.convert_collection_to_dict(end_points_collection)
                # if num_classes is not None:
                #     end_points['predictions'] = slim.softmax(logits, scope='predictions')
                return net, end_points
Beispiel #18
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None,
               use_bounded_activations=False):
               
  """Bottleneck residual unit variant with BN after convolutions.

  This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
  its definition. Note that we use here the bottleneck variant which has an
  extra bottleneck layer.


  When putting together two consecutive ResNet blocks that use this unit, one
  should use stride = 2 in the last unit of the first block.

  Args:
    输入形状:[batch,256,30,1]
    inputs: A tensor of size [batch, height, width, channels].
    深度:ResNet的输出深度
    depth: The depth of the ResNet unit output.
    残差层的深度
    depth_bottleneck: The depth of the bottleneck layers.
    步长,应该是用来匹配维度的
    stride: The ResNet unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    未知
    rate: An integer, rate for atrous convolution.
    用来添加ResNet单元的输出
    outputs_collections: Collection to add the ResNet unit output.
    未知
    scope: Optional variable_scope.
    是否使用有界激活
    use_bounded_activations: Whether or not to use bounded activations. Bounded
      activations better lend themselves to quantized inference.

  Returns:
    返回ResNet单元的输出
    The ResNet unit's output.
  """

    with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
        # 返回输入tensor的最后一个维度,这个最后一个维度用min_rank来指定
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        # short-cut部分
        if depth == depth_in:
           # 如果ResNet的输出结果的通道数等于当前残差块输入tensor的通道数
          #  则对输入的tensor进行下采样,使输入tensor的宽高等于残差输出的宽高
          shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
          # 如果输入输出的通道数不同,则使用1维卷积进行扩充通道数,并根据步长来调整维度
          # 参数分别为:输入tensor、输出的通道数等价于卷积核的数量、步长、激活函数
          shortcut = slim.conv2d(
            inputs,
            depth, [1, 1],
            stride=stride,
            activation_fn=tf.nn.relu6 if use_bounded_activations else None,
            scope='shortcut')
      
        # 残差部分
        residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1,
                            scope='conv1')
        residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
                                            rate=rate, scope='conv2')
        residual = slim.conv2d(residual, depth, [1, 1], stride=1,
                            activation_fn=None, scope='conv3')

        if use_bounded_activations:
        # Use clip_by_value to simulate bandpass activation.
        residual = tf.clip_by_value(residual, -6.0, 6.0)
        output = tf.nn.relu6(shortcut + residual)
        else:
        output = tf.nn.relu(shortcut + residual)
        # 对快速链接层和残差层进行加和操作后,直接输出即可
        return slim.utils.collect_named_outputs(outputs_collections,
                                                sc.name,
                                                output)
Beispiel #19
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None):
    """Bottleneck residual unit variant with BN before convolutions.
       在卷积之前进行BN的瓶颈残差单元变体

    This is the full preactivation residual unit variant proposed in [2]. See
    Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck
    variant which has an extra bottleneck layer.

    When putting together two consecutive ResNet blocks that use this unit, one
    should use stride = 2 in the last unit of the first block.

    Args:
      inputs: A tensor of size [batch, height, width, channels].
              输入张量
      depth: The depth of the ResNet unit output.
             残差网络单元输出的深度
      depth_bottleneck: The depth of the bottleneck layers.
                        瓶颈的深度
      stride: The ResNet unit's stride. Determines the amount of downsampling of
              残差网络单元的步长
        the units output compared to its input.
      rate: An integer, rate for atrous convolution.
            带洞卷积的比例
      outputs_collections: Collection to add the ResNet unit output.
                           收集网络中单元的输出
      scope: Optional variable_scope.
             可选的变量作用域

    Returns:
      The ResNet unit's output.
    """
    with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        preact = slim.batch_norm(inputs,
                                 activation_fn=tf.nn.relu,
                                 scope='preact')
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(preact,
                                   depth, [1, 1],
                                   stride=stride,
                                   normalizer_fn=None,
                                   activation_fn=None,
                                   scope='shortcut')

        residual = slim.conv2d(preact,
                               depth_bottleneck, [1, 1],
                               stride=1,
                               scope='conv1')
        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            stride,
                                            rate=rate,
                                            scope='conv2')
        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               normalizer_fn=None,
                               activation_fn=None,
                               scope='conv3')

        output = shortcut + residual

        return slim.utils.collect_named_outputs(outputs_collections, sc.name,
                                                output)
Beispiel #20
0
def RetinaNet_FPN(inputs,
                  num_planes,
                  num_channels=256,
                  is_training=True,
                  reuse=None,
                  scope=None):
    """ Generator for RetinaNet FPN models. A small modification of initial FPN model for returning layers
        {P3, P4, P5, P6, P7}. See paper Focal Loss for Dense Object Detection. arxiv: 1708.02002

        P2 is discarded and P6 is obtained via 3x3 stride-2 conv on c5; P7 is computed by applying ReLU followed by
        3x3 stride-2 conv on P6. P7 is to improve large object detection
    Args:
        same as FPN module

    Returns:
        5 feature map tensors: {P3, P4, P5, P6, P7}
    """
    with tf.variable_scope(scope, 'Retina_FPN', [inputs], reuse=reuse) as sc:
        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, bottleneck]):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                c1 = ru.conv2d_same(inputs, 64, 7, stride=2, scope='conv1')
                bn1 = slim.batch_norm(c1,
                                      scope='norm1',
                                      activation_fn=tf.nn.relu)
                mp1 = slim.max_pool2d(bn1, [3, 3],
                                      stride=2,
                                      scope='pool1',
                                      padding='SAME')
                # Bottom up
                # block 1, down-sampling is done in conv3_1, conv4_1, conv5_1
                block1 = resnet_v2_block('block1',
                                         base_depth=64,
                                         num_planes=num_planes[0],
                                         stride=1)
                c2 = stack_resnet_v2_units(mp1, block1)
                # block 2
                block2 = resnet_v2_block('block2',
                                         base_depth=128,
                                         num_planes=num_planes[1],
                                         stride=2)
                c3 = stack_resnet_v2_units(c2, block2)
                # block 3
                block3 = resnet_v2_block('block3',
                                         base_depth=256,
                                         num_planes=num_planes[2],
                                         stride=2)
                c4 = stack_resnet_v2_units(c3, block3)
                # block 4
                block4 = resnet_v2_block('block4',
                                         base_depth=512,
                                         num_planes=num_planes[3],
                                         stride=2)
                c5 = stack_resnet_v2_units(c4, block4)
                # P6
                p6 = ru.conv2d_same(c5,
                                    num_channels,
                                    3,
                                    stride=2,
                                    scope='conv6')
                # P7
                p7 = ru.conv2d_same(tf.nn.relu(p6),
                                    num_channels,
                                    3,
                                    stride=2,
                                    scope='conv7')

                # lateral layer
                l3 = slim.conv2d(c3,
                                 num_channels, [1, 1],
                                 stride=1,
                                 scope='lat3')
                l4 = slim.conv2d(c4,
                                 num_channels, [1, 1],
                                 stride=1,
                                 scope='lat4')
                p5 = slim.conv2d(c5,
                                 num_channels, [1, 1],
                                 stride=1,
                                 scope='conv5')
                # Top down
                t4 = slim.conv2d_transpose(p5,
                                           num_channels, [4, 4],
                                           stride=[2, 2])
                p4 = ru.conv2d_same(t4 + l4, num_channels, 3, stride=1)
                t3 = slim.conv2d_transpose(p4,
                                           num_channels, [4, 4],
                                           stride=[2, 2])
                p3 = ru.conv2d_same(t3 + l3, num_channels, 3, stride=1)
    return p3, p4, p5, p6, p7
Beispiel #21
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               lambda_decay=LAMBDA_DECAY,
               rate=1,
               outputs_collections=None,
               scope=None):
    """Bottleneck residual unit variant with BN before convolutions.

  This is the full preactivation residual unit variant proposed in [2]. See
  Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck
  variant which has an extra bottleneck layer.

  When putting together two consecutive ResNet blocks that use this unit, one
  should use stride = 2 in the last unit of the first block.

  Args:
    inputs: A tensor of size [batch, height, width, channels].
    depth: The depth of the ResNet unit output.
    depth_bottleneck: The depth of the bottleneck layers.
    stride: The ResNet unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    rate: An integer, rate for atrous convolution.
    outputs_collections: Collection to add the ResNet unit output.
    scope: Optional variable_scope.

  Returns:
    The ResNet unit's output.
  """
    with variable_scope.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
        depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
        preact = layers.batch_norm(inputs,
                                   activation_fn=nn_ops.relu,
                                   scope='preact')
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = layers_lib.conv2d(preact,
                                         depth, [1, 1],
                                         stride=stride,
                                         normalizer_fn=None,
                                         activation_fn=None,
                                         scope='shortcut')

        residual = layers_lib.conv2d(preact,
                                     depth_bottleneck, [1, 1],
                                     stride=1,
                                     scope='conv1')
        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            stride,
                                            rate=rate,
                                            scope='conv2')
        residual = layers_lib.conv2d(residual,
                                     depth, [1, 1],
                                     stride=1,
                                     normalizer_fn=None,
                                     activation_fn=None,
                                     scope='conv3')

        n, h, w, c = residual.get_shape()
        # lambda_channel = tf.get_variable(tf.ones(shape=[1, 1, 1, c], dtype=tf.float32, name=None), name='lambda_channel')
        lambda_channel = tf.get_variable(
            name='lambda_channel',
            shape=[1, 1, 1, depth],
            dtype=tf.float32,
            initializer=tf.ones_initializer(),
            regularizer=tf.contrib.layers.l1_regularizer(LAMBDA_DECAY,
                                                         scope=None))
        tf.add_to_collection('lambda_channel', lambda_channel)

        # lambda_channel = tf.contrib.layers.l1_regularizer(lambda_decay, scope='lambda_channel')(lambda_channel)
        # lambda_channel = utils.collect_named_outputs(outputs_collections, lambda_channel.name, lambda_channel)
        #TODO: lambda_decay
        # residual = tf.multiply(tf.tile(lambda_channel, [n, h, w, 1]), residual)
        residual = residual * lambda_channel
        output = shortcut + residual
        # print('outputs_collections', utils.collect_named_outputs(outputs_collections, sc.name, output))
    return utils.collect_named_outputs(outputs_collections, sc.name, output)
Beispiel #22
0
    def bottleneck(self,
                   inputs,
                   depth,
                   depth_bottleneck,
                   stride,
                   rate=1,
                   deformable=None,
                   attention_option=None,
                   outputs_collections=None,
                   scope=None):
        """Bottleneck residual unit variant with BN before convolutions.
    
        This is the full preactivation residual unit variant proposed in [2]. See
        lim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')ig. 1(b) of [2] for its definition. Note that we use here the bottleneck
        variant which has an extra bottleneck layer.

        When putting together two consecutive ResNet blocks that use this unit, one
        should use stride = 2 in the last unit of the first block.

        Args:
            inputs: A tensor of size [batch, height, width, channels].
            depth: The depth of the ResNet unit output.
            depth_bottleneck: The depth of the bottleneck layers.
            stride: The ResNet unit's stride. Determines the amount of downsampling of
                the units output compared to its input.
            rate: An integer, rate for atrous convolution.
            outputs_collections: Collection to add the ResNet unit output.
            scope: Optional variable_scope.

        Returns:
            The ResNet unit's output.
        """
        with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
            depth_in = slim.utils.last_dimension(inputs.get_shape(),
                                                 min_rank=4)
            # preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
            preact = tf.nn.relu(inputs)
            if depth == depth_in:
                shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
            else:
                shortcut = slim.conv2d(preact,
                                       depth, [1, 1],
                                       stride=stride,
                                       normalizer_fn=None,
                                       activation_fn=None,
                                       scope='shortcut')

            residual = slim.conv2d(preact,
                                   depth_bottleneck, [1, 1],
                                   stride=1,
                                   scope='conv1')
            if stride == 1:
                # Deformable blocks
                if deformable is not None and deformable == '1':
                    end_point = 'Deformation'
                    with tf.variable_scope(end_point):
                        with tf.variable_scope('Deform'):
                            residual_feature = slim.conv2d(residual,
                                                           depth_bottleneck,
                                                           3,
                                                           stride,
                                                           rate=rate,
                                                           padding='SAME',
                                                           scope='feature')
                            residual_shape = residual_feature.get_shape(
                            ).as_list()
                            b = residual_shape[0]
                            h = residual_shape[1]
                            w = residual_shape[2]
                            c = residual_shape[3]
                            residual_offset = slim.conv2d(inputs,
                                                          2 * depth_bottleneck,
                                                          3,
                                                          stride,
                                                          rate=rate,
                                                          padding='SAME',
                                                          scope='offset')
                            residual = df._to_b_h_w_c(
                                df.tf_batch_map_offsets(
                                    df._to_bc_h_w(residual_feature,
                                                  residual_shape),
                                    df._to_bc_h_w_2(residual_offset,
                                                    residual_shape)),
                                residual_shape)
                else:
                    residual = slim.conv2d(residual,
                                           depth_bottleneck,
                                           3,
                                           stride,
                                           rate=rate,
                                           padding='SAME',
                                           scope='conv2')
                # Attention blocks
                if attention_option is not None and attention_option[0] == '1':
                    end_point = 'Attention_S'
                    with tf.variable_scope(end_point):
                        residual_shape = residual.get_shape().as_list()
                        b = residual_shape[0]
                        h = residual_shape[1]
                        w = residual_shape[2]
                        c = residual_shape[3]
                        with tf.variable_scope('Spatial'):
                            attention_map = slim.conv2d(
                                residual,
                                c,
                                3,
                                stride=1,
                                rate=rate,
                                scope='attention_s_kernel')
                            attention_map = df._to_b_h_w_c(
                                tf.nn.softmax(
                                    df._to_bc_hw(attention_map,
                                                 residual_shape)),
                                residual_shape)
                        residual = residual * attention_map

                if attention_option is not None and attention_option[1] == '1':
                    end_point = 'Attention_C'
                    with tf.variable_scope(end_point):
                        residual_shape = residual.get_shape().as_list()
                        b = residual_shape[0]
                        h = residual_shape[1]
                        w = residual_shape[2]
                        c = residual_shape[3]
                        with tf.variable_scope('Channel'):
                            attention_map = slim.conv2d(
                                residual,
                                c,
                                3,
                                stride=1,
                                rate=rate,
                                scope='attention_c_kernel')
                            attention_map = tf.nn.softmax(
                                tf.reduce_mean(tf.reshape(
                                    attention_map, [b * h * w, c]),
                                               axis=0))
                        residual = residual * attention_map

                if attention_option is not None and attention_option[2] == '1':
                    end_point = 'Attention_S'
                    with tf.variable_scope(end_point):
                        residual_shape = residual.get_shape().as_list()
                        b = residual_shape[0]
                        h = residual_shape[1]
                        w = residual_shape[2]
                        c = residual_shape[3]
                        with tf.variable_scope('Spatial'):
                            attention_map = slim.conv2d(
                                residual,
                                c,
                                3,
                                stride=1,
                                rate=rate,
                                scope='attention_s_kernel')
                            attention_map = df._to_b_h_w_c(
                                tf.nn.softmax(
                                    df._to_bc_hw(attention_map,
                                                 residual_shape)),
                                residual_shape)
                        residual = residual * attention_map

                if attention_option is not None and attention_option[3] == '1':
                    end_point = 'Attention_M'
                    with tf.variable_scope(end_point):
                        residual_shape = residual.get_shape().as_list()
                        b = residual_shape[0]
                        h = residual_shape[1]
                        w = residual_shape[2]
                        c = residual_shape[3]
                        with tf.variable_scope('Modulation'):
                            attention_map = slim.conv2d(
                                residual,
                                c,
                                3,
                                stride=1,
                                rate=rate,
                                scope='attention_m_kernel')
                            attention_map = tf.clip_by_value(
                                attention_map, 0, 1)
                        residual = residual * attention_map

            else:
                residual = resnet_utils.conv2d_same(residual,
                                                    depth_bottleneck,
                                                    3,
                                                    stride,
                                                    rate=rate,
                                                    scope='conv2')

            residual = slim.conv2d(residual,
                                   depth, [1, 1],
                                   stride=1,
                                   normalizer_fn=None,
                                   activation_fn=None,
                                   scope='conv3')

            output = shortcut + residual

            return slim.utils.collect_named_outputs(outputs_collections,
                                                    sc.name, output)
Beispiel #23
0
    def bottle_x_neck(self,
                      inputs,
                      depth,
                      depth_bottleneck,
                      stride,
                      rate=1,
                      deformable=None,
                      attention_option=None,
                      outputs_collections=None,
                      scope=None):
        """
        ResNext
        """
        with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
            depth_in = slim.utils.last_dimension(inputs.get_shape(),
                                                 min_rank=4)
            # preact = slim.batch_norm(inputs, scope='preact')
            preact = tf.nn.relu(net)
            if depth == depth_in:
                shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
            else:
                shortcut = slim.conv2d(preact,
                                       depth, [1, 1],
                                       stride=stride,
                                       normalizer_fn=None,
                                       activation_fn=None,
                                       scope='shortcut')
            """
            # ResNet
            residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, scope='conv1')
            residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2')
            residual = slim.conv2d(residual, depth, [1, 1], stride=1,
                                   normalizer_fn=None, activation_fn=None,
                                   scope='conv3')
            """
            depth_bottleneck_per = depth_bottleneck / 32
            residual_split = []
            for i in range(32):
                net = slim.conv2d(preact,
                                  depth_bottleneck_per, [1, 1],
                                  stride=1,
                                  scope='conv1_%d' % i)
                net = resnet_utils.conv2d_same(net,
                                               depth_bottleneck_per,
                                               3,
                                               stride,
                                               rate=rate,
                                               scope='conv2_%d' % i)
                residual_split.append(net)
            residual = tf.concat(residual_split, axis=3)
            residual = slim.conv2d(residual,
                                   depth, [1, 1],
                                   stride=1,
                                   normalizer_fn=None,
                                   activation_fn=None,
                                   scope='conv3')

            output = shortcut + residual

            return slim.utils.collect_named_outputs(outputs_collections,
                                                    sc.name, output)
Beispiel #24
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None,
               use_bounded_activations=False):
    """Bottleneck residual unit variant with BN after convolutions.
       卷积之后进行BN的瓶颈残差单元变体

    This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
    its definition. Note that we use here the bottleneck variant which has an
    extra bottleneck layer.

    When putting together two consecutive ResNet blocks that use this unit, one
    should use stride = 2 in the last unit of the first block.

    Args:
      inputs: A tensor of size [batch, height, width, channels].
              输入张量
      depth: The depth of the ResNet unit output.
             网络输出的深度
      depth_bottleneck: The depth of the bottleneck layers.
                        瓶颈层的深度
      stride: The ResNet unit's stride. Determines the amount of downsampling of the units output compared to its input.
              残差单元的步长
      rate: An integer, rate for atrous convolution.
            带洞卷积的比例
      outputs_collections: Collection to add the ResNet unit output.
                           收集残差单元的输出
      scope: Optional variable_scope.
             可选的变量作用域
      use_bounded_activations: Whether or not to use bounded activations. Bounded
                               是否使用bounded activations
        activations better lend themselves to quantized inference.

    Returns:
      The ResNet unit's output.
      返回残差单元的输出
    """
    with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
        depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = slim.conv2d(
                inputs,
                depth, [1, 1],
                stride=stride,
                activation_fn=tf.nn.relu6 if use_bounded_activations else None,
                scope='shortcut')

        residual = slim.conv2d(inputs,
                               depth_bottleneck, [1, 1],
                               stride=1,
                               scope='conv1')
        residual = resnet_utils.conv2d_same(residual,
                                            depth_bottleneck,
                                            3,
                                            stride,
                                            rate=rate,
                                            scope='conv2')
        residual = slim.conv2d(residual,
                               depth, [1, 1],
                               stride=1,
                               activation_fn=None,
                               scope='conv3')

        if use_bounded_activations:
            # Use clip_by_value to simulate bandpass activation.
            residual = tf.clip_by_value(residual, -6.0, 6.0)
            output = tf.nn.relu6(shortcut + residual)
        else:
            output = tf.nn.relu(shortcut + residual)

        return slim.utils.collect_named_outputs(outputs_collections, sc.name,
                                                output)
Beispiel #25
0
def resnet_v1(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              store_non_strided_activations=False,
              reuse=None,
              scope=None):
    """Generator for v1 ResNet models.
       生成ResNet-v1模型
    This function generates a family of ResNet v1 models. See the resnet_v1_*()
    methods for specific model instantiations, obtained by selecting different
    block instantiations that produce ResNets of various depths.

    Training for image classification on Imagenet is usually done with [224, 224]
    inputs, resulting in [7, 7] feature maps at the output of the last ResNet
    block for the ResNets defined in [1] that have nominal stride equal to 32.
    However, for dense prediction tasks we advise that one uses inputs with
    spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
    this case the feature maps at the ResNet output will have spatial shape
    [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
    and corners exactly aligned with the input image corners, which greatly
    facilitates alignment of the features to the image. Using as input [225, 225]
    images results in [8, 8] feature maps at the output of the last ResNet block.

    For dense prediction tasks, the ResNet needs to run in fully-convolutional
    (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
    have nominal stride equal to 32 and a good choice in FCN mode is to use
    output_stride=16 in order to increase the density of the computed features at
    small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

    Args:
      inputs: A tensor of size [batch, height_in, width_in, channels].
              输入张量
      blocks: A list of length equal to the number of ResNet blocks. Each element
              长度等于ResNet块数量的列表
        is a resnet_utils.Block object describing the units in the block.
        每个元素是一个resnet_utils.Block对象,描述了块中的单元
      num_classes: Number of predicted classes for classification tasks.
                   分类任务的类别个数
        If 0 or None, we return the features before the logit layer.
      is_training: whether batch_norm layers are in training mode. If this is set
                   是否batch_norm层在训练模型
        to None, the callers can specify slim.batch_norm's is_training parameter
        from an outer slim.arg_scope.
      global_pool: If True, we perform global average pooling before computing the
                   如果ture,我们在计算logits之前执行GAP
        logits. Set to True for image classification, False for dense prediction.

      output_stride: If None, then the output will be computed at the nominal
        network stride. If output_stride is not None, it specifies the requested
        ratio of input to output spatial resolution.

      include_root_block: If True, include the initial convolution followed by
        max-pooling, if False excludes it.

      spatial_squeeze: if True, logits is of shape [B, C], if false logits is
          of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
          To use this parameter, the input images must be smaller than 300x300
          pixels, in which case the output logit layer does not contain spatial
          information and can be removed.

      store_non_strided_activations: If True, we compute non-strided (undecimated)
        activations at the last unit of each block and store them in the
        `outputs_collections` before subsampling them. This gives us access to
        higher resolution intermediate activations which are useful in some
        dense prediction problems but increases 4x the computation and memory cost
        at the last unit of each block.

      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.

      scope: Optional variable_scope.

    Returns:
      net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
           输出一个四维张量
        If global_pool is False, then height_out and width_out are reduced by a
        factor of output_stride compared to the respective height_in and width_in,
        else both height_out and width_out equal one. If num_classes is 0 or None,
        then net is the output of the last ResNet block, potentially after global
        average pooling. If num_classes a non-zero integer, net contains the
        pre-softmax activations.
      end_points: A dictionary from components of the network to the corresponding
        activation.

    Raises:
      ValueError: If the target output_stride is not valid.
    """
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with (slim.arg_scope([slim.batch_norm], is_training=is_training)
                  if is_training is not None else NoOpScope()):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    net = resnet_utils.conv2d_same(net,
                                                   64,
                                                   7,
                                                   stride=2,
                                                   scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(
                    net, blocks, output_stride, store_non_strided_activations)
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)

                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keep_dims=True)
                    end_points['global_pool'] = net
                if num_classes:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')
                    end_points[sc.name + '/logits'] = net
                    if spatial_squeeze:
                        net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
                        end_points[sc.name + '/spatial_squeeze'] = net
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                return net, end_points
def resnet_v2(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              reuse=None,
              scope=None):
    with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.name + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )

                        output_stride /= 4

                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None):
                        net = resnet_utils.conv2d_same(net,
                                                       64,
                                                       6,
                                                       stride=1,
                                                       scope='conv1')
                    net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                net = slim.batch_norm(net,
                                      activation_fn=tf.nn.relu,
                                      scope='postnorm')
                output0 = net

                if global_pool:
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keep_dims=True)
                    output1 = net
                if num_classes is not None:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')

                if spatial_squeeze:
                    logits = tf.squeeze(net, [1, 2], name='SpatialSqueeze')

                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)

                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        logits, scope='predictions')

                return logits, end_points, output0, output1
def resnet_v2(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              reuse=None,
              scope=None):

    with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.name + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    # We do not include batch normalization or activation functions in
                    # conv1 because the first ResNet unit will perform these. Cf.
                    # Appendix of [2].
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None):
                        net = resnet_utils.conv2d_same(net,
                                                       64,
                                                       6,
                                                       stride=1,
                                                       scope='conv1')
                    net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                # This is needed because the pre-activation variant does not have batch
                # normalization or activation functions in the residual unit output. See
                # Appendix of [2].
                net = slim.batch_norm(net,
                                      activation_fn=tf.nn.relu,
                                      scope='postnorm')
                output0 = net
                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keep_dims=True)
                    output1 = net
                if num_classes is not None:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')
                if spatial_squeeze:
                    logits = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        logits, scope='predictions')
                return logits, end_points, output0, output1
def _mcResnet(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=False,
              reuse=None,
              scope=None):
    """Generator for mcResnet models.

  """
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                outputs_collections = 'bottleneck'
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    # net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
                    net = resnet_utils.conv2d_same(net,
                                                   64,
                                                   3,
                                                   stride=1,
                                                   scope='conv1_1')
                    net = slim.max_pool2d(net, [2, 2],
                                          stride=2,
                                          scope='pool1_1')

                    reconv1 = slim.conv2d(net,
                                          32,
                                          1,
                                          stride=1,
                                          scope='conv1_1_res1')
                    reconv1 = slim.conv2d(reconv1,
                                          32,
                                          3,
                                          stride=1,
                                          padding='SAME',
                                          scope='conv1_1_res2')
                    reconv1 = slim.conv2d(reconv1,
                                          64,
                                          1,
                                          stride=1,
                                          scope='conv1_1_resnet',
                                          activation_fn=None)
                    reconv1 = tf.nn.relu(net + reconv1)
                    net = slim.utils.collect_named_outputs(
                        outputs_collections, 'conv1', reconv1)

                    net = resnet_utils.conv2d_same(net,
                                                   128,
                                                   3,
                                                   stride=1,
                                                   scope='conv1_2')
                    net = slim.max_pool2d(net, [2, 2],
                                          stride=2,
                                          scope='pool1_2')

                    reconv2 = slim.conv2d(net,
                                          64,
                                          1,
                                          stride=1,
                                          scope='conv1_2_res1')
                    reconv2 = slim.conv2d(reconv2,
                                          64,
                                          3,
                                          stride=1,
                                          padding='SAME',
                                          scope='conv1_2_res2')
                    reconv2 = slim.conv2d(reconv2,
                                          128,
                                          1,
                                          stride=1,
                                          scope='conv1_2_resnet',
                                          activation_fn=None)
                    reconv2 = tf.nn.relu(reconv2 + net)
                    net = slim.utils.collect_named_outputs(
                        outputs_collections, 'conv2', reconv2)
                    # Add this import to get the clear results
                net = resnet_utils.stack_blocks_dense_with_shotcut(
                    net,
                    blocks,
                    output_stride,
                    outputs_collections=outputs_collections)

                # print(net)
                with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
                                    normalizer_fn=slim.batch_norm):
                    net = resnet_utils.conv2d_same(net,
                                                   1024,
                                                   1,
                                                   stride=1,
                                                   scope='conv_up_1')
                    outputs_points = slim.utils.convert_collection_to_dict(
                        outputs_collections)

                    # upres1
                    shortcut = outputs_points['resnet_v1_101/block3']
                    # print(shortcut)
                    conv_blk3 = slim.conv2d(shortcut,
                                            512,
                                            1,
                                            stride=1,
                                            scope='conv_blk3_1')
                    conv_blk3 = slim.conv2d(conv_blk3,
                                            512,
                                            3,
                                            stride=1,
                                            padding='SAME',
                                            scope='conv_blk3_2')
                    conv_blk3 = slim.conv2d(conv_blk3,
                                            1024,
                                            1,
                                            stride=1,
                                            scope='conv_blk3_3',
                                            activation_fn=None)
                    # shortcut = outputs_points['resnet_v1_101/block3']
                    # shortcut = slim.conv2d(shortcut, 512, 1, stride=1, scope='newOut1')
                    # conv_blk3 = slim.relu(shortcut+conv_blk3)
                    shortcut = tf.nn.relu(conv_blk3 + shortcut + net)

                    residual = slim.conv2d(shortcut,
                                           512, [1, 1],
                                           stride=1,
                                           scope='b1conv1')
                    residual = slim.conv2d_transpose(residual,
                                                     512,
                                                     3,
                                                     stride=2,
                                                     scope='b1conv2')
                    residual = slim.conv2d(residual,
                                           1024, [1, 1],
                                           stride=1,
                                           activation_fn=None,
                                           scope='b1conv3')
                    net = tf.nn.relu(shortcut + residual)

                    # upres2
                    shortcut = outputs_points['resnet_v1_101/block2']
                    conv_blk2 = slim.conv2d(shortcut,
                                            256,
                                            1,
                                            stride=1,
                                            scope='conv_blk2_1')
                    conv_blk2 = slim.conv2d(conv_blk2,
                                            256,
                                            3,
                                            stride=1,
                                            padding='SAME',
                                            scope='conv_blk2_2')
                    conv_blk2 = slim.conv2d(conv_blk2,
                                            512,
                                            1,
                                            stride=1,
                                            scope='conv_blk2_3',
                                            activation_fn=None)
                    # conv_blk2 = slim.relu(shortcut+conv_blk2)
                    net = slim.conv2d(net,
                                      512, [1, 1],
                                      stride=1,
                                      scope='b2conv0')
                    shortcut = tf.nn.relu(conv_blk2 + shortcut + net)

                    residual = slim.conv2d(shortcut,
                                           256, [1, 1],
                                           stride=1,
                                           scope='b2conv1')
                    residual = slim.conv2d_transpose(residual,
                                                     256,
                                                     3,
                                                     stride=2,
                                                     scope='b2conv2')
                    residual = slim.conv2d(residual,
                                           512, [1, 1],
                                           stride=1,
                                           activation_fn=None,
                                           scope='b2conv3')
                    net = tf.nn.relu(shortcut + residual)

                    # upres3
                    shortcut = outputs_points['resnet_v1_101/block1']
                    # print(shortcut)
                    conv_blk1 = slim.conv2d(shortcut,
                                            128,
                                            1,
                                            stride=1,
                                            scope='conv_blk1_1')
                    conv_blk1 = slim.conv2d(conv_blk1,
                                            128,
                                            3,
                                            stride=1,
                                            padding='SAME',
                                            scope='conv_blk1_2')
                    conv_blk1 = slim.conv2d(conv_blk1,
                                            256,
                                            1,
                                            stride=1,
                                            scope='conv_blk1_3',
                                            activation_fn=None)
                    net = slim.conv2d(net,
                                      256, [1, 1],
                                      stride=1,
                                      scope='b3conv0')
                    shortcut = tf.nn.relu(conv_blk1 + shortcut + net)

                    residual = slim.conv2d(shortcut,
                                           128, [1, 1],
                                           stride=1,
                                           scope='b3conv1')
                    residual = slim.conv2d_same(residual,
                                                128,
                                                3,
                                                stride=1,
                                                scope='b3conv2')
                    residual = slim.conv2d(residual,
                                           256, [1, 1],
                                           stride=1,
                                           activation_fn=None,
                                           scope='b3conv3')
                    shortcut_0 = tf.image.resize_images(
                        shortcut,
                        (shortcut.shape[1] * 2, shortcut.shape[2] * 2))
                    net = tf.nn.relu(shortcut_0 + residual)

                    # upres4
                    shortcut = outputs_points['conv1']
                    conv_blk00 = slim.conv2d(shortcut,
                                             32,
                                             1,
                                             stride=1,
                                             scope='conv_blk00_1')
                    conv_blk00 = slim.conv2d(conv_blk00,
                                             32,
                                             3,
                                             stride=1,
                                             padding='SAME',
                                             scope='conv_blk00_2')
                    conv_blk00 = slim.conv2d(conv_blk00,
                                             64,
                                             1,
                                             stride=1,
                                             scope='conv_blk00_3',
                                             activation_fn=None)
                    net_0 = slim.conv2d(net, 64, 1, scope='b4conv0')
                    # net_0 = tf.image.resize_images(net_0, (net.shape[1]*2, net.shape[2]*2))
                    # net_0 = slim.conv2d_transpose(net, 64, 3, stride=2, scope='b5conv0')
                    shortcut = tf.nn.relu(net_0 + shortcut + conv_blk00)
                    residual = slim.conv2d(shortcut,
                                           32, [1, 1],
                                           stride=1,
                                           scope='b4conv1')
                    residual = slim.conv2d_transpose(residual,
                                                     32,
                                                     3,
                                                     stride=2,
                                                     scope='b4conv2')
                    residual = slim.conv2d(residual,
                                           64, [1, 1],
                                           stride=1,
                                           activation_fn=None,
                                           scope='b4conv3')
                    shortcut_0 = tf.image.resize_images(
                        shortcut,
                        (shortcut.shape[1] * 2, shortcut.shape[2] * 2))
                    net = tf.nn.relu(shortcut_0 + residual)
                    # Convert end_points_collection into a dictionary of end_points.
                    end_points = slim.utils.convert_collection_to_dict(
                        end_points_collection)

                    if global_pool:
                        # Global average pooling.
                        net = tf.reduce_mean(net, [1, 2],
                                             name='pool5',
                                             keep_dims=True)
                        end_points['global_pool'] = net
                    if num_classes:
                        net = slim.conv2d(net,
                                          num_classes, [1, 1],
                                          activation_fn=None,
                                          normalizer_fn=None,
                                          scope='logits')
                        end_points[sc.name + '/logits'] = net
                        if spatial_squeeze:
                            net = tf.squeeze(net, [1, 2],
                                             name='SpatialSqueeze')
                            end_points[sc.name + '/spatial_squeeze'] = net
                        end_points['predictions'] = slim.softmax(
                            net, scope='predictions')
                    return net, end_points, outputs_points
Beispiel #29
0
    def inference(self, mode, inputs):
        is_training = mode == 'TRAIN'

        ###decode your inputs
        image = inputs[0]
        im_info = inputs[1]
        gt_boxes = inputs[2]
        gt_masks = inputs[3]
        seg_loss_gate = inputs[4]
        iter = inputs[5]
        image.set_shape([1, None, None, 3])
        im_info.set_shape([1, 3])
        if mode == 'TRAIN':
            gt_boxes.set_shape([None, 5])
        ##end of decode

        num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios)
        bottleneck = resnet_v1.bottleneck
        initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
        initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

        blocks = [
            resnet_utils.Block('block1', bottleneck,
                               [(256, 64, 1, 1)] * 2 + [(256, 64, 2, 1)]),
            resnet_utils.Block('block2', bottleneck,
                               [(512, 128, 1, 1)] * 3 + [(512, 128, 2, 1)]),
            resnet_utils.Block('block3', bottleneck,
                               [(1024, 256, 1, 1)] * 5 + [(1024, 256, 2, 1)]),
            resnet_utils.Block('block4', bottleneck, [(2048, 512, 1, 1)] * 3)
        ]

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            with tf.variable_scope('resnet_v1_50', 'resnet_v1_50'):
                net = resnet_utils.conv2d_same(image,
                                               64,
                                               7,
                                               stride=2,
                                               scope='conv1')
                net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
                net = slim.max_pool2d(net, [3, 3],
                                      stride=2,
                                      padding='VALID',
                                      scope='pool1')
            net, _ = resnet_v1.resnet_v1(net,
                                         blocks[0:1],
                                         global_pool=False,
                                         include_root_block=False,
                                         scope='resnet_v1_50')

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net2, _ = resnet_v1.resnet_v1(net,
                                          blocks[1:2],
                                          global_pool=False,
                                          include_root_block=False,
                                          scope='resnet_v1_50')
        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net3, _ = resnet_v1.resnet_v1(net2,
                                          blocks[2:3],
                                          global_pool=False,
                                          include_root_block=False,
                                          scope='resnet_v1_50')
        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net4, _ = resnet_v1.resnet_v1(net3,
                                          blocks[3:4],
                                          global_pool=False,
                                          include_root_block=False,
                                          scope='resnet_v1_50')

        namescope = tf.no_op(name='.').name[:-1]
        resnet_features_name = [
            namescope + 'resnet_v1_50_1/block1/unit_2/bottleneck_v1/Relu:0',
            namescope + 'resnet_v1_50_2/block2/unit_3/bottleneck_v1/Relu:0',
            namescope + 'resnet_v1_50_3/block3/unit_5/bottleneck_v1/Relu:0',
            namescope + 'resnet_v1_50_4/block4/unit_3/bottleneck_v1/Relu:0'
        ]

        resnet_features = []
        for i in range(len(resnet_features_name)):
            resnet_features.append(tf.get_default_graph().get_tensor_by_name(
                resnet_features_name[i]))

        mid_channels = 256

        with tf.variable_scope('resnet_v1_50',
                               'resnet_v1_50',
                               regularizer=tf.contrib.layers.l2_regularizer(
                                   cfg.TRAIN.WEIGHT_DECAY)):
            finer = slim.conv2d(resnet_features[-1],
                                mid_channels, [1, 1],
                                trainable=is_training,
                                weights_initializer=initializer,
                                activation_fn=None,
                                scope='pyramid/res5')
            pyramid_features = [finer]
            for i in range(4, 1, -1):
                lateral = slim.conv2d(resnet_features[i - 2],
                                      mid_channels, [1, 1],
                                      trainable=is_training,
                                      weights_initializer=initializer,
                                      activation_fn=None,
                                      scope='lateral/res{}'.format(i))
                upsample = tf.image.resize_bilinear(
                    finer, (tf.shape(lateral)[1], tf.shape(lateral)[2]),
                    name='upsample/res{}'.format(i))
                finer = upsample + lateral
                pyramid = slim.conv2d(finer,
                                      mid_channels, [3, 3],
                                      trainable=is_training,
                                      weights_initializer=initializer,
                                      activation_fn=None,
                                      scope='pyramid/res{}'.format(i))
                pyramid_features.append(pyramid)
            pyramid_features.reverse()
            pyramid = slim.avg_pool2d(pyramid_features[-1], [2, 2],
                                      stride=2,
                                      padding='SAME',
                                      scope='pyramid/res6')
            pyramid_features.append(pyramid)
        # pyramid_features downsampling rate:   4, 8, 16, 32, 64

        allowed_borders = [16, 32, 64, 128, 256]
        feat_strides = np.array([4, 8, 16, 32, 64])
        anchor_scaleses = np.array([[1], [2], [4], [8], [16]])

        with tf.variable_scope('resnet_v1_50',
                               'resnet_v1_50',
                               regularizer=tf.contrib.layers.l2_regularizer(
                                   cfg.TRAIN.WEIGHT_DECAY)) as scope:
            num_anchors = len(cfg.anchor_ratios)
            rpn_cls_prob_pyramid = []
            rpn_bbox_pred_pyramid = []
            anchors_pyramid = []
            rpn_cls_score_reshape_pyramid = []

            rpn_label_pyramid = []
            labels_cat_pyramid = []
            rpn_bbox_targets_pyramid = []
            rpn_bbox_inside_weights_pyramid = []
            rpn_bbox_outside_weights_pyramid = []

            with tf.variable_scope('resnet_v1_50_rpn',
                                   'resnet_v1_50_rpn') as scope:
                for i, pyramid_feature in enumerate(pyramid_features):
                    with tf.variable_scope('anchor/res{}'.format(i + 2)):
                        shape = tf.shape(pyramid_feature)
                        height, width = shape[1], shape[2]
                        anchors, _ = tf.py_func(generate_anchors_pre, [
                            height, width, feat_strides[i], anchor_scaleses[i],
                            cfg.anchor_ratios
                        ], [tf.float32, tf.int32])

                    # rpn
                    rpn = slim.conv2d(pyramid_feature,
                                      512, [3, 3],
                                      trainable=is_training,
                                      weights_initializer=initializer,
                                      activation_fn=nn_ops.relu,
                                      scope='rpn_conv')
                    # head
                    rpn_cls_score = slim.conv2d(
                        rpn,
                        num_anchors * 2, [3, 3],
                        trainable=is_training,
                        weights_initializer=initializer,
                        activation_fn=None,
                        scope='rpn_cls_score')
                    rpn_cls_score_reshape = tf.reshape(
                        rpn_cls_score, [-1, 2],
                        name='rpn_cls_score_reshape/res{}'.format(i + 2))
                    rpn_cls_prob = tf.nn.softmax(
                        rpn_cls_score_reshape,
                        name="rpn_cls_prob_reshape/res{}".format(i + 2))
                    rpn_bbox_pred = slim.conv2d(
                        rpn,
                        num_anchors * 4, [3, 3],
                        trainable=is_training,
                        weights_initializer=initializer,
                        activation_fn=None,
                        scope='rpn_bbox_pred')
                    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4])

                    # share rpn
                    scope.reuse_variables()

                    rpn_cls_prob_pyramid.append(rpn_cls_prob)
                    rpn_bbox_pred_pyramid.append(rpn_bbox_pred)
                    anchors_pyramid.append(anchors)
                    rpn_cls_score_reshape_pyramid.append(rpn_cls_score_reshape)

                    if is_training:
                        with tf.variable_scope(
                                'anchors_targets/res{}'.format(i + 2)):
                            rpn_labels, rpn_bbox_targets, \
                            rpn_bbox_inside_weights, rpn_bbox_outside_weights, labels_cat, gt_id = \
                                tf.py_func(
                                    anchor_target_layer,
                                    [rpn_cls_score, gt_boxes, im_info,
                                     feat_strides[i], anchors, num_anchors, gt_masks],
                                    [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.int64])
                            rpn_labels = tf.to_int32(
                                rpn_labels, name="to_int32")  # (1, H, W, A)
                            labels_cat = tf.to_int32(
                                labels_cat, name="to_int32")  # (1, H, W, A)

                            rpn_labels = tf.reshape(rpn_labels, [-1])
                            labels_cat = tf.reshape(labels_cat, [-1])
                            rpn_bbox_targets = tf.reshape(
                                rpn_bbox_targets, [-1, 4])
                            rpn_bbox_inside_weights = tf.reshape(
                                rpn_bbox_inside_weights, [-1, 4])
                            rpn_bbox_outside_weights = tf.reshape(
                                rpn_bbox_outside_weights, [-1, 4])

                        rpn_label_pyramid.append(rpn_labels)
                        labels_cat_pyramid.append(labels_cat)
                        rpn_bbox_targets_pyramid.append(rpn_bbox_targets)
                        rpn_bbox_inside_weights_pyramid.append(
                            rpn_bbox_inside_weights)
                        rpn_bbox_outside_weights_pyramid.append(
                            rpn_bbox_outside_weights)

            rpn_cls_prob_pyramid = tf.concat(axis=0,
                                             values=rpn_cls_prob_pyramid)
            rpn_bbox_pred_pyramid = tf.concat(axis=0,
                                              values=rpn_bbox_pred_pyramid)
            anchors_pyramid = tf.concat(axis=0, values=anchors_pyramid)
            rpn_cls_score_reshape_pyramid = tf.concat(
                axis=0, values=rpn_cls_score_reshape_pyramid)

        with tf.variable_scope('rois') as scope:
            rpn_cls_prob_bg = rpn_cls_prob_pyramid[:, 0]
            rpn_cls_prob_fg = 1 - rpn_cls_prob_bg

            rpn_proposals, rpn_proposal_scores, \
            rpn_proposals_addone, keep_pre = tf.py_func(
                proposal_without_nms_layer,
                [rpn_cls_prob_fg, rpn_bbox_pred_pyramid,
                 im_info, anchors_pyramid],
                [tf.float32, tf.float32, tf.float32, tf.int64])

            rpn_cls_prob_pyramid = tf.gather(rpn_cls_prob_pyramid, keep_pre)

            keep = tf.image.non_max_suppression(
                rpn_proposals_addone,
                rpn_proposal_scores,
                cfg.TRAIN.RPN_POST_NMS_TOP_N,
                iou_threshold=cfg.TRAIN.RPN_NMS_THRESH)
            bbox_pred = tf.gather(rpn_proposals, keep)
            roi_scores = tf.gather(rpn_proposal_scores, keep)
            anchors_pyramid = tf.gather(anchors_pyramid, keep)
            rpn_cls_prob_pyramid = tf.gather(rpn_cls_prob_pyramid, keep)

        with tf.variable_scope('seg',
                               'seg',
                               regularizer=tf.contrib.layers.l2_regularizer(
                                   cfg.TRAIN.WEIGHT_DECAY)):
            x = pyramid_features[1]
            seg_pred = slim.conv2d(x,
                                   128, [3, 3],
                                   trainable=is_training,
                                   weights_initializer=initializer,
                                   scope="pixel_seg_conv_1")
            # br = slim.conv2d(
            #     x, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_1")
            # br = slim.conv2d(
            #     br, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_2")
            # x += br
            # br = slim.conv2d(
            #     x, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_3")
            # br = slim.conv2d(
            #     br, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_4")
            # x += br
            # br = slim.conv2d(
            #     x, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_5")
            # br = slim.conv2d(
            #     br, 256, [3, 3], trainable=is_training,
            #     weights_initializer=initializer, scope="pixel_seg_conv_6")
            # x += br

            # x = slim.conv2d_transpose(x, 256, [3, 3], [2, 2], "SAME", scope="pixel_seg_deconv_1")

            if is_training:
                # bbox_pred_seg = tf.concat([bbox_pred, gt_boxes[:, :4]], axis=0)
                bbox_pred_seg = gt_boxes[:, :4]
            else:
                bbox_pred_seg = bbox_pred
            num_proposals = tf.shape(bbox_pred_seg)[0]
            num_proposals = tf.stack([num_proposals])
            one = tf.constant([1], dtype=tf.int32)

            seg_pred_pyramid = tf.tile(
                seg_pred, tf.concat([num_proposals, one, one, one], axis=0))
            masks, bimasks = tf.py_func(generate_bimasks, [bbox_pred_seg],
                                        [tf.float32, tf.float32])
            masks.set_shape([None, None, None, None])
            masks = tf.stop_gradient(masks)
            bimasks.set_shape([None, None, None, None])
            bimasks = tf.stop_gradient(bimasks)

            seg_pred_pyramid = seg_pred_pyramid * bimasks
            x = seg_pred_pyramid

            x = tf.nn.max_pool(x,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 1, 1, 1],
                               padding='SAME')

            br = slim.conv2d(x,
                             128, [3, 3],
                             trainable=is_training,
                             weights_initializer=initializer,
                             scope="final_conv_1")
            br = slim.conv2d(br,
                             128, [3, 3],
                             trainable=is_training,
                             weights_initializer=initializer,
                             scope="final_conv_2")
            x += br

            x = tf.nn.max_pool(x,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 1, 1, 1],
                               padding='SAME')

            x = slim.conv2d(x,
                            64, [3, 3],
                            rate=2,
                            trainable=is_training,
                            weights_initializer=initializer,
                            scope="final_conv_3")

            br = slim.conv2d(x,
                             64, [3, 3],
                             trainable=is_training,
                             weights_initializer=initializer,
                             scope="final_conv_4")
            br = slim.conv2d(br,
                             64, [3, 3],
                             trainable=is_training,
                             weights_initializer=initializer,
                             scope="final_conv_5")
            x += br

            # x = tf.image.resize_bilinear(x, (40, 40))

            seg_pred_pyramid = slim.conv2d(x,
                                           2, [3, 3],
                                           trainable=is_training,
                                           weights_initializer=initializer,
                                           scope="final_conv_6")

            if is_training:
                labels_seg, = tf.py_func(generate_seg_gt,
                                         [bbox_pred_seg, gt_boxes, gt_masks],
                                         [tf.int32])

        if is_training:
            rpn_label_pyramid = tf.concat(axis=0, values=rpn_label_pyramid)
            labels_cat_pyramid = tf.concat(axis=0, values=labels_cat_pyramid)
            rpn_bbox_targets_pyramid = tf.concat(
                axis=0, values=rpn_bbox_targets_pyramid)
            rpn_bbox_inside_weights_pyramid = tf.concat(
                axis=0, values=rpn_bbox_inside_weights_pyramid)
            rpn_bbox_outside_weights_pyramid = tf.concat(
                axis=0, values=rpn_bbox_outside_weights_pyramid)

        ##############add prediction#####################
        tf.add_to_collection("rpn_cls_prob", rpn_cls_prob_pyramid)
        tf.add_to_collection("rpn_bbox_pred", bbox_pred)
        tf.add_to_collection("anchors", anchors_pyramid)
        tf.add_to_collection("seg_pred_pyramid", seg_pred_pyramid)

        if is_training:
            with tf.variable_scope('loss') as scope:
                #############rpn loss################
                rpn_cls_score = rpn_cls_score_reshape_pyramid
                rpn_label = rpn_label_pyramid
                rpn_select = tf.where(tf.not_equal(rpn_label, -1))
                rpn_cls_score = tf.reshape(
                    tf.gather(rpn_cls_score, rpn_select), [-1, 2])
                labels_cat = labels_cat_pyramid
                labels_cat = tf.reshape(tf.gather(labels_cat, rpn_select),
                                        [-1])

                inds_pos = tf.where(tf.not_equal(labels_cat, 0))
                inds_neg = tf.where(tf.equal(labels_cat, 0))

                rpn_cls_score_pos = tf.reshape(
                    tf.gather(rpn_cls_score, inds_pos), [-1, 2])
                rpn_cls_score_neg = tf.reshape(
                    tf.gather(rpn_cls_score, inds_neg), [-1, 2])
                labels_cat_pos = tf.reshape(tf.gather(labels_cat, inds_pos),
                                            [-1])
                labels_cat_neg = tf.reshape(tf.gather(labels_cat, inds_neg),
                                            [-1])

                rpn_cross_entropy_pos = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=rpn_cls_score_pos, labels=labels_cat_pos))
                rpn_cross_entropy_neg = softmax_loss_ohem(
                    rpn_cls_score_neg, labels_cat_neg, 256)

                rpn_cross_entropy_pos *= 0.3
                rpn_cross_entropy_neg *= 0.3

                bPos = tf.shape(inds_pos)[0] > 0
                zero = tf.constant(0.)
                rpn_cross_entropy_pos = tf.cond(bPos,
                                                lambda: rpn_cross_entropy_pos,
                                                lambda: zero)

                masks = masks[:, :, :, 0]

                seg_loss = tf.nn.softmax_cross_entropy_with_logits(
                    logits=seg_pred_pyramid, labels=labels_seg)

                seg_loss *= masks
                sum_mask = tf.reduce_sum(masks)
                bPos = sum_mask > 1

                seg_loss = tf.reduce_sum(seg_loss) / sum_mask
                # seg_loss = tf.cond(bPos, lambda: seg_loss, lambda: zero)
                # seg_loss *= seg_loss_gate

                rpn_cross_entropy = rpn_cross_entropy_pos + rpn_cross_entropy_neg

                rpn_loss_box = smooth_l1_loss_valid(
                    rpn_bbox_pred_pyramid,
                    rpn_bbox_targets_pyramid,
                    rpn_bbox_inside_weights_pyramid,
                    rpn_bbox_outside_weights_pyramid,
                    labels_cat_pyramid,
                    sigma=cfg.simga_rpn,
                    dim=[0])

                loss_wd = sum(
                    tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

                loss = rpn_cross_entropy + rpn_loss_box + seg_loss + loss_wd

                tf.add_to_collection('rpn_cross_entropy_pos',
                                     rpn_cross_entropy_pos)
                tf.add_to_collection('rpn_cross_entropy_neg',
                                     rpn_cross_entropy_neg)
                tf.add_to_collection('rpn_cross_entropy', rpn_cross_entropy)
                tf.add_to_collection('rpn_loss_box', rpn_loss_box)
                tf.add_to_collection('rpn_loss_seg', seg_loss)
                tf.add_to_collection('loss_wd', loss_wd)
                tf.add_to_collection('total_loss', loss)

            return loss
Beispiel #30
0
def resnet_v2(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              reuse=None,
              scope=None):
    """Generator for v2 (preactivation) ResNet models.

  This function generates a family of ResNet v2 models. See the resnet_v2_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce ResNets of various depths.

  Training for image classification on Imagenet is usually done with [224, 224]
  inputs, resulting in [7, 7] feature maps at the output of the last ResNet
  block for the ResNets defined in [1] that have nominal stride equal to 32.
  However, for dense prediction tasks we advise that one uses inputs with
  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
  this case the feature maps at the ResNet output will have spatial shape
  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
  and corners exactly aligned with the input image corners, which greatly
  facilitates alignment of the features to the image. Using as input [225, 225]
  images results in [8, 8] feature maps at the output of the last ResNet block.

  For dense prediction tasks, the ResNet needs to run in fully-convolutional
  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
  have nominal stride equal to 32 and a good choice in FCN mode is to use
  output_stride=16 in order to increase the density of the computed features at
  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: whether is training or not.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    include_root_block: If True, include the initial convolution followed by
      max-pooling, if False excludes it. If excluded, `inputs` should be the
      results of an activation-less convolution.
    spatial_squeeze: if True, logits is of shape [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
        To use this parameter, the input images must be smaller than 300x300
        pixels, in which case the output logit layer does not contain spatial
        information and can be removed.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.


  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
    with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.name + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    # We do not include batch normalization or activation functions in
                    # conv1 because the first ResNet unit will perform these. Cf.
                    # Appendix of [2].
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None):
                        net = resnet_utils.conv2d_same(net,
                                                       64,
                                                       7,
                                                       stride=2,
                                                       scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                # This is needed because the pre-activation variant does not have batch
                # normalization or activation functions in the residual unit output. See
                # Appendix of [2].
                net = slim.batch_norm(net,
                                      activation_fn=tf.nn.relu,
                                      scope='postnorm')
                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keep_dims=True)
                if num_classes is not None:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')
                    if spatial_squeeze:
                        net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                return net, end_points
Beispiel #31
0
def FPN(inputs,
        num_planes,
        num_channels=256,
        is_training=True,
        reuse=None,
        scope=None):
    """ Generator for FPN models.

    At bottom up stage, FPN use ResNet as backbone and feature activation outputs by each stages last residual block.
    By default, 4 blocks are used in ResNest: {C2, C3, C4, C5} with {4, 8, 16, 32} strides with
    respect to input image.

    At top down stage, with a coarser-resolution feature map, up-sample it by factor 2 then mergeed
    with corresponding bottom up layer (which undergoes a 1x1 conv to reduce dimension)
    by element-wise addition, called {P2, P3, P4, P5}.
    Attach a 1x1 conv layer on C5 to produce coarsest resolution map, then finally append 3x3 conv
    on each merged map to reduce alias effect of up-sampling. Because all levels of pyramid
    use shared classifier, feature dimension (output channel) is fixed to d=256.

    NOTE: P6 is simply a stride 2 sub-sampling of P5, for covering a coarser anchor scale of 512^2
    Args:
        inputs: A tensor of size [batchsize, height, width, channels]
        num_planes: A list of of length equal to the number of ResNet blocks. Each
            element is the number of planes in each ResNet block.
        num_channels: The number of output feature channels
        is_training: whether is training or not
        reuse: whether or not the network and its variables should be reused. To be
            able to reuse 'scope' must be given
        scope: variable scope

    Returns:
        5 feature map tensors: {P2, P3, P4, P5, P6}
    """
    with tf.variable_scope(scope, 'FPN', [inputs], reuse=reuse) as sc:
        with slim.arg_scope([slim.conv2d, bottleneck]):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                c1 = ru.conv2d_same(inputs, 64, 7, stride=2, scope='conv1')
                bn1 = slim.batch_norm(c1,
                                      scope='norm1',
                                      activation_fn=tf.nn.relu)
                mp1 = slim.max_pool2d(bn1, [3, 3],
                                      stride=2,
                                      scope='pool1',
                                      padding='SAME')
                # Bottom up
                # block 1, down-sampling is done in conv3_1, conv4_1, conv5_1
                block1 = resnet_v2_block('block1',
                                         base_depth=64,
                                         num_planes=num_planes[0],
                                         stride=1)
                c2 = stack_resnet_v2_units(mp1, block1)
                # block 2
                block2 = resnet_v2_block('block2',
                                         base_depth=128,
                                         num_planes=num_planes[1],
                                         stride=2)
                c3 = stack_resnet_v2_units(c2, block2)
                # block 3
                block3 = resnet_v2_block('block3',
                                         base_depth=256,
                                         num_planes=num_planes[2],
                                         stride=2)
                c4 = stack_resnet_v2_units(c3, block3)
                # block 4
                block4 = resnet_v2_block('block4',
                                         base_depth=512,
                                         num_planes=num_planes[3],
                                         stride=2)
                c5 = stack_resnet_v2_units(c4, block4)

                # lateral layer
                l2 = slim.conv2d(c2,
                                 num_channels, [1, 1],
                                 stride=1,
                                 scope='lat2')
                l3 = slim.conv2d(c3,
                                 num_channels, [1, 1],
                                 stride=1,
                                 scope='lat3')
                l4 = slim.conv2d(c4,
                                 num_channels, [1, 1],
                                 stride=1,
                                 scope='lat4')
                p5 = slim.conv2d(c5,
                                 num_channels, [1, 1],
                                 stride=1,
                                 scope='lat5')

                # Top down
                t4 = slim.conv2d_transpose(c5,
                                           num_channels, [4, 4],
                                           stride=[2, 2])
                p4 = ru.conv2d_same(t4 + l4, num_channels, 3, stride=1)
                t3 = slim.conv2d_transpose(p4,
                                           num_channels, [4, 4],
                                           stride=[2, 2])
                p3 = ru.conv2d_same(t3 + l3, num_channels, 3, stride=1)
                t2 = slim.conv2d_transpose(p3,
                                           num_channels, [4, 4],
                                           stride=[2, 2])
                p2 = ru.conv2d_same(t2 + l2, num_channels, 3, stride=1)
                return p2, p3, p4, p5