예제 #1
0
def resnet_v1(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              include_root_block=True,
              reuse=None,
              scope=None,
              normalize_inside=True):
    """Removes output_stride, use pre-defined rate

    Returns:
      net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
        If global_pool is False, then height_out and width_out are reduced by a
        factor of output_stride compared to the respective height_in and width_in,
        else both height_out and width_out equal one. If num_classes is None, then
        net is the output of the last ResNet block, potentially after global
        average pooling. If num_classes is not None, net contains the pre-softmax
        activations.
      end_points: A dictionary from components of the network to the corresponding
        activation.

    Raises:
      ValueError: If the target output_stride is not valid.
    """
    if normalize_inside:
        # if no normalization is used outside, use detectron style normalization
        inputs = _detectron_img_preprocess(inputs)

    with variable_scope.variable_scope(
            scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with arg_scope(
                [conv2d, bottleneck, stack_blocks_dense, max_pool2d],
                outputs_collections=end_points_collection):
            with arg_scope([batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    # net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
                    net = conv2d(net, 64, 7, 2, scope='conv1')
                    net = max_pool2d(net, 3, 2, scope='pool1')
                net = stack_blocks_dense(net, blocks)
                if global_pool:
                    # Global average pooling.
                    net = math_ops.reduce_mean(net, [1, 2], name='pool5', keepdims=True)
                    net = utils.collect_named_outputs(end_points_collection,
                                                      sc.name+'/gap', net)
                if num_classes is not None:
                    net = conv2d(
                        net,
                        num_classes, 1,
                        activation_fn=None,
                        normalizer_fn=None,
                        scope='logits')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = utils.convert_collection_to_dict(end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = layers_lib.softmax(
                        net, scope='predictions')
                return net, end_points
예제 #2
0
def bottleneck(inputs,
               depth,
               depth_bottleneck,
               stride,
               rate=1,
               outputs_collections=None,
               scope=None):
    """Bottleneck residual unit variant with BN after convolutions.

    This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
    its definition. Note that we use here the bottleneck variant which has an
    extra bottleneck layer.

    When putting together two consecutive ResNet blocks that use this unit, one
    should use stride = 2 in the last unit of the first block.

    Args:
      inputs: A tensor of size [batch, height, width, channels].
      depth: The depth of the ResNet unit output.
      depth_bottleneck: The depth of the bottleneck layers.
      stride: The ResNet unit's stride. Determines the amount of downsampling of
        the units output compared to its input.
      rate: An integer, rate for atrous convolution.
      outputs_collections: Collection to add the ResNet unit output.
      scope: Optional variable_scope.

    Returns:
      The ResNet unit's output.
    """
    with variable_scope.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
        depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
        if depth == depth_in:
            shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
        else:
            shortcut = conv2d(inputs,
                              depth,
                              1,
                              stride=stride,
                              activation_fn=None,
                              scope='shortcut')

        residual = conv2d(inputs, depth_bottleneck, 1, stride=1, scope='conv1')
        residual = conv2d(residual,
                          depth_bottleneck,
                          3,
                          stride,
                          rate=rate,
                          scope='conv2')
        residual = conv2d(residual,
                          depth,
                          1,
                          stride=1,
                          activation_fn=None,
                          scope='conv3')

        # utils.collect_named_outputs(outputs_collections, sc.name + '/unrelu', residual)
        output = nn_ops.relu(shortcut + residual)
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           output)
예제 #3
0
파일: xception.py 프로젝트: Mooonside/SEGS
def xception_entry_flow(inputs,
                        output_stride=None,
                        end_points_collection=None):
    if end_points_collection is None:
        end_points_collection = tf.get_variable_scope().name + '/end_points'
    net = inputs
    if output_stride is not None:
        if output_stride % 2 != 0:
            raise ValueError('The output_stride needs to be a multiple of 2.')
        # divide it by 2 for the entry_flow/conv1_1 convolution with stride = 2
        output_stride /= 2
    # Root block function operated on inputs.
    net = conv2d(net,
                 32,
                 ksize=[3, 3],
                 strides=[2, 2],
                 name='entry_flow/conv1_1')
    net = conv2d(net,
                 64,
                 ksize=[3, 3],
                 strides=[1, 1],
                 name='entry_flow/conv1_2')

    # Extract features for entry_flow, middle_flow, and exit_flow.
    entry_blocks = [
        xception_block('entry_flow/block1',
                       depth_list=[128, 128, 128],
                       skip_connection_type='conv',
                       activation_fn_in_separable_conv=None,
                       num_units=1,
                       strides=[2, 2]),
        xception_block('entry_flow/block2',
                       depth_list=[256, 256, 256],
                       skip_connection_type='conv',
                       activation_fn_in_separable_conv=None,
                       num_units=1,
                       strides=[2, 2]),
        xception_block('entry_flow/block3',
                       depth_list=[728, 728, 728],
                       skip_connection_type='conv',
                       activation_fn_in_separable_conv=None,
                       num_units=1,
                       strides=[2, 2])
    ]

    net = stack_blocks_dense(net, entry_blocks, output_stride)

    # Convert end_points_collection into a dictionary of end_points.
    end_points = tf.get_collection(end_points_collection)
    end_points = dict([(ep.name, ep) for ep in end_points])
    return net, end_points
예제 #4
0
파일: fcn.py 프로젝트: cicisdream/SEGS
def fcn_upsample(small,
                 big,
                 ksize=[4, 4],
                 strides=[2, 2],
                 padding='SAME',
                 name=None,
                 outputs_collections=None):
    """
    the upsample block for fcn, the specific strategy is :
        1. [1,1] conv to reduce big's channels so that channels match
        2. trans_conv to recover small's resolution so that resolution match
    :param small: low resolution feature
    :param big: high resolution feature
    :param ksize: trans_conv kernel size
    :param strides: trans_conv kernel stride
    :param padding: trans_conv kernel padding mode
    :param name: name for this op
    :param outputs_collections: add this op's output to outputs_collections
    :return:
    """
    # trans_conv small to big size
    with tf.variable_scope(name, 'fcn_upsample'):
        outc = tensor_shape(small)[-1]
        big = conv2d(big, outc, ksize=[1, 1], activate=None, name='score_conv')
        big_shape = tensor_shape(big)
        big_dim = big_shape[-1]
        trans_conv = trans_conv2d(small,
                                  outc=big_dim,
                                  ksize=ksize,
                                  output_shape=big_shape,
                                  strides=strides,
                                  padding=padding)
        summary = trans_conv + big
    tf.add_to_collection(outputs_collections, summary)
    return summary
예제 #5
0
def _get_detection(end_points,
                   num_classes,
                   weight_decay=0.0001,
                   reuse=tf.AUTO_REUSE,
                   scope_suffix=''):
    with arg_scope([conv2d],
                   weight_reg=regularizer('l2', weight_decay),
                   weight_init=tf.truncated_normal_initializer(stddev=0.01)):
        with tf.variable_scope(_DETECTION_SCOPE_NAME,
                               _DETECTION_SCOPE_NAME, [end_points],
                               reuse=reuse):
            branch_logits = []
            for layer_idx, layer_name in enumerate(detection_feature_layers):
                features = end_points[layer_name]
                branch_logits.append(
                    conv2d(features,
                           outc=len(detection_anchors[layer_idx]) *
                           (4 + num_classes),
                           ksize=[1, 1],
                           activate=None,
                           batch_norm=False,
                           use_bias=True,
                           name=scope_suffix if scope_suffix is None else
                           (scope_suffix + '_%d' % layer_idx)))
            return branch_logits
예제 #6
0
파일: ssd.py 프로젝트: Mooonside/SSD
def _layer_prediction(feature_map,
                      num_anchors,
                      num_classes,
                      l2_norm=False,
                      name=None):
    """
    For each location in feature map, predict 4*num_anchors locations and num_classes objectness
    :param feature_map: [None, H, W, C]
    :param num_classes:
    :param name:
    :return: locations with shape [None, H, W, num_anchors, 4]
             scores with shape [None, H, W, num_anchors, num_classes]
    """
    with tf.variable_scope(name, 'feature2bbox'):
        if l2_norm:
            feature_map = l2_norm_1D(feature_map, scale=True)
        locations = conv2d(feature_map,
                           outc=4 * num_anchors,
                           ksize=[3, 3],
                           activate=None,
                           name='conv_loc')
        scores = conv2d(feature_map,
                        outc=num_anchors * num_classes,
                        ksize=[3, 3],
                        activate=None,
                        name='conv_cls')

        partial_shape = (tensor_shape(feature_map))[1:-1]

        locations = tf.reshape(locations,
                               shape=[-1] + partial_shape + [num_anchors, 4])
        scores = tf.reshape(scores,
                            shape=[-1] + partial_shape +
                            [num_anchors, num_classes])

        # batch size = 1 version
        locations = tf.squeeze(locations, axis=0)
        scores = tf.squeeze(scores, axis=0)
        return locations, scores
예제 #7
0
def vgg_conv_block(inputs, outc, times, scope, ksize=3):
    """
    Repeat conv2d with [3, 3] kernel for times
    :param inputs:
    :param outc:
    :param times:
    :param scope:
    :param ksize:
    :return:
    """
    net = inputs
    with tf.variable_scope(scope, 'conv'):
        for i in range(times):
            iname = scope + '_' + str(i + 1)
            net = conv2d(net, outc, ksize, scope=iname)
    return net
예제 #8
0
def deform_conv2d(inputs,
                  num_outputs,
                  kernel_size,
                  stride=1,
                  rate=1,
                  padding='SAME',
                  activation_fn=tf.nn.relu,
                  deformable_group=1,
                  num_groups=1,
                  normalizer_fn=None,
                  weights_initializer=None,
                  weights_regularizer=None,
                  biases_initializer=tf.zeros_initializer,
                  biases_regularizer=None,
                  outputs_collections=None,
                  offsets_collections='offsets',
                  scope=None):
    assert num_outputs % num_groups == 0, print('outc % num_groups != 0')
    kernel_size = [kernel_size, kernel_size] if type(kernel_size) is int else kernel_size
    stride = [stride, stride] if type(stride) is int else stride
    rate = [rate, rate] if type(rate) is int else rate

    with tf.variable_scope(scope, 'deform_conv2d'):
        _, iH, iW, indim = tensor_shape(inputs)
        assert indim % num_groups == 0, print('indim % num_groups != 0')
        assert indim % deformable_group == 0, print('indim % deformable_group != 0')

        offsets = conv2d(
            inputs,
            num_outputs= kernel_size[0] * kernel_size[1] * 2 * deformable_group,
            kernel_size=kernel_size,
            stride=stride,
            rate=rate,
            padding=padding,
            normalizer_fn=None,
            activation_fn=None,
            # may be using zero initializer?
            # weight_init=tf.zeros_initializer,
            weights_initializer=weights_initializer,
            weights_regularizer=weights_regularizer,
            biases_initializer=tf.zeros_initializer,
            biases_regularizer=None,
            outputs_collections=offsets_collections,
            scope = 'conv_offsets'
        )
        offsets = tf.transpose(offsets, [0, 3, 1, 2])
        # TODO: MAYA
        offsets *= 0.0

        filters = tf.get_variable(name='weights',
                                  shape= kernel_size + [indim // num_groups, num_outputs],
                                  initializer=weights_initializer,
                                  regularizer=weights_regularizer)

        # transpose filters to required order
        # [outC, inC, ksize, ksize]
        filters = tf.transpose(filters, [3, 2, 0, 1])
        inputs = tf.transpose(inputs, [0, 3, 1, 2])
        conv = deform_conv_op.deform_conv_op(x=inputs,
                                             filter=filters,
                                             offset=offsets,
                                             strides=[1, 1] + stride,
                                             rates=[1, 1] + rate,
                                             num_groups=num_groups,
                                             padding=padding,
                                             deformable_group=deformable_group,
                                             name=scope)
        conv = tf.transpose(conv, [0, 2, 3, 1])

        # tf.add_to_collection(outputs_collections, conv)
        if normalizer_fn is not None:
            conv = normalizer_fn(conv)
        elif biases_initializer is not None:
            biases = tf.get_variable(name='biases',
                                     shape=[num_outputs],
                                     initializer=biases_initializer,
                                     regularizer=biases_regularizer,
                                     collections=BIAS_COLLECTIONS)
            conv = conv + biases

        if activation_fn is not None:
            conv = activation_fn(conv)

    tf.add_to_collection(outputs_collections, conv)
    return conv
예제 #9
0
def deform_conv2d(inputs,
                  outc,
                  ksize,
                  strides=[1, 1],
                  ratios=[1, 1],
                  name=None,
                  padding='SAME',
                  activate=tf.nn.relu,
                  deformable_group=1,
                  num_groups=1,
                  batch_norm=True,
                  group_norm=False,
                  use_bias=None,
                  weight_init=None,
                  weight_reg=None,
                  bias_init=tf.zeros_initializer,
                  bias_reg=None,
                  offset_init=tf.zeros_initializer,
                  offset_reg=None,
                  outputs_collections=None,
                  offsets_collections='offsets'):
    """
    Wrapper for Conv layers
    :param inputs: [N, H, W, C]
    :param outc: output channels
    :param ksize: [hk, wk]
    :param strides: [hs, ws]
    :param ratios: [hr, wr]
    :param name: var_scope & operation name
    :param padding: padding mode
    :param activate: activate function
    :param batch_norm: whether performs batch norm
    :param use_bias: whether use bias addition
    :param weight_init: weight initializer
    :param weight_reg: weight regularizer
    :param bias_init: bias initializer
    :param bias_reg: bias regularizer
    :param outputs_collections: add result to some collection
    :return: convolution after activation
    """
    # can't use both
    if use_bias is None:
        use_bias = not batch_norm
    assert not (batch_norm and use_bias)
    assert outc % num_groups == 0, print('outc % num_groups != 0')

    with tf.variable_scope(name, 'deform_conv2d'):
        _, iH, iW, indim = tensor_shape(inputs)
        assert indim % num_groups == 0, print('indim % num_groups != 0')
        assert indim % deformable_group == 0, print(
            'indim % deformable_group != 0')

        # use num groups xixi
        filters = get_variable(name='weights',
                               shape=ksize + [indim // num_groups, outc],
                               init=weight_init,
                               reg=weight_reg,
                               collections=WEIGHT_COLLECTIONS)

        # use get_variable merely for debug!
        offsets = conv2d(
            inputs,
            outc=ksize[0] * ksize[1] * 2 * deformable_group,
            ksize=ksize,
            strides=strides,
            ratios=ratios,
            padding=padding,
            batch_norm=False,
            group_norm=False,
            use_bias=True,
            activate=None,
            name='conv_offsets',
            # may be using zero initializer?
            # weight_init=tf.zeros_initializer,
            weight_init=weight_init,
            weight_reg=weight_reg,
            bias_init=tf.zeros_initializer,
            bias_reg=None,
            outputs_collections=offsets_collections)
        offsets = tf.transpose(offsets, [0, 3, 1, 2])
        tf.add_to_collection('offsets', offsets)
        # transpose filters to required order
        # [outC, inC, ksize, ksize]
        filters = tf.transpose(filters, [3, 2, 0, 1])

        inputs = tf.transpose(inputs, [0, 3, 1, 2])
        conv = deform_conv_op.deform_conv_op(x=inputs,
                                             filter=filters,
                                             offset=offsets,
                                             strides=[1, 1] + strides,
                                             rates=[1, 1] + ratios,
                                             num_groups=num_groups,
                                             padding=padding,
                                             deformable_group=deformable_group,
                                             name=name)
        conv = tf.transpose(conv, [0, 2, 3, 1])

        # tf.add_to_collection(outputs_collections, conv)
        if batch_norm:
            conv = batch_norm2d(conv)
        elif group_norm:
            conv = GroupNorm2D(conv)
        elif use_bias:
            biases = get_variable(name='biases',
                                  shape=[outc],
                                  init=bias_init,
                                  reg=bias_reg,
                                  collections=BIAS_COLLECTIONS)
            conv = conv + biases

        if activate is not None:
            conv = activate(conv)

    tf.add_to_collection(outputs_collections, conv)
    return conv
예제 #10
0
def xception_module(inputs,
                    depth_list,
                    skip_connection_type,
                    strides,
                    unit_rate_list=None,
                    rate=1,
                    activation_fn_in_separable_conv=False,
                    outputs_collections=None,
                    scope=None):
    """An Xception module.

    The output of one Xception module is equal to the sum of `residual` and
    `shortcut`, where `residual` is the feature computed by three separable
    convolution. The `shortcut` is the feature computed by 1x1 convolution with
    or without striding. In some cases, the `shortcut` path could be a simple
    identity function or none (i.e, no shortcut).

    Note that we replace the max pooling operations in the Xception module with
    another separable convolution with striding, since atrous rate is not properly
    supported in current TensorFlow max pooling implementation.

    Args:
      inputs: A tensor of size [batch, height, width, channels].
      depth_list: A list of three integers specifying the depth values of one
        Xception module.
      skip_connection_type: Skip connection type for the residual path. Only
        supports 'conv', 'sum', or 'none'.
      strides: The block unit's stride. Determines the amount of downsampling of
        the units output compared to its input.
      unit_rate_list: A list of three integers, determining the unit rate for
        each separable convolution in the xception module.
      rate: An integer, rate for atrous convolution.
      activation_fn_in_separable_conv: use func between depthwise and pointwise convolution
      outputs_collections: Collection to add the Xception unit output.
      scope: Optional variable_scope.

    Returns:
      The Xception module's output.

    Raises:
      ValueError: If depth_list and unit_rate_list do not contain three elements,
        or if stride != 1 for the third separable convolution operation in the
        residual path, or unsupported skip connection type.

    """
    if len(depth_list) != 3:
        raise ValueError('Expect three elements in depth_list.')
    if unit_rate_list:
        if len(unit_rate_list) != 3:
            raise ValueError('Expect three elements in unit_rate_list.')

    with tf.variable_scope(scope, 'xception_module', [inputs]):
        residual = inputs

        for i in range(3):
            if activation_fn_in_separable_conv is None:
                residual = tf.nn.relu(residual)
                activate_fn = None
            else:
                activate_fn = tf.nn.relu
            residual = sep_conv2d(
                inputs=residual,
                outc=depth_list[i],
                ksize=[3, 3],
                depth_multiplier=1,
                ratios=[rate * unit_rate_list[i], rate * unit_rate_list[i]],
                activate_middle=activation_fn_in_separable_conv,
                activate=activate_fn,
                strides=strides if i == 2 else [1, 1],
                name='separable_conv' + str(i + 1))
        if skip_connection_type == 'conv':
            shortcut = conv2d(inputs=inputs,
                              outc=depth_list[-1],
                              ksize=[1, 1],
                              strides=strides,
                              activate=None,
                              name='shortcut')
            outputs = residual + shortcut
        elif skip_connection_type == 'sum':
            outputs = residual + inputs
        elif skip_connection_type == 'none':
            outputs = residual
        else:
            raise ValueError('Unsupported skip connection type.')

        add_to_collection(outputs_collections, outputs)
        return outputs
예제 #11
0
def xception(inputs,
             blocks,
             num_classes=None,
             is_training=True,
             global_pool=True,
             keep_prob=0.5,
             output_stride=None,
             reuse=tf.AUTO_REUSE,
             scope=None):
    """Generator for Xception models.

    This function generates a family of Xception models. See the xception_*()
    methods for specific model instantiations, obtained by selecting different
    block instantiations that produce Xception of various depths.

    Args:
      inputs: A tensor of size [batch, height_in, width_in, channels]. Must be
        floating point. If a pretrained checkpoint is used, pixel values should be
        the same as during training (see go/slim-classification-models for
        specifics).
      blocks: A list of length equal to the number of Xception blocks. Each
        element is an Xception Block object describing the units in the block.
      num_classes: Number of predicted classes for classification tasks.
        If 0 or None, we return the features before the logit layer.
      is_training: whether batch_norm layers are in training mode.
      global_pool: If True, we perform global average pooling before computing the
        logits. Set to True for image classification, False for dense prediction.
      keep_prob: Keep probability used in the pre-logits dropout layer.
      output_stride: If None, then the output will be computed at the nominal
        network stride. If output_stride is not None, it specifies the requested
        ratio of input to output spatial resolution.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.

    Returns:
      net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
        If global_pool is False, then height_out and width_out are reduced by a
        factor of output_stride compared to the respective height_in and width_in,
        else both height_out and width_out equal one. If num_classes is 0 or None,
        then net is the output of the last Xception block, potentially after
        global average pooling. If num_classes is a non-zero integer, net contains
        the pre-softmax activations.
      end_points: A dictionary from components of the network to the corresponding
        activation.

    Raises:
      ValueError: If the target output_stride is not valid.
    """
    with tf.variable_scope(scope, 'xception', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + 'end_points'
        with arg_scope(
            [conv2d, sep_conv2d, xception_module, stack_blocks_dense],
                outputs_collections=end_points_collection):
            with arg_scope([batch_norm2d], is_training=is_training):
                net = inputs
                if output_stride is not None:
                    if output_stride % 2 != 0:
                        raise ValueError(
                            'The output_stride needs to be a multiple of 2.')
                    # divide it by 2 for the entry_flow/conv1_1 convolution with stride = 2
                    output_stride /= 2
                # Root block function operated on inputs.
                net = conv2d(net,
                             32,
                             ksize=[3, 3],
                             strides=[2, 2],
                             name='entry_flow/conv1_1')
                net = conv2d(net,
                             64,
                             ksize=[3, 3],
                             strides=[1, 1],
                             name='entry_flow/conv1_2')

                # Extract features for entry_flow, middle_flow, and exit_flow.
                net = stack_blocks_dense(net, blocks, output_stride)

                # Convert end_points_collection into a dictionary of end_points.
                end_points = tf.get_collection(end_points_collection)
                end_points = dict([(ep.name, ep) for ep in end_points])

                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(net, [1, 2],
                                         name='global_pool',
                                         keepdims=True)
                    end_points['global_pool'] = net
                if num_classes:
                    net = drop_out(net,
                                   keep_prob=keep_prob,
                                   is_training=is_training,
                                   name='prelogits_dropout')
                    net = conv2d(net,
                                 num_classes,
                                 ksize=[1, 1],
                                 activation=None,
                                 batch_norm=False,
                                 name='logits')
                    end_points[sc.name + '/logits'] = net
                    end_points['predictions'] = tf.nn.softmax(
                        net, axis=-1, name='predictions')
                return net, end_points
예제 #12
0
def _get_branch_logits(features,
                       num_classes,
                       atrous_rates=None,
                       aspp_with_batch_norm=False,
                       kernel_size=1,
                       weight_decay=0.0001,
                       reuse=tf.AUTO_REUSE,
                       scope_suffix=''):
    """Gets the logits from each model's branch.

    The underlying model is branched out in the last layer when atrous
    spatial pyramid pooling is employed, and all branches are sum-merged
    to form the final logits.

    Args:
      features: A float tensor of shape [batch, height, width, channels].
      num_classes: Number of classes to predict.
      atrous_rates: A list of atrous convolution rates for last layer.
      aspp_with_batch_norm: Use batch normalization layers for ASPP.
      kernel_size: Kernel size for convolution.
      weight_decay: Weight decay for the model variables.
      reuse: Reuse model variables or not.
      scope_suffix: Scope suffix for the model variables.

    Returns:
      Merged logits with shape [batch, height, width, num_classes].

    Raises:
      ValueError: Upon invalid input kernel_size value.
    """
    # When using batch normalization with ASPP, ASPP has been applied before
    # in _extract_features, and thus we simply apply 1x1 convolution here.
    if aspp_with_batch_norm or atrous_rates is None:
        if kernel_size != 1:
            raise ValueError('Kernel size must be 1 when atrous_rates is None or '
                             'using aspp_with_batch_norm. Gets %d.' % kernel_size)
        atrous_rates = [1]

    with arg_scope(
            [conv2d],
            weight_reg=regularizer('l2', weight_decay),
            weight_init=tf.truncated_normal_initializer(stddev=0.01)):
        with tf.variable_scope(_LOGITS_SCOPE_NAME, _LOGITS_SCOPE_NAME, [features], reuse=reuse):
            branch_logits = []
            for i, rate in enumerate(atrous_rates):
                scope = scope_suffix
                if i:
                    scope += '_%d' % i

                branch_logits.append(
                    conv2d(
                        features,
                        outc=num_classes,
                        ksize=[kernel_size, kernel_size],
                        ratios=[rate, rate],
                        activate=None,
                        batch_norm=False,
                        use_bias=True,
                        name=scope))

            return tf.add_n(branch_logits)
예제 #13
0
def refine_by_decoder(features,
                      end_points,
                      decoder_height,
                      decoder_width,
                      decoder_use_separable_conv=False,
                      model_variant=None,
                      weight_decay=0.0001,
                      reuse=tf.AUTO_REUSE,
                      is_training=False,
                      fine_tune_batch_norm=False):
    """Adds the decoder to obtain sharper segmentation results.

    Args:
      features: A tensor of size [batch, features_height, features_width,
        features_channels].
      end_points: A dictionary from components of the network to the corresponding
        activation.
      decoder_height: The height of decoder feature maps.
      decoder_width: The width of decoder feature maps.
      decoder_use_separable_conv: Employ separable convolution for decoder or not.
      model_variant: Model variant for feature extraction.
      weight_decay: The weight decay for model variables.
      reuse: Reuse the model variables or not.
      is_training: Is training or not.
      fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

    Returns:
      Decoder output with size [batch, decoder_height, decoder_width,
        decoder_channels].
    """
    batch_norm_params = {
        'is_training': is_training and fine_tune_batch_norm,
        'decay': 0.9997,
        'eps': 1e-5,
        'affine': True,
    }
    regularize_func = regularizer('l2', weight_decay)
    with tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
        with arg_scope([sep_conv2d], activate=tf.nn.relu, activate_middle=tf.nn.relu,
                       batch_norm=True, depthwise_weight_reg=None, pointwise_weight_reg=regularize_func,
                       padding='SAME', strides=[1, 1]):
            with arg_scope([conv2d], activate=tf.nn.relu, weight_reg=regularize_func,
                           batch_norm=True, padding='SAME', strides=[1, 1]):
                with arg_scope([batch_norm2d], **batch_norm_params):
                    with tf.variable_scope(_DECODER_SCOPE, _DECODER_SCOPE, [features]):
                        feature_list = feature_extractor.networks_to_feature_maps[
                            model_variant][feature_extractor.DECODER_END_POINTS]
                        if feature_list is None:
                            tf.logging.info('Not found any decoder end points.')
                            return features
                        else:
                            decoder_features = features
                            for i, name in enumerate(feature_list):
                                decoder_features_list = [decoder_features]

                                suffix = list(end_points.keys())[0].split('/')[0]
                                feature_name = '{}/{}'.format(
                                    suffix, name)
                                # [1, 1] to reduce channel to 4
                                decoder_features_list.append(
                                    conv2d(
                                        inputs=end_points[feature_name],
                                        outc=48,
                                        ksize=[1, 1],
                                        name='feature_projection' + str(i)))
                                # Resize to decoder_height/decoder_width.
                                for j, feature in enumerate(decoder_features_list):
                                    decoder_features_list[j] = tf.image.resize_bilinear(
                                        feature, [decoder_height, decoder_width], align_corners=True)
                                    decoder_features_list[j].set_shape(
                                        [None, decoder_height, decoder_width, None])
                                decoder_depth = 256
                                if decoder_use_separable_conv:
                                    # [3,3] kernel
                                    decoder_features = sep_conv2d(
                                        inputs=tf.concat(decoder_features_list, 3),
                                        ksize=[3, 3],
                                        outc=decoder_depth,
                                        ratios=[1, 1],
                                        name='decoder_conv0')
                                    decoder_features = sep_conv2d(
                                        inputs=decoder_features,
                                        ksize=[3, 3],
                                        outc=decoder_depth,
                                        ratios=[1, 1],
                                        name='decoder_conv1')
                                    DEBUG_VARS.decoder_features = decoder_features
                                else:
                                    decoder_features = conv2d(
                                        inputs=tf.concat(decoder_features_list, 3),
                                        outc=[decoder_depth],
                                        ksize=[3, 3],
                                        name='decoder_conv0')
                                    decoder_features = conv2d(
                                        inputs=decoder_features,
                                        outc=[decoder_depth],
                                        ksize=[3, 3],
                                        name='decoder_conv0')
                            return decoder_features
예제 #14
0
def _extract_features(images,
                      model_options,
                      weight_decay=0.0001,
                      reuse=tf.AUTO_REUSE,
                      is_training=False,
                      fine_tune_batch_norm=False):
    """Extracts features by the particular model_variant.

    Args:
      images: A tensor of size [batch, height, width, channels].
      model_options: A ModelOptions instance to configure models.
      weight_decay: The weight decay for model variables.
      reuse: Reuse the model variables or not.
      is_training: Is training or not.
      fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

    Returns:
      concat_logits: A tensor of size [batch, feature_height, feature_width,
        feature_channels], where feature_height/feature_width are determined by
        the images height/width and output_stride.
      end_points: A dictionary from components of the network to the corresponding
        activation.
    """
    # feature extractor is a backbone factory
    DEBUG_VARS.raw_image = images
    features, end_points = feature_extractor.extract_features(
        images,
        output_stride=model_options.output_stride,
        multi_grid=model_options.multi_grid,
        model_variant=model_options.model_variant,
        weight_decay=weight_decay,
        reuse=reuse,
        is_training=is_training,
        fine_tune_batch_norm=fine_tune_batch_norm)

    # TODO:check
    # DEBUG_VARS.xception_feature = end_points['xception_65/entry_flow/conv1_1/Relu:0']
    DEBUG_VARS.xception_feature = features
    if not model_options.aspp_with_batch_norm:
        return features, end_points
    else:
        batch_norm_params = {
            'is_training': is_training and fine_tune_batch_norm,
            'decay': 0.9997,
            'eps': 1e-5,
            'affine': True,
        }
        regularize_func = regularizer('l2', weight_decay)
        with tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
            with arg_scope([sep_conv2d], activate=tf.nn.relu, activate_middle=tf.nn.relu, batch_norm=True,
                           depthwise_weight_reg=None, pointwise_weight_reg=regularize_func,
                           padding='SAME', strides=[1, 1]):
                with arg_scope([conv2d], activate=tf.nn.relu, weight_reg=regularize_func,
                               batch_norm=True, padding='SAME', strides=[1, 1]):
                    # TODO: ASPP IS IMPLEMENTED HERE! Check Out!
                    with arg_scope([batch_norm2d], **batch_norm_params):
                        depth = 256
                        branch_logits = []

                        # TODO: ADD IMAGE POOLING HERE
                        if model_options.add_image_level_feature:
                            # this crop size has been updated to the new scaled one outside, which is the exact size
                            # of this model's inputs
                            pool_height = scale_dimension(model_options.crop_size[0],
                                                          1. / model_options.output_stride)
                            pool_width = scale_dimension(model_options.crop_size[1],
                                                         1. / model_options.output_stride)
                            # global average pooling, check whether the shape here is 1?
                            image_feature = avg_pool2d(
                                features, [pool_height, pool_width], [pool_height, pool_width],
                                padding='VALID')
                            # collapse channels to depth after GAP
                            image_feature = conv2d(
                                inputs=image_feature, outc=depth, ksize=[1, 1], name=_IMAGE_POOLING_SCOPE)
                            # TODO:check
                            DEBUG_VARS.image_feature = image_feature
                            # reshape it to final feature map shape
                            image_feature = tf.image.resize_bilinear(
                                image_feature, [pool_height, pool_width], align_corners=True)
                            image_feature.set_shape([None, pool_height, pool_width, depth])
                            # add image level feature to branch_logits
                            branch_logits.append(image_feature)

                        # Employ a 1x1 convolution.
                        branch_logits.append(conv2d(features, outc=depth, ksize=[1, 1], name=_ASPP_SCOPE + str(0)))

                        if model_options.atrous_rates:
                            # Employ 3x3 convolutions with different atrous rates.
                            DEBUG_VARS.aspp_features = []
                            for i, rate in enumerate(model_options.atrous_rates, 1):
                                scope = _ASPP_SCOPE + str(i)
                                if model_options.aspp_with_separable_conv:
                                    aspp_features = sep_conv2d(
                                        features, outc=depth, ksize=[3, 3], ratios=[rate, rate], name=scope)
                                    DEBUG_VARS.aspp_features.append(aspp_features)
                                else:
                                    aspp_features = conv2d(
                                        features, outc=depth, ksize=[3, 3], ratios=[rate, rate], name=scope)
                                branch_logits.append(aspp_features)

                        # Merge branch logits.
                        concat_logits = tf.concat(branch_logits, 3)
                        DEBUG_VARS.aspp_concat_feature = concat_logits
                        concat_logits = conv2d(inputs=concat_logits, outc=depth, ksize=[1, 1],
                                               name=_CONCAT_PROJECTION_SCOPE)
                        concat_logits = drop_out(concat_logits, kp_prob=0.9, is_training=is_training,
                                                 name=_CONCAT_PROJECTION_SCOPE + '_dropout')

                        return concat_logits, end_points
예제 #15
0
def vgg_16(inputs,
           num_classes=1000,
           is_training=True,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           scope='vgg_16',
           fc_conv_padding='VALID',
           global_pool=False):
    """Oxford Net VGG 16-Layers version D Example.

    Note: All the fully_connected layers have been transformed to conv2d layers.
          To use in classification mode, resize input to 224x224.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      num_classes: number of predicted classes. If 0 or None, the logits layer is
        omitted and the input features to the logits layer are returned instead.
      is_training: whether or not the model is being trained.
      dropout_keep_prob: the probability that activations are kept in the dropout
        layers during training.
      spatial_squeeze: whether or not should squeeze the spatial dimensions of the
        outputs. Useful to remove unnecessary dimensions for classification.
      scope: Optional scope for the variables.
      fc_conv_padding: the type of padding to use for the fully connected layer
        that is implemented as a convolutional layer. Use 'SAME' padding if you
        are applying the network in a fully convolutional manner and want to
        get a prediction map downsampled by a factor of 32 as an output.
        Otherwise, the output prediction map will be (input / 32) - 6 in case of
        'VALID' padding.
      global_pool: Optional boolean flag. If True, the input to the classification
        layer is avgpooled to size 1x1, for any input size. (This is not part
        of the original VGG architecture.)

    Returns:
      net: the output of the logits layer (if num_classes is a non-zero integer),
        or the input to the logits layer (if num_classes is 0 or None).
      end_points: a dict of tensors with intermediate activations.
    """
    with tf.variable_scope(scope, 'vgg_16', [inputs],
                           reuse=tf.AUTO_REUSE) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope([
                conv2d,
                layers_lib.fully_connected,
        ],
                       outputs_collections=end_points_collection):
            net = vgg_conv_block(inputs=inputs,
                                 outc=64,
                                 times=2,
                                 scope='conv1')
            net = max_pool2d(net, 2, scope='pool1')
            net = vgg_conv_block(inputs=net, outc=128, times=2, scope='conv2')
            net = max_pool2d(net, 2, scope='pool2')
            net = vgg_conv_block(inputs=net, outc=256, times=3, scope='conv3')
            net = max_pool2d(net, 2, scope='pool3')
            net = vgg_conv_block(inputs=net, outc=512, times=3, scope='conv4')
            net = max_pool2d(net, 2, scope='pool4')
            net = vgg_conv_block(inputs=net, outc=512, times=3, scope='conv5')
            net = max_pool2d(net, 2, scope='pool5')
            #
            # Use conv2d instead of fully_connected layers.
            net = conv2d(net, 4096, 7, padding=fc_conv_padding, scope='fc6')
            net = layers_lib.dropout(net,
                                     keep_prob=dropout_keep_prob,
                                     is_training=is_training,
                                     scope='dropout6')
            net = conv2d(net, 4096, 1, scope='fc7')
            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)

            if global_pool:
                net = tf.reduce_mean(net, [1, 2],
                                     keep_dims=True,
                                     name='global_pool')
                end_points['global_pool'] = net
            if num_classes:
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout7')
                net = conv2d(net,
                             num_classes,
                             1,
                             activation_fn=None,
                             scope='fc8')

                if spatial_squeeze:
                    net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points
예제 #16
0
파일: ssd.py 프로젝트: Mooonside/SSD
def ssd_vgg16(inputs, scope=None):
    with tf.variable_scope(scope, 'ssd_vgg16', [inputs],
                           reuse=tf.AUTO_REUSE) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'

        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope([conv2d, max_pool2d],
                       outputs_collections=end_points_collection):
            end_points = {}
            net = vgg_conv_block(inputs=inputs,
                                 outc=64,
                                 times=2,
                                 scope='conv1')
            end_points['block_1'] = net
            net = max_pool2d(net, ksize=[2, 2], name='pool1')

            net = vgg_conv_block(inputs=net, outc=128, times=2, scope='conv2')
            end_points['block_2'] = net
            net = max_pool2d(net, ksize=[2, 2], name='pool2')

            net = vgg_conv_block(inputs=net, outc=256, times=3, scope='conv3')
            end_points['block_3'] = net
            net = max_pool2d(net, ksize=[2, 2], name='pool3')

            net = vgg_conv_block(inputs=net, outc=512, times=3, scope='conv4')
            end_points['block_4'] = net
            net = max_pool2d(net, ksize=[2, 2], name='pool4')

            net = vgg_conv_block(inputs=net, outc=512, times=3, scope='conv5')
            # here use different kernel size and stride to pool
            end_points['block_5'] = net
            net = max_pool2d(net, ksize=[3, 3], strides=[1, 1], name='pool5')

            # here starts ssd extra block
            net = conv2d(net,
                         1024,
                         ksize=[3, 3],
                         strides=[1, 1],
                         ratios=[6, 6],
                         name='conv6')
            end_points['block_6'] = net
            # net = drop_out(net, kp_prob=0.5)
            net = conv2d(net,
                         1024,
                         ksize=[1, 1],
                         strides=[1, 1],
                         ratios=[1, 1],
                         name='conv7')
            end_points['block_7'] = net

            with tf.variable_scope('conv8'):
                net = conv2d(net,
                             256,
                             ksize=[1, 1],
                             strides=[1, 1],
                             name='conv8_1')
                net = conv2d(net,
                             512,
                             ksize=[3, 3],
                             strides=[2, 2],
                             name='conv8_2')
            end_points['block_8'] = net

            with tf.variable_scope('conv9'):
                net = conv2d(net,
                             128,
                             ksize=[1, 1],
                             strides=[1, 1],
                             name='conv9_1')
                net = conv2d(net,
                             256,
                             ksize=[3, 3],
                             strides=[2, 2],
                             name='conv9_2')
            end_points['block_9'] = net

            with tf.variable_scope('conv10'):
                net = conv2d(net,
                             128,
                             ksize=[1, 1],
                             strides=[1, 1],
                             name='conv10_1')
                net = conv2d(net,
                             256,
                             ksize=[3, 3],
                             strides=[1, 1],
                             padding='VALID',
                             name='conv10_2')
            end_points['block_10'] = net

            with tf.variable_scope('conv11'):
                net = conv2d(net,
                             128,
                             ksize=[1, 1],
                             strides=[1, 1],
                             name='conv11_1')
                net = conv2d(net,
                             256,
                             ksize=[3, 3],
                             strides=[1, 1],
                             padding='VALID',
                             name='conv11_2')
            end_points['block_11'] = net

            [
                end_points.update({i.name: i})
                for i in tf.get_collection(end_points_collection)
            ]

            prediction_gathers = layers_predictions(end_points)

            return net, end_points, prediction_gathers