Beispiel #1
0
def _get_detection(end_points,
                   num_classes,
                   weight_decay=0.0001,
                   reuse=tf.AUTO_REUSE,
                   scope_suffix=''):
    with arg_scope([conv2d],
                   weight_reg=regularizer('l2', weight_decay),
                   weight_init=tf.truncated_normal_initializer(stddev=0.01)):
        with tf.variable_scope(_DETECTION_SCOPE_NAME,
                               _DETECTION_SCOPE_NAME, [end_points],
                               reuse=reuse):
            branch_logits = []
            for layer_idx, layer_name in enumerate(detection_feature_layers):
                features = end_points[layer_name]
                branch_logits.append(
                    conv2d(features,
                           outc=len(detection_anchors[layer_idx]) *
                           (4 + num_classes),
                           ksize=[1, 1],
                           activate=None,
                           batch_norm=False,
                           use_bias=True,
                           name=scope_suffix if scope_suffix is None else
                           (scope_suffix + '_%d' % layer_idx)))
            return branch_logits
Beispiel #2
0
def xception_arg_scope(weight_decay=0.00004,
                       batch_norm_decay=0.9997,
                       batch_norm_epsilon=0.001,
                       batch_norm_scale=True,
                       weights_initializer_stddev=0.09,
                       activation_fn=tf.nn.relu,
                       regularize_depthwise=False,
                       use_batch_norm=True):
    """Defines the default Xception arg scope.

    Args:
      weight_decay: The weight decay to use for regularizing the model.
      batch_norm_decay: The moving average decay when estimating layer activation
        statistics in batch normalization.
      batch_norm_epsilon: Small constant to prevent division by zero when
        normalizing activations by their variance in batch normalization.
      batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
        activations in the batch normalization layer.
      weights_initializer_stddev: The standard deviation of the trunctated normal
        weight initializer.
      activation_fn: The activation function in Xception.
      regularize_depthwise: Whether or not apply L2-norm regularization on the
        depthwise convolution weights.
      use_batch_norm: Whether or not to use batch normalization.

    Returns:
      An `arg_scope` to use for the Xception models.
    """
    regular_func = regularizer(mode='l2', scale=weight_decay)
    depthwise_regularizer = regular_func if regularize_depthwise else None

    with arg_scope([conv2d, sep_conv2d],
                   weight_init=tf.truncated_normal_initializer(
                       stddev=weights_initializer_stddev),
                   activate=activation_fn,
                   batch_norm=use_batch_norm):
        with arg_scope([batch_norm2d],
                       decay=batch_norm_decay,
                       eps=batch_norm_epsilon,
                       affine=batch_norm_scale):
            with arg_scope([conv2d], weight_reg=regular_func):
                with arg_scope([sep_conv2d],
                               depthwise_weight_reg=depthwise_regularizer,
                               pointwise_weight_reg=regular_func) as arg_sc:
                    return arg_sc
Beispiel #3
0
def _get_branch_logits(features,
                       num_classes,
                       atrous_rates=None,
                       aspp_with_batch_norm=False,
                       kernel_size=1,
                       weight_decay=0.0001,
                       reuse=tf.AUTO_REUSE,
                       scope_suffix=''):
    """Gets the logits from each model's branch.

    The underlying model is branched out in the last layer when atrous
    spatial pyramid pooling is employed, and all branches are sum-merged
    to form the final logits.

    Args:
      features: A float tensor of shape [batch, height, width, channels].
      num_classes: Number of classes to predict.
      atrous_rates: A list of atrous convolution rates for last layer.
      aspp_with_batch_norm: Use batch normalization layers for ASPP.
      kernel_size: Kernel size for convolution.
      weight_decay: Weight decay for the model variables.
      reuse: Reuse model variables or not.
      scope_suffix: Scope suffix for the model variables.

    Returns:
      Merged logits with shape [batch, height, width, num_classes].

    Raises:
      ValueError: Upon invalid input kernel_size value.
    """
    # When using batch normalization with ASPP, ASPP has been applied before
    # in _extract_features, and thus we simply apply 1x1 convolution here.
    if aspp_with_batch_norm or atrous_rates is None:
        if kernel_size != 1:
            raise ValueError('Kernel size must be 1 when atrous_rates is None or '
                             'using aspp_with_batch_norm. Gets %d.' % kernel_size)
        atrous_rates = [1]

    with arg_scope(
            [conv2d],
            weight_reg=regularizer('l2', weight_decay),
            weight_init=tf.truncated_normal_initializer(stddev=0.01)):
        with tf.variable_scope(_LOGITS_SCOPE_NAME, _LOGITS_SCOPE_NAME, [features], reuse=reuse):
            branch_logits = []
            for i, rate in enumerate(atrous_rates):
                scope = scope_suffix
                if i:
                    scope += '_%d' % i

                branch_logits.append(
                    conv2d(
                        features,
                        outc=num_classes,
                        ksize=[kernel_size, kernel_size],
                        ratios=[rate, rate],
                        activate=None,
                        batch_norm=False,
                        use_bias=True,
                        name=scope))

            return tf.add_n(branch_logits)
Beispiel #4
0
def refine_by_decoder(features,
                      end_points,
                      decoder_height,
                      decoder_width,
                      decoder_use_separable_conv=False,
                      model_variant=None,
                      weight_decay=0.0001,
                      reuse=tf.AUTO_REUSE,
                      is_training=False,
                      fine_tune_batch_norm=False):
    """Adds the decoder to obtain sharper segmentation results.

    Args:
      features: A tensor of size [batch, features_height, features_width,
        features_channels].
      end_points: A dictionary from components of the network to the corresponding
        activation.
      decoder_height: The height of decoder feature maps.
      decoder_width: The width of decoder feature maps.
      decoder_use_separable_conv: Employ separable convolution for decoder or not.
      model_variant: Model variant for feature extraction.
      weight_decay: The weight decay for model variables.
      reuse: Reuse the model variables or not.
      is_training: Is training or not.
      fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

    Returns:
      Decoder output with size [batch, decoder_height, decoder_width,
        decoder_channels].
    """
    batch_norm_params = {
        'is_training': is_training and fine_tune_batch_norm,
        'decay': 0.9997,
        'eps': 1e-5,
        'affine': True,
    }
    regularize_func = regularizer('l2', weight_decay)
    with tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
        with arg_scope([sep_conv2d], activate=tf.nn.relu, activate_middle=tf.nn.relu,
                       batch_norm=True, depthwise_weight_reg=None, pointwise_weight_reg=regularize_func,
                       padding='SAME', strides=[1, 1]):
            with arg_scope([conv2d], activate=tf.nn.relu, weight_reg=regularize_func,
                           batch_norm=True, padding='SAME', strides=[1, 1]):
                with arg_scope([batch_norm2d], **batch_norm_params):
                    with tf.variable_scope(_DECODER_SCOPE, _DECODER_SCOPE, [features]):
                        feature_list = feature_extractor.networks_to_feature_maps[
                            model_variant][feature_extractor.DECODER_END_POINTS]
                        if feature_list is None:
                            tf.logging.info('Not found any decoder end points.')
                            return features
                        else:
                            decoder_features = features
                            for i, name in enumerate(feature_list):
                                decoder_features_list = [decoder_features]

                                suffix = list(end_points.keys())[0].split('/')[0]
                                feature_name = '{}/{}'.format(
                                    suffix, name)
                                # [1, 1] to reduce channel to 4
                                decoder_features_list.append(
                                    conv2d(
                                        inputs=end_points[feature_name],
                                        outc=48,
                                        ksize=[1, 1],
                                        name='feature_projection' + str(i)))
                                # Resize to decoder_height/decoder_width.
                                for j, feature in enumerate(decoder_features_list):
                                    decoder_features_list[j] = tf.image.resize_bilinear(
                                        feature, [decoder_height, decoder_width], align_corners=True)
                                    decoder_features_list[j].set_shape(
                                        [None, decoder_height, decoder_width, None])
                                decoder_depth = 256
                                if decoder_use_separable_conv:
                                    # [3,3] kernel
                                    decoder_features = sep_conv2d(
                                        inputs=tf.concat(decoder_features_list, 3),
                                        ksize=[3, 3],
                                        outc=decoder_depth,
                                        ratios=[1, 1],
                                        name='decoder_conv0')
                                    decoder_features = sep_conv2d(
                                        inputs=decoder_features,
                                        ksize=[3, 3],
                                        outc=decoder_depth,
                                        ratios=[1, 1],
                                        name='decoder_conv1')
                                    DEBUG_VARS.decoder_features = decoder_features
                                else:
                                    decoder_features = conv2d(
                                        inputs=tf.concat(decoder_features_list, 3),
                                        outc=[decoder_depth],
                                        ksize=[3, 3],
                                        name='decoder_conv0')
                                    decoder_features = conv2d(
                                        inputs=decoder_features,
                                        outc=[decoder_depth],
                                        ksize=[3, 3],
                                        name='decoder_conv0')
                            return decoder_features
Beispiel #5
0
def _extract_features(images,
                      model_options,
                      weight_decay=0.0001,
                      reuse=tf.AUTO_REUSE,
                      is_training=False,
                      fine_tune_batch_norm=False):
    """Extracts features by the particular model_variant.

    Args:
      images: A tensor of size [batch, height, width, channels].
      model_options: A ModelOptions instance to configure models.
      weight_decay: The weight decay for model variables.
      reuse: Reuse the model variables or not.
      is_training: Is training or not.
      fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

    Returns:
      concat_logits: A tensor of size [batch, feature_height, feature_width,
        feature_channels], where feature_height/feature_width are determined by
        the images height/width and output_stride.
      end_points: A dictionary from components of the network to the corresponding
        activation.
    """
    # feature extractor is a backbone factory
    DEBUG_VARS.raw_image = images
    features, end_points = feature_extractor.extract_features(
        images,
        output_stride=model_options.output_stride,
        multi_grid=model_options.multi_grid,
        model_variant=model_options.model_variant,
        weight_decay=weight_decay,
        reuse=reuse,
        is_training=is_training,
        fine_tune_batch_norm=fine_tune_batch_norm)

    # TODO:check
    # DEBUG_VARS.xception_feature = end_points['xception_65/entry_flow/conv1_1/Relu:0']
    DEBUG_VARS.xception_feature = features
    if not model_options.aspp_with_batch_norm:
        return features, end_points
    else:
        batch_norm_params = {
            'is_training': is_training and fine_tune_batch_norm,
            'decay': 0.9997,
            'eps': 1e-5,
            'affine': True,
        }
        regularize_func = regularizer('l2', weight_decay)
        with tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
            with arg_scope([sep_conv2d], activate=tf.nn.relu, activate_middle=tf.nn.relu, batch_norm=True,
                           depthwise_weight_reg=None, pointwise_weight_reg=regularize_func,
                           padding='SAME', strides=[1, 1]):
                with arg_scope([conv2d], activate=tf.nn.relu, weight_reg=regularize_func,
                               batch_norm=True, padding='SAME', strides=[1, 1]):
                    # TODO: ASPP IS IMPLEMENTED HERE! Check Out!
                    with arg_scope([batch_norm2d], **batch_norm_params):
                        depth = 256
                        branch_logits = []

                        # TODO: ADD IMAGE POOLING HERE
                        if model_options.add_image_level_feature:
                            # this crop size has been updated to the new scaled one outside, which is the exact size
                            # of this model's inputs
                            pool_height = scale_dimension(model_options.crop_size[0],
                                                          1. / model_options.output_stride)
                            pool_width = scale_dimension(model_options.crop_size[1],
                                                         1. / model_options.output_stride)
                            # global average pooling, check whether the shape here is 1?
                            image_feature = avg_pool2d(
                                features, [pool_height, pool_width], [pool_height, pool_width],
                                padding='VALID')
                            # collapse channels to depth after GAP
                            image_feature = conv2d(
                                inputs=image_feature, outc=depth, ksize=[1, 1], name=_IMAGE_POOLING_SCOPE)
                            # TODO:check
                            DEBUG_VARS.image_feature = image_feature
                            # reshape it to final feature map shape
                            image_feature = tf.image.resize_bilinear(
                                image_feature, [pool_height, pool_width], align_corners=True)
                            image_feature.set_shape([None, pool_height, pool_width, depth])
                            # add image level feature to branch_logits
                            branch_logits.append(image_feature)

                        # Employ a 1x1 convolution.
                        branch_logits.append(conv2d(features, outc=depth, ksize=[1, 1], name=_ASPP_SCOPE + str(0)))

                        if model_options.atrous_rates:
                            # Employ 3x3 convolutions with different atrous rates.
                            DEBUG_VARS.aspp_features = []
                            for i, rate in enumerate(model_options.atrous_rates, 1):
                                scope = _ASPP_SCOPE + str(i)
                                if model_options.aspp_with_separable_conv:
                                    aspp_features = sep_conv2d(
                                        features, outc=depth, ksize=[3, 3], ratios=[rate, rate], name=scope)
                                    DEBUG_VARS.aspp_features.append(aspp_features)
                                else:
                                    aspp_features = conv2d(
                                        features, outc=depth, ksize=[3, 3], ratios=[rate, rate], name=scope)
                                branch_logits.append(aspp_features)

                        # Merge branch logits.
                        concat_logits = tf.concat(branch_logits, 3)
                        DEBUG_VARS.aspp_concat_feature = concat_logits
                        concat_logits = conv2d(inputs=concat_logits, outc=depth, ksize=[1, 1],
                                               name=_CONCAT_PROJECTION_SCOPE)
                        concat_logits = drop_out(concat_logits, kp_prob=0.9, is_training=is_training,
                                                 name=_CONCAT_PROJECTION_SCOPE + '_dropout')

                        return concat_logits, end_points