Esempio n. 1
0
 def setUp(self):
   self.segmentation_layer = dense_prediction_cell.DensePredictionCell(
       config=[
           {
               dense_prediction_cell._INPUT: -1,
               dense_prediction_cell._OP: dense_prediction_cell._CONV,
               dense_prediction_cell._KERNEL: 1,
           },
           {
               dense_prediction_cell._INPUT: 0,
               dense_prediction_cell._OP: dense_prediction_cell._CONV,
               dense_prediction_cell._KERNEL: 3,
               dense_prediction_cell._RATE: [1, 3],
           },
           {
               dense_prediction_cell._INPUT: 1,
               dense_prediction_cell._OP: (
                   dense_prediction_cell._PYRAMID_POOLING),
               dense_prediction_cell._GRID_SIZE: [1, 2],
           },
       ],
       hparams={'conv_rate_multiplier': 2})
Esempio n. 2
0
def extract_features(images,
                     model_options,
                     weight_decay=0.0001,
                     reuse=None,
                     is_training=False,
                     fine_tune_batch_norm=False,
                     nas_training_hyper_parameters=None):
    """Extracts features by the particular model_variant.

  Args:
    images: A tensor of size [batch, height, width, channels].
    model_options: A ModelOptions instance to configure models.
    weight_decay: The weight decay for model variables.
    reuse: Reuse the model variables or not.
    is_training: Is training or not.
    fine_tune_batch_norm: Fine-tune the batch norm parameters or not.
    nas_training_hyper_parameters: A dictionary storing hyper-parameters for
      training nas models. Its keys are:
      - `drop_path_keep_prob`: Probability to keep each path in the cell when
        training.
      - `total_training_steps`: Total training steps to help drop path
        probability calculation.

  Returns:
    concat_logits: A tensor of size [batch, feature_height, feature_width,
      feature_channels], where feature_height/feature_width are determined by
      the images height/width and output_stride.
    end_points: A dictionary from components of the network to the corresponding
      activation.
  """
    features, end_points = feature_extractor.extract_features(
        images,
        output_stride=model_options.output_stride,
        multi_grid=model_options.multi_grid,
        model_variant=model_options.model_variant,
        depth_multiplier=model_options.depth_multiplier,
        divisible_by=model_options.divisible_by,
        weight_decay=weight_decay,
        reuse=reuse,
        is_training=is_training,
        preprocessed_images_dtype=model_options.preprocessed_images_dtype,
        fine_tune_batch_norm=fine_tune_batch_norm,
        nas_architecture_options=model_options.nas_architecture_options,
        nas_training_hyper_parameters=nas_training_hyper_parameters,
        use_bounded_activation=model_options.use_bounded_activation)

    if not model_options.aspp_with_batch_norm:
        return features, end_points
    else:
        if model_options.dense_prediction_cell_config is not None:
            tf.compat.v1.logging.info('Using dense prediction cell config.')
            dense_prediction_layer = dense_prediction_cell.DensePredictionCell(
                config=model_options.dense_prediction_cell_config,
                hparams={
                    'conv_rate_multiplier': 16 // model_options.output_stride,
                })
            concat_logits = dense_prediction_layer.build_cell(
                features,
                output_stride=model_options.output_stride,
                crop_size=model_options.crop_size,
                image_pooling_crop_size=model_options.image_pooling_crop_size,
                weight_decay=weight_decay,
                reuse=reuse,
                is_training=is_training,
                fine_tune_batch_norm=fine_tune_batch_norm)
            return concat_logits, end_points
        else:
            # The following codes employ the DeepLabv3 ASPP module. Note that we
            # could express the ASPP module as one particular dense prediction
            # cell architecture. We do not do so but leave the following codes
            # for backward compatibility.
            batch_norm_params = utils.get_batch_norm_params(
                decay=0.9997,
                epsilon=1e-5,
                scale=True,
                is_training=(is_training and fine_tune_batch_norm),
                sync_batch_norm_method=model_options.sync_batch_norm_method)
            batch_norm = utils.get_batch_norm_fn(
                model_options.sync_batch_norm_method)
            activation_fn = (tf.nn.relu6
                             if model_options.use_bounded_activation else
                             tf.nn.relu)
            with slim.arg_scope(
                [slim.conv2d, slim.separable_conv2d],
                    weights_regularizer=slim.l2_regularizer(weight_decay),
                    activation_fn=activation_fn,
                    normalizer_fn=batch_norm,
                    padding='SAME',
                    stride=1,
                    reuse=reuse):
                with slim.arg_scope([batch_norm], **batch_norm_params):
                    depth = model_options.aspp_convs_filters
                    branch_logits = []

                    if model_options.add_image_level_feature:
                        if model_options.crop_size is not None:
                            image_pooling_crop_size = model_options.image_pooling_crop_size
                            # If image_pooling_crop_size is not specified, use crop_size.
                            if image_pooling_crop_size is None:
                                image_pooling_crop_size = model_options.crop_size
                            pool_height = scale_dimension(
                                image_pooling_crop_size[0],
                                1. / model_options.output_stride)
                            pool_width = scale_dimension(
                                image_pooling_crop_size[1],
                                1. / model_options.output_stride)
                            image_feature = slim.avg_pool2d(
                                features, [pool_height, pool_width],
                                model_options.image_pooling_stride,
                                padding='VALID')
                            resize_height = scale_dimension(
                                model_options.crop_size[0],
                                1. / model_options.output_stride)
                            resize_width = scale_dimension(
                                model_options.crop_size[1],
                                1. / model_options.output_stride)
                        else:
                            # If crop_size is None, we simply do global pooling.
                            pool_height = tf.shape(features)[1]
                            pool_width = tf.shape(features)[2]
                            image_feature = tf.reduce_mean(features,
                                                           axis=[1, 2],
                                                           keepdims=True)
                            resize_height = pool_height
                            resize_width = pool_width
                        image_feature_activation_fn = tf.nn.relu
                        image_feature_normalizer_fn = batch_norm
                        if model_options.aspp_with_squeeze_and_excitation:
                            image_feature_activation_fn = tf.nn.sigmoid
                            if model_options.image_se_uses_qsigmoid:
                                image_feature_activation_fn = utils.q_sigmoid
                            image_feature_normalizer_fn = None
                        image_feature = slim.conv2d(
                            image_feature,
                            depth,
                            1,
                            activation_fn=image_feature_activation_fn,
                            normalizer_fn=image_feature_normalizer_fn,
                            scope=IMAGE_POOLING_SCOPE)
                        image_feature = _resize_bilinear(
                            image_feature, [resize_height, resize_width],
                            image_feature.dtype)
                        # Set shape for resize_height/resize_width if they are not Tensor.
                        if isinstance(resize_height, tf.Tensor):
                            resize_height = None
                        if isinstance(resize_width, tf.Tensor):
                            resize_width = None
                        image_feature.set_shape(
                            [None, resize_height, resize_width, depth])
                        if not model_options.aspp_with_squeeze_and_excitation:
                            branch_logits.append(image_feature)

                    # Employ a 1x1 convolution.
                    branch_logits.append(
                        slim.conv2d(features,
                                    depth,
                                    1,
                                    scope=ASPP_SCOPE + str(0)))

                    if model_options.atrous_rates:
                        # Employ 3x3 convolutions with different atrous rates.
                        for i, rate in enumerate(model_options.atrous_rates,
                                                 1):
                            scope = ASPP_SCOPE + str(i)
                            if model_options.aspp_with_separable_conv:
                                aspp_features = split_separable_conv2d(
                                    features,
                                    filters=depth,
                                    rate=rate,
                                    weight_decay=weight_decay,
                                    scope=scope)
                            else:
                                aspp_features = slim.conv2d(features,
                                                            depth,
                                                            3,
                                                            rate=rate,
                                                            scope=scope)
                            branch_logits.append(aspp_features)

                    # Merge branch logits.
                    concat_logits = tf.concat(branch_logits, 3)
                    if model_options.aspp_with_concat_projection:
                        concat_logits = slim.conv2d(
                            concat_logits,
                            depth,
                            1,
                            scope=CONCAT_PROJECTION_SCOPE)
                        concat_logits = slim.dropout(
                            concat_logits,
                            keep_prob=0.9,
                            is_training=is_training,
                            scope=CONCAT_PROJECTION_SCOPE + '_dropout')
                    if (model_options.add_image_level_feature and
                            model_options.aspp_with_squeeze_and_excitation):
                        concat_logits *= image_feature

                    return concat_logits, end_points
Esempio n. 3
0
def extract_features(images,
                     model_options,
                     weight_decay=0.0001,
                     reuse=None,
                     is_training=False,
                     fine_tune_batch_norm=False):
    """Extracts features by the particular model_variant.

    Args:
      images: A tensor of size [batch, height, width, channels].
      model_options: A ModelOptions instance to configure models.
      weight_decay: The weight decay for model variables.
      reuse: Reuse the model variables or not.
      is_training: Is training or not.
      fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

    Returns:
      concat_logits: A tensor of size [batch, feature_height, feature_width,
        feature_channels], where feature_height/feature_width are determined by
        the images height/width and output_stride.
      end_points: A dictionary from components of the network to the corresponding
        activation.
    """
    features, end_points = feature_extractor.extract_features(
        images,
        output_stride=model_options.output_stride,
        multi_grid=model_options.multi_grid,
        model_variant=model_options.model_variant,
        depth_multiplier=model_options.depth_multiplier,
        weight_decay=weight_decay,
        reuse=reuse,
        is_training=is_training,
        fine_tune_batch_norm=fine_tune_batch_norm)

    if not model_options.aspp_with_batch_norm:
        return features, end_points
    else:
        if model_options.dense_prediction_cell_config is not None:
            tf.logging.info('Using dense prediction cell config.')
            dense_prediction_layer = dense_prediction_cell.DensePredictionCell(
                config=model_options.dense_prediction_cell_config,
                hparams={
                    'conv_rate_multiplier': 16 // model_options.output_stride,
                })
            concat_logits = dense_prediction_layer.build_cell(
                features,
                output_stride=model_options.output_stride,
                crop_size=model_options.crop_size,
                image_pooling_crop_size=model_options.image_pooling_crop_size,
                weight_decay=weight_decay,
                reuse=reuse,
                is_training=is_training,
                fine_tune_batch_norm=fine_tune_batch_norm)
            return concat_logits, end_points
        else:
            # The following codes employ the DeepLabv3 ASPP module. Note that We
            # could express the ASPP module as one particular dense prediction
            # cell architecture. We do not do so but leave the following codes in
            # order for backward compatibility.
            batch_norm_params = {
                'is_training': is_training and fine_tune_batch_norm,
                'decay': 0.9997,
                'epsilon': 1e-5,
                'scale': True,
            }

            with slim.arg_scope(
                [slim.conv2d, slim.separable_conv2d],
                    weights_regularizer=slim.l2_regularizer(weight_decay),
                    activation_fn=tf.nn.relu,
                    normalizer_fn=slim.batch_norm,
                    padding='SAME',
                    stride=1,
                    reuse=reuse):
                with slim.arg_scope([slim.batch_norm], **batch_norm_params):
                    depth = 256
                    branch_logits = []

                    if model_options.add_image_level_feature:
                        if model_options.crop_size is not None:
                            image_pooling_crop_size = model_options.image_pooling_crop_size
                            # If image_pooling_crop_size is not specified, use crop_size.
                            if image_pooling_crop_size is None:
                                image_pooling_crop_size = model_options.crop_size
                            pool_height = scale_dimension(
                                image_pooling_crop_size[0],
                                1. / model_options.output_stride)
                            pool_width = scale_dimension(
                                image_pooling_crop_size[1],
                                1. / model_options.output_stride)
                            image_feature = slim.avg_pool2d(
                                features, [pool_height, pool_width], [1, 1],
                                padding='VALID')
                            resize_height = scale_dimension(
                                model_options.crop_size[0],
                                1. / model_options.output_stride)
                            resize_width = scale_dimension(
                                model_options.crop_size[1],
                                1. / model_options.output_stride)
                        else:
                            # If crop_size is None, we simply do global pooling.
                            pool_height = tf.shape(features)[1]
                            pool_width = tf.shape(features)[2]
                            image_feature = tf.reduce_mean(features,
                                                           axis=[1, 2],
                                                           keepdims=True)
                            resize_height = pool_height
                            resize_width = pool_width
                        image_feature = slim.conv2d(image_feature,
                                                    depth,
                                                    1,
                                                    scope=IMAGE_POOLING_SCOPE)
                        image_feature = _resize_bilinear(
                            image_feature, [resize_height, resize_width],
                            image_feature.dtype)
                        # Set shape for resize_height/resize_width if they are not Tensor.
                        if isinstance(resize_height, tf.Tensor):
                            resize_height = None
                        if isinstance(resize_width, tf.Tensor):
                            resize_width = None
                        image_feature.set_shape(
                            [None, resize_height, resize_width, depth])
                        branch_logits.append(image_feature)

                    # Employ a 1x1 convolution.
                    branch_logits.append(
                        slim.conv2d(features,
                                    depth,
                                    1,
                                    scope=ASPP_SCOPE + str(0)))

                    if model_options.atrous_rates:
                        # Employ 3x3 convolutions with different atrous rates.
                        for i, rate in enumerate(model_options.atrous_rates,
                                                 1):
                            scope = ASPP_SCOPE + str(i)
                            if model_options.aspp_with_separable_conv:
                                aspp_features = split_separable_conv2d(
                                    features,
                                    filters=depth,
                                    rate=rate,
                                    weight_decay=weight_decay,
                                    scope=scope)
                            else:
                                aspp_features = slim.conv2d(features,
                                                            depth,
                                                            3,
                                                            rate=rate,
                                                            scope=scope)
                            branch_logits.append(aspp_features)

                    # Merge branch logits.
                    concat_logits = tf.concat(branch_logits, 3)
                    concat_logits = slim.conv2d(concat_logits,
                                                depth,
                                                1,
                                                scope=CONCAT_PROJECTION_SCOPE)
                    concat_logits = slim.dropout(
                        concat_logits,
                        keep_prob=0.9,
                        is_training=is_training,
                        scope=CONCAT_PROJECTION_SCOPE + '_dropout')

                    return concat_logits, end_points
Esempio n. 4
0
def extract_features(
        images,  # 提取经过主干网络和ASPP后的特征 再最后经过1x1卷积之后加上一层dropout层
        model_options,
        weight_decay=0.0001,
        reuse=None,
        is_training=False,
        fine_tune_batch_norm=False,
        nas_training_hyper_parameters=None):
    """Extracts features by the particular model_variant.

  Args:
    images: A tensor of size [batch, height, width, channels].
    model_options: A ModelOptions instance to configure models.
    weight_decay: The weight decay for model variables.
    reuse: Reuse the model variables or not.
    is_training: Is training or not.
    fine_tune_batch_norm: Fine-tune the batch norm parameters or not.
    nas_training_hyper_parameters: A dictionary storing hyper-parameters for
      training nas models. Its keys are:
      - `drop_path_keep_prob`: Probability to keep each path in the cell when
        training.
      - `total_training_steps`: Total training steps to help drop path
        probability calculation.

  Returns:
    concat_logits: A tensor of size [batch, feature_height, feature_width,
      feature_channels], where feature_height/feature_width are determined by
      the images height/width and output_stride.
    end_points: A dictionary from components of the network to the corresponding
      activation.
  """
    features, end_points = feature_extractor.extract_features(  # 经过主干网络得到的特征图
        images,
        output_stride=model_options.output_stride,  # 默认为 16
        multi_grid=model_options.multi_grid,  # 默认为None  使用resnet时为[1,2,4]
        model_variant=model_options.model_variant,  # xception_65 模型名称
        depth_multiplier=model_options.
        depth_multiplier,  # 深度乘子 默认为1.0  mobilenet中使用
        divisible_by=model_options.divisible_by,  # mobilenet中使用 默认为None
        weight_decay=weight_decay,  # 权重衰退 0.0004
        reuse=reuse,
        is_training=is_training,
        preprocessed_images_dtype=model_options.
        preprocessed_images_dtype,  # 预处理图像类型
        fine_tune_batch_norm=fine_tune_batch_norm,  # 微调BN层
        nas_architecture_options=model_options.nas_architecture_options,
        nas_training_hyper_parameters=nas_training_hyper_parameters,
        use_bounded_activation=model_options.use_bounded_activation
    )  # 使用边界激活函数 False

    if not model_options.aspp_with_batch_norm:  # mobileNet中设置 若不需要ASPP,直接返回主干网络提取的特征图
        return features, end_points
    else:

        if model_options.dense_prediction_cell_config is not None:
            tf.logging.info('Using dense prediction cell config.')
            dense_prediction_layer = dense_prediction_cell.DensePredictionCell(
                config=model_options.dense_prediction_cell_config,
                hparams={
                    'conv_rate_multiplier': 16 // model_options.output_stride,
                })
            concat_logits = dense_prediction_layer.build_cell(
                features,
                output_stride=model_options.output_stride,
                crop_size=model_options.crop_size,
                image_pooling_crop_size=model_options.image_pooling_crop_size,
                weight_decay=weight_decay,
                reuse=reuse,
                is_training=is_training,
                fine_tune_batch_norm=fine_tune_batch_norm)
            return concat_logits, end_points
        else:
            # The following codes employ the DeepLabv3 ASPP module. Note that we
            # could express the ASPP module as one particular dense prediction
            # cell architecture. We do not do so but leave the following codes
            # for backward compatibility.# 空洞空间金字塔池化 ASPP
            batch_norm_params = utils.get_batch_norm_params(  # 定义BN层参数
                decay=0.9997,
                epsilon=1e-5,
                scale=True,
                is_training=(is_training and fine_tune_batch_norm),
                sync_batch_norm_method=model_options.sync_batch_norm_method)
            batch_norm = utils.get_batch_norm_fn(  # BN层
                model_options.sync_batch_norm_method)
            activation_fn = (  # 激活函数:有指定边界激活函数就用relu6否则用relu
                tf.nn.relu6
                if model_options.use_bounded_activation else tf.nn.relu)
            with slim.arg_scope(
                [slim.conv2d, slim.separable_conv2d],
                    weights_regularizer=slim.l2_regularizer(weight_decay),
                    activation_fn=activation_fn,
                    normalizer_fn=batch_norm,
                    padding='SAME',
                    stride=1,
                    reuse=reuse):
                with slim.arg_scope([batch_norm], **batch_norm_params):
                    depth = model_options.aspp_convs_filters  # ASPP卷积过滤器的数量 256
                    branch_logits = []  # 存储ASPP中并行的特征
                    # 添加image_pooling层
                    if model_options.add_image_level_feature:  # 添加图像水平特征 默认为True
                        if model_options.crop_size is not None:
                            image_pooling_crop_size = model_options.image_pooling_crop_size
                            # If image_pooling_crop_size is not specified, use crop_size.
                            if image_pooling_crop_size is None:  #若image_pooling_crop_size未指定,用crop_size
                                image_pooling_crop_size = model_options.crop_size
                            # image_pooling池化输出的高度,宽度进行尺度变化
                            pool_height = scale_dimension(
                                image_pooling_crop_size[0],
                                1. / model_options.output_stride)
                            pool_width = scale_dimension(
                                image_pooling_crop_size[1],
                                1. / model_options.output_stride)
                            image_feature = slim.avg_pool2d(  # image pooling采用平均池化
                                features, [pool_height, pool_width],
                                model_options.image_pooling_stride,
                                padding='VALID')
                            resize_height = scale_dimension(  # 高度映射 保证固定维度的输出
                                model_options.crop_size[0],
                                1. / model_options.output_stride)
                            resize_width = scale_dimension(  # 宽度映射 保证固定维度的输出
                                model_options.crop_size[1],
                                1. / model_options.output_stride)
                        else:
                            # If crop_size is None, we simply do global pooling. 如果crop_size为空,我们用全局池化
                            pool_height = tf.shape(features)[1]
                            pool_width = tf.shape(features)[2]
                            image_feature = tf.reduce_mean(  # 若crop_size为空,采用全局池化
                                features,
                                axis=[1, 2],
                                keepdims=True)
                            resize_height = pool_height
                            resize_width = pool_width
                        image_feature_activation_fn = tf.nn.relu
                        image_feature_normalizer_fn = batch_norm
                        if model_options.aspp_with_squeeze_and_excitation:  # 一般为False,暂不考虑
                            image_feature_activation_fn = tf.nn.sigmoid
                            if model_options.image_se_uses_qsigmoid:
                                image_feature_activation_fn = utils.q_sigmoid
                            image_feature_normalizer_fn = None

                        image_feature = slim.conv2d(
                            image_feature,
                            depth,
                            1,  # image_pooling出来的特征进行1x1卷积
                            activation_fn=image_feature_activation_fn,
                            normalizer_fn=image_feature_normalizer_fn,
                            scope=IMAGE_POOLING_SCOPE)
                        image_feature = _resize_bilinear(  # 上采样
                            image_feature, [resize_height, resize_width],
                            image_feature.dtype)
                        # Set shape for resize_height/resize_width if they are not Tensor.
                        if isinstance(resize_height, tf.Tensor):
                            resize_height = None
                        if isinstance(resize_width, tf.Tensor):
                            resize_width = None
                        image_feature.set_shape(
                            [None, resize_height, resize_width, depth])
                        if not model_options.aspp_with_squeeze_and_excitation:
                            branch_logits.append(image_feature)

                    # Employ a 1x1 convolution. 添加使用1x1卷积
                    branch_logits.append(
                        slim.conv2d(features,
                                    depth,
                                    1,
                                    scope=ASPP_SCOPE + str(0)))

                    if model_options.atrous_rates:  # 空洞卷积
                        # Employ 3x3 convolutions with different atrous rates. 为每个3X3的卷积使用指定的空洞率
                        for i, rate in enumerate(model_options.atrous_rates,
                                                 1):  # rate 为空洞率
                            scope = ASPP_SCOPE + str(i)
                            if model_options.aspp_with_separable_conv:  # 若使用空洞的深度可分离卷积,可大大减少计算量 默认为True
                                aspp_features = split_separable_conv2d(
                                    features,
                                    filters=depth,
                                    rate=rate,
                                    weight_decay=weight_decay,
                                    scope=scope)
                            else:
                                aspp_features = slim.conv2d(features,
                                                            depth,
                                                            3,
                                                            rate=rate,
                                                            scope=scope)
                            branch_logits.append(
                                aspp_features)  # 将空洞卷积提取的特征加入列表

                    # Merge branch logits.
                    concat_logits = tf.concat(branch_logits,
                                              3)  # 将这些特征图进行按照通道的那个维度进行级联组合
                    if model_options.aspp_with_concat_projection:  # 级联之后添加1x1卷积 默认为True
                        concat_logits = slim.conv2d(
                            concat_logits,
                            depth,
                            1,
                            scope=CONCAT_PROJECTION_SCOPE)
                        concat_logits = slim.dropout(  # 再加dropout层 防止过拟合
                            concat_logits,
                            keep_prob=0.9,
                            is_training=is_training,
                            scope=CONCAT_PROJECTION_SCOPE + '_dropout')
                    if (model_options.add_image_level_feature and
                            model_options.aspp_with_squeeze_and_excitation):
                        concat_logits *= image_feature

                    return concat_logits, end_points