def extract_features(images,
                     model_options,
                     weight_decay=0.0001,
                     reuse=None,
                     is_training=False,
                     fine_tune_batch_norm=False,
                     fine_tune_feature_extractor=True):
    """Extracts features by the particular model_variant.

  Args:
    images: A tensor of size [batch, height, width, channels].
    model_options: A ModelOptions instance to configure models.
    weight_decay: The weight decay for model variables.
    reuse: Reuse the model variables or not.
    is_training: Is training or not.
    fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

  Returns:
    concat_logits: A tensor of size [batch, feature_height, feature_width,
      feature_channels], where feature_height/feature_width are determined by
      the images height/width and output_stride.
    end_points: A dictionary from components of the network to the corresponding
      activation.
  """
    features, end_points = feature_extractor.extract_features(
        images,
        output_stride=model_options.output_stride,
        multi_grid=model_options.multi_grid,
        model_variant=model_options.model_variant,
        depth_multiplier=model_options.depth_multiplier,
        weight_decay=weight_decay,
        reuse=reuse,
        is_training=is_training,
        fine_tune_batch_norm=fine_tune_batch_norm)

    if not model_options.aspp_with_batch_norm:
        return features, end_points
    else:
        return aspp_with_batch_norm(features,
                                    model_options,
                                    weight_decay,
                                    reuse,
                                    is_training,
                                    fine_tune_batch_norm,
                                    activation_fn=tf.nn.relu,
                                    depth=256), end_points, features
Beispiel #2
0
def extract_features(images,
                     model_options,
                     weight_decay=0.0001,
                     reuse=None,
                     is_training=False,
                     fine_tune_batch_norm=False,
                     nas_training_hyper_parameters=None):
    """Extracts features by the particular model_variant.

  Args:
    images: A tensor of size [batch, height, width, channels].
    model_options: A ModelOptions instance to configure models.
    weight_decay: The weight decay for model variables.
    reuse: Reuse the model variables or not.
    is_training: Is training or not.
    fine_tune_batch_norm: Fine-tune the batch norm parameters or not.
    nas_training_hyper_parameters: A dictionary storing hyper-parameters for
      training nas models. Its keys are:
      - `drop_path_keep_prob`: Probability to keep each path in the cell when
        training.
      - `total_training_steps`: Total training steps to help drop path
        probability calculation.

  Returns:
    concat_logits: A tensor of size [batch, feature_height, feature_width,
      feature_channels], where feature_height/feature_width are determined by
      the images height/width and output_stride.
    end_points: A dictionary from components of the network to the corresponding
      activation.
  """
    features, end_points = feature_extractor.extract_features(
        images,
        output_stride=model_options.output_stride,
        multi_grid=model_options.multi_grid,
        model_variant=model_options.model_variant,
        depth_multiplier=model_options.depth_multiplier,
        divisible_by=model_options.divisible_by,
        weight_decay=weight_decay,
        reuse=reuse,
        is_training=is_training,
        preprocessed_images_dtype=model_options.preprocessed_images_dtype,
        fine_tune_batch_norm=fine_tune_batch_norm,
        nas_architecture_options=model_options.nas_architecture_options,
        nas_training_hyper_parameters=nas_training_hyper_parameters,
        use_bounded_activation=model_options.use_bounded_activation)

    if not model_options.aspp_with_batch_norm:
        return features, end_points
    else:
        if model_options.dense_prediction_cell_config is not None:
            tf.compat.v1.logging.info('Using dense prediction cell config.')
            dense_prediction_layer = dense_prediction_cell.DensePredictionCell(
                config=model_options.dense_prediction_cell_config,
                hparams={
                    'conv_rate_multiplier': 16 // model_options.output_stride,
                })
            concat_logits = dense_prediction_layer.build_cell(
                features,
                output_stride=model_options.output_stride,
                crop_size=model_options.crop_size,
                image_pooling_crop_size=model_options.image_pooling_crop_size,
                weight_decay=weight_decay,
                reuse=reuse,
                is_training=is_training,
                fine_tune_batch_norm=fine_tune_batch_norm)
            return concat_logits, end_points
        else:
            # The following codes employ the DeepLabv3 ASPP module. Note that we
            # could express the ASPP module as one particular dense prediction
            # cell architecture. We do not do so but leave the following codes
            # for backward compatibility.
            batch_norm_params = utils.get_batch_norm_params(
                decay=0.9997,
                epsilon=1e-5,
                scale=True,
                is_training=(is_training and fine_tune_batch_norm),
                sync_batch_norm_method=model_options.sync_batch_norm_method)
            batch_norm = utils.get_batch_norm_fn(
                model_options.sync_batch_norm_method)
            activation_fn = (tf.nn.relu6
                             if model_options.use_bounded_activation else
                             tf.nn.relu)
            with slim.arg_scope(
                [slim.conv2d, slim.separable_conv2d],
                    weights_regularizer=slim.l2_regularizer(weight_decay),
                    activation_fn=activation_fn,
                    normalizer_fn=batch_norm,
                    padding='SAME',
                    stride=1,
                    reuse=reuse):
                with slim.arg_scope([batch_norm], **batch_norm_params):
                    depth = model_options.aspp_convs_filters
                    branch_logits = []

                    if model_options.add_image_level_feature:
                        if model_options.crop_size is not None:
                            image_pooling_crop_size = model_options.image_pooling_crop_size
                            # If image_pooling_crop_size is not specified, use crop_size.
                            if image_pooling_crop_size is None:
                                image_pooling_crop_size = model_options.crop_size
                            pool_height = scale_dimension(
                                image_pooling_crop_size[0],
                                1. / model_options.output_stride)
                            pool_width = scale_dimension(
                                image_pooling_crop_size[1],
                                1. / model_options.output_stride)
                            image_feature = slim.avg_pool2d(
                                features, [pool_height, pool_width],
                                model_options.image_pooling_stride,
                                padding='VALID')
                            resize_height = scale_dimension(
                                model_options.crop_size[0],
                                1. / model_options.output_stride)
                            resize_width = scale_dimension(
                                model_options.crop_size[1],
                                1. / model_options.output_stride)
                        else:
                            # If crop_size is None, we simply do global pooling.
                            pool_height = tf.shape(features)[1]
                            pool_width = tf.shape(features)[2]
                            image_feature = tf.reduce_mean(features,
                                                           axis=[1, 2],
                                                           keepdims=True)
                            resize_height = pool_height
                            resize_width = pool_width
                        image_feature_activation_fn = tf.nn.relu
                        image_feature_normalizer_fn = batch_norm
                        if model_options.aspp_with_squeeze_and_excitation:
                            image_feature_activation_fn = tf.nn.sigmoid
                            if model_options.image_se_uses_qsigmoid:
                                image_feature_activation_fn = utils.q_sigmoid
                            image_feature_normalizer_fn = None
                        image_feature = slim.conv2d(
                            image_feature,
                            depth,
                            1,
                            activation_fn=image_feature_activation_fn,
                            normalizer_fn=image_feature_normalizer_fn,
                            scope=IMAGE_POOLING_SCOPE)
                        image_feature = _resize_bilinear(
                            image_feature, [resize_height, resize_width],
                            image_feature.dtype)
                        # Set shape for resize_height/resize_width if they are not Tensor.
                        if isinstance(resize_height, tf.Tensor):
                            resize_height = None
                        if isinstance(resize_width, tf.Tensor):
                            resize_width = None
                        image_feature.set_shape(
                            [None, resize_height, resize_width, depth])
                        if not model_options.aspp_with_squeeze_and_excitation:
                            branch_logits.append(image_feature)

                    # Employ a 1x1 convolution.
                    branch_logits.append(
                        slim.conv2d(features,
                                    depth,
                                    1,
                                    scope=ASPP_SCOPE + str(0)))

                    if model_options.atrous_rates:
                        # Employ 3x3 convolutions with different atrous rates.
                        for i, rate in enumerate(model_options.atrous_rates,
                                                 1):
                            scope = ASPP_SCOPE + str(i)
                            if model_options.aspp_with_separable_conv:
                                aspp_features = split_separable_conv2d(
                                    features,
                                    filters=depth,
                                    rate=rate,
                                    weight_decay=weight_decay,
                                    scope=scope)
                            else:
                                aspp_features = slim.conv2d(features,
                                                            depth,
                                                            3,
                                                            rate=rate,
                                                            scope=scope)
                            branch_logits.append(aspp_features)

                    # Merge branch logits.
                    concat_logits = tf.concat(branch_logits, 3)
                    if model_options.aspp_with_concat_projection:
                        concat_logits = slim.conv2d(
                            concat_logits,
                            depth,
                            1,
                            scope=CONCAT_PROJECTION_SCOPE)
                        concat_logits = slim.dropout(
                            concat_logits,
                            keep_prob=0.9,
                            is_training=is_training,
                            scope=CONCAT_PROJECTION_SCOPE + '_dropout')
                    if (model_options.add_image_level_feature and
                            model_options.aspp_with_squeeze_and_excitation):
                        concat_logits *= image_feature

                    return concat_logits, end_points
def _extract_features(images,
                      model_options,
                      weight_decay=0.0001,
                      reuse=None,
                      is_training=False,
                      fine_tune_batch_norm=False):
  """Extracts features by the particular model_variant.

  Args:
    images: A tensor of size [batch, height, width, channels].
    model_options: A ModelOptions instance to configure models.
    weight_decay: The weight decay for model variables.
    reuse: Reuse the model variables or not.
    is_training: Is training or not.
    fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

  Returns:
    concat_logits: A tensor of size [batch, feature_height, feature_width,
      feature_channels], where feature_height/feature_width are determined by
      the images height/width and output_stride.
    end_points: A dictionary from components of the network to the corresponding
      activation.
  """
  features, end_points = feature_extractor.extract_features(
      images,
      output_stride=model_options.output_stride,
      multi_grid=model_options.multi_grid,
      model_variant=model_options.model_variant,
      depth_multiplier=model_options.depth_multiplier,
      weight_decay=weight_decay,
      reuse=reuse,
      is_training=is_training,
      fine_tune_batch_norm=fine_tune_batch_norm)

  if not model_options.aspp_with_batch_norm:
    return features, end_points
  else:
    batch_norm_params = {
        'is_training': is_training and fine_tune_batch_norm,
        'decay': 0.9997,
        'epsilon': 1e-5,
        'scale': True,
    }

    with slim.arg_scope(
        [slim.conv2d, slim.separable_conv2d],
        weights_regularizer=slim.l2_regularizer(weight_decay),
        activation_fn=tf.nn.relu,
        normalizer_fn=slim.batch_norm,
        padding='SAME',
        stride=1,
        reuse=reuse):
      with slim.arg_scope([slim.batch_norm], **batch_norm_params):
        depth = 256
        branch_logits = []

        if model_options.add_image_level_feature:
          pool_height = scale_dimension(model_options.crop_size[0],
                                        1. / model_options.output_stride)
          pool_width = scale_dimension(model_options.crop_size[1],
                                       1. / model_options.output_stride)
          image_feature = slim.avg_pool2d(
              features, [pool_height, pool_width], [pool_height, pool_width],
              padding='VALID')
          image_feature = slim.conv2d(
              image_feature, depth, 1, scope=_IMAGE_POOLING_SCOPE)
          image_feature = tf.image.resize_bilinear(
              image_feature, [pool_height, pool_width], align_corners=True)
          image_feature.set_shape([None, pool_height, pool_width, depth])
          branch_logits.append(image_feature)

        # Employ a 1x1 convolution.
        branch_logits.append(slim.conv2d(features, depth, 1,
                                         scope=_ASPP_SCOPE + str(0)))

        if model_options.atrous_rates:
          # Employ 3x3 convolutions with different atrous rates.
          for i, rate in enumerate(model_options.atrous_rates, 1):
            scope = _ASPP_SCOPE + str(i)
            if model_options.aspp_with_separable_conv:
              aspp_features = _split_separable_conv2d(
                  features,
                  filters=depth,
                  rate=rate,
                  weight_decay=weight_decay,
                  scope=scope)
            else:
              aspp_features = slim.conv2d(
                  features, depth, 3, rate=rate, scope=scope)
            branch_logits.append(aspp_features)

        # Merge branch logits.
        concat_logits = tf.concat(branch_logits, 3)
        concat_logits = slim.conv2d(
            concat_logits, depth, 1, scope=_CONCAT_PROJECTION_SCOPE)
        concat_logits = slim.dropout(
            concat_logits,
            keep_prob=0.9,
            is_training=is_training,
            scope=_CONCAT_PROJECTION_SCOPE + '_dropout')

        return concat_logits, end_points
Beispiel #4
0
def extract_features(images,
                     model_options,
                     weight_decay=0.0001,
                     reuse=None,
                     is_training=False,
                     fine_tune_batch_norm=False):
  """Extracts features by the particular model_variant.

  Args:
    images: A tensor of size [batch, height, width, channels].
    model_options: A ModelOptions instance to configure models.
    weight_decay: The weight decay for model variables.
    reuse: Reuse the model variables or not.
    is_training: Is training or not.
    fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

  Returns:
    concat_logits: A tensor of size [batch, feature_height, feature_width,
      feature_channels], where feature_height/feature_width are determined by
      the images height/width and output_stride.
    end_points: A dictionary from components of the network to the corresponding
      activation.
  """
  features, end_points = feature_extractor.extract_features(
      images,
      output_stride=model_options.output_stride,
      multi_grid=model_options.multi_grid,
      model_variant=model_options.model_variant,
      depth_multiplier=model_options.depth_multiplier,
      weight_decay=weight_decay,
      reuse=reuse,
      is_training=is_training,
      fine_tune_batch_norm=fine_tune_batch_norm)

  if not model_options.aspp_with_batch_norm:
    return features, end_points
  else:
    if model_options.dense_prediction_cell_config is not None:
      tf.logging.info('Using dense prediction cell config.')
      dense_prediction_layer = dense_prediction_cell.DensePredictionCell(
          config=model_options.dense_prediction_cell_config,
          hparams={
            'conv_rate_multiplier': 16 // model_options.output_stride,
          })
      concat_logits = dense_prediction_layer.build_cell(
          features,
          output_stride=model_options.output_stride,
          crop_size=model_options.crop_size,
          image_pooling_crop_size=model_options.image_pooling_crop_size,
          weight_decay=weight_decay,
          reuse=reuse,
          is_training=is_training,
          fine_tune_batch_norm=fine_tune_batch_norm)
      return concat_logits, end_points
    else:
      # The following codes employ the DeepLabv3 ASPP module. Note that We
      # could express the ASPP module as one particular dense prediction
      # cell architecture. We do not do so but leave the following codes in
      # order for backward compatibility.
      batch_norm_params = {
        'is_training': is_training and fine_tune_batch_norm,
        'decay': 0.9997,
        'epsilon': 1e-5,
        'scale': True,
      }

      with slim.arg_scope(
          [slim.conv2d, slim.separable_conv2d],
          weights_regularizer=slim.l2_regularizer(weight_decay),
          activation_fn=tf.nn.relu,
          normalizer_fn=slim.batch_norm,
          padding='SAME',
          stride=1,
          reuse=reuse):
        with slim.arg_scope([slim.batch_norm], **batch_norm_params):
          depth = 256
          branch_logits = []

          if model_options.add_image_level_feature:
            if model_options.crop_size is not None:
              image_pooling_crop_size = model_options.image_pooling_crop_size
              # If image_pooling_crop_size is not specified, use crop_size.
              if image_pooling_crop_size is None:
                image_pooling_crop_size = model_options.crop_size
              pool_height = scale_dimension(
                  image_pooling_crop_size[0],
                  1. / model_options.output_stride)
              pool_width = scale_dimension(
                  image_pooling_crop_size[1],
                  1. / model_options.output_stride)
              image_feature = slim.avg_pool2d(
                  features, [pool_height, pool_width], [1, 1], padding='VALID')
              resize_height = scale_dimension(
                  model_options.crop_size[0],
                  1. / model_options.output_stride)
              resize_width = scale_dimension(
                  model_options.crop_size[1],
                  1. / model_options.output_stride)
            else:
              # If crop_size is None, we simply do global pooling.
              pool_height = tf.shape(features)[1]
              pool_width = tf.shape(features)[2]
              image_feature = tf.reduce_mean(
                  features, axis=[1, 2], keepdims=True)
              resize_height = pool_height
              resize_width = pool_width
            image_feature = slim.conv2d(
                image_feature, depth, 1, scope=IMAGE_POOLING_SCOPE)
            image_feature = _resize_bilinear(
                image_feature,
                [resize_height, resize_width],
                image_feature.dtype)
            # Set shape for resize_height/resize_width if they are not Tensor.
            if isinstance(resize_height, tf.Tensor):
              resize_height = None
            if isinstance(resize_width, tf.Tensor):
              resize_width = None
            image_feature.set_shape([None, resize_height, resize_width, depth])
            branch_logits.append(image_feature)

          # Employ a 1x1 convolution.
          branch_logits.append(slim.conv2d(features, depth, 1,
                                           scope=ASPP_SCOPE + str(0)))

          if model_options.atrous_rates:
            # Employ 3x3 convolutions with different atrous rates.
            for i, rate in enumerate(model_options.atrous_rates, 1):
              scope = ASPP_SCOPE + str(i)
              if model_options.aspp_with_separable_conv:
                aspp_features = split_separable_conv2d(
                    features,
                    filters=depth,
                    rate=rate,
                    weight_decay=weight_decay,
                    scope=scope)
              else:
                aspp_features = slim.conv2d(
                    features, depth, 3, rate=rate, scope=scope)
              branch_logits.append(aspp_features)

          # Merge branch logits.
          concat_logits = tf.concat(branch_logits, 3)
          concat_logits = slim.conv2d(
              concat_logits, depth, 1, scope=CONCAT_PROJECTION_SCOPE)
          concat_logits = slim.dropout(
              concat_logits,
              keep_prob=0.9,
              is_training=is_training,
              scope=CONCAT_PROJECTION_SCOPE + '_dropout')

          return concat_logits, end_points
Beispiel #5
0
def _extract_features(images,
                      model_options,
                      weight_decay=0.0001,
                      reuse=None,
                      is_training=False,
                      fine_tune_batch_norm=False):
  """Extracts features by the particular model_variant.

  Args:
    images: A tensor of size [batch, height, width, channels].
    model_options: A ModelOptions instance to configure models.
    weight_decay: The weight decay for model variables.
    reuse: Reuse the model variables or not.
    is_training: Is training or not.
    fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

  Returns:
    concat_logits: A tensor of size [batch, feature_height, feature_width,
      feature_channels], where feature_height/feature_width are determined by
      the images height/width and output_stride.
    end_points: A dictionary from components of the network to the corresponding
      activation.
  """
  features, end_points = feature_extractor.extract_features(
      images,
      output_stride=model_options.output_stride,
      multi_grid=model_options.multi_grid,
      model_variant=model_options.model_variant,
      weight_decay=weight_decay,
      reuse=reuse,
      is_training=is_training,
      fine_tune_batch_norm=fine_tune_batch_norm)

  if not model_options.aspp_with_batch_norm:
    return features, end_points
  else:
    batch_norm_params = {
        'is_training': is_training and fine_tune_batch_norm,
        'decay': 0.9997,
        'epsilon': 1e-5,
        'scale': True,
    }

    with slim.arg_scope(
        [slim.conv2d, slim.separable_conv2d],
        weights_regularizer=slim.l2_regularizer(weight_decay),
        activation_fn=tf.nn.relu,
        normalizer_fn=slim.batch_norm,
        padding='SAME',
        stride=1,
        reuse=reuse):
      with slim.arg_scope([slim.batch_norm], **batch_norm_params):
        depth = 256
        branch_logits = []

        if model_options.add_image_level_feature:
          pool_height = scale_dimension(model_options.crop_size[0],
                                        1. / model_options.output_stride)
          pool_width = scale_dimension(model_options.crop_size[1],
                                       1. / model_options.output_stride)
          image_feature = slim.avg_pool2d(
              features, [pool_height, pool_width], [pool_height, pool_width],
              padding='VALID')
          image_feature = slim.conv2d(
              image_feature, depth, 1, scope=_IMAGE_POOLING_SCOPE)
          image_feature = tf.image.resize_bilinear(
              image_feature, [pool_height, pool_width], align_corners=True)
          image_feature.set_shape([None, pool_height, pool_width, depth])
          branch_logits.append(image_feature)

        # Employ a 1x1 convolution.
        branch_logits.append(slim.conv2d(features, depth, 1,
                                         scope=_ASPP_SCOPE + str(0)))

        if model_options.atrous_rates:
          # Employ 3x3 convolutions with different atrous rates.
          for i, rate in enumerate(model_options.atrous_rates, 1):
            scope = _ASPP_SCOPE + str(i)
            if model_options.aspp_with_separable_conv:
              aspp_features = _split_separable_conv2d(
                  features,
                  filters=depth,
                  rate=rate,
                  weight_decay=weight_decay,
                  scope=scope)
            else:
              aspp_features = slim.conv2d(
                  features, depth, 3, rate=rate, scope=scope)
            branch_logits.append(aspp_features)

        # Merge branch logits.
        concat_logits = tf.concat(branch_logits, 3)
        concat_logits = slim.conv2d(
            concat_logits, depth, 1, scope=_CONCAT_PROJECTION_SCOPE)
        concat_logits = slim.dropout(
            concat_logits,
            keep_prob=0.9,
            is_training=is_training,
            scope=_CONCAT_PROJECTION_SCOPE + '_dropout')

        return concat_logits, end_points
Beispiel #6
0
def extract_features(images,
                     model_options,
                     weight_decay=0.0001,
                     reuse=None,
                     is_training=False,
                     fine_tune_batch_norm=False):
    """Extracts features by the particular model_variant.

    Args:
      images: A tensor of size [batch, height, width, channels].
      model_options: A ModelOptions instance to configure models.
      weight_decay: The weight decay for model variables.
      reuse: Reuse the model variables or not.
      is_training: Is training or not.
      fine_tune_batch_norm: Fine-tune the batch norm parameters or not.

    Returns:
      concat_logits: A tensor of size [batch, feature_height, feature_width,
        feature_channels], where feature_height/feature_width are determined by
        the images height/width and output_stride.
      end_points: A dictionary from components of the network to the corresponding
        activation.
    """
    features, end_points = feature_extractor.extract_features(
        images,
        output_stride=model_options.output_stride,
        multi_grid=model_options.multi_grid,
        model_variant=model_options.model_variant,
        depth_multiplier=model_options.depth_multiplier,
        weight_decay=weight_decay,
        reuse=reuse,
        is_training=is_training,
        fine_tune_batch_norm=fine_tune_batch_norm)

    if not model_options.aspp_with_batch_norm:
        return features, end_points
    else:
        if model_options.dense_prediction_cell_config is not None:
            tf.logging.info('Using dense prediction cell config.')
            dense_prediction_layer = dense_prediction_cell.DensePredictionCell(
                config=model_options.dense_prediction_cell_config,
                hparams={
                    'conv_rate_multiplier': 16 // model_options.output_stride,
                })
            concat_logits = dense_prediction_layer.build_cell(
                features,
                output_stride=model_options.output_stride,
                crop_size=model_options.crop_size,
                image_pooling_crop_size=model_options.image_pooling_crop_size,
                weight_decay=weight_decay,
                reuse=reuse,
                is_training=is_training,
                fine_tune_batch_norm=fine_tune_batch_norm)
            return concat_logits, end_points
        else:
            # The following codes employ the DeepLabv3 ASPP module. Note that We
            # could express the ASPP module as one particular dense prediction
            # cell architecture. We do not do so but leave the following codes in
            # order for backward compatibility.
            batch_norm_params = {
                'is_training': is_training and fine_tune_batch_norm,
                'decay': 0.9997,
                'epsilon': 1e-5,
                'scale': True,
            }

            with slim.arg_scope(
                [slim.conv2d, slim.separable_conv2d],
                    weights_regularizer=slim.l2_regularizer(weight_decay),
                    activation_fn=tf.nn.relu,
                    normalizer_fn=slim.batch_norm,
                    padding='SAME',
                    stride=1,
                    reuse=reuse):
                with slim.arg_scope([slim.batch_norm], **batch_norm_params):
                    depth = 256
                    branch_logits = []

                    if model_options.add_image_level_feature:
                        if model_options.crop_size is not None:
                            image_pooling_crop_size = model_options.image_pooling_crop_size
                            # If image_pooling_crop_size is not specified, use crop_size.
                            if image_pooling_crop_size is None:
                                image_pooling_crop_size = model_options.crop_size
                            pool_height = scale_dimension(
                                image_pooling_crop_size[0],
                                1. / model_options.output_stride)
                            pool_width = scale_dimension(
                                image_pooling_crop_size[1],
                                1. / model_options.output_stride)
                            image_feature = slim.avg_pool2d(
                                features, [pool_height, pool_width], [1, 1],
                                padding='VALID')
                            resize_height = scale_dimension(
                                model_options.crop_size[0],
                                1. / model_options.output_stride)
                            resize_width = scale_dimension(
                                model_options.crop_size[1],
                                1. / model_options.output_stride)
                        else:
                            # If crop_size is None, we simply do global pooling.
                            pool_height = tf.shape(features)[1]
                            pool_width = tf.shape(features)[2]
                            image_feature = tf.reduce_mean(features,
                                                           axis=[1, 2],
                                                           keepdims=True)
                            resize_height = pool_height
                            resize_width = pool_width
                        image_feature = slim.conv2d(image_feature,
                                                    depth,
                                                    1,
                                                    scope=IMAGE_POOLING_SCOPE)
                        image_feature = _resize_bilinear(
                            image_feature, [resize_height, resize_width],
                            image_feature.dtype)
                        # Set shape for resize_height/resize_width if they are not Tensor.
                        if isinstance(resize_height, tf.Tensor):
                            resize_height = None
                        if isinstance(resize_width, tf.Tensor):
                            resize_width = None
                        image_feature.set_shape(
                            [None, resize_height, resize_width, depth])
                        branch_logits.append(image_feature)

                    # Employ a 1x1 convolution.
                    branch_logits.append(
                        slim.conv2d(features,
                                    depth,
                                    1,
                                    scope=ASPP_SCOPE + str(0)))

                    if model_options.atrous_rates:
                        # Employ 3x3 convolutions with different atrous rates.
                        for i, rate in enumerate(model_options.atrous_rates,
                                                 1):
                            scope = ASPP_SCOPE + str(i)
                            if model_options.aspp_with_separable_conv:
                                aspp_features = split_separable_conv2d(
                                    features,
                                    filters=depth,
                                    rate=rate,
                                    weight_decay=weight_decay,
                                    scope=scope)
                            else:
                                aspp_features = slim.conv2d(features,
                                                            depth,
                                                            3,
                                                            rate=rate,
                                                            scope=scope)
                            branch_logits.append(aspp_features)

                    # Merge branch logits.
                    concat_logits = tf.concat(branch_logits, 3)
                    concat_logits = slim.conv2d(concat_logits,
                                                depth,
                                                1,
                                                scope=CONCAT_PROJECTION_SCOPE)
                    concat_logits = slim.dropout(
                        concat_logits,
                        keep_prob=0.9,
                        is_training=is_training,
                        scope=CONCAT_PROJECTION_SCOPE + '_dropout')

                    return concat_logits, end_points
Beispiel #7
0
def extract_features(
        images,  # 提取经过主干网络和ASPP后的特征 再最后经过1x1卷积之后加上一层dropout层
        model_options,
        weight_decay=0.0001,
        reuse=None,
        is_training=False,
        fine_tune_batch_norm=False,
        nas_training_hyper_parameters=None):
    """Extracts features by the particular model_variant.

  Args:
    images: A tensor of size [batch, height, width, channels].
    model_options: A ModelOptions instance to configure models.
    weight_decay: The weight decay for model variables.
    reuse: Reuse the model variables or not.
    is_training: Is training or not.
    fine_tune_batch_norm: Fine-tune the batch norm parameters or not.
    nas_training_hyper_parameters: A dictionary storing hyper-parameters for
      training nas models. Its keys are:
      - `drop_path_keep_prob`: Probability to keep each path in the cell when
        training.
      - `total_training_steps`: Total training steps to help drop path
        probability calculation.

  Returns:
    concat_logits: A tensor of size [batch, feature_height, feature_width,
      feature_channels], where feature_height/feature_width are determined by
      the images height/width and output_stride.
    end_points: A dictionary from components of the network to the corresponding
      activation.
  """
    features, end_points = feature_extractor.extract_features(  # 经过主干网络得到的特征图
        images,
        output_stride=model_options.output_stride,  # 默认为 16
        multi_grid=model_options.multi_grid,  # 默认为None  使用resnet时为[1,2,4]
        model_variant=model_options.model_variant,  # xception_65 模型名称
        depth_multiplier=model_options.
        depth_multiplier,  # 深度乘子 默认为1.0  mobilenet中使用
        divisible_by=model_options.divisible_by,  # mobilenet中使用 默认为None
        weight_decay=weight_decay,  # 权重衰退 0.0004
        reuse=reuse,
        is_training=is_training,
        preprocessed_images_dtype=model_options.
        preprocessed_images_dtype,  # 预处理图像类型
        fine_tune_batch_norm=fine_tune_batch_norm,  # 微调BN层
        nas_architecture_options=model_options.nas_architecture_options,
        nas_training_hyper_parameters=nas_training_hyper_parameters,
        use_bounded_activation=model_options.use_bounded_activation
    )  # 使用边界激活函数 False

    if not model_options.aspp_with_batch_norm:  # mobileNet中设置 若不需要ASPP,直接返回主干网络提取的特征图
        return features, end_points
    else:

        if model_options.dense_prediction_cell_config is not None:
            tf.logging.info('Using dense prediction cell config.')
            dense_prediction_layer = dense_prediction_cell.DensePredictionCell(
                config=model_options.dense_prediction_cell_config,
                hparams={
                    'conv_rate_multiplier': 16 // model_options.output_stride,
                })
            concat_logits = dense_prediction_layer.build_cell(
                features,
                output_stride=model_options.output_stride,
                crop_size=model_options.crop_size,
                image_pooling_crop_size=model_options.image_pooling_crop_size,
                weight_decay=weight_decay,
                reuse=reuse,
                is_training=is_training,
                fine_tune_batch_norm=fine_tune_batch_norm)
            return concat_logits, end_points
        else:
            # The following codes employ the DeepLabv3 ASPP module. Note that we
            # could express the ASPP module as one particular dense prediction
            # cell architecture. We do not do so but leave the following codes
            # for backward compatibility.# 空洞空间金字塔池化 ASPP
            batch_norm_params = utils.get_batch_norm_params(  # 定义BN层参数
                decay=0.9997,
                epsilon=1e-5,
                scale=True,
                is_training=(is_training and fine_tune_batch_norm),
                sync_batch_norm_method=model_options.sync_batch_norm_method)
            batch_norm = utils.get_batch_norm_fn(  # BN层
                model_options.sync_batch_norm_method)
            activation_fn = (  # 激活函数:有指定边界激活函数就用relu6否则用relu
                tf.nn.relu6
                if model_options.use_bounded_activation else tf.nn.relu)
            with slim.arg_scope(
                [slim.conv2d, slim.separable_conv2d],
                    weights_regularizer=slim.l2_regularizer(weight_decay),
                    activation_fn=activation_fn,
                    normalizer_fn=batch_norm,
                    padding='SAME',
                    stride=1,
                    reuse=reuse):
                with slim.arg_scope([batch_norm], **batch_norm_params):
                    depth = model_options.aspp_convs_filters  # ASPP卷积过滤器的数量 256
                    branch_logits = []  # 存储ASPP中并行的特征
                    # 添加image_pooling层
                    if model_options.add_image_level_feature:  # 添加图像水平特征 默认为True
                        if model_options.crop_size is not None:
                            image_pooling_crop_size = model_options.image_pooling_crop_size
                            # If image_pooling_crop_size is not specified, use crop_size.
                            if image_pooling_crop_size is None:  #若image_pooling_crop_size未指定,用crop_size
                                image_pooling_crop_size = model_options.crop_size
                            # image_pooling池化输出的高度,宽度进行尺度变化
                            pool_height = scale_dimension(
                                image_pooling_crop_size[0],
                                1. / model_options.output_stride)
                            pool_width = scale_dimension(
                                image_pooling_crop_size[1],
                                1. / model_options.output_stride)
                            image_feature = slim.avg_pool2d(  # image pooling采用平均池化
                                features, [pool_height, pool_width],
                                model_options.image_pooling_stride,
                                padding='VALID')
                            resize_height = scale_dimension(  # 高度映射 保证固定维度的输出
                                model_options.crop_size[0],
                                1. / model_options.output_stride)
                            resize_width = scale_dimension(  # 宽度映射 保证固定维度的输出
                                model_options.crop_size[1],
                                1. / model_options.output_stride)
                        else:
                            # If crop_size is None, we simply do global pooling. 如果crop_size为空,我们用全局池化
                            pool_height = tf.shape(features)[1]
                            pool_width = tf.shape(features)[2]
                            image_feature = tf.reduce_mean(  # 若crop_size为空,采用全局池化
                                features,
                                axis=[1, 2],
                                keepdims=True)
                            resize_height = pool_height
                            resize_width = pool_width
                        image_feature_activation_fn = tf.nn.relu
                        image_feature_normalizer_fn = batch_norm
                        if model_options.aspp_with_squeeze_and_excitation:  # 一般为False,暂不考虑
                            image_feature_activation_fn = tf.nn.sigmoid
                            if model_options.image_se_uses_qsigmoid:
                                image_feature_activation_fn = utils.q_sigmoid
                            image_feature_normalizer_fn = None

                        image_feature = slim.conv2d(
                            image_feature,
                            depth,
                            1,  # image_pooling出来的特征进行1x1卷积
                            activation_fn=image_feature_activation_fn,
                            normalizer_fn=image_feature_normalizer_fn,
                            scope=IMAGE_POOLING_SCOPE)
                        image_feature = _resize_bilinear(  # 上采样
                            image_feature, [resize_height, resize_width],
                            image_feature.dtype)
                        # Set shape for resize_height/resize_width if they are not Tensor.
                        if isinstance(resize_height, tf.Tensor):
                            resize_height = None
                        if isinstance(resize_width, tf.Tensor):
                            resize_width = None
                        image_feature.set_shape(
                            [None, resize_height, resize_width, depth])
                        if not model_options.aspp_with_squeeze_and_excitation:
                            branch_logits.append(image_feature)

                    # Employ a 1x1 convolution. 添加使用1x1卷积
                    branch_logits.append(
                        slim.conv2d(features,
                                    depth,
                                    1,
                                    scope=ASPP_SCOPE + str(0)))

                    if model_options.atrous_rates:  # 空洞卷积
                        # Employ 3x3 convolutions with different atrous rates. 为每个3X3的卷积使用指定的空洞率
                        for i, rate in enumerate(model_options.atrous_rates,
                                                 1):  # rate 为空洞率
                            scope = ASPP_SCOPE + str(i)
                            if model_options.aspp_with_separable_conv:  # 若使用空洞的深度可分离卷积,可大大减少计算量 默认为True
                                aspp_features = split_separable_conv2d(
                                    features,
                                    filters=depth,
                                    rate=rate,
                                    weight_decay=weight_decay,
                                    scope=scope)
                            else:
                                aspp_features = slim.conv2d(features,
                                                            depth,
                                                            3,
                                                            rate=rate,
                                                            scope=scope)
                            branch_logits.append(
                                aspp_features)  # 将空洞卷积提取的特征加入列表

                    # Merge branch logits.
                    concat_logits = tf.concat(branch_logits,
                                              3)  # 将这些特征图进行按照通道的那个维度进行级联组合
                    if model_options.aspp_with_concat_projection:  # 级联之后添加1x1卷积 默认为True
                        concat_logits = slim.conv2d(
                            concat_logits,
                            depth,
                            1,
                            scope=CONCAT_PROJECTION_SCOPE)
                        concat_logits = slim.dropout(  # 再加dropout层 防止过拟合
                            concat_logits,
                            keep_prob=0.9,
                            is_training=is_training,
                            scope=CONCAT_PROJECTION_SCOPE + '_dropout')
                    if (model_options.add_image_level_feature and
                            model_options.aspp_with_squeeze_and_excitation):
                        concat_logits *= image_feature

                    return concat_logits, end_points
Beispiel #8
0
flags.DEFINE_float(
    'depth_multiplier', 1.0,
    'Multiplier for the depth (number of channels) for all '
    'convolution ops used in MobileNet.')

FLAGS = flags.FLAGS

if __name__ == '__main__':

    images = tf.random_normal([1, 513, 513, 3])

    features, end_points = feature_extractor.extract_features(
        images,
        output_stride=FLAGS.output_stride,
        multi_grid=FLAGS.multi_grid,
        model_variant=FLAGS.model_variant,
        depth_multiplier=FLAGS.depth_multiplier,
        weight_decay=0.0001,
        reuse=None,
        is_training=False,
        fine_tune_batch_norm=False)

    print(features, end_points)

    writer = tf.summary.FileWriter("./logs", graph=tf.get_default_graph())

    print("Layers")
    for k, v in end_points.items():
        print('name = {}, shape = {}'.format(v.name, v.get_shape()))

    print("Parameters")
    for v in slim.get_model_variables():