Example #1
0
def hnasnet(images,
            num_classes,
            is_training=True,
            global_pool=False,
            output_stride=8,
            nas_architecture_options=None,
            nas_training_hyper_parameters=None,
            reuse=None,
            scope='hnasnet',
            final_endpoint=None,
            sync_batch_norm_method='None'):
  """Builds hierarchical model."""
  if nas_architecture_options is None:
    raise ValueError(
        'Using NAS model variants. nas_architecture_options cannot be None.')
  hparams = config(num_conv_filters=nas_architecture_options[
      'nas_stem_output_num_conv_filters'])
  if nas_training_hyper_parameters:
    hparams.set_hparam('drop_path_keep_prob',
                       nas_training_hyper_parameters['drop_path_keep_prob'])
    hparams.set_hparam('total_training_steps',
                       nas_training_hyper_parameters['total_training_steps'])
  if not is_training:
    tf.logging.info('During inference, setting drop_path_keep_prob = 1.0.')
    hparams.set_hparam('drop_path_keep_prob', 1.0)
  tf.logging.info(hparams)
  operations = [
      'atrous_5x5', 'separable_3x3_2', 'separable_3x3_2', 'atrous_3x3',
      'separable_3x3_2', 'separable_3x3_2', 'separable_5x5_2',
      'separable_5x5_2', 'separable_5x5_2', 'atrous_5x5'
  ]
  used_hiddenstates = [1, 1, 0, 0, 0, 0, 0]
  hiddenstate_indices = [1, 0, 1, 0, 3, 1, 4, 2, 3, 5]
  backbone = [0, 0, 0, 1, 2, 1, 2, 2, 3, 3, 2, 1]
  batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method)
  cell = NASBaseCell(hparams.num_conv_filters,
                     operations,
                     used_hiddenstates,
                     hiddenstate_indices,
                     hparams.drop_path_keep_prob,
                     len(backbone),
                     hparams.total_training_steps,
                     batch_norm_fn=batch_norm)
  with arg_scope([slim.dropout, batch_norm], is_training=is_training):
    return _build_nas_base(
        images,
        cell=cell,
        backbone=backbone,
        num_classes=num_classes,
        hparams=hparams,
        global_pool=global_pool,
        output_stride=output_stride,
        nas_use_classification_head=nas_architecture_options[
            'nas_use_classification_head'],
        reuse=reuse,
        scope=scope,
        final_endpoint=final_endpoint,
        batch_norm_fn=batch_norm,
        nas_remove_os32_stride=nas_architecture_options[
            'nas_remove_os32_stride'])
Example #2
0
def nas_arg_scope(weight_decay=4e-5,
                  batch_norm_decay=0.9997,
                  batch_norm_epsilon=0.001,
                  sync_batch_norm_method='None'):
  """Default arg scope for the NAS models."""
  batch_norm_params = {
      # Decay for the moving averages.
      'decay': batch_norm_decay,
      # epsilon to prevent 0s in variance.
      'epsilon': batch_norm_epsilon,
      'scale': True,
  }
  batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method)
  weights_regularizer = contrib_layers.l2_regularizer(weight_decay)
  weights_initializer = contrib_layers.variance_scaling_initializer(
      factor=1 / 3.0, mode='FAN_IN', uniform=True)
  with arg_scope([slim.fully_connected, slim.conv2d, slim.separable_conv2d],
                 weights_regularizer=weights_regularizer,
                 weights_initializer=weights_initializer):
    with arg_scope([slim.fully_connected],
                   activation_fn=None, scope='FC'):
      with arg_scope([slim.conv2d, slim.separable_conv2d],
                     activation_fn=None, biases_initializer=None):
        with arg_scope([batch_norm], **batch_norm_params) as sc:
          return sc
Example #3
0
def pnasnet(images,
            num_classes,
            is_training=True,
            global_pool=False,
            output_stride=16,
            nas_architecture_options=None,
            nas_training_hyper_parameters=None,
            reuse=None,
            scope='pnasnet',
            final_endpoint=None,
            sync_batch_norm_method='None'):
  """Builds PNASNet model."""
  if nas_architecture_options is None:
    raise ValueError(
        'Using NAS model variants. nas_architecture_options cannot be None.')
  hparams = config(num_conv_filters=nas_architecture_options[
      'nas_stem_output_num_conv_filters'])
  if nas_training_hyper_parameters:
    hparams.set_hparam('drop_path_keep_prob',
                       nas_training_hyper_parameters['drop_path_keep_prob'])
    hparams.set_hparam('total_training_steps',
                       nas_training_hyper_parameters['total_training_steps'])
  if not is_training:
    tf.logging.info('During inference, setting drop_path_keep_prob = 1.0.')
    hparams.set_hparam('drop_path_keep_prob', 1.0)
  tf.logging.info(hparams)
  if output_stride == 8:
    backbone = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
  elif output_stride == 16:
    backbone = [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2]
  elif output_stride == 32:
    backbone = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]
  else:
    raise ValueError('Unsupported output_stride ', output_stride)
  batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method)
  cell = nas_genotypes.PNASCell(hparams.num_conv_filters,
                                hparams.drop_path_keep_prob,
                                len(backbone),
                                hparams.total_training_steps,
                                batch_norm_fn=batch_norm)
  with arg_scope([slim.dropout, batch_norm], is_training=is_training):
    return _build_nas_base(
        images,
        cell=cell,
        backbone=backbone,
        num_classes=num_classes,
        hparams=hparams,
        global_pool=global_pool,
        output_stride=output_stride,
        nas_use_classification_head=nas_architecture_options[
            'nas_use_classification_head'],
        reuse=reuse,
        scope=scope,
        final_endpoint=final_endpoint,
        batch_norm_fn=batch_norm,
        nas_remove_os32_stride=nas_architecture_options[
            'nas_remove_os32_stride'])
Example #4
0
def xception_arg_scope(weight_decay=0.00004,
                       batch_norm_decay=0.9997,
                       batch_norm_epsilon=0.001,
                       batch_norm_scale=True,
                       weights_initializer_stddev=0.09,
                       regularize_depthwise=False,
                       use_batch_norm=True,
                       use_bounded_activation=False,
                       sync_batch_norm_method='None'):
  """Defines the default Xception arg scope.

  Args:
    weight_decay: The weight decay to use for regularizing the model.
    batch_norm_decay: The moving average decay when estimating layer activation
      statistics in batch normalization.
    batch_norm_epsilon: Small constant to prevent division by zero when
      normalizing activations by their variance in batch normalization.
    batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
      activations in the batch normalization layer.
    weights_initializer_stddev: The standard deviation of the trunctated normal
      weight initializer.
    regularize_depthwise: Whether or not apply L2-norm regularization on the
      depthwise convolution weights.
    use_batch_norm: Whether or not to use batch normalization.
    use_bounded_activation: Whether or not to use bounded activations. Bounded
      activations better lend themselves to quantized inference.
    sync_batch_norm_method: String, sync batchnorm method. Currently only
      support `None`. Also, it is only effective for Xception.

  Returns:
    An `arg_scope` to use for the Xception models.
  """
  batch_norm_params = {
      'decay': batch_norm_decay,
      'epsilon': batch_norm_epsilon,
      'scale': batch_norm_scale,
  }
  if regularize_depthwise:
    depthwise_regularizer = slim.l2_regularizer(weight_decay)
  else:
    depthwise_regularizer = None
  activation_fn = tf.nn.relu6 if use_bounded_activation else tf.nn.relu
  batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method)
  with slim.arg_scope(
      [slim.conv2d, slim.separable_conv2d],
      weights_initializer=tf.truncated_normal_initializer(
          stddev=weights_initializer_stddev),
      activation_fn=activation_fn,
      normalizer_fn=batch_norm if use_batch_norm else None):
    with slim.arg_scope([batch_norm], **batch_norm_params):
      with slim.arg_scope(
          [slim.conv2d],
          weights_regularizer=slim.l2_regularizer(weight_decay)):
        with slim.arg_scope(
            [slim.separable_conv2d],
            weights_regularizer=depthwise_regularizer):
          with slim.arg_scope(
              [xception_module],
              use_bounded_activation=use_bounded_activation,
              use_explicit_padding=not use_bounded_activation) as arg_sc:
            return arg_sc
Example #5
0
def xception(inputs,
             blocks,
             num_classes=None,
             is_training=True,
             global_pool=True,
             keep_prob=0.5,
             output_stride=None,
             reuse=None,
             scope=None,
             sync_batch_norm_method='None'):
  """Generator for Xception models.

  This function generates a family of Xception models. See the xception_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce Xception of various depths.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels]. Must be
      floating point. If a pretrained checkpoint is used, pixel values should be
      the same as during training (see go/slim-classification-models for
      specifics).
    blocks: A list of length equal to the number of Xception blocks. Each
      element is an Xception Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks.
      If 0 or None, we return the features before the logit layer.
    is_training: whether batch_norm layers are in training mode.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    keep_prob: Keep probability used in the pre-logits dropout layer.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    sync_batch_norm_method: String, sync batchnorm method. Currently only
      support `None`.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is 0 or None,
      then net is the output of the last Xception block, potentially after
      global average pooling. If num_classes is a non-zero integer, net contains
      the pre-softmax activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
  with tf.variable_scope(
      scope, 'xception', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.original_name_scope + 'end_points'
    batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method)
    with slim.arg_scope([slim.conv2d,
                         slim.separable_conv2d,
                         xception_module,
                         stack_blocks_dense],
                        outputs_collections=end_points_collection):
      with slim.arg_scope([batch_norm], is_training=is_training):
        net = inputs
        if output_stride is not None:
          if output_stride % 2 != 0:
            raise ValueError('The output_stride needs to be a multiple of 2.')
          output_stride //= 2
        # Root block function operated on inputs.
        net = resnet_utils.conv2d_same(net, 32, 3, stride=2,
                                       scope='entry_flow/conv1_1')
        net = resnet_utils.conv2d_same(net, 64, 3, stride=1,
                                       scope='entry_flow/conv1_2')

        # Extract features for entry_flow, middle_flow, and exit_flow.
        net = stack_blocks_dense(net, blocks, output_stride)

        # Convert end_points_collection into a dictionary of end_points.
        end_points = slim.utils.convert_collection_to_dict(
            end_points_collection, clear_collection=True)

        if global_pool:
          # Global average pooling.
          net = tf.reduce_mean(net, [1, 2], name='global_pool', keepdims=True)
          end_points['global_pool'] = net
        if num_classes:
          net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training,
                             scope='prelogits_dropout')
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn=None, scope='logits')
          end_points[sc.name + '/logits'] = net
          end_points['predictions'] = slim.softmax(net, scope='predictions')
        return net, end_points
Example #6
0
def refine_by_decoder(features,
                      end_points,
                      crop_size=None,
                      decoder_output_stride=None,
                      decoder_use_separable_conv=False,
                      decoder_use_sum_merge=False,
                      decoder_filters=None,
                      decoder_output_is_logits=False,
                      model_variant=None,
                      weight_decay=0.0001,
                      reuse=None,
                      is_training=False,
                      fine_tune_batch_norm=False,
                      use_bounded_activation=False,
                      sync_batch_norm_method='None'):
    """Adds the decoder to obtain sharper segmentation results.

  Args:
    features: A tensor of size [batch, features_height, features_width,
      features_channels].
    end_points: A dictionary from components of the network to the corresponding
      activation.
    crop_size: A tuple [crop_height, crop_width] specifying whole patch crop
      size.
    decoder_output_stride: A list of integers specifying the output stride of
      low-level features used in the decoder module.
    decoder_use_separable_conv: Employ separable convolution for decoder or not.
    decoder_use_sum_merge: Boolean, decoder uses simple sum merge or not.
    decoder_filters: A list of integer, filter size for each decoder level..
    decoder_output_is_logits: Boolean, using decoder output as logits or not.
    model_variant: Model variant for feature extraction.
    weight_decay: The weight decay for model variables.
    reuse: Reuse the model variables or not.
    is_training: Is training or not.
    fine_tune_batch_norm: Fine-tune the batch norm parameters or not.
    use_bounded_activation: Whether or not to use bounded activations. Bounded
      activations better lend themselves to quantized inference.
    sync_batch_norm_method: String, method used to sync batch norm. Currently
     only support `None` (no sync batch norm) and `tpu` (use tpu code to
     sync batch norm).

  Returns:
    Decoder output with size [batch, decoder_height, decoder_width,
      decoder_channels].

  Raises:
    ValueError: If crop_size is None, or unexpected decoder_filters.
  """
    if crop_size is None:
        raise ValueError('crop_size must be provided when using decoder.')
    # We currently only support len(decoder_filters) == 1.
    batch_norm_params = utils.get_batch_norm_params(
        decay=0.9997,
        epsilon=1e-5,
        scale=True,
        is_training=(is_training and fine_tune_batch_norm),
        sync_batch_norm_method=sync_batch_norm_method)
    batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method)
    decoder_depth = decoder_filters
    projected_filters = 48
    if decoder_use_sum_merge:
        # When using sum merge, the projected filters must be equal to decoder
        # filters.
        projected_filters = decoder_filters
    if decoder_output_is_logits:
        # Overwrite the setting when decoder output is logits.
        activation_fn = None
        normalizer_fn = None
        conv2d_kernel = 1
        # Use original conv instead of separable conv.
        decoder_use_separable_conv = False
    else:
        # Default setting when decoder output is not logits.
        activation_fn = tf.nn.relu6 if use_bounded_activation else tf.nn.relu
        normalizer_fn = batch_norm
        conv2d_kernel = 3
    with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                        weights_regularizer=slim.l2_regularizer(weight_decay),
                        activation_fn=activation_fn,
                        normalizer_fn=normalizer_fn,
                        padding='SAME',
                        stride=1,
                        reuse=reuse):
        with slim.arg_scope([batch_norm], **batch_norm_params):
            with tf.compat.v1.variable_scope(DECODER_SCOPE, DECODER_SCOPE,
                                             [features]):
                decoder_features = features
                decoder_stage = 0
                scope_suffix = ''
                for output_stride in decoder_output_stride:
                    feature_list = feature_extractor.networks_to_feature_maps[
                        model_variant][feature_extractor.
                                       DECODER_END_POINTS][output_stride]
                    # If only one decoder stage, we do not change the scope name in
                    # order for backward compactibility.
                    if decoder_stage:
                        scope_suffix = '_{}'.format(decoder_stage)
                    for i, name in enumerate(feature_list):
                        decoder_features_list = [decoder_features]
                        # MobileNet and NAS variants use different naming convention.
                        if ('mobilenet' in model_variant
                                or model_variant.startswith('mnas')
                                or model_variant.startswith('nas')):
                            feature_name = name
                        else:
                            feature_name = '{}/{}'.format(
                                feature_extractor.name_scope[model_variant],
                                name)
                        decoder_features_list.append(
                            slim.conv2d(end_points[feature_name],
                                        projected_filters,
                                        1,
                                        scope='feature_projection' + str(i) +
                                        scope_suffix))
                        # Determine the output size.
                        decoder_height = scale_dimension(
                            crop_size[0], 1.0 / output_stride)
                        decoder_width = scale_dimension(
                            crop_size[1], 1.0 / output_stride)
                        # Resize to decoder_height/decoder_width.
                        for j, feature in enumerate(decoder_features_list):
                            decoder_features_list[j] = _resize_bilinear(
                                feature, [decoder_height, decoder_width],
                                feature.dtype)
                            h = (None if isinstance(decoder_height, tf.Tensor)
                                 else decoder_height)
                            w = (None if isinstance(decoder_width, tf.Tensor)
                                 else decoder_width)
                            decoder_features_list[j].set_shape(
                                [None, h, w, None])
                        if decoder_use_sum_merge:
                            decoder_features = _decoder_with_sum_merge(
                                decoder_features_list,
                                decoder_depth,
                                conv2d_kernel=conv2d_kernel,
                                decoder_use_separable_conv=
                                decoder_use_separable_conv,
                                weight_decay=weight_decay,
                                scope_suffix=scope_suffix)
                        else:
                            if not decoder_use_separable_conv:
                                scope_suffix = str(i) + scope_suffix
                            decoder_features = _decoder_with_concat_merge(
                                decoder_features_list,
                                decoder_depth,
                                decoder_use_separable_conv=
                                decoder_use_separable_conv,
                                weight_decay=weight_decay,
                                scope_suffix=scope_suffix)
                    decoder_stage += 1
                return decoder_features
Example #7
0
def extract_features(images,
                     model_options,
                     weight_decay=0.0001,
                     reuse=None,
                     is_training=False,
                     fine_tune_batch_norm=False,
                     nas_training_hyper_parameters=None):
    """Extracts features by the particular model_variant.

  Args:
    images: A tensor of size [batch, height, width, channels].
    model_options: A ModelOptions instance to configure models.
    weight_decay: The weight decay for model variables.
    reuse: Reuse the model variables or not.
    is_training: Is training or not.
    fine_tune_batch_norm: Fine-tune the batch norm parameters or not.
    nas_training_hyper_parameters: A dictionary storing hyper-parameters for
      training nas models. Its keys are:
      - `drop_path_keep_prob`: Probability to keep each path in the cell when
        training.
      - `total_training_steps`: Total training steps to help drop path
        probability calculation.

  Returns:
    concat_logits: A tensor of size [batch, feature_height, feature_width,
      feature_channels], where feature_height/feature_width are determined by
      the images height/width and output_stride.
    end_points: A dictionary from components of the network to the corresponding
      activation.
  """
    features, end_points = feature_extractor.extract_features(
        images,
        output_stride=model_options.output_stride,
        multi_grid=model_options.multi_grid,
        model_variant=model_options.model_variant,
        depth_multiplier=model_options.depth_multiplier,
        divisible_by=model_options.divisible_by,
        weight_decay=weight_decay,
        reuse=reuse,
        is_training=is_training,
        preprocessed_images_dtype=model_options.preprocessed_images_dtype,
        fine_tune_batch_norm=fine_tune_batch_norm,
        nas_architecture_options=model_options.nas_architecture_options,
        nas_training_hyper_parameters=nas_training_hyper_parameters,
        use_bounded_activation=model_options.use_bounded_activation)

    if not model_options.aspp_with_batch_norm:
        return features, end_points
    else:
        if model_options.dense_prediction_cell_config is not None:
            tf.logging.info('Using dense prediction cell config.')
            dense_prediction_layer = dense_prediction_cell.DensePredictionCell(
                config=model_options.dense_prediction_cell_config,
                hparams={
                    'conv_rate_multiplier': 16 // model_options.output_stride,
                })
            concat_logits = dense_prediction_layer.build_cell(
                features,
                output_stride=model_options.output_stride,
                crop_size=model_options.crop_size,
                image_pooling_crop_size=model_options.image_pooling_crop_size,
                weight_decay=weight_decay,
                reuse=reuse,
                is_training=is_training,
                fine_tune_batch_norm=fine_tune_batch_norm)
            return concat_logits, end_points
        else:
            # The following codes employ the DeepLabv3 ASPP module. Note that we
            # could express the ASPP module as one particular dense prediction
            # cell architecture. We do not do so but leave the following codes
            # for backward compatibility.
            batch_norm_params = utils.get_batch_norm_params(
                decay=0.9997,
                epsilon=1e-5,
                scale=True,
                is_training=(is_training and fine_tune_batch_norm),
                sync_batch_norm_method=model_options.sync_batch_norm_method)
            batch_norm = utils.get_batch_norm_fn(
                model_options.sync_batch_norm_method)
            activation_fn = (tf.nn.relu6
                             if model_options.use_bounded_activation else
                             tf.nn.relu)
            with slim.arg_scope(
                [slim.conv2d, slim.separable_conv2d],
                    weights_regularizer=slim.l2_regularizer(weight_decay),
                    activation_fn=activation_fn,
                    normalizer_fn=batch_norm,
                    padding='SAME',
                    stride=1,
                    reuse=reuse):
                with slim.arg_scope([batch_norm], **batch_norm_params):
                    depth = model_options.aspp_convs_filters
                    branch_logits = []

                    if model_options.add_image_level_feature:
                        if model_options.crop_size is not None:
                            image_pooling_crop_size = model_options.image_pooling_crop_size
                            # If image_pooling_crop_size is not specified, use crop_size.
                            if image_pooling_crop_size is None:
                                image_pooling_crop_size = model_options.crop_size
                            pool_height = scale_dimension(
                                image_pooling_crop_size[0],
                                1. / model_options.output_stride)
                            pool_width = scale_dimension(
                                image_pooling_crop_size[1],
                                1. / model_options.output_stride)
                            image_feature = slim.avg_pool2d(
                                features, [pool_height, pool_width],
                                model_options.image_pooling_stride,
                                padding='VALID')
                            resize_height = scale_dimension(
                                model_options.crop_size[0],
                                1. / model_options.output_stride)
                            resize_width = scale_dimension(
                                model_options.crop_size[1],
                                1. / model_options.output_stride)
                        else:
                            # If crop_size is None, we simply do global pooling.
                            pool_height = tf.shape(features)[1]
                            pool_width = tf.shape(features)[2]
                            image_feature = tf.reduce_mean(features,
                                                           axis=[1, 2],
                                                           keepdims=True)
                            resize_height = pool_height
                            resize_width = pool_width
                        image_feature_activation_fn = tf.nn.relu
                        image_feature_normalizer_fn = batch_norm
                        if model_options.aspp_with_squeeze_and_excitation:
                            image_feature_activation_fn = tf.nn.sigmoid
                            if model_options.image_se_uses_qsigmoid:
                                image_feature_activation_fn = utils.q_sigmoid
                            image_feature_normalizer_fn = None
                        image_feature = slim.conv2d(
                            image_feature,
                            depth,
                            1,
                            activation_fn=image_feature_activation_fn,
                            normalizer_fn=image_feature_normalizer_fn,
                            scope=IMAGE_POOLING_SCOPE)
                        image_feature = _resize_bilinear(
                            image_feature, [resize_height, resize_width],
                            image_feature.dtype)
                        # Set shape for resize_height/resize_width if they are not Tensor.
                        if isinstance(resize_height, tf.Tensor):
                            resize_height = None
                        if isinstance(resize_width, tf.Tensor):
                            resize_width = None
                        image_feature.set_shape(
                            [None, resize_height, resize_width, depth])
                        if not model_options.aspp_with_squeeze_and_excitation:
                            branch_logits.append(image_feature)

                    # Employ a 1x1 convolution.
                    branch_logits.append(
                        slim.conv2d(features,
                                    depth,
                                    1,
                                    scope=ASPP_SCOPE + str(0)))

                    if model_options.atrous_rates:
                        # Employ 3x3 convolutions with different atrous rates.
                        for i, rate in enumerate(model_options.atrous_rates,
                                                 1):
                            scope = ASPP_SCOPE + str(i)
                            if model_options.aspp_with_separable_conv:
                                aspp_features = split_separable_conv2d(
                                    features,
                                    filters=depth,
                                    rate=rate,
                                    weight_decay=weight_decay,
                                    scope=scope)
                            else:
                                aspp_features = slim.conv2d(features,
                                                            depth,
                                                            3,
                                                            rate=rate,
                                                            scope=scope)
                            branch_logits.append(aspp_features)

                    # Merge branch logits.
                    concat_logits = tf.concat(branch_logits, 3)
                    if model_options.aspp_with_concat_projection:
                        concat_logits = slim.conv2d(
                            concat_logits,
                            depth,
                            1,
                            scope=CONCAT_PROJECTION_SCOPE)
                        concat_logits = slim.dropout(
                            concat_logits,
                            keep_prob=0.9,
                            is_training=is_training,
                            scope=CONCAT_PROJECTION_SCOPE + '_dropout')
                    if (model_options.add_image_level_feature and
                            model_options.aspp_with_squeeze_and_excitation):
                        concat_logits = tf.math.multiply(
                            concat_logits,
                            image_feature,
                            name='aspp_multiply_image_feature')

                    return concat_logits, end_points
Example #8
0
def resnet_arg_scope(weight_decay=0.0001,
                     batch_norm_decay=0.997,
                     batch_norm_epsilon=1e-5,
                     batch_norm_scale=True,
                     activation_fn=tf.nn.relu,
                     use_batch_norm=True,
                     sync_batch_norm_method='None',
                     normalization_method='unspecified',
                     use_weight_standardization=False):
    """Defines the default ResNet arg scope.

  Args:
    weight_decay: The weight decay to use for regularizing the model.
    batch_norm_decay: The moving average decay when estimating layer activation
      statistics in batch normalization.
    batch_norm_epsilon: Small constant to prevent division by zero when
      normalizing activations by their variance in batch normalization.
    batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
      activations in the batch normalization layer.
    activation_fn: The activation function which is used in ResNet.
    use_batch_norm: Deprecated in favor of normalization_method.
    sync_batch_norm_method: String, sync batchnorm method.
    normalization_method: String, one of `batch`, `none`, or `group`, to use
      batch normalization, no normalization, or group normalization.
    use_weight_standardization: Boolean, whether to use weight standardization.

  Returns:
    An `arg_scope` to use for the resnet models.
  """
    batch_norm_params = {
        'decay': batch_norm_decay,
        'epsilon': batch_norm_epsilon,
        'scale': batch_norm_scale,
    }
    batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method)
    if normalization_method == 'batch':
        normalizer_fn = batch_norm
    elif normalization_method == 'none':
        normalizer_fn = None
    elif normalization_method == 'group':
        normalizer_fn = slim.group_norm
    elif normalization_method == 'unspecified':
        normalizer_fn = batch_norm if use_batch_norm else None
    else:
        raise ValueError('Unrecognized normalization_method %s' %
                         normalization_method)

    with slim.arg_scope(
        [conv2d_ws.conv2d],
            weights_regularizer=slim.l2_regularizer(weight_decay),
            weights_initializer=slim.variance_scaling_initializer(),
            activation_fn=activation_fn,
            normalizer_fn=normalizer_fn,
            use_weight_standardization=use_weight_standardization):
        with slim.arg_scope([batch_norm], **batch_norm_params):
            # The following implies padding='SAME' for pool1, which makes feature
            # alignment easier for dense prediction tasks. This is also used in
            # https://github.com/facebook/fb.resnet.torch. However the accompanying
            # code of 'Deep Residual Learning for Image Recognition' uses
            # padding='VALID' for pool1. You can switch to that choice by setting
            # slim.arg_scope([slim.max_pool2d], padding='VALID').
            with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
                return arg_sc
Example #9
0
def resnet_v1_beta(inputs,
                   blocks,
                   num_classes=None,
                   is_training=None,
                   global_pool=True,
                   output_stride=None,
                   root_block_fn=None,
                   reuse=None,
                   scope=None,
                   sync_batch_norm_method='None'):
    """Generator for v1 ResNet models (beta variant).

  This function generates a family of modified ResNet v1 models. In particular,
  the first original 7x7 convolution is replaced with three 3x3 convolutions.
  See the resnet_v1_*() methods for specific model instantiations, obtained by
  selecting different block instantiations that produce ResNets of various
  depths.

  The code is modified from slim/nets/resnet_v1.py, and please refer to it for
  more details.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: Enable/disable is_training for batch normalization.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    root_block_fn: The function consisting of convolution operations applied to
      the root input. If root_block_fn is None, use the original setting of
      RseNet-v1, which is simply one convolution with 7x7 kernel and stride=2.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    sync_batch_norm_method: String, sync batchnorm method.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
    if root_block_fn is None:
        root_block_fn = functools.partial(conv2d_ws.conv2d_same,
                                          num_outputs=64,
                                          kernel_size=7,
                                          stride=2,
                                          scope='conv1')
    batch_norm = utils.get_batch_norm_fn(sync_batch_norm_method)
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope([
                conv2d_ws.conv2d, bottleneck, lite_bottleneck,
                resnet_utils.stack_blocks_dense
        ],
                            outputs_collections=end_points_collection):
            if is_training is not None:
                arg_scope = slim.arg_scope([batch_norm],
                                           is_training=is_training)
            else:
                arg_scope = slim.arg_scope([])
            with arg_scope:
                net = inputs
                if output_stride is not None:
                    if output_stride % 4 != 0:
                        raise ValueError(
                            'The output_stride needs to be a multiple of 4.')
                    output_stride //= 4
                net = root_block_fn(net)
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='SAME',
                                      scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)

                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keepdims=True)
                if num_classes is not None:
                    net = conv2d_ws.conv2d(net,
                                           num_classes, [1, 1],
                                           activation_fn=None,
                                           normalizer_fn=None,
                                           scope='logits',
                                           use_weight_standardization=False)
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                if num_classes is not None:
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')
                return net, end_points