Esempio n. 1
0
 def test_prediction_size(self):
   mask_prediction_head = mask_head.MaskRCNNMaskHead(
       num_classes=20,
       conv_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
       mask_height=14,
       mask_width=14,
       mask_prediction_num_conv_layers=2,
       mask_prediction_conv_depth=256,
       masks_are_class_agnostic=False)
   roi_pooled_features = tf.random_uniform(
       [64, 7, 7, 1024], minval=-10.0, maxval=10.0, dtype=tf.float32)
   prediction = mask_prediction_head.predict(
       features=roi_pooled_features, num_predictions_per_location=1)
   self.assertAllEqual([64, 1, 20, 14, 14], prediction.get_shape().as_list())
def build_mask_rcnn_box_predictor(is_training,
                                  num_classes,
                                  fc_hyperparams_fn,
                                  use_dropout,
                                  dropout_keep_prob,
                                  box_code_size,
                                  add_background_class=True,
                                  share_box_across_classes=False,
                                  predict_instance_masks=False,
                                  conv_hyperparams_fn=None,
                                  mask_height=14,
                                  mask_width=14,
                                  mask_prediction_num_conv_layers=2,
                                  mask_prediction_conv_depth=256,
                                  masks_are_class_agnostic=False,
                                  convolve_then_upsample_masks=False):
  
  box_prediction_head = box_head.MaskRCNNBoxHead(
      is_training=is_training,
      num_classes=num_classes,
      fc_hyperparams_fn=fc_hyperparams_fn,
      use_dropout=use_dropout,
      dropout_keep_prob=dropout_keep_prob,
      box_code_size=box_code_size,
      share_box_across_classes=share_box_across_classes)
  class_prediction_head = class_head.MaskRCNNClassHead(
      is_training=is_training,
      num_class_slots=num_classes + 1 if add_background_class else num_classes,
      fc_hyperparams_fn=fc_hyperparams_fn,
      use_dropout=use_dropout,
      dropout_keep_prob=dropout_keep_prob)
  third_stage_heads = {}
  if predict_instance_masks:
    third_stage_heads[
        mask_rcnn_box_predictor.
        MASK_PREDICTIONS] = mask_head.MaskRCNNMaskHead(
            num_classes=num_classes,
            conv_hyperparams_fn=conv_hyperparams_fn,
            mask_height=mask_height,
            mask_width=mask_width,
            mask_prediction_num_conv_layers=mask_prediction_num_conv_layers,
            mask_prediction_conv_depth=mask_prediction_conv_depth,
            masks_are_class_agnostic=masks_are_class_agnostic,
            convolve_then_upsample=convolve_then_upsample_masks)
  return mask_rcnn_box_predictor.MaskRCNNBoxPredictor(
      is_training=is_training,
      num_classes=num_classes,
      box_prediction_head=box_prediction_head,
      class_prediction_head=class_prediction_head,
      third_stage_heads=third_stage_heads)
Esempio n. 3
0
def build_mask_rcnn_box_predictor(is_training,
                                  num_classes,
                                  fc_hyperparams_fn,
                                  use_dropout,
                                  dropout_keep_prob,
                                  box_code_size,
                                  add_background_class=True,
                                  share_box_across_classes=False,
                                  predict_instance_masks=False,
                                  conv_hyperparams_fn=None,
                                  mask_height=14,
                                  mask_width=14,
                                  mask_prediction_num_conv_layers=2,
                                  mask_prediction_conv_depth=256,
                                  masks_are_class_agnostic=False,
                                  convolve_then_upsample_masks=False):
    """Builds and returns a MaskRCNNBoxPredictor class.

  Args:
    is_training: Indicates whether the BoxPredictor is in training mode.
    num_classes: number of classes.  Note that num_classes *does not*
      include the background category, so if groundtruth labels take values
      in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
      assigned classification targets can range from {0,... K}).
    fc_hyperparams_fn: A function to generate tf-slim arg_scope with
      hyperparameters for fully connected ops.
    use_dropout: Option to use dropout or not.  Note that a single dropout
      op is applied here prior to both box and class predictions, which stands
      in contrast to the ConvolutionalBoxPredictor below.
    dropout_keep_prob: Keep probability for dropout.
      This is only used if use_dropout is True.
    box_code_size: Size of encoding for each box.
    add_background_class: Whether to add an implicit background class.
    share_box_across_classes: Whether to share boxes across classes rather
      than use a different box for each class.
    predict_instance_masks: If True, will add a third stage mask prediction
      to the returned class.
    conv_hyperparams_fn: A function to generate tf-slim arg_scope with
      hyperparameters for convolution ops.
    mask_height: Desired output mask height. The default value is 14.
    mask_width: Desired output mask width. The default value is 14.
    mask_prediction_num_conv_layers: Number of convolution layers applied to
      the image_features in mask prediction branch.
    mask_prediction_conv_depth: The depth for the first conv2d_transpose op
      applied to the image_features in the mask prediction branch. If set
      to 0, the depth of the convolution layers will be automatically chosen
      based on the number of object classes and the number of channels in the
      image features.
    masks_are_class_agnostic: Boolean determining if the mask-head is
      class-agnostic or not.
    convolve_then_upsample_masks: Whether to apply convolutions on mask
      features before upsampling using nearest neighbor resizing. Otherwise,
      mask features are resized to [`mask_height`, `mask_width`] using
      bilinear resizing before applying convolutions.

  Returns:
    A MaskRCNNBoxPredictor class.
  """
    box_prediction_head = box_head.MaskRCNNBoxHead(
        is_training=is_training,
        num_classes=num_classes,
        fc_hyperparams_fn=fc_hyperparams_fn,
        use_dropout=use_dropout,
        dropout_keep_prob=dropout_keep_prob,
        box_code_size=box_code_size,
        share_box_across_classes=share_box_across_classes)
    class_prediction_head = class_head.MaskRCNNClassHead(
        is_training=is_training,
        num_class_slots=num_classes +
        1 if add_background_class else num_classes,
        fc_hyperparams_fn=fc_hyperparams_fn,
        use_dropout=use_dropout,
        dropout_keep_prob=dropout_keep_prob)
    third_stage_heads = {}
    if predict_instance_masks:
        third_stage_heads[
            mask_rcnn_box_predictor.
            MASK_PREDICTIONS] = mask_head.MaskRCNNMaskHead(
                num_classes=num_classes,
                conv_hyperparams_fn=conv_hyperparams_fn,
                mask_height=mask_height,
                mask_width=mask_width,
                mask_prediction_num_conv_layers=mask_prediction_num_conv_layers,
                mask_prediction_conv_depth=mask_prediction_conv_depth,
                masks_are_class_agnostic=masks_are_class_agnostic,
                convolve_then_upsample=convolve_then_upsample_masks)
    return mask_rcnn_box_predictor.MaskRCNNBoxPredictor(
        is_training=is_training,
        num_classes=num_classes,
        box_prediction_head=box_prediction_head,
        class_prediction_head=class_prediction_head,
        third_stage_heads=third_stage_heads)
Esempio n. 4
0
def build_convolutional_box_predictor(is_training,
                                      num_classes,
                                      conv_hyperparams_fn,
                                      min_depth,
                                      max_depth,
                                      num_layers_before_predictor,
                                      use_dropout,
                                      dropout_keep_prob,
                                      kernel_size,
                                      box_code_size,
                                      apply_sigmoid_to_scores=False,
                                      add_background_class=True,
                                      class_prediction_bias_init=0.0,
                                      use_depthwise=False,
                                      box_encodings_clip_range=None,
                                      predict_instance_masks=False,
                                      mask_use_dropout=False,
                                      mask_dropout_keep_prob=0.8,
                                      mask_kernel_size=3,
                                      mask_use_depthwise=False,
                                      mask_height=15,
                                      mask_width=15,
                                      feature_extractor=None,
                                      crop_and_resize_fn=None,
                                      initial_crop_size=None,
                                      maxpool_kernel_size=None,
                                      maxpool_stride=None,
                                      mask_prediction_num_conv_layers=2,
                                      mask_prediction_conv_depth=256,
                                      masks_are_class_agnostic=False,
                                      convolve_then_upsample_masks=False):
  """Builds the ConvolutionalBoxPredictor from the arguments.

  Args:
    is_training: Indicates whether the BoxPredictor is in training mode.
    num_classes: number of classes.  Note that num_classes *does not*
      include the background category, so if groundtruth labels take values
      in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
      assigned classification targets can range from {0,... K}).
    conv_hyperparams_fn: A function to generate tf-slim arg_scope with
      hyperparameters for convolution ops.
    min_depth: Minimum feature depth prior to predicting box encodings
      and class predictions.
    max_depth: Maximum feature depth prior to predicting box encodings
      and class predictions. If max_depth is set to 0, no additional
      feature map will be inserted before location and class predictions.
    num_layers_before_predictor: Number of the additional conv layers before
      the predictor.
    use_dropout: Option to use dropout or not.  Note that a single dropout
      op is applied here prior to both box and class predictions, which stands
      in contrast to the ConvolutionalBoxPredictor below.
    dropout_keep_prob: Keep probability for dropout.
      This is only used if use_dropout is True.
    kernel_size: Size of final convolution kernel.  If the
      spatial resolution of the feature map is smaller than the kernel size,
      then the kernel size is automatically set to be
      min(feature_width, feature_height).
    box_code_size: Size of encoding for each box.
    apply_sigmoid_to_scores: If True, apply the sigmoid on the output
      class_predictions.
    add_background_class: Whether to add an implicit background class.
    class_prediction_bias_init: Constant value to initialize bias of the last
      conv2d layer before class prediction.
    use_depthwise: Whether to use depthwise convolutions for prediction
      steps. Default is False.
    box_encodings_clip_range: Min and max values for clipping the box_encodings.

  Returns:
    A ConvolutionalBoxPredictor class.
  """
  box_prediction_head = box_head.ConvolutionalBoxHead(
      is_training=is_training,
      box_code_size=box_code_size,
      kernel_size=kernel_size,
      use_depthwise=use_depthwise,
      box_encodings_clip_range=box_encodings_clip_range)
  class_prediction_head = class_head.ConvolutionalClassHead(
      is_training=is_training,
      num_class_slots=num_classes + 1 if add_background_class else num_classes,
      use_dropout=use_dropout,
      dropout_keep_prob=dropout_keep_prob,
      kernel_size=kernel_size,
      apply_sigmoid_to_scores=apply_sigmoid_to_scores,
      class_prediction_bias_init=class_prediction_bias_init,
      use_depthwise=use_depthwise)
  other_heads = {}
  if predict_instance_masks:
    other_heads[
        convolutional_box_predictor.
        MASK_PREDICTIONS] = mask_head.MaskRCNNMaskHead(
            num_classes=num_classes,
            conv_hyperparams_fn=conv_hyperparams_fn,
            mask_height=mask_height,
            mask_width=mask_width,
            mask_prediction_num_conv_layers=mask_prediction_num_conv_layers,
            mask_prediction_conv_depth=mask_prediction_conv_depth,
            masks_are_class_agnostic=masks_are_class_agnostic,
            convolve_then_upsample=convolve_then_upsample_masks)

        
  return convolutional_box_predictor.ConvolutionalBoxPredictor(
      is_training=is_training,
      num_classes=num_classes,
      box_prediction_head=box_prediction_head,
      class_prediction_head=class_prediction_head,
      other_heads=other_heads,
      conv_hyperparams_fn=conv_hyperparams_fn,
      num_layers_before_predictor=num_layers_before_predictor,
      min_depth=min_depth,
      max_depth=max_depth,
      feature_extractor=feature_extractor,
      crop_and_resize_fn=crop_and_resize_fn,
      initial_crop_size=initial_crop_size,
      maxpool_kernel_size=maxpool_kernel_size,
      maxpool_stride=maxpool_stride)