Esempio n. 1
0
    def __call__(self, fpn_features, boxes, outer_boxes, classes, is_training):
        """Generate the detection priors from the box detections and FPN features.

    This corresponds to the Fig. 4 of the ShapeMask paper at
    https://arxiv.org/pdf/1904.03239.pdf

    Args:
      fpn_features: a dictionary of FPN features.
      boxes: a float tensor of shape [batch_size, num_instances, 4]
        representing the tight gt boxes from dataloader/detection.
      outer_boxes: a float tensor of shape [batch_size, num_instances, 4]
        representing the loose gt boxes from dataloader/detection.
      classes: a int Tensor of shape [batch_size, num_instances]
        of instance classes.
      is_training: training mode or not.

    Returns:
      instance_features: a float Tensor of shape [batch_size * num_instances,
          mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
          instance feature crop.
      detection_priors: A float Tensor of shape [batch_size * num_instances,
        mask_size, mask_size, 1].
    """
        with keras_utils.maybe_enter_backend_graph(), tf.name_scope(
                'prior_mask'):
            batch_size, num_instances, _ = boxes.get_shape().as_list()
            outer_boxes = tf.cast(outer_boxes, tf.float32)
            boxes = tf.cast(boxes, tf.float32)
            instance_features = spatial_transform_ops.multilevel_crop_and_resize(
                fpn_features, outer_boxes, output_size=self._mask_crop_size)
            instance_features = self._shape_prior_fc(instance_features)

            shape_priors = self._get_priors()

            # Get uniform priors for each outer box.
            uniform_priors = tf.ones([
                batch_size, num_instances, self._mask_crop_size,
                self._mask_crop_size
            ])
            uniform_priors = spatial_transform_ops.crop_mask_in_target_box(
                uniform_priors, boxes, outer_boxes, self._mask_crop_size)

            # Classify shape priors using uniform priors + instance features.
            prior_distribution = self._classify_shape_priors(
                tf.cast(instance_features, tf.float32), uniform_priors,
                classes)

            instance_priors = tf.gather(shape_priors, classes)
            instance_priors *= tf.expand_dims(tf.expand_dims(tf.cast(
                prior_distribution, tf.float32),
                                                             axis=-1),
                                              axis=-1)
            instance_priors = tf.reduce_sum(instance_priors, axis=2)
            detection_priors = spatial_transform_ops.crop_mask_in_target_box(
                instance_priors, boxes, outer_boxes, self._mask_crop_size)

            return instance_features, detection_priors
Esempio n. 2
0
  def build_model(self, params, mode=None):
    if self._keras_model is None:
      with keras_utils.maybe_enter_backend_graph():
        outputs = self.model_outputs(self._input_layer, mode)

        model = tf.keras.models.Model(
            inputs=self._input_layer, outputs=outputs, name='retinanet')
        assert model is not None, 'Fail to build tf.keras.Model.'
        model.optimizer = self.build_optimizer()
        self._keras_model = model

    return self._keras_model
Esempio n. 3
0
  def __call__(self, features, is_training=None):

    scores_outputs = {}
    box_outputs = {}

    with keras_utils.maybe_enter_backend_graph(), tf.name_scope('rpn_head'):
      for level in range(self._min_level, self._max_level + 1):
        scores_output, box_output = self._shared_rpn_heads(
            features[level], self._anchors_per_location, level, is_training)
        scores_outputs[level] = scores_output
        box_outputs[level] = box_output
      return scores_outputs, box_outputs
Esempio n. 4
0
    def __call__(self, multilevel_features, is_training=None):
        """Returns the FPN features for a given multilevel features.

    Args:
      multilevel_features: a `dict` containing `int` keys for continuous feature
        levels, e.g., [2, 3, 4, 5]. The values are corresponding features with
        shape [batch_size, height_l, width_l, num_filters].
      is_training: `bool` if True, the model is in training mode.

    Returns:
      a `dict` containing `int` keys for continuous feature levels
      [min_level, min_level + 1, ..., max_level]. The values are corresponding
      FPN features with shape [batch_size, height_l, width_l, fpn_feat_dims].
    """
        input_levels = list(multilevel_features.keys())
        if min(input_levels) > self._min_level:
            raise ValueError('The minimum backbone level %d should be ' %
                             (min(input_levels)) +
                             'less or equal to FPN minimum level %d.:' %
                             (self._min_level))
        backbone_max_level = min(max(input_levels), self._max_level)
        with keras_utils.maybe_enter_backend_graph(), tf.name_scope('fpn'):
            # Adds lateral connections.
            feats_lateral = {}
            for level in range(self._min_level, backbone_max_level + 1):
                feats_lateral[level] = self._lateral_conv2d_op[level](
                    multilevel_features[level])

            # Adds top-down path.
            feats = {backbone_max_level: feats_lateral[backbone_max_level]}
            for level in range(backbone_max_level - 1, self._min_level - 1,
                               -1):
                feats[level] = spatial_transform_ops.nearest_upsampling(
                    feats[level + 1], 2) + feats_lateral[level]

            # Adds post-hoc 3x3 convolution kernel.
            for level in range(self._min_level, backbone_max_level + 1):
                feats[level] = self._post_hoc_conv2d_op[level](feats[level])

            # Adds coarser FPN levels introduced for RetinaNet.
            for level in range(backbone_max_level + 1, self._max_level + 1):
                feats_in = feats[level - 1]
                if level > backbone_max_level + 1:
                    feats_in = self._activation_op(feats_in)
                feats[level] = self._coarse_conv2d_op[level](feats_in)
            if self._use_batch_norm:
                # Adds batch_norm layer.
                for level in range(self._min_level, self._max_level + 1):
                    feats[level] = self._norm_activations[level](
                        feats[level], is_training=is_training)
        return feats
Esempio n. 5
0
    def __call__(self, features, mask_logits, classes, is_training):
        """Generate instance masks from FPN features and detection priors.

    This corresponds to the Fig. 5-6 of the ShapeMask paper at
    https://arxiv.org/pdf/1904.03239.pdf

    Args:
      features: a float Tensor of shape
        [batch_size, num_instances, mask_crop_size, mask_crop_size,
        num_downsample_channels]. This is the instance feature crop.
      mask_logits: a float Tensor of shape
        [batch_size, num_instances, mask_crop_size, mask_crop_size] indicating
        predicted mask logits.
      classes: a int Tensor of shape [batch_size, num_instances]
        of instance classes.
      is_training: a bool indicating whether in training mode.

    Returns:
      mask_outputs: instance mask prediction as a float Tensor of shape
        [batch_size, num_instances, mask_size, mask_size].
    """
        # Extract the foreground mean features
        # with tf.variable_scope('fine_mask', reuse=tf.AUTO_REUSE):
        with keras_utils.maybe_enter_backend_graph(), tf.name_scope(
                'fine_mask'):
            mask_probs = tf.nn.sigmoid(mask_logits)
            # Compute instance embedding for hard average.
            binary_mask = tf.cast(tf.greater(mask_probs, 0.5), features.dtype)
            instance_embedding = tf.reduce_sum(
                features * tf.expand_dims(binary_mask, axis=-1), axis=(2, 3))
            instance_embedding /= tf.expand_dims(
                tf.reduce_sum(binary_mask, axis=(2, 3)) + 1e-20, axis=-1)
            # Take the difference between crop features and mean instance features.
            features -= tf.expand_dims(tf.expand_dims(instance_embedding,
                                                      axis=2),
                                       axis=2)

            features += self._fine_mask_fc(tf.expand_dims(mask_probs, axis=-1))

            # Decoder to generate upsampled segmentation mask.
            mask_logits = self.decoder_net(features, is_training)
            if self._use_category_for_mask:
                mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3])
                mask_logits = tf.gather(mask_logits,
                                        tf.expand_dims(classes, -1),
                                        batch_dims=2)
                mask_logits = tf.squeeze(mask_logits, axis=2)
            else:
                mask_logits = mask_logits[..., 0]

        return mask_logits
Esempio n. 6
0
  def __call__(self, fpn_features, is_training=None):
    """Returns outputs of RetinaNet head."""
    class_outputs = {}
    box_outputs = {}
    with keras_utils.maybe_enter_backend_graph(), tf.name_scope(
        'retinanet_head'):
      for level in range(self._min_level, self._max_level + 1):
        features = fpn_features[level]

        class_outputs[level] = self.class_net(
            features, level, is_training=is_training)
        box_outputs[level] = self.box_net(
            features, level, is_training=is_training)
    return class_outputs, box_outputs
Esempio n. 7
0
    def __call__(self, inputs, is_training=None):
        """Returns the ResNet model for a given size and number of output classes.

    Args:
      inputs: a `Tesnor` with shape [batch_size, height, width, 3] representing
        a batch of images.
      is_training: `bool` if True, the model is in training mode.

    Returns:
      a `dict` containing `int` keys for continuous feature levels [2, 3, 4, 5].
      The values are corresponding feature hierarchy in ResNet with shape
      [batch_size, height_l, width_l, num_filters].
    """
        with keras_utils.maybe_enter_backend_graph():
            with tf.name_scope('resnet%s' % self._resnet_depth):
                return self._resnet_fn(inputs, is_training)
Esempio n. 8
0
 def __call__(self, inputs, is_training=None):
     with keras_utils.maybe_enter_backend_graph():
         model = SpineNet(input_specs=self._input_specs,
                          min_level=self._min_level,
                          max_level=self._max_level,
                          block_specs=self._block_specs,
                          endpoints_num_filters=self._endpoints_num_filters,
                          resample_alpha=self._resample_alpha,
                          block_repeats=self._block_repeats,
                          filter_size_scale=self._filter_size_scale,
                          kernel_initializer=self._kernel_initializer,
                          kernel_regularizer=self._kernel_regularizer,
                          bias_regularizer=self._bias_regularizer,
                          activation=self._activation,
                          use_sync_bn=self._use_sync_bn,
                          norm_momentum=self._norm_momentum,
                          norm_epsilon=self._norm_epsilon)
         return model(inputs)
Esempio n. 9
0
    def __call__(self, features, detection_priors, classes, is_training):
        """Generate instance masks from FPN features and detection priors.

    This corresponds to the Fig. 5-6 of the ShapeMask paper at
    https://arxiv.org/pdf/1904.03239.pdf

    Args:
      features: a float Tensor of shape [batch_size, num_instances,
        mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
        instance feature crop.
      detection_priors: a float Tensor of shape [batch_size, num_instances,
        mask_crop_size, mask_crop_size, 1]. This is the detection prior for
        the instance.
      classes: a int Tensor of shape [batch_size, num_instances]
        of instance classes.
      is_training: a bool indicating whether in training mode.

    Returns:
      mask_outputs: instance mask prediction as a float Tensor of shape
        [batch_size, num_instances, mask_size, mask_size].
    """
        with keras_utils.maybe_enter_backend_graph(), tf.name_scope(
                'coarse_mask'):
            # Transform detection priors to have the same dimension as features.
            detection_priors = tf.expand_dims(detection_priors, axis=-1)
            detection_priors = self._coarse_mask_fc(detection_priors)

            features += detection_priors
            mask_logits = self.decoder_net(features, is_training)
            # Gather the logits with right input class.
            if self._use_category_for_mask:
                mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3])
                mask_logits = tf.gather(mask_logits,
                                        tf.expand_dims(classes, -1),
                                        batch_dims=2)
                mask_logits = tf.squeeze(mask_logits, axis=2)
            else:
                mask_logits = mask_logits[..., 0]

            return mask_logits
Esempio n. 10
0
  def __call__(self, roi_features, is_training=None):
    """Box and class branches for the Mask-RCNN model.

    Args:
      roi_features: A ROI feature tensor of shape [batch_size, num_rois,
        height_l, width_l, num_filters].
      is_training: `boolean`, if True if model is in training mode.

    Returns:
      class_outputs: a tensor with a shape of
        [batch_size, num_rois, num_classes], representing the class predictions.
      box_outputs: a tensor with a shape of
        [batch_size, num_rois, num_classes * 4], representing the box
        predictions.
    """

    with keras_utils.maybe_enter_backend_graph(), tf.name_scope(
        'fast_rcnn_head'):
      # reshape inputs beofre FC.
      _, num_rois, height, width, filters = roi_features.get_shape().as_list()

      net = tf.reshape(roi_features, [-1, height, width, filters])
      for i in range(self._num_convs):
        net = self._conv_ops[i](net)
        if self._use_batch_norm:
          net = self._conv_bn_ops[i](net, is_training=is_training)

      filters = self._num_filters if self._num_convs > 0 else filters
      net = tf.reshape(net, [-1, num_rois, height * width * filters])

      for i in range(self._num_fcs):
        net = self._fc_ops[i](net)
        if self._use_batch_norm:
          net = self._fc_bn_ops[i](net, is_training=is_training)

      class_outputs = self._class_predict(net)
      box_outputs = self._box_predict(net)
      score_outputs = self._score_predict(net)
      return class_outputs, box_outputs, score_outputs
Esempio n. 11
0
  def __call__(self, roi_features, class_indices, is_training=None):
    """Mask branch for the Mask-RCNN model.

    Args:
      roi_features: A ROI feature tensor of shape [batch_size, num_rois,
        height_l, width_l, num_filters].
      class_indices: a Tensor of shape [batch_size, num_rois], indicating which
        class the ROI is.
      is_training: `boolean`, if True if model is in training mode.

    Returns:
      mask_outputs: a tensor with a shape of
        [batch_size, num_masks, mask_height, mask_width, num_classes],
        representing the mask predictions.
      fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2],
        representing the fg mask targets.
    Raises:
      ValueError: If boxes is not a rank-3 tensor or the last dimension of
        boxes is not 4.
    """

    with keras_utils.maybe_enter_backend_graph():
      with tf.name_scope('mask_head'):
        _, num_rois, height, width, filters = roi_features.get_shape().as_list()
        net = tf.reshape(roi_features, [-1, height, width, filters])

        for i in range(self._num_convs):
          net = self._conv2d_ops[i](net)
          if self._use_batch_norm:
            net = self._norm_activation()(net, is_training=is_training)

        net = self._mask_conv_transpose(net)
        if self._use_batch_norm:
          net = self._norm_activation()(net, is_training=is_training)

        mask_outputs = self._conv2d_op(
            self._num_classes,
            kernel_size=(1, 1),
            strides=(1, 1),
            padding='valid',
            name='mask_fcn_logits')(
                net)
        mask_outputs = tf.reshape(mask_outputs, [
            -1, num_rois, self._mask_target_size, self._mask_target_size,
            self._num_classes
        ])

        with tf.name_scope('masks_post_processing'):
          # TODO(pengchong): Figure out the way not to use the static inferred
          # batch size.
          batch_size, num_masks = class_indices.get_shape().as_list()
          mask_outputs = tf.transpose(a=mask_outputs, perm=[0, 1, 4, 2, 3])
          # Contructs indices for gather.
          batch_indices = tf.tile(
              tf.expand_dims(tf.range(batch_size), axis=1), [1, num_masks])
          mask_indices = tf.tile(
              tf.expand_dims(tf.range(num_masks), axis=0), [batch_size, 1])
          gather_indices = tf.stack(
              [batch_indices, mask_indices, class_indices], axis=2)
          mask_outputs = tf.gather_nd(mask_outputs, gather_indices)
      return mask_outputs
Esempio n. 12
0
 def build_model(self, params, mode=None):
   if self._keras_model is None:
     with keras_utils.maybe_enter_backend_graph():