Beispiel #1
0
def nms(bboxes,
        bbox_scores,
        nms_iou_threshold=0.7,
        nms_score_threshold=0.00,
        max_nms_boxes=200,
        use_oriented_per_class_nms=True):
    """NMS."""
    batch_size = get_shape(bboxes)[0]
    bboxes = tf.reshape(bboxes, [batch_size, -1, 7])
    bbox_scores = tf.reshape(bbox_scores, [batch_size, -1, 1])
    bbox_background = tf.zeros_like(bbox_scores)
    bbox_scores = tf.concat([bbox_background, bbox_scores], axis=-1)
    nms_bboxes, nms_bbox_scores, nms_valid_mask = (
        detection_decoder.DecodeWithNMS(
            bboxes,
            bbox_scores,
            nms_iou_threshold=nms_iou_threshold,
            score_threshold=nms_score_threshold,
            max_boxes_per_class=max_nms_boxes,
            use_oriented_per_class_nms=use_oriented_per_class_nms))
    nms_bboxes = tf.reshape(nms_bboxes[:, 1, :, :], [batch_size, -1, 7])
    nms_bbox_scores = tf.reshape(nms_bbox_scores[:, 1, :], [batch_size, -1])
    nms_valid_mask = tf.reshape(nms_valid_mask[:, 1, :], [batch_size, -1])
    nms_valid_mask = tf.cast(nms_valid_mask, tf.dtypes.int32)
    return nms_bboxes, nms_bbox_scores, nms_valid_mask
Beispiel #2
0
    def testDecoderWithOrientedPerClassNMS(self):
        batch_size = 4
        num_preds = 8
        num_classes = 10

        # An example of setting the score threshold high and IOU threshold low
        # for classes we don't care about
        score_threshold = [1.0] * num_classes
        score_threshold[1] = 0.05

        nms_iou_threshold = [0.0] * num_classes
        nms_iou_threshold[1] = 0.5

        with tf.Graph().as_default():
            tf.random.set_seed(12345)
            predicted_bboxes = tf.random.normal([batch_size, num_preds, 7])
            classification_scores = tf.random.uniform(
                [batch_size, num_preds, num_classes], minval=0, maxval=1)

            idxs, bboxes, bbox_scores, valid_mask = detection_decoder.DecodeWithNMS(
                predicted_bboxes,
                classification_scores,
                nms_iou_threshold=nms_iou_threshold,
                score_threshold=score_threshold,
                use_oriented_per_class_nms=True)

            with self.session():
                outputs = self.evaluate([
                    predicted_bboxes, classification_scores, idxs, bboxes,
                    bbox_scores, valid_mask
                ])
                (input_bboxes, input_scores, output_idxs, output_bboxes,
                 output_scores, mask) = outputs

                self.assertEqual((batch_size, num_preds, 7),
                                 input_bboxes.shape)
                self.assertEqual((batch_size, num_classes, num_preds),
                                 output_idxs.shape)
                self.assertEqual((batch_size, num_classes, num_preds, 7),
                                 output_bboxes.shape)
                self.assertEqual((batch_size, num_preds, num_classes),
                                 input_scores.shape)
                self.assertEqual((batch_size, num_classes, num_preds),
                                 output_scores.shape)
                self.assertEqual((batch_size, num_classes, num_preds),
                                 mask.shape)

                # Assert that NMS did some kind of filtering for each class
                for cls_idx in range(num_classes):
                    self.assertEqual(mask[:, cls_idx, :].sum(),
                                     (input_scores[:, :, cls_idx] >
                                      score_threshold[cls_idx]).sum())
                    self.assertEqual(mask[:, cls_idx, :].sum(),
                                     (output_scores[:, cls_idx, :] >
                                      score_threshold[cls_idx]).sum())
Beispiel #3
0
    def Inference(self):
        """Builds the inference graph.

    Default subgraph should return:

      predicted_bboxes: A [batch_size, num_boxes, 7] float Tensor.

      classification_scores: A [batch_size, num_boxes, num_classes] float
      Tensor.

    Returns:
      A dictionary whose values are a tuple of fetches and feeds.
    """
        p = self.params
        subgraphs = {}
        with tf.name_scope('inference'):
            input_placeholders = self._Placeholders()
            predictions = self.ComputePredictions(self.theta,
                                                  input_placeholders)
            bboxes_and_logits = self._BBoxesAndLogits(input_placeholders,
                                                      predictions)
            predicted_bboxes = bboxes_and_logits.predicted_bboxes
            classification_logits = bboxes_and_logits.classification_logits
            classification_scores = tf.sigmoid(classification_logits)

            _, per_cls_bboxes, per_cls_bbox_scores, per_cls_valid_mask = (
                detection_decoder.DecodeWithNMS(
                    predicted_bboxes,
                    classification_scores,
                    nms_iou_threshold=p.nms_iou_threshold,
                    score_threshold=p.nms_score_threshold,
                    max_boxes_per_class=p.max_nms_boxes,
                    use_oriented_per_class_nms=p.use_oriented_per_class_nms))
            per_cls_bbox_scores *= per_cls_valid_mask

            # TODO(vrv): Fix the inference graph for KITTI, since we need
            # to apply frustum clipping.  This requires customizing the
            # inference placeholders for each model.
            fetches = {
                'per_class_predicted_bboxes': per_cls_bboxes,
                'per_class_predicted_bbox_scores': per_cls_bbox_scores,
                'per_class_valid_mask': per_cls_valid_mask
            }
            subgraphs['default'] = fetches, dict(
                input_placeholders.FlattenItems())
        return subgraphs
Beispiel #4
0
    def testDecoderSingleClassNMS(self):
        batch_size = 4
        num_preds = 8
        num_classes = 10

        score_threshold = 0.05
        nms_iou_threshold = 0.5
        with tf.Graph().as_default():
            tf.random.set_seed(12345)
            predicted_bboxes = tf.random.normal([batch_size, num_preds, 7])
            classification_scores = tf.random.uniform(
                [batch_size, num_preds, num_classes], minval=0, maxval=1)

            idxs, bboxes, bbox_scores, valid_mask = detection_decoder.DecodeWithNMS(
                predicted_bboxes,
                classification_scores,
                nms_iou_threshold=nms_iou_threshold,
                score_threshold=score_threshold,
                use_oriented_per_class_nms=False)

            with self.session():
                outputs = self.evaluate([
                    predicted_bboxes, classification_scores, idxs, bboxes,
                    bbox_scores, valid_mask
                ])
                (input_bboxes, input_scores, output_idxs, output_bboxes,
                 output_scores, mask) = outputs

                self.assertEqual((batch_size, num_preds, 7),
                                 input_bboxes.shape)
                self.assertEqual((batch_size, num_preds), output_idxs.shape)
                self.assertEqual((batch_size, num_classes, num_preds, 7),
                                 output_bboxes.shape)
                self.assertEqual((batch_size, num_preds, num_classes),
                                 input_scores.shape)
                self.assertEqual((batch_size, num_classes, num_preds),
                                 output_scores.shape)
                self.assertEqual((batch_size, num_classes, num_preds),
                                 mask.shape)
Beispiel #5
0
  def Decode(self, input_batch):
    """Decode an input batch, computing predicted bboxes from residuals."""
    p = self.params

    predictions = self.ComputePredictions(self.theta, input_batch)
    bboxes_and_logits = self._BBoxesAndLogits(input_batch, predictions)
    predicted_bboxes = bboxes_and_logits.predicted_bboxes
    batch_size, num_bboxes, _ = py_utils.GetShape(predicted_bboxes, 3)
    classification_logits = bboxes_and_logits.classification_logits
    classification_logits = py_utils.HasShape(
        classification_logits, [batch_size, num_bboxes, p.num_classes])

    classification_scores = tf.sigmoid(classification_logits)

    _, per_example_dict = self.ComputeLoss(self.theta, predictions, input_batch)
    if 'score_scaler' in per_example_dict:
      classification_scores *= per_example_dict['score_scaler']

    with tf.device('/cpu:0'):
      # Decode the predicted bboxes, performing NMS.
      per_cls_idxs, per_cls_bboxes, per_cls_bbox_scores, per_cls_valid_mask = (
          detection_decoder.DecodeWithNMS(
              predicted_bboxes,
              classification_scores,
              nms_iou_threshold=p.nms_iou_threshold,
              score_threshold=p.nms_score_threshold,
              max_boxes_per_class=p.max_nms_boxes,
              use_oriented_per_class_nms=p.use_oriented_per_class_nms))

      # per_cls_valid_mask is [batch, num_classes, num_boxes] Tensor that
      # indicates which boxes were selected by NMS. Each example will have a
      # different number of chosen bboxes, so the mask is present to allow us
      # to keep the boxes as a batched dense Tensor.
      #
      # We mask the scores by the per_cls_valid_mask so that none of these boxes
      # will be interpreted as valid.
      per_cls_bbox_scores *= per_cls_valid_mask
      visualization_weights = py_utils.HasShape(
          per_cls_bbox_scores, [batch_size, p.num_classes, p.max_nms_boxes])

      # For top down visualization, filter boxes whose scores are not above the
      # visualization threshold.
      visualization_weights = tf.where(
          tf.greater_equal(visualization_weights,
                           p.visualization_classification_threshold),
          visualization_weights, tf.zeros_like(visualization_weights))

    model_outputs = py_utils.NestedMap()
    model_outputs.per_class_predicted_bboxes = per_cls_bboxes
    model_outputs.per_class_predicted_bbox_scores = per_cls_bbox_scores
    model_outputs.per_class_valid_mask = per_cls_valid_mask

    decoder_outputs = py_utils.NestedMap({
        'per_class_predicted_bboxes': per_cls_bboxes,
        'per_class_predicted_bbox_scores': per_cls_bbox_scores,
        'per_class_valid_mask': per_cls_valid_mask,
        'visualization_weights': visualization_weights,
    })

    if p.decode_include_residuals:
      # Including the residuals in the decoder output makes it possible to save
      # the outputs for further analysis. Note that we ensure that the outputs
      # match the per-class NMS output format of [batch, num_classes, ...].
      def _ReshapeGather(tensor):
        """Reshapes tensor and then gathers using the nms indices."""
        tensor = tf.gather(
            tf.reshape(tensor, [batch_size, num_bboxes, -1]),
            per_cls_idxs,
            batch_dims=1)
        if not p.use_oriented_per_class_nms:
          # Tile so that the data fits the expected per class shape of
          # [batch_size, num_classes, ...]. When *not* using oriented NMS, the
          # num_classes dimension will be missing since the indices will not
          # have it.
          tensor = tf.tile(tensor[:, tf.newaxis, :, :],
                           [1, p.num_classes, 1, 1])
        return tensor

      decoder_outputs.update({
          'per_class_gt_residuals':
              _ReshapeGather(input_batch.anchor_localization_residuals),
          'per_class_gt_labels':
              _ReshapeGather(input_batch.assigned_gt_labels),
          'per_class_residuals':
              _ReshapeGather(predictions.residuals),
          'per_class_logits':
              _ReshapeGather(predictions.classification_logits),
          'per_class_anchor_boxes':
              _ReshapeGather(input_batch.anchor_bboxes),
      })

    decoder_outputs.update(
        self.output_decoder.ProcessOutputs(input_batch, model_outputs))

    # Produce global step as an output (which is the step
    # of the checkpoint being decoded.)
    decoder_outputs.global_step = py_utils.GetGlobalStep()

    return decoder_outputs
Beispiel #6
0
    def Decode(self, input_batch):
        """Decode an input batch, computing predicted bboxes from residuals."""
        p = self.params

        bboxes_and_logits = self._BBoxesAndLogits(input_batch)
        predicted_bboxes = bboxes_and_logits.predicted_bboxes
        batch_size, num_bboxes, _ = py_utils.GetShape(predicted_bboxes, 3)
        classification_logits = bboxes_and_logits.classification_logits
        classification_logits = py_utils.HasShape(
            classification_logits, [batch_size, num_bboxes, p.num_classes])

        classification_scores = tf.sigmoid(classification_logits)

        # Score scaler.
        if 'score_scaler' in bboxes_and_logits:
            classification_scores *= bboxes_and_logits.score_scaler

        with tf.device('/cpu:0'):
            # Decode the predicted bboxes, performing NMS.
            per_cls_bboxes, per_cls_bbox_scores, per_cls_valid_mask = (
                detection_decoder.DecodeWithNMS(
                    predicted_bboxes,
                    classification_scores,
                    nms_iou_threshold=p.nms_iou_threshold,
                    score_threshold=p.nms_score_threshold,
                    max_boxes_per_class=p.max_nms_boxes,
                    use_oriented_per_class_nms=p.use_oriented_per_class_nms))

            # per_cls_valid_mask is [batch, num_classes, num_boxes] Tensor that
            # indicates which boxes were selected by NMS. Each example will have a
            # different number of chosen bboxes, so the mask is present to allow us
            # to keep the boxes as a batched dense Tensor.
            #
            # We mask the scores by the per_cls_valid_mask so that none of these boxes
            # will be interpreted as valid.
            per_cls_bbox_scores *= per_cls_valid_mask
            visualization_weights = py_utils.HasShape(
                per_cls_bbox_scores,
                [batch_size, p.num_classes, p.max_nms_boxes])

            # For top down visualization, filter boxes whose scores are not above the
            # visualization threshold.
            visualization_weights = tf.where(
                tf.greater_equal(visualization_weights,
                                 p.visualization_classification_threshold),
                visualization_weights, tf.zeros_like(visualization_weights))

        model_outputs = py_utils.NestedMap()
        model_outputs.per_class_predicted_bboxes = per_cls_bboxes
        model_outputs.per_class_predicted_bbox_scores = per_cls_bbox_scores
        model_outputs.per_class_valid_mask = per_cls_valid_mask

        decoder_outputs = py_utils.NestedMap({
            'per_class_predicted_bboxes':
            per_cls_bboxes,
            'per_class_predicted_bbox_scores':
            per_cls_bbox_scores,
            'per_class_valid_mask':
            per_cls_valid_mask,
            'visualization_weights':
            visualization_weights,
        })

        decoder_outputs.update(
            self.output_decoder.ProcessOutputs(input_batch, model_outputs))

        # Produce global step as an output (which is the step
        # of the checkpoint being decoded.)
        decoder_outputs.global_step = py_utils.GetGlobalStep()

        return decoder_outputs