コード例 #1
0
        def single_image_nms_fn(args):
            """Runs NMS on a single image and returns padded output."""
            (per_image_boxes, per_image_scores, per_image_masks,
             per_image_num_valid_boxes) = args
            per_image_boxes = tf.reshape(
                tf.slice(per_image_boxes, 3 * [0],
                         tf.stack([per_image_num_valid_boxes, -1, -1])),
                [-1, q, 4])
            per_image_scores = tf.reshape(
                tf.slice(per_image_scores, [0, 0],
                         tf.stack([per_image_num_valid_boxes, -1])),
                [-1, num_classes])

            per_image_masks = tf.reshape(
                tf.slice(per_image_masks, 4 * [0],
                         tf.stack([per_image_num_valid_boxes, -1, -1, -1])), [
                             -1, q, per_image_masks.shape[2].value,
                             per_image_masks.shape[3].value
                         ])
            nmsed_boxlist = multiclass_non_max_suppression(
                per_image_boxes,
                per_image_scores,
                score_thresh,
                iou_thresh,
                max_size_per_class,
                max_total_size,
                masks=per_image_masks,
                clip_window=clip_window,
                change_coordinate_frame=change_coordinate_frame)
            padded_boxlist = box_list_ops.pad_or_clip_box_list(
                nmsed_boxlist, max_total_size)
            num_detections = nmsed_boxlist.num_boxes()
            nmsed_boxes = padded_boxlist.get()
            nmsed_scores = padded_boxlist.get_field(
                fields.BoxListFields.scores)
            nmsed_classes = padded_boxlist.get_field(
                fields.BoxListFields.classes)
            nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks)
            return [
                nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
                num_detections
            ]
コード例 #2
0
  def test_pad_box_list(self):
    boxlist = box_list.BoxList(
        tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
    boxlist.add_field('classes', tf.constant([0, 1]))
    boxlist.add_field('scores', tf.constant([0.75, 0.2]))
    num_boxes = 4
    padded_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)

    expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
                      [0, 0, 0, 0], [0, 0, 0, 0]]
    expected_classes = [0, 1, 0, 0]
    expected_scores = [0.75, 0.2, 0, 0]
    with self.test_session() as sess:
      boxes_out, classes_out, scores_out = sess.run(
          [padded_boxlist.get(), padded_boxlist.get_field('classes'),
           padded_boxlist.get_field('scores')])

      self.assertAllClose(expected_boxes, boxes_out)
      self.assertAllEqual(expected_classes, classes_out)
      self.assertAllClose(expected_scores, scores_out)
コード例 #3
0
ファイル: crnn.py プロジェクト: AXATechLab/models
            def compute_loss():
                sampled_boxlist = box_list_ops.boolean_mask(
                    detection_boxlist, sampled_indices)

                sampled_padded_boxlist = box_list_ops.pad_or_clip_box_list(
                    sampled_boxlist, num_boxes=self.batch_size)
                detection_boxes = sampled_padded_boxlist.get()
                detection_transcriptions = sampled_padded_boxlist.get_field(
                    fields.BoxListFields.transcription)
                # detection_transcriptions = tf.Print(detection_transcriptions, [detection_transcriptions], message="These are the subsampled GTs transcr.", summarize=99999)
                detection_scores = sampled_padded_boxlist.get_field(
                    fields.BoxListFields.scores)
                num_detections = tf.minimum(sampled_boxlist.num_boxes(),
                                            self.batch_size)
                transcriptions_dict, eval_metric_ops = self._predict_lstm(
                    rpn_features_to_crop, detection_boxes,
                    detection_transcriptions, detection_scores, num_detections)
                return [
                    self.loss(transcriptions_dict),
                    (transcriptions_dict, eval_metric_ops)
                ]
コード例 #4
0
    def _single_image_nms_fn(args):
      """Runs NMS on a single image and returns padded output.

      Args:
        args: A list of tensors consisting of the following:
          per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
            detections. If `q` is 1 then same boxes are used for all classes
            otherwise, if `q` is equal to number of classes, class-specific
            boxes are used.
          per_image_scores - A [num_anchors, num_classes] float32 tensor
            containing the scores for each of the `num_anchors` detections.
          per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
            tensor containing box masks. `q` can be either number of classes
            or 1 depending on whether a separate mask is predicted per class.
          per_image_clip_window - A 1D float32 tensor of the form
            [ymin, xmin, ymax, xmax] representing the window to clip the boxes
            to.
          per_image_additional_fields - (optional) A variable number of float32
            tensors each with size [num_anchors, ...].
          per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
            shape [batch_size] representing the number of valid boxes to be
            considered for each image in the batch.  This parameter allows for
            ignoring zero paddings.

      Returns:
        'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
          non-max suppressed boxes.
        'nmsed_scores': A [max_detections] float32 tensor containing the scores
          for the boxes.
        'nmsed_classes': A [max_detections] float32 tensor containing the class
          for boxes.
        'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
          float32 tensor containing masks for each selected box. This is set to
          None if input `masks` is None.
        'nmsed_additional_fields':  (optional) A variable number of float32
          tensors each with size [max_detections, ...] corresponding to the
          input `per_image_additional_fields`.
        'num_detections': A [batch_size] int32 tensor indicating the number of
          valid detections per batch item. Only the top num_detections[i]
          entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
          rest of the entries are zero paddings.
      """
      per_image_boxes = args[0]
      per_image_scores = args[1]
      per_image_masks = args[2]
      per_image_clip_window = args[3]
      per_image_additional_fields = {
          key: value
          for key, value in zip(additional_fields, args[4:-1])
      }
      per_image_num_valid_boxes = args[-1]
      if use_static_shapes:
        total_proposals = tf.shape(per_image_scores)
        per_image_scores = tf.where(
            tf.less(tf.range(total_proposals[0]), per_image_num_valid_boxes),
            per_image_scores,
            tf.fill(total_proposals, np.finfo('float32').min))
      else:
        per_image_boxes = tf.reshape(
            tf.slice(per_image_boxes, 3 * [0],
                     tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4])
        per_image_scores = tf.reshape(
            tf.slice(per_image_scores, [0, 0],
                     tf.stack([per_image_num_valid_boxes, -1])),
            [-1, num_classes])
        per_image_masks = tf.reshape(
            tf.slice(per_image_masks, 4 * [0],
                     tf.stack([per_image_num_valid_boxes, -1, -1, -1])),
            [-1, q, per_image_masks.shape[2].value,
             per_image_masks.shape[3].value])
        if per_image_additional_fields is not None:
          for key, tensor in per_image_additional_fields.items():
            additional_field_shape = tensor.get_shape()
            additional_field_dim = len(additional_field_shape)
            per_image_additional_fields[key] = tf.reshape(
                tf.slice(per_image_additional_fields[key],
                         additional_field_dim * [0],
                         tf.stack([per_image_num_valid_boxes] +
                                  (additional_field_dim - 1) * [-1])),
                [-1] + [dim.value for dim in additional_field_shape[1:]])

      nmsed_boxlist, num_valid_nms_boxes = multiclass_non_max_suppression(
          per_image_boxes,
          per_image_scores,
          score_thresh,
          iou_thresh,
          max_size_per_class,
          max_total_size,
          clip_window=per_image_clip_window,
          change_coordinate_frame=change_coordinate_frame,
          masks=per_image_masks,
          pad_to_max_output_size=use_static_shapes,
          additional_fields=per_image_additional_fields)

      if not use_static_shapes:
        nmsed_boxlist = box_list_ops.pad_or_clip_box_list(
            nmsed_boxlist, max_total_size)
      num_detections = num_valid_nms_boxes
      nmsed_boxes = nmsed_boxlist.get()
      nmsed_scores = nmsed_boxlist.get_field(fields.BoxListFields.scores)
      nmsed_classes = nmsed_boxlist.get_field(fields.BoxListFields.classes)
      nmsed_masks = nmsed_boxlist.get_field(fields.BoxListFields.masks)
      nmsed_additional_fields = [
          nmsed_boxlist.get_field(key) for key in per_image_additional_fields
      ]
      return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
              nmsed_additional_fields + [num_detections])
コード例 #5
0
def batch_multiclass_non_max_suppression(boxes,
                                         scores,
                                         score_thresh,
                                         iou_thresh,
                                         max_size_per_class,
                                         max_total_size=0,
                                         clip_window=None,
                                         change_coordinate_frame=False,
                                         num_valid_boxes=None,
                                         masks=None,
                                         scope=None):
    """Multi-class version of non maximum suppression that operates on a batch.

  This op is similar to `multiclass_non_max_suppression` but operates on a batch
  of boxes and scores. See documentation for `multiclass_non_max_suppression`
  for details.

  Args:
    boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
      detections. If `q` is 1 then same boxes are used for all classes
        otherwise, if `q` is equal to number of classes, class-specific boxes
        are used.
    scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
      the scores for each of the `num_anchors` detections.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
      representing the window to clip boxes to before performing non-max
      suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
      [batch_size] representing the number of valid boxes to be considered
        for each image in the batch.  This parameter allows for ignoring zero
        paddings.
    masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
      float32 tensor containing box masks. `q` can be either number of classes
      or 1 depending on whether a separate mask is predicted per class.
    scope: tf scope name.

  Returns:
    A dictionary containing the following entries:
    'detection_boxes': A [batch_size, max_detections, 4] float32 tensor
      containing the non-max suppressed boxes.
    'detection_scores': A [bath_size, max_detections] float32 tensor containing
      the scores for the boxes.
    'detection_classes': A [batch_size, max_detections] float32 tensor
      containing the class for boxes.
    'num_detections': A [batchsize] float32 tensor indicating the number of
      valid detections per batch item. Only the top num_detections[i] entries in
      nms_boxes[i], nms_scores[i] and nms_class[i] are valid. the rest of the
      entries are zero paddings.
    'detection_masks': (optional) a
      [batch_size, max_detections, mask_height, mask_width] float32 tensor
      containing masks for each selected box.

  Raises:
    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
      a valid scores field.
  """
    q = boxes.shape[2].value
    num_classes = scores.shape[2].value
    if q != 1 and q != num_classes:
        raise ValueError('third dimension of boxes must be either 1 or equal '
                         'to the third dimension of scores')

    with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
        per_image_boxes_list = tf.unstack(boxes)
        per_image_scores_list = tf.unstack(scores)
        num_valid_boxes_list = len(per_image_boxes_list) * [None]
        per_image_masks_list = len(per_image_boxes_list) * [None]
        if num_valid_boxes is not None:
            num_valid_boxes_list = tf.unstack(num_valid_boxes)
        if masks is not None:
            per_image_masks_list = tf.unstack(masks)

        detection_boxes_list = []
        detection_scores_list = []
        detection_classes_list = []
        num_detections_list = []
        detection_masks_list = []
        for (per_image_boxes, per_image_scores, per_image_masks,
             num_valid_boxes) in zip(per_image_boxes_list,
                                     per_image_scores_list,
                                     per_image_masks_list,
                                     num_valid_boxes_list):
            if num_valid_boxes is not None:
                per_image_boxes = tf.reshape(
                    tf.slice(per_image_boxes, 3 * [0],
                             tf.stack([num_valid_boxes, -1, -1])), [-1, q, 4])
                per_image_scores = tf.reshape(
                    tf.slice(per_image_scores, [0, 0],
                             tf.stack([num_valid_boxes, -1])),
                    [-1, num_classes])
                if masks is not None:
                    per_image_masks = tf.reshape(
                        tf.slice(per_image_masks, 4 * [0],
                                 tf.stack([num_valid_boxes, -1, -1, -1])),
                        [-1, q, masks.shape[3].value, masks.shape[4].value])
            nmsed_boxlist = multiclass_non_max_suppression(
                per_image_boxes,
                per_image_scores,
                score_thresh,
                iou_thresh,
                max_size_per_class,
                max_total_size,
                masks=per_image_masks,
                clip_window=clip_window,
                change_coordinate_frame=change_coordinate_frame)
            num_detections_list.append(tf.to_float(nmsed_boxlist.num_boxes()))
            padded_boxlist = box_list_ops.pad_or_clip_box_list(
                nmsed_boxlist, max_total_size)
            detection_boxes_list.append(padded_boxlist.get())
            detection_scores_list.append(
                padded_boxlist.get_field(fields.BoxListFields.scores))
            detection_classes_list.append(
                padded_boxlist.get_field(fields.BoxListFields.classes))
            if masks is not None:
                detection_masks_list.append(
                    padded_boxlist.get_field(fields.BoxListFields.masks))

        nms_dict = {
            'detection_boxes': tf.stack(detection_boxes_list),
            'detection_scores': tf.stack(detection_scores_list),
            'detection_classes': tf.stack(detection_classes_list),
            'num_detections': tf.stack(num_detections_list)
        }
        if masks is not None:
            nms_dict['detection_masks'] = tf.stack(detection_masks_list)
        return nms_dict