Example #1
0
    def _transform_boxes_to_feature_coordinates(self, provided_boxes,
                                                true_image_shapes,
                                                resized_image_shape,
                                                instance_embedding):
        """Transforms normalzied boxes to feature map coordinates.

    Args:
      provided_boxes: A [batch, num_instances, 4] float tensor containing
        normalized bounding boxes.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is of
        the form [height, width, channels] indicating the shapes of true images
        in the resized images, as resized images can be padded with zeros.
      resized_image_shape: A 4D int32 tensor containing shapes of the
        preprocessed inputs (N, H, W, C).
      instance_embedding: A [batch, output_height, output_width, embedding_size]
        float tensor containing instance embeddings.

    Returns:
      A float tensor of size [batch, num_instances, 4] containing boxes whose
        coordinates have been transformed to the absolute output space of the
        feature extractor.
    """
        # Input boxes must be normalized.
        shape_utils.assert_box_normalized(provided_boxes)

        # Transform the provided boxes to the absolute output space of the feature
        # extractor.
        height, width = (tf.shape(instance_embedding)[1],
                         tf.shape(instance_embedding)[2])

        resized_image_height = resized_image_shape[1]
        resized_image_width = resized_image_shape[2]

        def transform_boxes(elems):
            boxes_per_image, true_image_shape = elems
            blist = box_list.BoxList(boxes_per_image)
            # First transform boxes from image space to resized image space since
            # there may have paddings in the resized images.
            blist = box_list_ops.scale(
                blist, true_image_shape[0] / resized_image_height,
                true_image_shape[1] / resized_image_width)
            # Then transform boxes from resized image space (normalized) to the
            # feature map space (absolute).
            blist = box_list_ops.to_absolute_coordinates(blist,
                                                         height,
                                                         width,
                                                         check_range=False)
            return blist.get()

        return tf.map_fn(transform_boxes, [provided_boxes, true_image_shapes],
                         dtype=tf.float32)
def merge_boxes_with_multiple_labels(boxes,
                                     classes,
                                     confidences,
                                     num_classes,
                                     quantization_bins=10000):
    """Merges boxes with same coordinates and returns K-hot encoded classes.

  Args:
    boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only
      normalized coordinates are allowed.
    classes: A tf.int32 tensor with shape [N] holding class indices.
      The class index starts at 0.
    confidences: A tf.float32 tensor with shape [N] holding class confidences.
    num_classes: total number of classes to use for K-hot encoding.
    quantization_bins: the number of bins used to quantize the box coordinate.

  Returns:
    merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes,
      where N' <= N.
    class_encodings: A tf.int32 tensor with shape [N', num_classes] holding
      K-hot encodings for the merged boxes.
    confidence_encodings: A tf.float32 tensor with shape [N', num_classes]
      holding encodings of confidences for the merged boxes.
    merged_box_indices: A tf.int32 tensor with shape [N'] holding original
      indices of the boxes.
  """
    boxes_shape = tf.shape(boxes)
    classes_shape = tf.shape(classes)
    confidences_shape = tf.shape(confidences)
    box_class_shape_assert = shape_utils.assert_shape_equal_along_first_dimension(
        boxes_shape, classes_shape)
    box_confidence_shape_assert = (
        shape_utils.assert_shape_equal_along_first_dimension(
            boxes_shape, confidences_shape))
    box_dimension_assert = tf.assert_equal(boxes_shape[1], 4)
    box_normalized_assert = shape_utils.assert_box_normalized(boxes)

    with tf.control_dependencies([
            box_class_shape_assert, box_confidence_shape_assert,
            box_dimension_assert, box_normalized_assert
    ]):
        quantized_boxes = tf.to_int64(boxes * (quantization_bins - 1))
        ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1)
        hashcodes = (
            ymin + xmin * quantization_bins +
            ymax * quantization_bins * quantization_bins +
            xmax * quantization_bins * quantization_bins * quantization_bins)
        unique_hashcodes, unique_indices = tf.unique(hashcodes)
        num_boxes = tf.shape(boxes)[0]
        num_unique_boxes = tf.shape(unique_hashcodes)[0]
        merged_box_indices = tf.unsorted_segment_min(tf.range(num_boxes),
                                                     unique_indices,
                                                     num_unique_boxes)
        merged_boxes = tf.gather(boxes, merged_box_indices)

        def map_box_encodings(i):
            """Produces box K-hot and score encodings for each class index."""
            box_mask = tf.equal(unique_indices,
                                i * tf.ones(num_boxes, dtype=tf.int32))
            box_mask = tf.reshape(box_mask, [-1])
            box_indices = tf.boolean_mask(classes, box_mask)
            box_confidences = tf.boolean_mask(confidences, box_mask)
            box_class_encodings = tf.sparse_to_dense(box_indices,
                                                     [num_classes],
                                                     1,
                                                     validate_indices=False)
            box_confidence_encodings = tf.sparse_to_dense(
                box_indices, [num_classes],
                box_confidences,
                validate_indices=False)
            return box_class_encodings, box_confidence_encodings

        class_encodings, confidence_encodings = tf.map_fn(
            map_box_encodings,
            tf.range(num_unique_boxes),
            back_prop=False,
            dtype=(tf.int32, tf.float32))

        merged_boxes = tf.reshape(merged_boxes, [-1, 4])
        class_encodings = tf.reshape(class_encodings, [-1, num_classes])
        confidence_encodings = tf.reshape(confidence_encodings,
                                          [-1, num_classes])
        merged_box_indices = tf.reshape(merged_box_indices, [-1])
        return (merged_boxes, class_encodings, confidence_encodings,
                merged_box_indices)
Example #3
0
      K-hot encodings for the merged boxes.
    confidence_encodings: A tf.float32 tensor with shape [N', num_classes]
      holding encodings of confidences for the merged boxes.
    merged_box_indices: A tf.int32 tensor with shape [N'] holding original
      indices of the boxes.
  """
  boxes_shape = tf.shape(boxes)
  classes_shape = tf.shape(classes)
  confidences_shape = tf.shape(confidences)
  box_class_shape_assert = shape_utils.assert_shape_equal_along_first_dimension(
      boxes_shape, classes_shape)
  box_confidence_shape_assert = (
      shape_utils.assert_shape_equal_along_first_dimension(
          boxes_shape, confidences_shape))
  box_dimension_assert = tf.assert_equal(boxes_shape[1], 4)
  box_normalized_assert = shape_utils.assert_box_normalized(boxes)

  with tf.control_dependencies(
      [box_class_shape_assert, box_confidence_shape_assert,
       box_dimension_assert, box_normalized_assert]):
    quantized_boxes = tf.to_int64(boxes * (quantization_bins - 1))
    ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1)
    hashcodes = (
        ymin +
        xmin * quantization_bins +
        ymax * quantization_bins * quantization_bins +
        xmax * quantization_bins * quantization_bins * quantization_bins)
    unique_hashcodes, unique_indices = tf.unique(hashcodes)
    num_boxes = tf.shape(boxes)[0]
    num_unique_boxes = tf.shape(unique_hashcodes)[0]
    merged_box_indices = tf.unsorted_segment_min(
Example #4
0
def merge_boxes_with_multiple_labels(boxes,
                                     classes,
                                     confidences,
                                     num_classes,
                                     quantization_bins=10000):
  """Merges boxes with same coordinates and returns K-hot encoded classes.

  Args:
    boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only
      normalized coordinates are allowed.
    classes: A tf.int32 tensor with shape [N] holding class indices.
      The class index starts at 0.
    confidences: A tf.float32 tensor with shape [N] holding class confidences.
    num_classes: total number of classes to use for K-hot encoding.
    quantization_bins: the number of bins used to quantize the box coordinate.

  Returns:
    merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes,
      where N' <= N.
    class_encodings: A tf.int32 tensor with shape [N', num_classes] holding
      K-hot encodings for the merged boxes.
    confidence_encodings: A tf.float32 tensor with shape [N', num_classes]
      holding encodings of confidences for the merged boxes.
    merged_box_indices: A tf.int32 tensor with shape [N'] holding original
      indices of the boxes.
  """
  boxes_shape = tf.shape(boxes)
  classes_shape = tf.shape(classes)
  confidences_shape = tf.shape(confidences)
  box_class_shape_assert = shape_utils.assert_shape_equal_along_first_dimension(
      boxes_shape, classes_shape)
  box_confidence_shape_assert = (
      shape_utils.assert_shape_equal_along_first_dimension(
          boxes_shape, confidences_shape))
  box_dimension_assert = tf.assert_equal(boxes_shape[1], 4)
  box_normalized_assert = shape_utils.assert_box_normalized(boxes)

  with tf.control_dependencies(
      [box_class_shape_assert, box_confidence_shape_assert,
       box_dimension_assert, box_normalized_assert]):
    quantized_boxes = tf.to_int64(boxes * (quantization_bins - 1))
    ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1)
    hashcodes = (
        ymin +
        xmin * quantization_bins +
        ymax * quantization_bins * quantization_bins +
        xmax * quantization_bins * quantization_bins * quantization_bins)
    unique_hashcodes, unique_indices = tf.unique(hashcodes)
    num_boxes = tf.shape(boxes)[0]
    num_unique_boxes = tf.shape(unique_hashcodes)[0]
    merged_box_indices = tf.unsorted_segment_min(
        tf.range(num_boxes), unique_indices, num_unique_boxes)
    merged_boxes = tf.gather(boxes, merged_box_indices)

    def map_box_encodings(i):
      """Produces box K-hot and score encodings for each class index."""
      box_mask = tf.equal(
          unique_indices, i * tf.ones(num_boxes, dtype=tf.int32))
      box_mask = tf.reshape(box_mask, [-1])
      box_indices = tf.boolean_mask(classes, box_mask)
      box_confidences = tf.boolean_mask(confidences, box_mask)
      box_class_encodings = tf.sparse_to_dense(
          box_indices, [num_classes], 1, validate_indices=False)
      box_confidence_encodings = tf.sparse_to_dense(
          box_indices, [num_classes], box_confidences, validate_indices=False)
      return box_class_encodings, box_confidence_encodings

    class_encodings, confidence_encodings = tf.map_fn(
        map_box_encodings,
        tf.range(num_unique_boxes),
        back_prop=False,
        dtype=(tf.int32, tf.float32))

    merged_boxes = tf.reshape(merged_boxes, [-1, 4])
    class_encodings = tf.reshape(class_encodings, [-1, num_classes])
    confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes])
    merged_box_indices = tf.reshape(merged_box_indices, [-1])
    return (merged_boxes, class_encodings, confidence_encodings,
            merged_box_indices)