Beispiel #1
0
def _MultiClassOrientedDecodeWithNMS(predicted_bboxes,
                                     classification_scores,
                                     nms_iou_threshold,
                                     score_threshold,
                                     max_boxes_per_class=None):
    """Perform Oriented Per Class NMS on predicted bounding boxes / logits.

  Args:
    predicted_bboxes: [batch_size, num_boxes, 7] float Tensor containing
      predicted bounding box coordinates.
    classification_scores: [batch_size, num_boxes, num_classes] float Tensor
      containing predicted classification scores for each box.
    nms_iou_threshold: IoU threshold to use when determining whether two boxes
      overlap for purposes of suppression. Either a float or a list of len
      num_classes.
    score_threshold: The score threshold passed to NMS that allows NMS to
      quickly ignore irrelevant boxes. Either a float or a list of len
      num_classes. It is strongly recommended that the score for non-active
      classes (like background) be set to 1 so they are discarded.
    max_boxes_per_class: The maximum number of boxes per example to emit. If
      None, this value is set to num_boxes from the shape of predicted_bboxes.

  Returns:
    predicted_bboxes: Filtered bboxes after NMS of shape
      [batch_size, num_classes, max_boxes_per_class, 7].
    bbox_scores: A float32 Tensor with the score for each box of shape
      [batch_size, num_classes, max_boxes_per_class].
    valid_mask: A float32 Tensor with 1/0 values indicating the validity of
      each box. 1 indicates valid, and 0 invalid. Tensor of shape
      [batch_size, num_classes, max_boxes_per_class].
  """
    utils_3d = detection_3d_lib.Utils3D()
    predicted_bboxes = py_utils.HasShape(predicted_bboxes, [-1, -1, 7])
    batch_size, num_predicted_boxes, _ = py_utils.GetShape(predicted_bboxes)
    classification_scores = py_utils.HasShape(
        classification_scores, [batch_size, num_predicted_boxes, -1])
    _, _, num_classes = py_utils.GetShape(classification_scores)

    if max_boxes_per_class is None:
        max_boxes_per_class = num_predicted_boxes

    # Compute NMS for every sample in the batch.
    bbox_indices, bbox_scores, valid_mask = utils_3d.BatchedOrientedNMSIndices(
        predicted_bboxes,
        classification_scores,
        nms_iou_threshold=nms_iou_threshold,
        score_threshold=score_threshold,
        max_boxes_per_class=max_boxes_per_class)

    # TODO(bencaine): Consider optimizing away the tf.tile or make upstream
    # changes to make predicted boxes include a class dimension.
    # Get the original box for each index selected by NMS.
    predicted_bboxes = tf.tile(predicted_bboxes[:, tf.newaxis, :, :],
                               [1, num_classes, 1, 1])
    predicted_bboxes = tf.batch_gather(predicted_bboxes, bbox_indices)
    return predicted_bboxes, bbox_scores, valid_mask
Beispiel #2
0
def SegmentPool3D(points,
                  point_features,
                  pooling_idx,
                  closest_idx,
                  pooling_method='max'):
    """Performs {min/max/average} pooling over a pointcloud given indices.

  This should be functionally identical when using max to the above
  MaxPool3D function, except it turns out to be much more memory efficient
  on a TPU, and supports min/max/mean.

  Args:
    points: A float tf.Tensor of shape [N, P1, 3] with point locations.
    point_features: A float tf.Tensor of shape [N, P1, C] with point features.
    pooling_idx: A tf.int32 tf.Tensor of shape [N, P2] with the index of which
      points we want to keep. Each value should be in the range [0, P1].
    closest_idx: A tf.int32 tf.Tensor of shape [N, P1] representing which
      sampled point is closest to each original point. Each value should be in
      the range of [0, P2].
    pooling_method: A string for which pooling function to use. Should be one of
      {'min', 'max', 'mean'}.

  Returns:
    pooled_points: A float tf.Tensor of shape [N, P2, 3] with the pooled
      point locations.
    pooled_features: A float tf.Tensor of shape [N, P2, C] with the pooled
      features.
  Raises:
    ValueError: If pooling_method is not one of {min/max/mean}.
  """
    segment_pooling_functions = {
        'min': tf.unsorted_segment_min,
        'max': tf.unsorted_segment_max,
        'mean': tf.unsorted_segment_mean
    }

    if pooling_method not in segment_pooling_functions:
        raise ValueError('`pooling_method` must be one of {}.'.format(
            segment_pooling_functions.keys()))
    segment_fn = segment_pooling_functions[pooling_method]

    points = py_utils.HasShape(points, [-1, -1, 3])
    n, p1 = py_utils.GetShape(points, 2)
    point_features = py_utils.HasShape(point_features, [n, p1, -1])
    _, _, c = py_utils.GetShape(point_features)
    pooling_idx = py_utils.HasShape(pooling_idx, [n, -1])
    _, p2 = py_utils.GetShape(pooling_idx)
    closest_idx = py_utils.HasShape(closest_idx, [n, p1])

    # Subselect our output points
    pooled_points = tf.batch_gather(points, pooling_idx)

    # Loop over batch dimension of our features/indices, as unsorted_segment_X
    # does not currently support a batch dimension.
    def _LoopFn(args):
        example_features, example_closest_idx = args
        return segment_fn(example_features,
                          example_closest_idx,
                          num_segments=p2)

    pooled_features = tf.map_fn(fn=_LoopFn,
                                elems=(point_features, closest_idx),
                                dtype=tf.float32)

    return (py_utils.HasShape(pooled_points, [n, p2, 3]),
            py_utils.HasShape(pooled_features, [n, p2, c]))
Beispiel #3
0
def MaxPool3D(points, point_features, pooling_idx, closest_idx):
    """Apply max pooling to a point cloud with computed sampling indices.

  sampled_idx and closest_idx are the outputs of a sampler such as
  FurthestPointSampler.

  The pooling operation results in a point cloud with fewer points, where the
  pooled points are specified by pooling_idx. Each element of pooling_idx
  contains an integer in the range [0, P1) containing the index of the point in
  points/points_features.

  Max pooling is performed by assigning each point to its closest pooled point,
  and then taking a max over the features of points assigned. We assume that
  this mapping is provided by closest_idx, where each element should contain
  an integer in the range [0, P2) containing the index of the pooled point that
  each point is assigned to.

  Note: This logic for pooling assumes that there will be at least
  one value > 0 per sampled region for each feature, otherwise it will return 0.
  Additionally, it does a reduce over a masked version of the features, so
  mean and min would not work without a change in the logic.

  Args:
    points: a floating point tf.Tensor with shape [N, P1, 3]
    point_features: a floating point tf.Tensor with shape [N, P1, C]
    pooling_idx: A tf.int32 tf.Tensor of shape [N, P2] with the index of which
      points we want to keep. Each value should be in the range [0, P1].
    closest_idx: A tf.int32 tf.Tensor of shape [N, P1] representing which
      sampled point is closest to each original point. Each value should be in
      the range of [0, P2].

  Returns:
    A tuple of tf.Tensors (pooled_points, pooled_features).

    pooled_points has shape [N, P2, 3] representing the locations of each
    selected point. P2 corresponds to num_pooled_points.

    pooled_features has shape [N, P2, C] representing the pooled features at
    each point.
  """
    batch_size, num_points = py_utils.GetShape(points, 2)
    point_features = py_utils.HasShape(point_features,
                                       [batch_size, num_points, -1])
    pooling_idx = py_utils.HasShape(pooling_idx, [batch_size, -1])
    _, num_output_points = py_utils.GetShape(pooling_idx)
    _, _, feature_dims = py_utils.GetShape(point_features, 3)

    # Gather new point locations.
    pooled_points = tf.batch_gather(points, pooling_idx)

    mask = tf.one_hot(closest_idx, num_output_points)  # [N, P1, P2]
    mask = tf.transpose(mask, [2, 0, 1])  # [P2, N, P1]

    def _PartialPoolFeaturesFn(partial_mask):
        partial_mask = tf.tile(
            tf.reshape(partial_mask, [batch_size, num_points, 1]),
            [1, 1, feature_dims])
        # Note: This method of pooling assumes there will be a value > 0
        # And will only work with max under this condition.
        return tf.reduce_max(partial_mask * point_features, axis=1)

    # Performing a map_fn over the pooled points is more memory efficient.
    pooled_point_features = tf.map_fn(_PartialPoolFeaturesFn,
                                      mask)  # [P2, N, P1]
    pooled_point_features = tf.transpose(pooled_point_features, [1, 0, 2])

    return pooled_points, pooled_point_features
Beispiel #4
0
    def AssignAnchors(self,
                      anchor_bboxes,
                      gt_bboxes,
                      gt_bboxes_labels,
                      gt_bboxes_mask,
                      foreground_assignment_threshold=0.5,
                      background_assignment_threshold=0.35,
                      background_class_id=0,
                      force_match=True,
                      similarity_fn=None):
        """Assigns anchors to bboxes using a similarity function (SSD-based).

    Each anchor box is assigned to the top matching ground truth box.
    Ground truth boxes can be assigned to multiple anchor boxes.

    Assignments can result in 3 outcomes:

      - Positive assignment (if score >= foreground_assignment_threshold):
        assigned_gt_labels will reflect the assigned box label and
        assigned_cls_mask will be set to 1.0
      - Background assignment (if score <= background_assignment_threshold):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 1.0
      - Ignore assignment (otherwise):
        assigned_gt_labels will be background_class_id and assigned_cls_mask
        will be set to 0.0

    The detection loss function would usually:

      - Use assigned_cls_mask for weighting the classification loss. The mask
        is set such that the loss applies to foreground and background
        assignments only - ignored anchors will be set to 0.
      - Use assigned_reg_mask for weighting the regression loss. The mask is set
        such that the loss applies to foreground assignments only.

    The thresholds (foreground_assignment_threshold and
    background_assignment_threshold) should be tuned per dataset.

    TODO(jngiam): Consider having a separate threshold for regression boxes; a
    separate threshold is used in PointRCNN.

    Args:
      anchor_bboxes: tf.float32. [A, 7], where [..., :] corresponds to box
        parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes: tf.float32. [G, 7], where [..., :] corresponds to ground truth
        box parameters (x, y, z, dx, dy, dz, r).
      gt_bboxes_labels: tensor with shape [G]. Ground truth labels for each
        bounding box.
      gt_bboxes_mask: tensor with shape [G]. Mask for ground truth boxes, 1 iff
        the gt_bbox is a real bbox.
      foreground_assignment_threshold: Similarity score threshold for assigning
        foreground bounding boxes; scores need to be >=
        foreground_assignment_threshold to be assigned to foreground.
      background_assignment_threshold: Similarity score threshold for assigning
        background bounding boxes; scores need to be <=
        background_assignment_threshold to be assigned to background.
      background_class_id: class id to be assigned to anchors_gt_class if no
        anchor boxes match.
      force_match: Boolean specifying if force matching is enabled. If
        force matching is enabled, then matched anchors which are also the
        highest scoring with a ground-truth box are considered foreground
        matches as long as their similarity score > 0.
      similarity_fn: Function that computes the a similarity score (e.g., IOU)
        between pairs of bounding boxes. This function should take in two
        tensors corresponding to anchor and ground-truth bboxes, and return a
        matrix [A, G] with the similarity score between each pair of bboxes. The
        score must be non-negative, with greater scores representing more
        similar. The fore/background_assignment_thresholds will be applied to
        this score to determine if the an anchor is foreground, background or
        ignored. If set to None, the function will default to IOU2DRotatedBoxes.

    Returns:
      NestedMap with the following keys

      - assigned_gt_idx: shape [A] index corresponding to the index of the
        assigned ground truth box. Anchors not assigned to a ground truth box
        will have the index set to -1.
      - assigned_gt_bbox: shape [A, 7] bbox parameters assigned to each anchor.
      - assigned_gt_similarity_score: shape [A] (iou) score between the anchor
        and the gt bbox.
      - assigned_gt_labels: shape [A] label assigned to bbox.
      - assigned_cls_mask: shape [A] mask for classification loss per anchor.
        This should be 1.0 if the anchor has a foreground or background
        assignment; otherwise, it will be assigned to 0.0.
      - assigned_reg_mask: shape [A] mask for regression loss per anchor.
        This should be 1.0 if the anchor has a foreground assignment;
        otherwise, it will be assigned to 0.0.
        Note: background anchors do not have regression targets.
    """
        if similarity_fn is None:
            similarity_fn = self.IOU2DRotatedBoxes

        # Shape validation.
        anchor_bboxes = py_utils.HasShape(anchor_bboxes, [-1, 7])
        num_anchor_bboxes, _ = py_utils.GetShape(anchor_bboxes, 2)
        gt_bboxes = py_utils.HasShape(gt_bboxes, [-1, 7])
        num_gt_bboxes, _ = py_utils.GetShape(gt_bboxes, 2)

        # Compute similarity score and reduce max by anchors and by ground-truth.
        similarity_score = similarity_fn(anchor_bboxes, gt_bboxes)
        similarity_score = py_utils.HasShape(
            similarity_score, [num_anchor_bboxes, num_gt_bboxes])

        # Reduce over ground-truth boxes, so we have the max score per anchor.
        anchor_max_score = tf.reduce_max(similarity_score, axis=1)
        anchor_max_idx = tf.argmax(similarity_score, axis=1)

        if force_match:
            # Reduce over anchors, so we have the max score per ground truth box.
            gt_max_score = tf.reduce_max(similarity_score,
                                         axis=0,
                                         keepdims=True)

            # Force matches occur when the top matching gt bbox for an anchor is the
            # top matching anchor for the gt bbox. When force matching, we match
            # these boxes as long as their similarity score exceeds 0.
            force_matches = (
                tf.equal(similarity_score, gt_max_score)
                & tf.equal(similarity_score, anchor_max_score[..., tf.newaxis])
                & tf.greater(similarity_score, 0.)
                & tf.cast(gt_bboxes_mask[tf.newaxis, ...], tf.bool))
            force_match_indicator = tf.reduce_any(force_matches, axis=1)
            force_match_idx = tf.argmax(tf.cast(force_matches, tf.int32),
                                        axis=1)

            # In assigning foreground/background anchors later, force_match_indicator
            # is used to determine which anchors are force foreground, and the index
            # assigned will be taken from anchor_max_idx.

            # Force matchers must also be the max scoring gt bbox per anchor.
            # We overwrite anchor_max_idx to ensure that the right match is done.
            anchor_max_idx = tf.where(force_match_indicator, force_match_idx,
                                      anchor_max_idx)

        # Ensure that max score boxes are not padded boxes by setting score to 0
        # for boxes that are padded.
        gathered_mask = tf.batch_gather(gt_bboxes_mask, anchor_max_idx)
        anchor_max_score = tf.where(tf.equal(gathered_mask, 1),
                                    anchor_max_score,
                                    tf.zeros_like(anchor_max_score))

        # Boolean tensors corresponding to whether an anchor is background or
        # foreground based on thresholding.
        background_anchors = tf.less_equal(anchor_max_score,
                                           background_assignment_threshold)
        foreground_anchors = tf.greater_equal(anchor_max_score,
                                              foreground_assignment_threshold)
        if force_match:
            # Background anchors are below threshold and not force matches.
            background_anchors &= ~force_match_indicator
            # Foreground anchors are above thresholds or force matches.
            foreground_anchors |= force_match_indicator

        # Add dummy background bbox to gt_boxes to facilitate batch gather.
        dummy_bbox = tf.constant([[0, 0, 0, 1, 1, 1, 0]], dtype=tf.float32)

        # Since we are concatenating the dummy bbox, the index corresponds to the
        # number of boxes.
        dummy_bbox_idx = py_utils.GetShape(gt_bboxes, 1)[0]

        gt_bboxes = tf.concat([gt_bboxes, dummy_bbox], axis=0)
        gt_bboxes_labels = tf.concat([gt_bboxes_labels, [background_class_id]],
                                     axis=0)

        # Gather indices so that all foreground boxes are gathered from gt_bboxes,
        # while all background and ignore boxes gather the dummy_bbox.
        anchor_gather_idx = tf.where(
            foreground_anchors, anchor_max_idx,
            tf.constant(dummy_bbox_idx,
                        shape=py_utils.GetShape(anchor_max_idx),
                        dtype=anchor_max_idx.dtype))

        # Gather the bboxes and weights.
        assigned_gt_bbox = tf.batch_gather(gt_bboxes, anchor_gather_idx)
        assigned_gt_labels = tf.batch_gather(gt_bboxes_labels,
                                             anchor_gather_idx)

        # Set masks for classification and regression losses.
        assigned_cls_mask = tf.cast(background_anchors | foreground_anchors,
                                    tf.float32)
        assigned_reg_mask = tf.cast(foreground_anchors, tf.float32)

        # Set assigned_gt_idx such that dummy boxes have idx = -1.
        assigned_gt_idx = tf.where(tf.equal(anchor_gather_idx, dummy_bbox_idx),
                                   tf.ones_like(anchor_gather_idx) * -1,
                                   anchor_gather_idx)
        assigned_gt_idx = tf.cast(assigned_gt_idx, tf.int32)

        return py_utils.NestedMap(
            assigned_gt_idx=assigned_gt_idx,
            assigned_gt_bbox=assigned_gt_bbox,
            assigned_gt_similarity_score=anchor_max_score,
            assigned_gt_labels=assigned_gt_labels,
            assigned_cls_mask=assigned_cls_mask,
            assigned_reg_mask=assigned_reg_mask)
Beispiel #5
0
def _SingleClassDecodeWithNMS(predicted_bboxes,
                              classification_scores,
                              nms_iou_threshold,
                              score_threshold,
                              max_boxes_per_class=None):
    """Perform NMS on predicted bounding boxes / associated logits.

  Args:
    predicted_bboxes: [batch_size, num_boxes, 7] float Tensor containing
      predicted bounding box coordinates.
    classification_scores: [batch_size, num_boxes, num_classes] float Tensor
      containing predicted classification scores for each box.
    nms_iou_threshold: IoU threshold to use when determining whether two boxes
      overlap for purposes of suppression.
    score_threshold: The score threshold passed to NMS that allows NMS to
      quickly ignore irrelevant boxes.
    max_boxes_per_class: The maximum number of boxes per example to emit. If
      None, this value is set to num_boxes from the shape of predicted_bboxes.

  Returns:
    predicted_bboxes: Filtered bboxes after NMS of shape
      [batch_size, num_classes, max_boxes_per_class, 7].
    bbox_scores: A float32 Tensor with the score for each box of shape
      [batch_size, num_classes, max_boxes_per_class].
    valid_mask: A float32 Tensor with 1/0 values indicating the validity of
      each box. 1 indicates valid, and 0 invalid. Tensor of shape
      [batch_size, num_classes, max_boxes_per_class].
  """
    utils_3d = detection_3d_lib.Utils3D()
    predicted_bboxes = py_utils.HasShape(predicted_bboxes, [-1, -1, 7])
    batch_size, num_predicted_boxes, _ = py_utils.GetShape(predicted_bboxes)
    classification_scores = py_utils.HasShape(
        classification_scores, [batch_size, num_predicted_boxes, -1])
    _, _, num_classes = py_utils.GetShape(classification_scores)

    if not isinstance(nms_iou_threshold, float):
        raise ValueError('Single class NMS only supports a scalar '
                         '`nms_iou_threshold`.')
    if not isinstance(score_threshold, float):
        raise ValueError('Single class NMS only supports a scalar '
                         '`score_threshold`.')

    if max_boxes_per_class is None:
        max_boxes_per_class = num_predicted_boxes

    # TODO(jngiam): Change to be per-class bboxes, and hence, per-class NMS, and
    # per-class thresholding.
    # [batch, num_predicted_boxes]
    nms_scores = tf.reduce_max(classification_scores, axis=-1)

    # Compute the most likely label by computing the highest class score from
    # the output of the sigmoid.
    likely_labels = tf.argmax(classification_scores, axis=-1)

    # When background is the most likely class for the box, mask out the scores
    # of that box from NMS scoring so the background boxes don't dominate the
    # NMS.
    nms_scores *= tf.to_float(likely_labels > 0)

    # Compute NMS for every sample in the batch.
    nms_indices, valid_mask = utils_3d.BatchedNMSIndices(
        predicted_bboxes,
        nms_scores,
        nms_iou_threshold=nms_iou_threshold,
        score_threshold=score_threshold,
        max_num_boxes=max_boxes_per_class)

    # Reorder the box data and logits according to NMS scoring.
    predicted_bboxes = tf.batch_gather(predicted_bboxes, nms_indices)
    classification_scores = tf.batch_gather(classification_scores, nms_indices)

    # Now reformat the output of NMS to match the format of the
    # MultiClassOrientedDecodeWithNMS, which outputs a per class NMS result.
    # This takes the leading shape of
    # [batch_size, num_classes, max_boxes_per_class] for all outputs, which
    # means since this NMS is not class specific we need to tile the outputs
    # num_classes times or reorder the data such that its [batch, num_classes].
    predicted_bboxes = tf.tile(predicted_bboxes[:, tf.newaxis, :, :],
                               [1, num_classes, 1, 1])
    classification_scores = tf.transpose(classification_scores, (0, 2, 1))
    classification_scores = py_utils.HasShape(
        classification_scores, [batch_size, num_classes, max_boxes_per_class])
    valid_mask = tf.tile(valid_mask[:, tf.newaxis, :], [1, num_classes, 1])
    return predicted_bboxes, classification_scores, valid_mask
Beispiel #6
0
    def FProp(self, theta, input_data):
        """Apply projection to inputs.

    Args:
      theta: A NestedMap object containing weights' values of this layer and its
        children layers.
      input_data: A NestedMap object containing 'points', 'features', 'padding'
        Tensors, all of type tf.float32.
        'points': Shape [N, P1, 3]
        'features': Shape [N, P1, F]
        'padding': Shape [N, P1] where 0 indicates real, 1 indicates padded.

    Returns:
      A NestedMap consisting of the following two NestedMaps,
        grouped_points: consists of the grouped points, features and padding.
        query_points: consists of the sampled points and padding.
    """

        p = self.params
        features = input_data.features
        n, p1, c = py_utils.GetShape(features)
        points = py_utils.HasShape(input_data.points, [n, p1, 3])
        padding = py_utils.HasShape(input_data.padding, [n, p1])

        # Sampling
        sampled_idx, _ = car_lib.FarthestPointSampler(
            points, padding, num_sampled_points=p.num_samples)
        query_points = car_lib.MatmulGather(points,
                                            tf.expand_dims(sampled_idx, -1))
        query_points = tf.squeeze(query_points, -2)

        # Grouping
        grouped_idx, grouped_padding = car_lib.NeighborhoodIndices(
            points,
            query_points,
            p.group_size,
            points_padding=padding,
            max_distance=p.ball_radius,
            sample_neighbors_uniformly=p.sample_neighbors_uniformly)
        grouped_points = car_lib.MatmulGather(points, grouped_idx)
        # Normalize the grouped points based on the location of the query point.
        grouped_points -= tf.expand_dims(query_points, -2)
        grouped_features = car_lib.MatmulGather(features, grouped_idx)

        # Get the padding for the query points.
        query_padding = tf.batch_gather(padding, sampled_idx)

        # Verify the shapes of output tensors.
        query_points = py_utils.HasShape(query_points, [n, p.num_samples, 3])
        query_padding = py_utils.HasShape(query_padding, [n, p.num_samples])
        grouped_features = py_utils.HasShape(
            grouped_features, [n, p.num_samples, p.group_size, c])
        grouped_padding = py_utils.HasShape(grouped_padding,
                                            [n, p.num_samples, p.group_size])

        output_grouped_points = py_utils.NestedMap(points=grouped_points,
                                                   features=grouped_features,
                                                   padding=grouped_padding)
        output_query = py_utils.NestedMap(points=query_points,
                                          padding=query_padding)
        output_map = py_utils.NestedMap({
            'grouped_points': output_grouped_points,
            'query_points': output_query
        })
        return output_map