Beispiel #1
0
def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
    """Clip bounding boxes to a window.
  This op clips any input bounding boxes (represented by bounding box
  corners) to a window, optionally filtering out boxes that do not
  overlap at all with the window.
  Args:
    boxlist: BoxList holding M_in boxes
    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
      window to which the op should clip boxes.
    filter_nonoverlapping: whether to filter out boxes that do not overlap at
      all with the window.
    scope: name scope.
  Returns:
    a BoxList holding M_out boxes where M_out <= M_in
  """
    with tf.name_scope(scope, 'ClipToWindow'):
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
        y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
        y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
        x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
        x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
        clipped = box_list.BoxList(
            tf.concat(
                [y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
                1))
        clipped = _copy_extra_fields(clipped, boxlist)
        if filter_nonoverlapping:
            areas = area(clipped)
            nonzero_area_indices = tf.cast(
                tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
            clipped = gather(clipped, nonzero_area_indices)
        return clipped
Beispiel #2
0
def boolean_mask(boxlist, indicator, fields=None, scope=None):
    """Select boxes from BoxList according to indicator and return new BoxList.
  `boolean_mask` returns the subset of boxes that are marked as "True" by the
  indicator tensor. By default, `boolean_mask` returns boxes corresponding to
  the input index list, as well as all additional fields stored in the boxlist
  (indexing into the first dimension).  However one can optionally only draw
  from a subset of fields.
  Args:
    boxlist: BoxList holding N boxes
    indicator: a rank-1 boolean tensor
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.
  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
      specified by indicator
  Raises:
    ValueError: if `indicator` is not a rank-1 boolean tensor.
  """
    with tf.name_scope(scope, 'BooleanMask'):
        if indicator.shape.ndims != 1:
            raise ValueError('indicator should have rank 1')
        if indicator.dtype != tf.bool:
            raise ValueError('indicator should be a boolean tensor')
        subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(),
                                                      indicator))
        if fields is None:
            fields = boxlist.get_extra_fields()
        for field in fields:
            if not boxlist.has_field(field):
                raise ValueError('boxlist must contain all specified fields')
            subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
            subboxlist.add_field(field, subfieldlist)
        return subboxlist
Beispiel #3
0
def gather(boxlist, indices, fields=None, scope=None):
    """Gather boxes from BoxList according to indices and return new BoxList.
  By default, `gather` returns boxes corresponding to the input index list, as
  well as all additional fields stored in the boxlist (indexing into the
  first dimension).  However one can optionally only gather from a
  subset of fields.
  Args:
    boxlist: BoxList holding N boxes
    indices: a rank-1 tensor of type int32 / int64
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.
  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
    specified by indices
  Raises:
    ValueError: if specified field is not contained in boxlist or if the
      indices are not of type int32
  """
    with tf.name_scope(scope, 'Gather'):
        if len(indices.shape.as_list()) != 1:
            raise ValueError('indices should have rank 1')
        if indices.dtype != tf.int32 and indices.dtype != tf.int64:
            raise ValueError('indices should be an int32 / int64 tensor')
        subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices))
        if fields is None:
            fields = boxlist.get_extra_fields()
        for field in fields:
            if not boxlist.has_field(field):
                raise ValueError('boxlist must contain all specified fields')
            subfieldlist = tf.gather(boxlist.get_field(field), indices)
            subboxlist.add_field(field, subfieldlist)
        return subboxlist
Beispiel #4
0
def change_coordinate_frame(boxlist, window, scope=None):
    """Change coordinate frame of the boxlist to be relative to window's frame.
  Given a window of the form [ymin, xmin, ymax, xmax],
  changes bounding box coordinates from boxlist to be relative to this window
  (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
  An example use case is data augmentation: where we are given groundtruth
  boxes (boxlist) and would like to randomly crop the image to some
  window (window). In this case we need to change the coordinate frame of
  each groundtruth box to be relative to this new window.
  Args:
    boxlist: A BoxList object holding N boxes.
    window: A rank 1 tensor [4].
    scope: name scope.
  Returns:
    Returns a BoxList object with N boxes.
  """
    with tf.name_scope(scope, 'ChangeCoordinateFrame'):
        win_height = window[2] - window[0]
        win_width = window[3] - window[1]
        boxlist_new = scale(
            box_list.BoxList(boxlist.get() -
                             [window[0], window[1], window[0], window[1]]),
            1.0 / win_height, 1.0 / win_width)
        boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
        return boxlist_new
    def _decode(self, rel_codes, anchors):
        """Decode relative codes to boxes.
    Args:
      rel_codes: a tensor representing N anchor-encoded boxes.
      anchors: BoxList of anchors.
    Returns:
      boxes: BoxList holding N bounding boxes.
    """
        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes(
        )

        ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
        if self._scale_factors:
            ty /= self._scale_factors[0]
            tx /= self._scale_factors[1]
            th /= self._scale_factors[2]
            tw /= self._scale_factors[3]
        w = tf.exp(tw) * wa
        h = tf.exp(th) * ha
        ycenter = ty * ha + ycenter_a
        xcenter = tx * wa + xcenter_a
        ymin = ycenter - h / 2.
        xmin = xcenter - w / 2.
        ymax = ycenter + h / 2.
        xmax = xcenter + w / 2.
        return boxlist.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax,
                                                      xmax])))
Beispiel #6
0
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
    """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.
  Performs box voting as described in 'Object detection via a multi-region &
  semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
  each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
  with iou overlap >= iou_thresh. The location of B is set to the weighted
  average location of boxes in S (scores are used for weighting). And the score
  of B is set to the average score of boxes in S.
  Args:
    selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
      boxes are usually selected from pool_boxes using non max suppression.
    pool_boxes: BoxList containing a set of (possibly redundant) boxes.
    iou_thresh: (float scalar) iou threshold for matching boxes in
      selected_boxes and pool_boxes.
  Returns:
    BoxList containing averaged locations and scores for each box in
    selected_boxes.
  Raises:
    ValueError: if
      a) selected_boxes or pool_boxes is not a BoxList.
      b) if iou_thresh is not in [0, 1].
      c) pool_boxes does not have a scores field.
  """
    if not 0.0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if not isinstance(selected_boxes, box_list.BoxList):
        raise ValueError('selected_boxes must be a BoxList')
    if not isinstance(pool_boxes, box_list.BoxList):
        raise ValueError('pool_boxes must be a BoxList')
    if not pool_boxes.has_field('scores'):
        raise ValueError('pool_boxes must have a \'scores\' field')

    iou_ = iou(selected_boxes, pool_boxes)
    match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
    num_matches = tf.reduce_sum(match_indicator, 1)
    # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not
    # match to any boxes in pool_boxes. For such boxes without any matches, we
    # should return the original boxes without voting.
    match_assert = tf.Assert(tf.reduce_all(tf.greater(num_matches, 0)), [
        'Each box in selected_boxes must match with at least one box '
        'in pool_boxes.'
    ])

    scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
    scores_assert = tf.Assert(tf.reduce_all(tf.greater_equal(scores, 0)),
                              ['Scores must be non negative.'])

    with tf.control_dependencies([scores_assert, match_assert]):
        sum_scores = tf.matmul(match_indicator, scores)
    averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches

    box_locations = tf.matmul(match_indicator,
                              pool_boxes.get() * scores) / sum_scores
    averaged_boxes = box_list.BoxList(box_locations)
    _copy_extra_fields(averaged_boxes, selected_boxes)
    averaged_boxes.add_field('scores', averaged_scores)
    return averaged_boxes
Beispiel #7
0
def concatenate(boxlists, fields=None, scope=None):
    """Concatenate list of BoxLists.
  This op concatenates a list of input BoxLists into a larger BoxList.  It also
  handles concatenation of BoxList fields as long as the field tensor shapes
  are equal except for the first dimension.
  Args:
    boxlists: list of BoxList objects
    fields: optional list of fields to also concatenate.  By default, all
      fields from the first BoxList in the list are included in the
      concatenation.
    scope: name scope.
  Returns:
    a BoxList with number of boxes equal to
      sum([boxlist.num_boxes() for boxlist in BoxList])
  Raises:
    ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
      contains non BoxList objects), or if requested fields are not contained in
      all boxlists
  """
    with tf.name_scope(scope, 'Concatenate'):
        if not isinstance(boxlists, list):
            raise ValueError('boxlists should be a list')
        if not boxlists:
            raise ValueError('boxlists should have nonzero length')
        for boxlist in boxlists:
            if not isinstance(boxlist, box_list.BoxList):
                raise ValueError(
                    'all elements of boxlists should be BoxList objects')
        concatenated = box_list.BoxList(
            tf.concat([boxlist.get() for boxlist in boxlists], 0))
        if fields is None:
            fields = boxlists[0].get_extra_fields()
        for field in fields:
            first_field_shape = boxlists[0].get_field(
                field).get_shape().as_list()
            first_field_shape[0] = -1
            if None in first_field_shape:
                raise ValueError(
                    'field %s must have fully defined shape except for the'
                    ' 0th dimension.' % field)
            for boxlist in boxlists:
                if not boxlist.has_field(field):
                    raise ValueError(
                        'boxlist must contain all requested fields')
                field_shape = boxlist.get_field(field).get_shape().as_list()
                field_shape[0] = -1
                if field_shape != first_field_shape:
                    raise ValueError(
                        'field %s must have same shape for all boxlists '
                        'except for the 0th dimension.' % field)
            concatenated_field = tf.concat(
                [boxlist.get_field(field) for boxlist in boxlists], 0)
            concatenated.add_field(field, concatenated_field)
        return concatenated
Beispiel #8
0
def tile_anchors(grid_height, grid_width, scales, aspect_ratios,
                 base_anchor_size, anchor_stride, anchor_offset):
    """Create a tiled set of anchors strided along a grid in image space.
  This op creates a set of anchor boxes by placing a "basis" collection of
  boxes with user-specified scales and aspect ratios centered at evenly
  distributed points along a grid.  The basis collection is specified via the
  scale and aspect_ratios arguments.  For example, setting scales=[.1, .2, .2]
  and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
  .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
  and aspect ratio 1/2.  Each box is multiplied by "base_anchor_size" before
  placing it over its respective center.
  Grid points are specified via grid_height, grid_width parameters as well as
  the anchor_stride and anchor_offset parameters.
  Args:
    grid_height: size of the grid in the y direction (int or int scalar tensor)
    grid_width: size of the grid in the x direction (int or int scalar tensor)
    scales: a 1-d  (float) tensor representing the scale of each box in the
      basis set.
    aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
      box in the basis set.  The length of the scales and aspect_ratios tensors
      must be equal.
    base_anchor_size: base anchor size as [height, width]
      (float tensor of shape [2])
    anchor_stride: difference in centers between base anchors for adjacent grid
                   positions (float tensor of shape [2])
    anchor_offset: center of the anchor with scale and aspect ratio 1 for the
                   upper left element of the grid, this should be zero for
                   feature networks with only VALID padding and even receptive
                   field size, but may need some additional calculation if other
                   padding is used (float tensor of shape [2])
  Returns:
    a BoxList holding a collection of N anchor boxes
  """
    ratio_sqrts = tf.sqrt(aspect_ratios)
    heights = scales / ratio_sqrts * base_anchor_size[0]
    widths = scales * ratio_sqrts * base_anchor_size[1]

    # Get a grid of box centers
    y_centers = tf.to_float(tf.range(grid_height))
    y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
    x_centers = tf.to_float(tf.range(grid_width))
    x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
    x_centers, y_centers = ops.meshgrid(x_centers, y_centers)

    widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
    heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
    bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
    bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
    bbox_centers = tf.reshape(bbox_centers, [-1, 2])
    bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
    bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
    return boxlist.BoxList(bbox_corners)
Beispiel #9
0
def pad_or_clip_box_list(boxlist, num_boxes, scope=None):
    """Pads or clips all fields of a BoxList.
  Args:
    boxlist: A BoxList with arbitrary of number of boxes.
    num_boxes: First num_boxes in boxlist are kept.
      The fields are zero-padded if num_boxes is bigger than the
      actual number of boxes.
    scope: name scope.
  Returns:
    BoxList with all fields padded or clipped.
  """
    with tf.name_scope(scope, 'PadOrClipBoxList'):
        subboxlist = box_list.BoxList(
            shape_utils.pad_or_clip_tensor(boxlist.get(), num_boxes))
        for field in boxlist.get_extra_fields():
            subfield = shape_utils.pad_or_clip_tensor(boxlist.get_field(field),
                                                      num_boxes)
            subboxlist.add_field(field, subfield)
        return subboxlist
Beispiel #10
0
def scale(boxlist, y_scale, x_scale, scope=None):
    """scale box coordinates in x and y dimensions.
  Args:
    boxlist: BoxList holding N boxes
    y_scale: (float) scalar tensor
    x_scale: (float) scalar tensor
    scope: name scope.
  Returns:
    boxlist: BoxList holding N boxes
  """
    with tf.name_scope(scope, 'Scale'):
        y_scale = tf.cast(y_scale, tf.float32)
        x_scale = tf.cast(x_scale, tf.float32)
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        y_min = y_scale * y_min
        y_max = y_scale * y_max
        x_min = x_scale * x_min
        x_max = x_scale * x_max
        scaled_boxlist = box_list.BoxList(
            tf.concat([y_min, x_min, y_max, x_max], 1))
        return _copy_extra_fields(scaled_boxlist, boxlist)
Beispiel #11
0
 def _to_absolute_coordinates(normalized_boxes):
     return box_list_ops.to_absolute_coordinates(
         box_list.BoxList(normalized_boxes),
         image_shape[1],
         image_shape[2],
         check_range=False).get()
Beispiel #12
0
def result_dict_for_single_example(image,
                                   key,
                                   detections,
                                   groundtruth=None,
                                   class_agnostic=False,
                                   scale_to_absolute=False):
    """Merges all detection and groundtruth information for a single example.
  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.
  Args:
    image: A single 4D uint8 image tensor of shape [1, H, W, C].
    key: A single string tensor identifying the image.
    detections: A dictionary of detections, returned from
      DetectionModel.postprocess().
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
        normalized coordinates.
      'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
      'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
      'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
      'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
      'groundtruth_instance_masks': 3D int64 tensor of instance masks
        (Optional).
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
      scaled to absolute coordinates. Note that for IoU based evaluations, it
      does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.
  Returns:
    A dictionary with:
    'original_image': A [1, H, W, C] uint8 image tensor.
    'key': A string tensor with image identifier.
    'detection_boxes': [max_detections, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`.
    'detection_scores': [max_detections] float32 tensor of scores.
    'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
    'detection_masks': [max_detections, H, W] float32 tensor of binarized
      masks, reframed to full image masks.
    'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      (Optional)
    'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
    'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 3D int64 tensor of instance masks
      (Optional).
  """
    label_id_offset = 1  # Applying label id offset (b/63711816)

    input_data_fields = fields.InputDataFields
    output_dict = {
        input_data_fields.original_image: image,
        input_data_fields.key: key,
    }

    detection_fields = fields.DetectionResultFields
    detection_boxes = detections[detection_fields.detection_boxes][0]
    image_shape = tf.shape(image)
    detection_scores = detections[detection_fields.detection_scores][0]

    if class_agnostic:
        detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
    else:
        detection_classes = (
            tf.to_int64(detections[detection_fields.detection_classes][0]) +
            label_id_offset)

    num_detections = tf.to_int32(
        detections[detection_fields.num_detections][0])
    detection_boxes = tf.slice(detection_boxes,
                               begin=[0, 0],
                               size=[num_detections, -1])
    detection_classes = tf.slice(detection_classes,
                                 begin=[0],
                                 size=[num_detections])
    detection_scores = tf.slice(detection_scores,
                                begin=[0],
                                size=[num_detections])

    if scale_to_absolute:
        absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
            box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
        output_dict[detection_fields.detection_boxes] = (
            absolute_detection_boxlist.get())
    else:
        output_dict[detection_fields.detection_boxes] = detection_boxes
    output_dict[detection_fields.detection_classes] = detection_classes
    output_dict[detection_fields.detection_scores] = detection_scores

    if detection_fields.detection_masks in detections:
        detection_masks = detections[detection_fields.detection_masks][0]
        # TODO(rathodv): This should be done in model's postprocess
        # function ideally.
        detection_masks = tf.slice(detection_masks,
                                   begin=[0, 0, 0],
                                   size=[num_detections, -1, -1])
        detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image_shape[1], image_shape[2])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        output_dict[
            detection_fields.detection_masks] = detection_masks_reframed
    if detection_fields.detection_keypoints in detections:
        detection_keypoints = detections[
            detection_fields.detection_keypoints][0]
        output_dict[detection_fields.detection_keypoints] = detection_keypoints
        if scale_to_absolute:
            absolute_detection_keypoints = keypoint_ops.scale(
                detection_keypoints, image_shape[1], image_shape[2])
            output_dict[detection_fields.detection_keypoints] = (
                absolute_detection_keypoints)

    if groundtruth:
        if input_data_fields.groundtruth_instance_masks in groundtruth:
            masks = groundtruth[input_data_fields.groundtruth_instance_masks]
            masks = tf.expand_dims(masks, 3)
            masks = tf.image.resize_images(
                masks,
                image_shape[1:3],
                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                align_corners=True)
            masks = tf.squeeze(masks, 3)
            groundtruth[
                input_data_fields.groundtruth_instance_masks] = tf.cast(
                    masks, tf.uint8)
        output_dict.update(groundtruth)
        if scale_to_absolute:
            groundtruth_boxes = groundtruth[
                input_data_fields.groundtruth_boxes]
            absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
                box_list.BoxList(groundtruth_boxes), image_shape[1],
                image_shape[2])
            output_dict[input_data_fields.groundtruth_boxes] = (
                absolute_gt_boxlist.get())
        # For class-agnostic models, groundtruth classes all become 1.
        if class_agnostic:
            groundtruth_classes = groundtruth[
                input_data_fields.groundtruth_classes]
            groundtruth_classes = tf.ones_like(groundtruth_classes,
                                               dtype=tf.int64)
            output_dict[
                input_data_fields.groundtruth_classes] = groundtruth_classes

    return output_dict