コード例 #1
0
    def _decode(self, rel_codes, anchors):
        """Decode relative codes to boxes.

    Args:
      rel_codes: a tensor representing N anchor-encoded boxes.
      anchors: BoxList of anchors.

    Returns:
      boxes: BoxList holding N bounding boxes.
    """
        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes(
        )

        ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
        if self._scale_factors:
            ty /= self._scale_factors[0]
            tx /= self._scale_factors[1]
            th /= self._scale_factors[2]
            tw /= self._scale_factors[3]
        w = tf.exp(tw) * wa
        h = tf.exp(th) * ha
        ycenter = ty * ha + ycenter_a
        xcenter = tx * wa + xcenter_a
        ymin = ycenter - h / 2.
        xmin = xcenter - w / 2.
        ymax = ycenter + h / 2.
        xmax = xcenter + w / 2.
        return box_list.BoxList(
            tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
コード例 #2
0
  def _decode(self, rel_codes, anchors):
    """Decodes relative codes to boxes.

    Args:
      rel_codes: a tensor representing N anchor-encoded boxes.
      anchors: BoxList of anchors.

    Returns:
      boxes: BoxList holding N bounding boxes.
    """
    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
    la = tf.sqrt(ha * wa)

    ty, tx, tl = tf.unstack(tf.transpose(rel_codes))
    if self._scale_factors:
      ty /= self._scale_factors[0]
      tx /= self._scale_factors[1]
      tl /= self._scale_factors[2]
    l = tf.exp(tl) * la
    ycenter = ty * la + ycenter_a
    xcenter = tx * la + xcenter_a
    ymin = ycenter - l / 2.
    xmin = xcenter - l / 2.
    ymax = ycenter + l / 2.
    xmax = xcenter + l / 2.
    return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
コード例 #3
0
def change_coordinate_frame(boxlist, window, scope=None):
    """Change coordinate frame of the boxlist to be relative to window's frame.

  Given a window of the form [ymin, xmin, ymax, xmax],
  changes bounding box coordinates from boxlist to be relative to this window
  (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).

  An example use case is data augmentation: where we are given groundtruth
  boxes (boxlist) and would like to randomly crop the image to some
  window (window). In this case we need to change the coordinate frame of
  each groundtruth box to be relative to this new window.

  Args:
    boxlist: A BoxList object holding N boxes.
    window: A rank 1 tensor [4].
    scope: name scope.

  Returns:
    Returns a BoxList object with N boxes.
  """
    with tf.name_scope(scope, 'ChangeCoordinateFrame'):
        win_height = window[2] - window[0]
        win_width = window[3] - window[1]
        boxlist_new = scale(
            box_list.BoxList(boxlist.get() -
                             [window[0], window[1], window[0], window[1]]),
            1.0 / win_height, 1.0 / win_width)
        boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
        return boxlist_new
コード例 #4
0
def scale(boxlist, y_scale, x_scale, scope=None):
    """scale box coordinates in x and y dimensions.

  Args:
    boxlist: BoxList holding N boxes
    y_scale: (float) scalar tensor
    x_scale: (float) scalar tensor
    scope: name scope.

  Returns:
    boxlist: BoxList holding N boxes
  """
    with tf.name_scope(scope, 'Scale'):
        y_scale = tf.cast(y_scale, tf.float32)
        x_scale = tf.cast(x_scale, tf.float32)
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        y_min = y_scale * y_min
        y_max = y_scale * y_max
        x_min = x_scale * x_min
        x_max = x_scale * x_max
        scaled_boxlist = box_list.BoxList(
            tf.concat([y_min, x_min, y_max, x_max], 1))
        return _copy_extra_fields(scaled_boxlist, boxlist)
コード例 #5
0
def concatenate(boxlists, fields=None, scope=None):
    """Concatenate list of BoxLists.

  This op concatenates a list of input BoxLists into a larger BoxList.  It also
  handles concatenation of BoxList fields as long as the field tensor shapes
  are equal except for the first dimension.

  Args:
    boxlists: list of BoxList objects
    fields: optional list of fields to also concatenate.  By default, all
      fields from the first BoxList in the list are included in the
      concatenation.
    scope: name scope.

  Returns:
    a BoxList with number of boxes equal to
      sum([boxlist.num_boxes() for boxlist in BoxList])
  Raises:
    ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
      contains non BoxList objects), or if requested fields are not contained in
      all boxlists
  """
    with tf.name_scope(scope, 'Concatenate'):
        if not isinstance(boxlists, list):
            raise ValueError('boxlists should be a list')
        if not boxlists:
            raise ValueError('boxlists should have nonzero length')
        for boxlist in boxlists:
            if not isinstance(boxlist, box_list.BoxList):
                raise ValueError(
                    'all elements of boxlists should be BoxList objects')
        concatenated = box_list.BoxList(
            tf.concat([boxlist.get() for boxlist in boxlists], 0))
        if fields is None:
            fields = boxlists[0].get_extra_fields()
        for field in fields:
            first_field_shape = boxlists[0].get_field(
                field).get_shape().as_list()
            first_field_shape[0] = -1
            if None in first_field_shape:
                raise ValueError(
                    'field %s must have fully defined shape except for the'
                    ' 0th dimension.' % field)
            for boxlist in boxlists:
                if not boxlist.has_field(field):
                    raise ValueError(
                        'boxlist must contain all requested fields')
                field_shape = boxlist.get_field(field).get_shape().as_list()
                field_shape[0] = -1
                if field_shape != first_field_shape:
                    raise ValueError(
                        'field %s must have same shape for all boxlists '
                        'except for the 0th dimension.' % field)
            concatenated_field = tf.concat(
                [boxlist.get_field(field) for boxlist in boxlists], 0)
            concatenated.add_field(field, concatenated_field)
        return concatenated
コード例 #6
0
    def _compute_loss(self, prediction_tensor, target_tensor, weights):
        """Compute loss function.

    Args:
      prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
        representing the decoded predicted boxes
      target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
        representing the decoded target boxes
      weights: a float tensor of shape [batch_size, num_anchors]

    Returns:
      loss: a float tensor of shape [batch_size, num_anchors] tensor
        representing the value of the loss function.
    """
        predicted_boxes = box_list.BoxList(
            tf.reshape(prediction_tensor, [-1, 4]))
        target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4]))
        per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(
            predicted_boxes, target_boxes)
        return tf.reshape(weights, [-1]) * per_anchor_iou_loss
コード例 #7
0
def sample_boxes_by_jittering(boxlist,
                              num_boxes_to_sample,
                              stddev=0.1,
                              scope=None):
    """Samples num_boxes_to_sample boxes by jittering around boxlist boxes.

  It is possible that this function might generate boxes with size 0. The larger
  the stddev, this is more probable. For a small stddev of 0.1 this probability
  is very small.

  Args:
    boxlist: A boxlist containing N boxes in normalized coordinates.
    num_boxes_to_sample: A positive integer containing the number of boxes to
      sample.
    stddev: Standard deviation. This is used to draw random offsets for the
      box corners from a normal distribution. The offset is multiplied by the
      box size so will be larger in terms of pixels for larger boxes.
    scope: Name scope.

  Returns:
    sampled_boxlist: A boxlist containing num_boxes_to_sample boxes in
      normalized coordinates.
  """
    with tf.name_scope(scope, 'SampleBoxesByJittering'):
        num_boxes = boxlist.num_boxes()
        box_indices = tf.random_uniform([num_boxes_to_sample],
                                        minval=0,
                                        maxval=num_boxes,
                                        dtype=tf.int32)
        sampled_boxes = tf.gather(boxlist.get(), box_indices)
        sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0]
        sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1]
        rand_miny_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        rand_minx_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        rand_maxy_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        rand_maxx_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        miny = rand_miny_gaussian * sampled_boxes_height + sampled_boxes[:, 0]
        minx = rand_minx_gaussian * sampled_boxes_width + sampled_boxes[:, 1]
        maxy = rand_maxy_gaussian * sampled_boxes_height + sampled_boxes[:, 2]
        maxx = rand_maxx_gaussian * sampled_boxes_width + sampled_boxes[:, 3]
        maxy = tf.maximum(miny, maxy)
        maxx = tf.maximum(minx, maxx)
        sampled_boxes = tf.stack([miny, minx, maxy, maxx], axis=1)
        sampled_boxes = tf.maximum(tf.minimum(sampled_boxes, 1.0), 0.0)
        return box_list.BoxList(sampled_boxes)
コード例 #8
0
    def _decode(self, rel_codes, anchors):
        """Decode relative codes to boxes and keypoints.

    Args:
      rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
        anchor-encoded boxes and keypoints
      anchors: BoxList of anchors.

    Returns:
      boxes: BoxList holding N bounding boxes and keypoints.
    """
        ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes(
        )

        num_codes = tf.shape(rel_codes)[0]
        result = tf.unstack(tf.transpose(rel_codes))
        ty, tx, th, tw = result[:4]
        tkeypoints = result[4:]
        if self._scale_factors:
            ty /= self._scale_factors[0]
            tx /= self._scale_factors[1]
            th /= self._scale_factors[2]
            tw /= self._scale_factors[3]
            tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])

        w = tf.exp(tw) * wa
        h = tf.exp(th) * ha
        ycenter = ty * ha + ycenter_a
        xcenter = tx * wa + xcenter_a
        ymin = ycenter - h / 2.
        xmin = xcenter - w / 2.
        ymax = ycenter + h / 2.
        xmax = xcenter + w / 2.
        decoded_boxes_keypoints = box_list.BoxList(
            tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))

        tiled_anchor_centers = tf.tile(tf.stack([ycenter_a, xcenter_a]),
                                       [self._num_keypoints, 1])
        tiled_anchor_sizes = tf.tile(tf.stack([ha, wa]),
                                     [self._num_keypoints, 1])
        keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
        keypoints = tf.reshape(tf.transpose(keypoints),
                               [-1, self._num_keypoints, 2])
        decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints,
                                          keypoints)
        return decoded_boxes_keypoints
コード例 #9
0
def gather(boxlist, indices, fields=None, scope=None, use_static_shapes=False):
    """Gather boxes from BoxList according to indices and return new BoxList.

  By default, `gather` returns boxes corresponding to the input index list, as
  well as all additional fields stored in the boxlist (indexing into the
  first dimension).  However one can optionally only gather from a
  subset of fields.

  Args:
    boxlist: BoxList holding N boxes
    indices: a rank-1 tensor of type int32 / int64
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.
    use_static_shapes: Whether to use an implementation with static shape
      gurantees.

  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
    specified by indices
  Raises:
    ValueError: if specified field is not contained in boxlist or if the
      indices are not of type int32
  """
    with tf.name_scope(scope, 'Gather'):
        if len(indices.shape.as_list()) != 1:
            raise ValueError('indices should have rank 1')
        if indices.dtype != tf.int32 and indices.dtype != tf.int64:
            raise ValueError('indices should be an int32 / int64 tensor')
        gather_op = tf.gather
        if use_static_shapes:
            gather_op = ops.matmul_gather_on_zeroth_axis
        subboxlist = box_list.BoxList(gather_op(boxlist.get(), indices))
        if fields is None:
            fields = boxlist.get_extra_fields()
        fields += ['boxes']
        for field in fields:
            if not boxlist.has_field(field):
                raise ValueError('boxlist must contain all specified fields')
            subfieldlist = gather_op(boxlist.get_field(field), indices)
            subboxlist.add_field(field, subfieldlist)
        return subboxlist
コード例 #10
0
def pad_or_clip_box_list(boxlist, num_boxes, scope=None):
    """Pads or clips all fields of a BoxList.

  Args:
    boxlist: A BoxList with arbitrary of number of boxes.
    num_boxes: First num_boxes in boxlist are kept.
      The fields are zero-padded if num_boxes is bigger than the
      actual number of boxes.
    scope: name scope.

  Returns:
    BoxList with all fields padded or clipped.
  """
    with tf.name_scope(scope, 'PadOrClipBoxList'):
        subboxlist = box_list.BoxList(
            shape_utils.pad_or_clip_tensor(boxlist.get(), num_boxes))
        for field in boxlist.get_extra_fields():
            subfield = shape_utils.pad_or_clip_tensor(boxlist.get_field(field),
                                                      num_boxes)
            subboxlist.add_field(field, subfield)
        return subboxlist
コード例 #11
0
    def _decode(self, rel_codes, anchors):
        """Decode.

    Args:
      rel_codes: a tensor representing N anchor-encoded boxes.
      anchors: BoxList of anchors.

    Returns:
      boxes: BoxList holding N bounding boxes

    Raises:
      ValueError: if the anchors still have deprecated stddev field and expects
        the decode method to use stddev value from that field.
    """
        means = anchors.get()
        if anchors.has_field('stddev'):
            raise ValueError(
                "'stddev' is a parameter of MeanStddevBoxCoder and "
                "should not be specified in the box list.")
        box_corners = rel_codes * self._stddev + means
        return box_list.BoxList(box_corners)
コード例 #12
0
def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
    """Clip bounding boxes to a window.

  This op clips any input bounding boxes (represented by bounding box
  corners) to a window, optionally filtering out boxes that do not
  overlap at all with the window.

  Args:
    boxlist: BoxList holding M_in boxes
    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
      window to which the op should clip boxes.
    filter_nonoverlapping: whether to filter out boxes that do not overlap at
      all with the window.
    scope: name scope.

  Returns:
    a BoxList holding M_out boxes where M_out <= M_in
  """
    with tf.name_scope(scope, 'ClipToWindow'):
        y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(),
                                              num_or_size_splits=4,
                                              axis=1)
        win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
        y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
        y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
        x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
        x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
        clipped = box_list.BoxList(
            tf.concat(
                [y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
                1))
        clipped = _copy_extra_fields(clipped, boxlist)
        if filter_nonoverlapping:
            areas = area(clipped)
            nonzero_area_indices = tf.cast(
                tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
            clipped = gather(clipped, nonzero_area_indices)
        return clipped
コード例 #13
0
    def _create_regression_targets(self, anchors, groundtruth_boxes, match):
        """Returns a regression target for each anchor.

    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth_boxes
      match: a matcher.Match object

    Returns:
      reg_targets: a float32 tensor with shape [N, box_code_dimension]
    """
        matched_gt_boxes = match.gather_based_on_match(
            groundtruth_boxes.get(),
            unmatched_value=tf.zeros(4),
            ignored_value=tf.zeros(4))
        matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
        if groundtruth_boxes.has_field(fields.BoxListFields.keypoints):
            groundtruth_keypoints = groundtruth_boxes.get_field(
                fields.BoxListFields.keypoints)
            matched_keypoints = match.gather_based_on_match(
                groundtruth_keypoints,
                unmatched_value=tf.zeros(
                    groundtruth_keypoints.get_shape()[1:]),
                ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
            matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
                                         matched_keypoints)
        matched_reg_targets = self._box_coder.encode(matched_gt_boxlist,
                                                     anchors)
        match_results_shape = shape_utils.combined_static_and_dynamic_shape(
            match.match_results)

        # Zero out the unmatched and ignored regression targets.
        unmatched_ignored_reg_targets = tf.tile(
            self._default_regression_target(), [match_results_shape[0], 1])
        matched_anchors_mask = match.matched_column_indicator()
        reg_targets = tf.where(matched_anchors_mask, matched_reg_targets,
                               unmatched_ignored_reg_targets)
        return reg_targets
コード例 #14
0
def multiclass_non_max_suppression(boxes,
                                   scores,
                                   score_thresh,
                                   iou_thresh,
                                   max_size_per_class,
                                   max_total_size=0,
                                   clip_window=None,
                                   change_coordinate_frame=False,
                                   masks=None,
                                   boundaries=None,
                                   pad_to_max_output_size=False,
                                   additional_fields=None,
                                   scope=None):
    """Multi-class version of non maximum suppression.

  This op greedily selects a subset of detection bounding boxes, pruning
  away boxes that have high IOU (intersection over union) overlap (> thresh)
  with already selected boxes.  It operates independently for each class for
  which scores are provided (via the scores field of the input box_list),
  pruning boxes with score less than a provided threshold prior to
  applying NMS.

  Please note that this operation is performed on *all* classes, therefore any
  background classes should be removed prior to calling this function.

  Selected boxes are guaranteed to be sorted in decreasing order by score (but
  the sort is not guaranteed to be stable).

  Args:
    boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
      number of classes or 1 depending on whether a separate box is predicted
      per class.
    scores: A [k, num_classes] float32 tensor containing the scores for each of
      the k detections. The scores have to be non-negative when
      pad_to_max_output_size is True.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
      representing the window to clip and normalize boxes to before performing
      non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
      containing box masks. `q` can be either number of classes or 1 depending
      on whether a separate mask is predicted per class.
    boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
      tensor containing box boundaries. `q` can be either number of classes or 1
      depending on whether a separate boundary is predicted per class.
    pad_to_max_output_size: If true, the output nmsed boxes are padded to be of
      length `max_size_per_class`. Defaults to false.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose first dimensions are all of size `k`. After non-maximum
      suppression, all tensors corresponding to the selected boxes will be
      added to resulting BoxList.
    scope: name scope.

  Returns:
    A tuple of sorted_boxes and num_valid_nms_boxes. The sorted_boxes is a
      BoxList holds M boxes with a rank-1 scores field representing
      corresponding scores for each box with scores sorted in decreasing order
      and a rank-1 classes field representing a class label for each box. The
      num_valid_nms_boxes is a 0-D integer tensor representing the number of
      valid elements in `BoxList`, with the valid elements appearing first.

  Raises:
    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
      a valid scores field.
  """
    if not 0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if scores.shape.ndims != 2:
        raise ValueError('scores field must be of rank 2')
    if scores.shape[1].value is None:
        raise ValueError('scores must have statically defined second '
                         'dimension')
    if boxes.shape.ndims != 3:
        raise ValueError('boxes must be of rank 3.')
    if not (boxes.shape[1].value == scores.shape[1].value
            or boxes.shape[1].value == 1):
        raise ValueError('second dimension of boxes must be either 1 or equal '
                         'to the second dimension of scores')
    if boxes.shape[2].value != 4:
        raise ValueError('last dimension of boxes must be of size 4.')
    if change_coordinate_frame and clip_window is None:
        raise ValueError(
            'if change_coordinate_frame is True, then a clip_window'
            'must be specified.')

    with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
        num_scores = tf.shape(scores)[0]
        num_classes = scores.get_shape()[1]

        selected_boxes_list = []
        num_valid_nms_boxes_cumulative = tf.constant(0)
        per_class_boxes_list = tf.unstack(boxes, axis=1)
        if masks is not None:
            per_class_masks_list = tf.unstack(masks, axis=1)
        if boundaries is not None:
            per_class_boundaries_list = tf.unstack(boundaries, axis=1)
        boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1 else
                     [0] * num_classes.value)
        for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
            per_class_boxes = per_class_boxes_list[boxes_idx]
            boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
            class_scores = tf.reshape(
                tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])),
                [-1])

            boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
                                               class_scores)
            if masks is not None:
                per_class_masks = per_class_masks_list[boxes_idx]
                boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
                                                   per_class_masks)
            if boundaries is not None:
                per_class_boundaries = per_class_boundaries_list[boxes_idx]
                boxlist_and_class_scores.add_field(
                    fields.BoxListFields.boundaries, per_class_boundaries)
            if additional_fields is not None:
                for key, tensor in additional_fields.items():
                    boxlist_and_class_scores.add_field(key, tensor)

            if pad_to_max_output_size:
                max_selection_size = max_size_per_class
                selected_indices, num_valid_nms_boxes = (
                    tf.image.non_max_suppression_padded(
                        boxlist_and_class_scores.get(),
                        boxlist_and_class_scores.get_field(
                            fields.BoxListFields.scores),
                        max_selection_size,
                        iou_threshold=iou_thresh,
                        score_threshold=score_thresh,
                        pad_to_max_output_size=True))
            else:
                max_selection_size = tf.minimum(
                    max_size_per_class, boxlist_and_class_scores.num_boxes())
                selected_indices = tf.image.non_max_suppression(
                    boxlist_and_class_scores.get(),
                    boxlist_and_class_scores.get_field(
                        fields.BoxListFields.scores),
                    max_selection_size,
                    iou_threshold=iou_thresh,
                    score_threshold=score_thresh)
                num_valid_nms_boxes = tf.shape(selected_indices)[0]
                selected_indices = tf.concat([
                    selected_indices,
                    tf.zeros(max_selection_size - num_valid_nms_boxes,
                             tf.int32)
                ], 0)
            nms_result = box_list_ops.gather(boxlist_and_class_scores,
                                             selected_indices)
            # Make the scores -1 for invalid boxes.
            valid_nms_boxes_indx = tf.less(tf.range(max_selection_size),
                                           num_valid_nms_boxes)
            nms_scores = nms_result.get_field(fields.BoxListFields.scores)
            nms_result.add_field(
                fields.BoxListFields.scores,
                tf.where(valid_nms_boxes_indx, nms_scores,
                         -1 * tf.ones(max_selection_size)))
            num_valid_nms_boxes_cumulative += num_valid_nms_boxes

            nms_result.add_field(fields.BoxListFields.classes, (tf.zeros_like(
                nms_result.get_field(fields.BoxListFields.scores)) +
                                                                class_idx))
            selected_boxes_list.append(nms_result)
        selected_boxes = box_list_ops.concatenate(selected_boxes_list)
        sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
                                                  fields.BoxListFields.scores)
        if clip_window is not None:
            # When pad_to_max_output_size is False, it prunes the boxes with zero
            # area.
            sorted_boxes = box_list_ops.clip_to_window(
                sorted_boxes,
                clip_window,
                filter_nonoverlapping=not pad_to_max_output_size)
            # Set the scores of boxes with zero area to -1 to keep the default
            # behaviour of pruning out zero area boxes.
            sorted_boxes_size = tf.shape(sorted_boxes.get())[0]
            non_zero_box_area = tf.cast(box_list_ops.area(sorted_boxes),
                                        tf.bool)
            sorted_boxes_scores = tf.where(
                non_zero_box_area,
                sorted_boxes.get_field(fields.BoxListFields.scores),
                -1 * tf.ones(sorted_boxes_size))
            sorted_boxes.add_field(fields.BoxListFields.scores,
                                   sorted_boxes_scores)
            num_valid_nms_boxes_cumulative = tf.reduce_sum(
                tf.cast(tf.greater_equal(sorted_boxes_scores, 0), tf.int32))
            sorted_boxes = box_list_ops.sort_by_field(
                sorted_boxes, fields.BoxListFields.scores)
            if change_coordinate_frame:
                sorted_boxes = box_list_ops.change_coordinate_frame(
                    sorted_boxes, clip_window)

        if max_total_size:
            max_total_size = tf.minimum(max_total_size,
                                        sorted_boxes.num_boxes())
            sorted_boxes = box_list_ops.gather(sorted_boxes,
                                               tf.range(max_total_size))
            num_valid_nms_boxes_cumulative = tf.where(
                max_total_size > num_valid_nms_boxes_cumulative,
                num_valid_nms_boxes_cumulative, max_total_size)
        # Select only the valid boxes if pad_to_max_output_size is False.
        if not pad_to_max_output_size:
            sorted_boxes = box_list_ops.gather(
                sorted_boxes, tf.range(num_valid_nms_boxes_cumulative))

        return sorted_boxes, num_valid_nms_boxes_cumulative
コード例 #15
0
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
    """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.

  Performs box voting as described in 'Object detection via a multi-region &
  semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
  each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
  with iou overlap >= iou_thresh. The location of B is set to the weighted
  average location of boxes in S (scores are used for weighting). And the score
  of B is set to the average score of boxes in S.

  Args:
    selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
      boxes are usually selected from pool_boxes using non max suppression.
    pool_boxes: BoxList containing a set of (possibly redundant) boxes.
    iou_thresh: (float scalar) iou threshold for matching boxes in
      selected_boxes and pool_boxes.

  Returns:
    BoxList containing averaged locations and scores for each box in
    selected_boxes.

  Raises:
    ValueError: if
      a) selected_boxes or pool_boxes is not a BoxList.
      b) if iou_thresh is not in [0, 1].
      c) pool_boxes does not have a scores field.
  """
    if not 0.0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if not isinstance(selected_boxes, box_list.BoxList):
        raise ValueError('selected_boxes must be a BoxList')
    if not isinstance(pool_boxes, box_list.BoxList):
        raise ValueError('pool_boxes must be a BoxList')
    if not pool_boxes.has_field('scores'):
        raise ValueError('pool_boxes must have a \'scores\' field')

    iou_ = iou(selected_boxes, pool_boxes)
    match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
    num_matches = tf.reduce_sum(match_indicator, 1)
    # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not
    # match to any boxes in pool_boxes. For such boxes without any matches, we
    # should return the original boxes without voting.
    match_assert = tf.Assert(tf.reduce_all(tf.greater(num_matches, 0)), [
        'Each box in selected_boxes must match with at least one box '
        'in pool_boxes.'
    ])

    scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
    scores_assert = tf.Assert(tf.reduce_all(tf.greater_equal(scores, 0)),
                              ['Scores must be non negative.'])

    with tf.control_dependencies([scores_assert, match_assert]):
        sum_scores = tf.matmul(match_indicator, scores)
    averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches

    box_locations = tf.matmul(match_indicator,
                              pool_boxes.get() * scores) / sum_scores
    averaged_boxes = box_list.BoxList(box_locations)
    _copy_extra_fields(averaged_boxes, selected_boxes)
    averaged_boxes.add_field('scores', averaged_scores)
    return averaged_boxes
コード例 #16
0
def boolean_mask(boxlist,
                 indicator,
                 fields=None,
                 scope=None,
                 use_static_shapes=False,
                 indicator_sum=None):
    """Select boxes from BoxList according to indicator and return new BoxList.

  `boolean_mask` returns the subset of boxes that are marked as "True" by the
  indicator tensor. By default, `boolean_mask` returns boxes corresponding to
  the input index list, as well as all additional fields stored in the boxlist
  (indexing into the first dimension).  However one can optionally only draw
  from a subset of fields.

  Args:
    boxlist: BoxList holding N boxes
    indicator: a rank-1 boolean tensor
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.
    use_static_shapes: Whether to use an implementation with static shape
      gurantees.
    indicator_sum: An integer containing the sum of `indicator` vector. Only
      required if `use_static_shape` is True.

  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
      specified by indicator
  Raises:
    ValueError: if `indicator` is not a rank-1 boolean tensor.
  """
    with tf.name_scope(scope, 'BooleanMask'):
        if indicator.shape.ndims != 1:
            raise ValueError('indicator should have rank 1')
        if indicator.dtype != tf.bool:
            raise ValueError('indicator should be a boolean tensor')
        if use_static_shapes:
            if not (indicator_sum and isinstance(indicator_sum, int)):
                raise ValueError('`indicator_sum` must be a of type int')
            selected_positions = tf.to_float(indicator)
            indexed_positions = tf.cast(tf.multiply(
                tf.cumsum(selected_positions), selected_positions),
                                        dtype=tf.int32)
            one_hot_selector = tf.one_hot(indexed_positions - 1,
                                          indicator_sum,
                                          dtype=tf.float32)
            sampled_indices = tf.cast(tf.tensordot(tf.to_float(
                tf.range(tf.shape(indicator)[0])),
                                                   one_hot_selector,
                                                   axes=[0, 0]),
                                      dtype=tf.int32)
            return gather(boxlist, sampled_indices, use_static_shapes=True)
        else:
            subboxlist = box_list.BoxList(
                tf.boolean_mask(boxlist.get(), indicator))
            if fields is None:
                fields = boxlist.get_extra_fields()
            for field in fields:
                if not boxlist.has_field(field):
                    raise ValueError(
                        'boxlist must contain all specified fields')
                subfieldlist = tf.boolean_mask(boxlist.get_field(field),
                                               indicator)
                subboxlist.add_field(field, subfieldlist)
            return subboxlist