def anchor_test():
    input_size = [224, 224]
    # feature_map = [(28, 28), (14, 14), (7, 7), (4, 4), (2, 2)]
    feature_maps = [(tf.ceil(input_size[0] / pow(2., i + 3)),
                     tf.ceil(input_size[1] / pow(2., i + 3)))
                    for i in range(5)]

    feature_map_list = [
        (tf.ceil(tf.multiply(tf.to_float(input_size[0]), 1 / pow(2., i + 3))),
         tf.ceil(tf.multiply(tf.to_float(input_size[1]), 1 / pow(2., i + 3))))
        for i in range(5)
    ]

    # feature_map_list = [(3,3)]
    anchor_generator = create_retinanet_anchors()
    # print ('scales = ', anchor_generator._scales)
    # print ('aspect ratio = ', anchor_generator._aspect_ratios)

    anchors = anchor_generator.generate(input_size, feature_map_list)
    anchors_before_assign = anchors.get()
    # return
    gt_boxes = box_list.BoxList(
        tf.convert_to_tensor(
            [[0, 0, 210, 210], [200, 203, 205, 206], [1, 1, 220, 220]],
            dtype=tf.float32))
    gt_labels = tf.convert_to_tensor([1, 1, 1])
    anchors, box_iou = anchor_assign(anchors, gt_boxes, gt_labels)
    # x = tf.convert_to_tensor([[[1,2,3],[3,4,5],[5,6,7]],[[1,2,3],[3,4,5],[5,6,7]]])
    result = anchors.get_field("gt_boxes")
    labels = anchors.get_field('gt_labels')
    print(labels.get_shape())
    with tf.Session() as sess:
        print(sess.run(result).shape)
        print(sess.run(labels).shape)
        print(sess.run(box_iou))
        # print(sess.run(tf.squeeze(tf.where(tf.greater(gt_labels, 1)))))
        # print(sess.run(tf.gather(x, tf.convert_to_tensor([0,1]), axis=1)))
    sess.close()
Esempio n. 2
0
def gather(boxlist, indices, fields=None, scope=None):
  """Gather boxes from BoxList according to indices and return new BoxList.

  By default, `gather` returns boxes corresponding to the input index list, as
  well as all additional fields stored in the boxlist (indexing into the
  first dimension).  However one can optionally only gather from a
  subset of fields.

  Args:
    boxlist: BoxList holding N boxes
    indices: a rank-1 tensor of type int32 / int64
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.

  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
    specified by indices
  Raises:
    ValueError: if specified field is not contained in boxlist or if the
      indices are not of type int32
  """
  with tf.name_scope(scope, 'Gather'):
    if len(indices.shape.as_list()) != 1:
      raise ValueError('indices should have rank 1')
    if indices.dtype != tf.int32 and indices.dtype != tf.int64:
      raise ValueError('indices should be an int32 / int64 tensor')
    subboxlist = box_list.BoxList(tf.gather(boxlist.get(), indices))
    if fields is None:
      fields = boxlist.get_extra_fields()
    for field in fields:
      if not boxlist.has_field(field):
        raise ValueError('boxlist must contain all specified fields')
      subfieldlist = tf.gather(boxlist.get_field(field), indices)
      subboxlist.add_field(field, subfieldlist)
    return subboxlist
Esempio n. 3
0
def boolean_mask(boxlist, indicator, fields=None, scope=None):
  """Select boxes from BoxList according to indicator and return new BoxList.

  `boolean_mask` returns the subset of boxes that are marked as "True" by the
  indicator tensor. By default, `boolean_mask` returns boxes corresponding to
  the input index list, as well as all additional fields stored in the boxlist
  (indexing into the first dimension).  However one can optionally only draw
  from a subset of fields.

  Args:
    boxlist: BoxList holding N boxes
    indicator: a rank-1 boolean tensor
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.

  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
      specified by indicator
  Raises:
    ValueError: if `indicator` is not a rank-1 boolean tensor.
  """
  with tf.name_scope(scope, 'BooleanMask'):
    if indicator.shape.ndims != 1:
      raise ValueError('indicator should have rank 1')
    if indicator.dtype != tf.bool:
      raise ValueError('indicator should be a boolean tensor')
    subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
    if fields is None:
      fields = boxlist.get_extra_fields()
    for field in fields:
      if not boxlist.has_field(field):
        raise ValueError('boxlist must contain all specified fields')
      subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
      subboxlist.add_field(field, subfieldlist)
    return subboxlist
Esempio n. 4
0
def scale(boxlist, y_scale, x_scale, scope=None):
  """scale box coordinates in x and y dimensions.

  Args:
    boxlist: BoxList holding N boxes
    y_scale: (float) scalar tensor
    x_scale: (float) scalar tensor
    scope: name scope.

  Returns:
    boxlist: BoxList holding N boxes
  """
  with tf.name_scope(scope, 'Scale'):
    y_scale = tf.cast(y_scale, tf.float32)
    x_scale = tf.cast(x_scale, tf.float32)
    y_min, x_min, y_max, x_max = tf.split(
        value=boxlist.get(), num_or_size_splits=4, axis=1)
    y_min = y_scale * y_min
    y_max = y_scale * y_max
    x_min = x_scale * x_min
    x_max = x_scale * x_max
    scaled_boxlist = box_list.BoxList(
        tf.concat([y_min, x_min, y_max, x_max], 1))
    return _copy_extra_fields(scaled_boxlist, boxlist)
Esempio n. 5
0
def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
  """Clip bounding boxes to a window.

  This op clips any input bounding boxes (represented by bounding box
  corners) to a window, optionally filtering out boxes that do not
  overlap at all with the window.

  Args:
    boxlist: BoxList holding M_in boxes
    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
      window to which the op should clip boxes.
    filter_nonoverlapping: whether to filter out boxes that do not overlap at
      all with the window.
    scope: name scope.

  Returns:
    a BoxList holding M_out boxes where M_out <= M_in
  """
  with tf.name_scope(scope, 'ClipToWindow'):
    y_min, x_min, y_max, x_max = tf.split(
        value=boxlist.get(), num_or_size_splits=4, axis=1)
    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
    y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
    y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
    x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
    x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
    clipped = box_list.BoxList(
        tf.concat([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
                  1))
    clipped = _copy_extra_fields(clipped, boxlist)
    if filter_nonoverlapping:
      areas = area(clipped)
      nonzero_area_indices = tf.cast(
          tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
      clipped = gather(clipped, nonzero_area_indices)
    return clipped
Esempio n. 6
0
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
  """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.

  Performs box voting as described in 'Object detection via a multi-region &
  semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
  each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
  with iou overlap >= iou_thresh. The location of B is set to the weighted
  average location of boxes in S (scores are used for weighting). And the score
  of B is set to the average score of boxes in S.

  Args:
    selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
      boxes are usually selected from pool_boxes using non max suppression.
    pool_boxes: BoxList containing a set of (possibly redundant) boxes.
    iou_thresh: (float scalar) iou threshold for matching boxes in
      selected_boxes and pool_boxes.

  Returns:
    BoxList containing averaged locations and scores for each box in
    selected_boxes.

  Raises:
    ValueError: if
      a) selected_boxes or pool_boxes is not a BoxList.
      b) if iou_thresh is not in [0, 1].
      c) pool_boxes does not have a scores field.
  """
  if not 0.0 <= iou_thresh <= 1.0:
    raise ValueError('iou_thresh must be between 0 and 1')
  if not isinstance(selected_boxes, box_list.BoxList):
    raise ValueError('selected_boxes must be a BoxList')
  if not isinstance(pool_boxes, box_list.BoxList):
    raise ValueError('pool_boxes must be a BoxList')
  if not pool_boxes.has_field('scores'):
    raise ValueError('pool_boxes must have a \'scores\' field')

  iou_ = iou(selected_boxes, pool_boxes)
  match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
  num_matches = tf.reduce_sum(match_indicator, 1)
  # TODO: Handle the case where some boxes in selected_boxes do not match to any
  # boxes in pool_boxes. For such boxes without any matches, we should return
  # the original boxes without voting.
  match_assert = tf.Assert(
      tf.reduce_all(tf.greater(num_matches, 0)),
      ['Each box in selected_boxes must match with at least one box '
       'in pool_boxes.'])

  scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
  scores_assert = tf.Assert(
      tf.reduce_all(tf.greater_equal(scores, 0)),
      ['Scores must be non negative.'])

  with tf.control_dependencies([scores_assert, match_assert]):
    sum_scores = tf.matmul(match_indicator, scores)
  averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches

  box_locations = tf.matmul(match_indicator,
                            pool_boxes.get() * scores) / sum_scores
  averaged_boxes = box_list.BoxList(box_locations)
  _copy_extra_fields(averaged_boxes, selected_boxes)
  averaged_boxes.add_field('scores', averaged_scores)
  return averaged_boxes
def tile_anchors(grid_height, grid_width, scales, aspect_ratios,
                 base_anchor_size, anchor_stride, anchor_offset):
    """Create a tiled set of anchors strided along a grid in image space.

  This op creates a set of anchor boxes by placing a "basis" collection of
  boxes with user-specified scales and aspect ratios centered at evenly
  distributed points along a grid.  The basis collection is specified via the
  scale and aspect_ratios arguments.  For example, setting scales=[.1, .2, .2]
  and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
  .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
  and aspect ratio 1/2.  Each box is multiplied by "base_anchor_size" before
  placing it over its respective center.

  Grid points are specified via grid_height, grid_width parameters as well as
  the anchor_stride and anchor_offset parameters.

  Args:
    grid_height: size of the grid in the y direction (int or int scalar tensor)
    grid_width: size of the grid in the x direction (int or int scalar tensor)
    scales: a 1-d  (float) tensor representing the scale of each box in the
      basis set.
    aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
      box in the basis set.  The length of the scales and aspect_ratios tensors
      must be equal.
    base_anchor_size: base anchor size in this layer as [height, width]
        (float tensor of shape [2])
    anchor_stride: difference in centers between base anchors for adjacent grid
                   positions (float tensor of shape [2])
    anchor_offset: center of the anchor with scale and aspect ratio 1 for the
                   upper left element of the grid, this should be zero for
                   feature networks with only VALID padding and even receptive
                   field size, but may need some additional calculation if other
                   padding is used (float tensor of shape [2])
  Returns:
    a BoxList holding a collection of N anchor boxes
  """
    ratio_sqrts = tf.sqrt(aspect_ratios)
    # 根据base_anchor_size计算anchor在原图上本来的宽高
    heights = scales / ratio_sqrts * base_anchor_size
    # print ('heights == ', heights.get_shape())
    widths = scales * ratio_sqrts * base_anchor_size
    # print ('widths == ', widths.get_shape())
    # Get a grid of box centers
    y_centers = tf.to_float(tf.range(grid_height))
    y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
    # print ('y_centers before meshgrid === ', y_centers.get_shape())
    x_centers = tf.to_float(tf.range(grid_width))
    x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
    # print('x_centers before meshgrid === ', x_centers.get_shape())
    x_centers, y_centers = tf.meshgrid(x_centers, y_centers)

    # xcenters在和widths进行meshgrid之前,xcenters的shape是(grid_height * grid_width),只不过每一行都是0-(grid_width-1),widths长度为9,是总共要生成的
    # 9个anchors宽度列表,由前面计算得到。widths在和xcenters进行meshgrid之后,由于meshgrid是对维度为1的tensor进行操作,首先会把xcenters展开,
    # 变成一行,有(grid_height * grid_width)列,然后再进行meshgrid操作。meshgrid之后,widths_grid为 (grid_height * gird_width) × 9维矩阵,每一行都是9个anchor的宽度
    # xcenters_grid为(grid_height * grid_width) * 9矩阵,每一列都是grid_height个(0-grid_widht-1)数值。
    # 下面的heights和y_centers进行meshgrid最终得到的结果略有不同,heights_grid和widths_grid结果很一致,都是(grid_height * gird_width) × 9维矩阵,每一行都是9个anchor的高度,
    # 但y_centers_grid就略有变化,因为y_centers是每一列值都是 (0~grid_heigt-1),但每一行的值都是相同的,即每一行的值都是同一个值,meshgrid会将不是1维的矩阵变成一维,是按照行展开的,
    # 所以y_centers展开后就变成[1,1,1,1,1,..., 1,2,2,2,2,...,2,....,h,h,h,...h]这种形式,因此在和heights进行meshgrid之后,y_centers_grid每一列都变成了前面说的那个列表内的值

    widths_grid, x_centers_grid = tf.meshgrid(widths, x_centers)
    heights_grid, y_centers_grid = tf.meshgrid(heights, y_centers)

    # 在对y_centers_grid 和 x_centers_grid 进行axis=2的stack,x_centers_gird 和 y_centers_grid 维度均为  (grid_height*grid_width) * 9 维度,只不过数值不一样,按照
    # axis=2 进行stack,其实就是把x_centers_grid 和 y_centers_grid 里的值一一对应起来,最后变成 (grid_height * grid_width) * 9 * 2的三维矩阵,其实就是所有anchor对应的
    # 中心点在图像上的坐标,类似于[[[1,1]*9, [1,2]*9, ..., [7,7]*9]]这种形式,其实就是把图片上每个点的坐标拿出来,并重复9次,当做这个点生成的9个anchor的centers
    bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=2)

    # 同理,对heights_grid 和 widths_grid 进行 axis=2 的stack, 也是得到一个(grid_height * grid_width) * 9 * 2的三维矩阵,只不过这个矩阵保存的是anchor的size,和前面的bbox_centers
    # 的值是一一对应的,即一个存了center的(x,y)坐标,一个存了bbox的宽高
    bbox_sizes = tf.stack([heights_grid, widths_grid], axis=2)

    # 接着对这两个矩阵进行reshape成 n*2 的二维矩阵,n是所有anchor的数量,为 (grid_heigt * grid_width * 9),bbox_centers每一行保存的是anchor的中心点坐标
    # bbox_sizes 保存的是anchor的对应的宽高
    bbox_centers = tf.reshape(bbox_centers, [-1, 2])
    bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
    # convert [ycenter, xcenter, height, width] to [ymin, xmin, ymax, xmax]
    bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)

    # 需要注意的是,这个生成的anchor就是相对于原图上的位置在哪,并且通过上一行的函数,把box的表示方式变成了[ymin, xmin, ymax, xmax],最终的shape为(n, 4)
    # base_anchor_size 变得越来越大的原因是,随着featuremap维度不断增高,其上面的每一个点所能表示的原图的范围,也即是感受野也在不断增大
    return box_list.BoxList(bbox_corners)
def multiclass_non_max_suppression(boxes,
                                   scores,
                                   score_thresh,
                                   iou_thresh,
                                   max_size_per_class,
                                   max_total_size=0,
                                   clip_window=None,
                                   change_coordinate_frame=False,
                                   masks=None,
                                   additional_fields=None,
                                   scope=None):
  """Multi-class version of non maximum suppression.

  This op greedily selects a subset of detection bounding boxes, pruning
  away boxes that have high IOU (intersection over union) overlap (> thresh)
  with already selected boxes.  It operates independently for each class for
  which scores are provided (via the scores field of the input box_list),
  pruning boxes with score less than a provided threshold prior to
  applying NMS.

  Please note that this operation is performed on *all* classes, therefore any
  background classes should be removed prior to calling this function.

  Args:
    boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
      number of classes or 1 depending on whether a separate box is predicted
      per class.
    scores: A [k, num_classes] float32 tensor containing the scores for each of
      the k detections.
    score_thresh: scalar threshold for score (low scoring boxes are removed).
    iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
      with previously selected boxes are removed).
    max_size_per_class: maximum number of retained boxes per class.
    max_total_size: maximum number of boxes retained over all classes. By
      default returns all boxes retained after capping boxes per class.
    clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
      representing the window to clip and normalize boxes to before performing
      non-max suppression.
    change_coordinate_frame: Whether to normalize coordinates after clipping
      relative to clip_window (this can only be set to True if a clip_window
      is provided)
    masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
      containing box masks. `q` can be either number of classes or 1 depending
      on whether a separate mask is predicted per class.
    additional_fields: (optional) If not None, a dictionary that maps keys to
      tensors whose first dimensions are all of size `k`. After non-maximum
      suppression, all tensors corresponding to the selected boxes will be
      added to resulting BoxList.
    scope: name scope.

  Returns:
    a BoxList holding M boxes with a rank-1 scores field representing
      corresponding scores for each box with scores sorted in decreasing order
      and a rank-1 classes field representing a class label for each box.
      If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
      contain masks, keypoints, keypoint_heatmaps corresponding to boxes.

  Raises:
    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
      a valid scores field.
  """
  if not 0 <= iou_thresh <= 1.0:
    raise ValueError('iou_thresh must be between 0 and 1')
  if scores.shape.ndims != 2:
    raise ValueError('scores field must be of rank 2')
  if scores.shape[1].value is None:
    raise ValueError('scores must have statically defined second '
                     'dimension')
  if boxes.shape.ndims != 3:
    raise ValueError('boxes must be of rank 3.')
  if not (boxes.shape[1].value == scores.shape[1].value or
          boxes.shape[1].value == 1):
    raise ValueError('second dimension of boxes must be either 1 or equal '
                     'to the second dimension of scores')
  if boxes.shape[2].value != 4:
    raise ValueError('last dimension of boxes must be of size 4.')
  if change_coordinate_frame and clip_window is None:
    raise ValueError('if change_coordinate_frame is True, then a clip_window'
                     'must be specified.')

  with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
    num_boxes = tf.shape(boxes)[0]
    num_scores = tf.shape(scores)[0]
    num_classes = scores.get_shape()[1]

    length_assert = tf.Assert(
        tf.equal(num_boxes, num_scores),
        ['Incorrect scores field length: actual vs expected.',
         num_scores, num_boxes])

    selected_boxes_list = []
    per_class_boxes_list = tf.unstack(boxes, axis=1)
    if masks is not None:
      per_class_masks_list = tf.unstack(masks, axis=1)
    boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
                 else [0] * num_classes)
    for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
      per_class_boxes = per_class_boxes_list[boxes_idx]
      boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
      with tf.control_dependencies([length_assert]):
        class_scores = tf.reshape(
            tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1])
      boxlist_and_class_scores.add_field(BoxListFields.scores,
                                         class_scores)
      if masks is not None:
        per_class_masks = per_class_masks_list[boxes_idx]
        boxlist_and_class_scores.add_field(BoxListFields.masks,
                                           per_class_masks)
      if additional_fields is not None:
        for key, tensor in additional_fields.items():
          boxlist_and_class_scores.add_field(key, tensor)
      boxlist_filtered = box_list_ops.filter_greater_than(
          boxlist_and_class_scores, score_thresh)
      if clip_window is not None:
        boxlist_filtered = box_list_ops.clip_to_window(
            boxlist_filtered, clip_window)
        if change_coordinate_frame:
          boxlist_filtered = box_list_ops.change_coordinate_frame(
              boxlist_filtered, clip_window)
      max_selection_size = tf.minimum(max_size_per_class,
                                      boxlist_filtered.num_boxes())
      selected_indices = tf.image.non_max_suppression(
          boxlist_filtered.get(),
          boxlist_filtered.get_field(BoxListFields.scores),
          max_selection_size,
          iou_threshold=iou_thresh)
      nms_result = box_list_ops.gather(boxlist_filtered, selected_indices)
      nms_result.add_field(
          BoxListFields.classes, (tf.zeros_like(
              nms_result.get_field(BoxListFields.scores)) + class_idx))
      selected_boxes_list.append(nms_result)
    selected_boxes = box_list_ops.concatenate(selected_boxes_list)
    sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
                                              BoxListFields.scores)
    if max_total_size:
      max_total_size = tf.minimum(max_total_size,
                                  sorted_boxes.num_boxes())
      sorted_boxes = box_list_ops.gather(sorted_boxes,
                                         tf.range(max_total_size))
    return sorted_boxes