def test_concatenate_with_missing_fields(self): corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32) scores1 = tf.constant([1.0, 2.1]) corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32) boxlist1 = box_list.BoxList(corners1) boxlist1.add_field('scores', scores1) boxlist2 = box_list.BoxList(corners2) with self.assertRaises(ValueError): box_list_ops.concatenate([boxlist1, boxlist2])
def test_invalid_input_box_list_list(self): with self.assertRaises(ValueError): box_list_ops.concatenate(None) with self.assertRaises(ValueError): box_list_ops.concatenate([]) with self.assertRaises(ValueError): corners = tf.constant([[0, 0, 0, 0]], tf.float32) boxlist = box_list.BoxList(corners) box_list_ops.concatenate([boxlist, 2])
def test_concatenate_is_correct(self): corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32) scores1 = tf.constant([1.0, 2.1]) corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]], tf.float32) scores2 = tf.constant([1.0, 2.1, 5.6]) exp_corners = [[0, 0, 0, 0], [1, 2, 3, 4], [0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]] exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6] boxlist1 = box_list.BoxList(corners1) boxlist1.add_field('scores', scores1) boxlist2 = box_list.BoxList(corners2) boxlist2.add_field('scores', scores2) result = box_list_ops.concatenate([boxlist1, boxlist2]) with self.test_session() as sess: corners_output, scores_output = sess.run( [result.get(), result.get_field('scores')]) self.assertAllClose(corners_output, exp_corners) self.assertAllClose(scores_output, exp_scores)
def multiclass_non_max_suppression(boxes, scores, score_thresh, iou_thresh, max_size_per_class, max_total_size=0, clip_window=None, change_coordinate_frame=False, masks=None, additional_fields=None, scope=None): """Multi-class version of non maximum suppression. This op greedily selects a subset of detection bounding boxes, pruning away boxes that have high IOU (intersection over union) overlap (> thresh) with already selected boxes. It operates independently for each class for which scores are provided (via the scores field of the input box_list), pruning boxes with score less than a provided threshold prior to applying NMS. Please note that this operation is performed on *all* classes, therefore any background classes should be removed prior to calling this function. Args: boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either number of classes or 1 depending on whether a separate box is predicted per class. scores: A [k, num_classes] float32 tensor containing the scores for each of the k detections. score_thresh: scalar threshold for score (low scoring boxes are removed). iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap with previously selected boxes are removed). max_size_per_class: maximum number of retained boxes per class. max_total_size: maximum number of boxes retained over all classes. By default returns all boxes retained after capping boxes per class. clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] representing the window to clip and normalize boxes to before performing non-max suppression. change_coordinate_frame: Whether to normalize coordinates after clipping relative to clip_window (this can only be set to True if a clip_window is provided) masks: (optional) a [k, q, mask_height, mask_width] float32 tensor containing box masks. `q` can be either number of classes or 1 depending on whether a separate mask is predicted per class. additional_fields: (optional) If not None, a dictionary that maps keys to tensors whose first dimensions are all of size `k`. After non-maximum suppression, all tensors corresponding to the selected boxes will be added to resulting BoxList. scope: name scope. Returns: a BoxList holding M boxes with a rank-1 scores field representing corresponding scores for each box with scores sorted in decreasing order and a rank-1 classes field representing a class label for each box. Raises: ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have a valid scores field. """ if not 0 <= iou_thresh <= 1.0: raise ValueError('iou_thresh must be between 0 and 1') if scores.shape.ndims != 2: raise ValueError('scores field must be of rank 2') if scores.shape[1].value is None: raise ValueError('scores must have statically defined second ' 'dimension') if boxes.shape.ndims != 3: raise ValueError('boxes must be of rank 3.') if not (boxes.shape[1].value == scores.shape[1].value or boxes.shape[1].value == 1): raise ValueError('second dimension of boxes must be either 1 or equal ' 'to the second dimension of scores') if boxes.shape[2].value != 4: raise ValueError('last dimension of boxes must be of size 4.') if change_coordinate_frame and clip_window is None: raise ValueError( 'if change_coordinate_frame is True, then a clip_window' 'must be specified.') with tf.name_scope(scope, 'MultiClassNonMaxSuppression'): num_boxes = tf.shape(boxes)[0] num_scores = tf.shape(scores)[0] num_classes = scores.get_shape()[1] length_assert = tf.Assert(tf.equal(num_boxes, num_scores), [ 'Incorrect scores field length: actual vs expected.', num_scores, num_boxes ]) selected_boxes_list = [] per_class_boxes_list = tf.unstack(boxes, axis=1) if masks is not None: per_class_masks_list = tf.unstack(masks, axis=1) boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1 else [0] * num_classes) for class_idx, boxes_idx in zip(range(num_classes), boxes_ids): per_class_boxes = per_class_boxes_list[boxes_idx] boxlist_and_class_scores = box_list.BoxList(per_class_boxes) with tf.control_dependencies([length_assert]): class_scores = tf.reshape( tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1]) boxlist_and_class_scores.add_field(fields.BoxListFields.scores, class_scores) if masks is not None: per_class_masks = per_class_masks_list[boxes_idx] boxlist_and_class_scores.add_field(fields.BoxListFields.masks, per_class_masks) if additional_fields is not None: for key, tensor in additional_fields.items(): boxlist_and_class_scores.add_field(key, tensor) boxlist_filtered = box_list_ops.filter_greater_than( boxlist_and_class_scores, score_thresh) if clip_window is not None: boxlist_filtered = box_list_ops.clip_to_window( boxlist_filtered, clip_window) if change_coordinate_frame: boxlist_filtered = box_list_ops.change_coordinate_frame( boxlist_filtered, clip_window) max_selection_size = tf.minimum(max_size_per_class, boxlist_filtered.num_boxes()) selected_indices = tf.image.non_max_suppression( boxlist_filtered.get(), boxlist_filtered.get_field(fields.BoxListFields.scores), max_selection_size, iou_threshold=iou_thresh) nms_result = box_list_ops.gather(boxlist_filtered, selected_indices) nms_result.add_field(fields.BoxListFields.classes, (tf.zeros_like( nms_result.get_field(fields.BoxListFields.scores)) + class_idx)) selected_boxes_list.append(nms_result) selected_boxes = box_list_ops.concatenate(selected_boxes_list) sorted_boxes = box_list_ops.sort_by_field(selected_boxes, fields.BoxListFields.scores) if max_total_size: max_total_size = tf.minimum(max_total_size, sorted_boxes.num_boxes()) sorted_boxes = box_list_ops.gather(sorted_boxes, tf.range(max_total_size)) return sorted_boxes
def _generate(self, feature_map_shape_list, im_height=1, im_width=1): """Generates a collection of bounding boxes to be used as anchors. The number of anchors generated for a single grid with shape MxM where we place k boxes over each grid center is k*M^2 and thus the total number of anchors is the sum over all grids. In our box_specs_list example (see the constructor docstring), we would place two boxes over each grid point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the output anchors follows the order of how the grid sizes and box_specs are specified (with box_spec index varying the fastest, followed by width index, then height index, then grid index). Args: feature_map_shape_list: list of pairs of convnet layer resolutions in the format [(height_0, width_0), (height_1, width_1), ...]. For example, setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that correspond to an 8x8 layer followed by a 7x7 layer. im_height: the height of the image to generate the grid for. If both im_height and im_width are 1, the generated anchors default to normalized coordinates, otherwise absolute coordinates are used for the grid. im_width: the width of the image to generate the grid for. If both im_height and im_width are 1, the generated anchors default to normalized coordinates, otherwise absolute coordinates are used for the grid. Returns: boxes: a BoxList holding a collection of N anchor boxes Raises: ValueError: if feature_map_shape_list, box_specs_list do not have the same length. ValueError: if feature_map_shape_list does not consist of pairs of integers """ if not (isinstance(feature_map_shape_list, list) and len(feature_map_shape_list) == len(self._box_specs)): raise ValueError('feature_map_shape_list must be a list with the same ' 'length as self._box_specs') if not all([isinstance(list_item, tuple) and len(list_item) == 2 for list_item in feature_map_shape_list]): raise ValueError('feature_map_shape_list must be a list of pairs.') im_height = tf.to_float(im_height) im_width = tf.to_float(im_width) if not self._anchor_strides: anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1])) for pair in feature_map_shape_list] else: anchor_strides = [(tf.to_float(stride[0]) / im_height, tf.to_float(stride[1]) / im_width) for stride in self._anchor_strides] if not self._anchor_offsets: anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1]) for stride in anchor_strides] else: anchor_offsets = [(tf.to_float(offset[0]) / im_height, tf.to_float(offset[1]) / im_width) for offset in self._anchor_offsets] for arg, arg_name in zip([anchor_strides, anchor_offsets], ['anchor_strides', 'anchor_offsets']): if not (isinstance(arg, list) and len(arg) == len(self._box_specs)): raise ValueError('%s must be a list with the same length ' 'as self._box_specs' % arg_name) if not all([isinstance(list_item, tuple) and len(list_item) == 2 for list_item in arg]): raise ValueError('%s must be a list of pairs.' % arg_name) anchor_grid_list = [] min_im_shape = tf.minimum(im_height, im_width) scale_height = min_im_shape / im_height scale_width = min_im_shape / im_width base_anchor_size = [ scale_height * self._base_anchor_size[0], scale_width * self._base_anchor_size[1] ] for grid_size, scales, aspect_ratios, stride, offset in zip( feature_map_shape_list, self._scales, self._aspect_ratios, anchor_strides, anchor_offsets): anchor_grid_list.append( grid_anchor_generator.tile_anchors( grid_height=grid_size[0], grid_width=grid_size[1], scales=scales, aspect_ratios=aspect_ratios, base_anchor_size=base_anchor_size, anchor_stride=stride, anchor_offset=offset)) concatenated_anchors = box_list_ops.concatenate(anchor_grid_list) num_anchors = concatenated_anchors.num_boxes_static() if num_anchors is None: num_anchors = concatenated_anchors.num_boxes() if self._clip_window is not None: concatenated_anchors = box_list_ops.clip_to_window( concatenated_anchors, self._clip_window, filter_nonoverlapping=False) # TODO(jonathanhuang): make reshape an option for the clip_to_window op concatenated_anchors.set( tf.reshape(concatenated_anchors.get(), [num_anchors, 4])) stddevs_tensor = 0.01 * tf.ones( [num_anchors, 4], dtype=tf.float32, name='stddevs') concatenated_anchors.add_field('stddev', stddevs_tensor) return concatenated_anchors