def _get_overlaps_and_scores_box_mode(self, detected_boxes, detected_scores, groundtruth_boxes, groundtruth_is_group_of_list): """Computes overlaps and scores between detected and groudntruth boxes. Args: detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates detected_scores: A 1-d numpy array of length N representing classification score groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates groundtruth_is_group_of_list: A boolean numpy array of length M denoting whether a ground truth box has group-of tag. If a groundtruth box is group-of box, every detection matching this box is ignored. Returns: iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If gt_non_group_of_boxlist.num_boxes() == 0 it will be None. ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If gt_group_of_boxlist.num_boxes() == 0 it will be None. scores: The score of the detected boxlist. num_boxes: Number of non-maximum suppressed detected boxes. """ detected_boxlist = np_box_list.BoxList(detected_boxes) detected_boxlist.add_field('scores', detected_scores) gt_non_group_of_boxlist = np_box_list.BoxList( groundtruth_boxes[~groundtruth_is_group_of_list]) iou = np_box_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist) scores = detected_boxlist.get_field('scores') num_boxes = detected_boxlist.num_boxes() return iou, None, scores, num_boxes
def _compute_is_class_correctly_detected_in_image(self, detected_boxes, detected_scores, groundtruth_boxes, detected_masks=None, groundtruth_masks=None): """Compute CorLoc score for a single class. Args: detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates detected_scores: A 1-d numpy array of length N representing classification score groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates detected_masks: (optional) A np.uint8 numpy array of shape [N, height, width]. If not None, the scores will be computed based on masks. groundtruth_masks: (optional) A np.uint8 numpy array of shape [M, height, width]. Returns: is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a class is correctly detected in the image or not """ if detected_boxes.size > 0: if groundtruth_boxes.size > 0: max_score_id = np.argmax(detected_scores) mask_mode = False if detected_masks is not None and groundtruth_masks is not None: mask_mode = True if mask_mode: detected_boxlist = np_box_mask_list.BoxMaskList( box_data=np.expand_dims(detected_boxes[max_score_id], axis=0), mask_data=np.expand_dims(detected_masks[max_score_id], axis=0)) gt_boxlist = np_box_mask_list.BoxMaskList( box_data=groundtruth_boxes, mask_data=groundtruth_masks) iou = np_box_mask_list_ops.iou(detected_boxlist, gt_boxlist) else: detected_boxlist = np_box_list.BoxList( np.expand_dims(detected_boxes[max_score_id, :], axis=0)) gt_boxlist = np_box_list.BoxList(groundtruth_boxes) iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist) if np.max(iou) >= self.matching_iou_threshold: return 1 return 0
def gather(boxlist, indices, fields=None): """Gather boxes from BoxList according to indices and return new BoxList. By default, gather returns boxes corresponding to the input index list, as well as all additional fields stored in the boxlist (indexing into the first dimension). However one can optionally only gather from a subset of fields. Args: boxlist: BoxList holding N boxes indices: a 1-d numpy array of type int_ fields: (optional) list of fields to also gather from. If None (default), all fields are gathered from. Pass an empty fields list to only gather the box coordinates. Returns: subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indices Raises: ValueError: if specified field is not contained in boxlist or if the indices are not of type int_ """ if indices.size: if np.amax(indices) >= boxlist.num_boxes() or np.amin(indices) < 0: raise ValueError('indices are out of valid range.') subboxlist = np_box_list.BoxList(boxlist.get()[indices, :]) if fields is None: fields = boxlist.get_extra_fields() for field in fields: extra_field_data = boxlist.get_field(field) subboxlist.add_field(field, extra_field_data[indices, ...]) return subboxlist
def change_coordinate_frame(boxlist, window): """Change coordinate frame of the boxlist to be relative to window's frame. Given a window of the form [ymin, xmin, ymax, xmax], changes bounding box coordinates from boxlist to be relative to this window (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)). An example use case is data augmentation: where we are given groundtruth boxes (boxlist) and would like to randomly crop the image to some window (window). In this case we need to change the coordinate frame of each groundtruth box to be relative to this new window. Args: boxlist: A BoxList object holding N boxes. window: a size 4 1-D numpy array. Returns: Returns a BoxList object with N boxes. """ win_height = window[2] - window[0] win_width = window[3] - window[1] boxlist_new = scale( np_box_list.BoxList(boxlist.get() - [window[0], window[1], window[0], window[1]]), 1.0 / win_height, 1.0 / win_width) _copy_extra_fields(boxlist_new, boxlist) return boxlist_new
def clip_to_window(boxlist, window): """Clip bounding boxes to a window. This op clips input bounding boxes (represented by bounding box corners) to a window, optionally filtering out boxes that do not overlap at all with the window. Args: boxlist: BoxList holding M_in boxes window: a numpy array of shape [4] representing the [y_min, x_min, y_max, x_max] window to which the op should clip boxes. Returns: a BoxList holding M_out boxes where M_out <= M_in """ y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1) win_y_min = window[0] win_x_min = window[1] win_y_max = window[2] win_x_max = window[3] y_min_clipped = np.fmax(np.fmin(y_min, win_y_max), win_y_min) y_max_clipped = np.fmax(np.fmin(y_max, win_y_max), win_y_min) x_min_clipped = np.fmax(np.fmin(x_min, win_x_max), win_x_min) x_max_clipped = np.fmax(np.fmin(x_max, win_x_max), win_x_min) clipped = np_box_list.BoxList( np.hstack([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped])) clipped = _copy_extra_fields(clipped, boxlist) areas = area(clipped) nonzero_area_indices = np.reshape(np.nonzero(np.greater(areas, 0.0)), [-1]).astype(np.int32) return gather(clipped, nonzero_area_indices)
def scale(boxlist, y_scale, x_scale): """Scale box coordinates in x and y dimensions. Args: boxlist: BoxList holding N boxes y_scale: float x_scale: float Returns: boxlist: BoxList holding N boxes """ y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1) y_min = y_scale * y_min y_max = y_scale * y_max x_min = x_scale * x_min x_max = x_scale * x_max scaled_boxlist = np_box_list.BoxList( np.hstack([y_min, x_min, y_max, x_max])) fields = boxlist.get_extra_fields() for field in fields: extra_field_data = boxlist.get_field(field) scaled_boxlist.add_field(field, extra_field_data) return scaled_boxlist
def concatenate(boxlists, fields=None): """Concatenate list of BoxLists. This op concatenates a list of input BoxLists into a larger BoxList. It also handles concatenation of BoxList fields as long as the field tensor shapes are equal except for the first dimension. Args: boxlists: list of BoxList objects fields: optional list of fields to also concatenate. By default, all fields from the first BoxList in the list are included in the concatenation. Returns: a BoxList with number of boxes equal to sum([boxlist.num_boxes() for boxlist in BoxList]) Raises: ValueError: if boxlists is invalid (i.e., is not a list, is empty, or contains non BoxList objects), or if requested fields are not contained in all boxlists """ if not isinstance(boxlists, list): raise ValueError('boxlists should be a list') if not boxlists: raise ValueError('boxlists should have nonzero length') for boxlist in boxlists: if not isinstance(boxlist, np_box_list.BoxList): raise ValueError( 'all elements of boxlists should be BoxList objects') concatenated = np_box_list.BoxList( np.vstack([boxlist.get() for boxlist in boxlists])) if fields is None: fields = boxlists[0].get_extra_fields() for field in fields: first_field_shape = boxlists[0].get_field(field).shape first_field_shape = first_field_shape[1:] for boxlist in boxlists: if not boxlist.has_field(field): raise ValueError('boxlist must contain all requested fields') field_shape = boxlist.get_field(field).shape field_shape = field_shape[1:] if field_shape != first_field_shape: raise ValueError( 'field %s must have same shape for all boxlists ' 'except for the 0th dimension.' % field) concatenated_field = np.concatenate( [boxlist.get_field(field) for boxlist in boxlists], axis=0) concatenated.add_field(field, concatenated_field) return concatenated
def multi_class_non_max_suppression(boxlist, score_thresh, iou_thresh, max_output_size): """Multi-class version of non maximum suppression. This op greedily selects a subset of detection bounding boxes, pruning away boxes that have high IOU (intersection over union) overlap (> thresh) with already selected boxes. It operates independently for each class for which scores are provided (via the scores field of the input box_list), pruning boxes with score less than a provided threshold prior to applying NMS. Args: boxlist: BoxList holding N boxes. Must contain a 'scores' field representing detection scores. This scores field is a tensor that can be 1 dimensional (in the case of a single class) or 2-dimensional, which which case we assume that it takes the shape [num_boxes, num_classes]. We further assume that this rank is known statically and that scores.shape[1] is also known (i.e., the number of classes is fixed and known at graph construction time). score_thresh: scalar threshold for score (low scoring boxes are removed). iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap with previously selected boxes are removed). max_output_size: maximum number of retained boxes per class. Returns: a BoxList holding M boxes with a rank-1 scores field representing corresponding scores for each box with scores sorted in decreasing order and a rank-1 classes field representing a class label for each box. Raises: ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have a valid scores field. """ if not 0 <= iou_thresh <= 1.0: raise ValueError('thresh must be between 0 and 1') if not isinstance(boxlist, np_box_list.BoxList): raise ValueError('boxlist must be a BoxList') if not boxlist.has_field('scores'): raise ValueError('input boxlist must have \'scores\' field') scores = boxlist.get_field('scores') if len(scores.shape) == 1: scores = np.reshape(scores, [-1, 1]) elif len(scores.shape) == 2: if scores.shape[1] is None: raise ValueError( 'scores field must have statically defined second ' 'dimension') else: raise ValueError('scores field must be of rank 1 or 2') num_boxes = boxlist.num_boxes() num_scores = scores.shape[0] num_classes = scores.shape[1] if num_boxes != num_scores: raise ValueError('Incorrect scores field length: actual vs expected.') selected_boxes_list = [] for class_idx in range(num_classes): boxlist_and_class_scores = np_box_list.BoxList(boxlist.get()) class_scores = np.reshape(scores[0:num_scores, class_idx], [-1]) boxlist_and_class_scores.add_field('scores', class_scores) boxlist_filt = filter_scores_greater_than(boxlist_and_class_scores, score_thresh) nms_result = non_max_suppression(boxlist_filt, max_output_size=max_output_size, iou_threshold=iou_thresh, score_threshold=score_thresh) nms_result.add_field( 'classes', np.zeros_like(nms_result.get_field('scores')) + class_idx) selected_boxes_list.append(nms_result) selected_boxes = concatenate(selected_boxes_list) sorted_boxes = sort_by_field(selected_boxes, 'scores') return sorted_boxes