def _decode(self, rel_codes, anchors): """Decode relative codes to boxes. Args: rel_codes: a tensor representing N anchor-encoded boxes. anchors: BoxList of anchors. Returns: boxes: BoxList holding N bounding boxes. """ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes() ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes)) if self._scale_factors: ty /= self._scale_factors[0] tx /= self._scale_factors[1] th /= self._scale_factors[2] tw /= self._scale_factors[3] w = tf.exp(tw) * wa h = tf.exp(th) * ha ycenter = ty * ha + ycenter_a xcenter = tx * wa + xcenter_a ymin = ycenter - h / 2. xmin = xcenter - w / 2. ymax = ycenter + h / 2. xmax = xcenter + w / 2. return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
def normalize_boxes(args): proposal_boxes_per_image = args[0] image_shape = args[1] normalized_boxes_per_image = box_list_ops.to_normalized_coordinates( box_list.BoxList(proposal_boxes_per_image), image_shape[0], image_shape[1], check_range=False).get() return normalized_boxes_per_image
def tile_anchors(grid_height, grid_width, scales, aspect_ratios, base_anchor_size, anchor_stride, anchor_offset): """Create a tiled set of anchors strided along a grid in image space. This op creates a set of anchor boxes by placing a "basis" collection of boxes with user-specified scales and aspect ratios centered at evenly distributed points along a grid. The basis collection is specified via the scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2] and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2 and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before placing it over its respective center. Grid points are specified via grid_height, grid_width parameters as well as the anchor_stride and anchor_offset parameters. Args: grid_height: size of the grid in the y direction (int or int scalar tensor) grid_width: size of the grid in the x direction (int or int scalar tensor) scales: a 1-d (float) tensor representing the scale of each box in the basis set. aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each box in the basis set. The length of the scales and aspect_ratios tensors must be equal. base_anchor_size: base anchor size as [height, width] (float tensor of shape [2]) anchor_stride: difference in centers between base anchors for adjacent grid positions (float tensor of shape [2]) anchor_offset: center of the anchor with scale and aspect ratio 1 for the upper left element of the grid, this should be zero for feature networks with only VALID padding and even receptive field size, but may need some additional calculation if other padding is used (float tensor of shape [2]) Returns: a BoxList holding a collection of N anchor boxes """ ratio_sqrts = tf.sqrt(aspect_ratios) heights = scales / ratio_sqrts * base_anchor_size[0] widths = scales * ratio_sqrts * base_anchor_size[1] # Get a grid of box centers y_centers = tf.to_float(tf.range(grid_height)) y_centers = y_centers * anchor_stride[0] + anchor_offset[0] x_centers = tf.to_float(tf.range(grid_width)) x_centers = x_centers * anchor_stride[1] + anchor_offset[1] x_centers, y_centers = ops.meshgrid(x_centers, y_centers) widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers) heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers) bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3) bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3) bbox_centers = tf.reshape(bbox_centers, [-1, 2]) bbox_sizes = tf.reshape(bbox_sizes, [-1, 2]) bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes) return box_list.BoxList(bbox_corners)
def _batch_decode_boxes(box_encodings, anchor_boxes): """Decodes box encodings with respect to the anchor boxes. Args: box_encodings: a 4-D tensor with shape [batch_size, num_anchors, num_classes, self._box_coder.code_size] representing box encodings. anchor_boxes: [batch_size, num_anchors, self._box_coder.code_size] representing decoded bounding boxes. If using a shared box across classes the shape will instead be [total_num_proposals, 1, self._box_coder.code_size]. Returns: decoded_boxes: a [batch_size, num_anchors, num_classes, self._box_coder.code_size] float tensor representing bounding box predictions (for each image in batch, proposal and class). If using a shared box across classes the shape will instead be [batch_size, num_anchors, 1, self._box_coder.code_size]. """ combined_shape = shape_utils.combined_static_and_dynamic_shape( box_encodings) num_classes = combined_shape[2] tiled_anchor_boxes = tf.tile(tf.expand_dims(anchor_boxes, 2), [1, 1, num_classes, 1]) print("tiled_anchor_boxes:", tiled_anchor_boxes.name) tiled_anchors_boxlist = box_list.BoxList( tf.reshape(tiled_anchor_boxes, [-1, 4])) _proposal_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal') _box_coder = _proposal_target_assigner.box_coder decoded_boxes = _box_coder.decode( tf.reshape(box_encodings, [-1, _box_coder.code_size]), tiled_anchors_boxlist) print("combined_shape[0]:", combined_shape[0]) print("combined_shape[1]:", combined_shape[1]) print("num_classes:", num_classes) print("decoded_boxes.get():", decoded_boxes.get()) decoded_boxes_reahpe = tf.reshape( decoded_boxes.get(), tf.stack([combined_shape[0], combined_shape[1], num_classes, 4])) return decoded_boxes_reahpe
def _decode(self, rel_codes, anchors): """Decode. Args: rel_codes: a tensor representing N anchor-encoded boxes. anchors: BoxList of anchors. Returns: boxes: BoxList holding N bounding boxes Raises: ValueError: if the anchors still have deprecated stddev field and expects the decode method to use stddev value from that field. """ means = anchors.get() if anchors.has_field('stddev'): raise ValueError( "'stddev' is a parameter of MeanStddevBoxCoder and " "should not be specified in the box list.") box_corners = rel_codes * self._stddev + means return box_list.BoxList(box_corners)
def _batch_decode(anchors, box_encodings): """Decodes a batch of box encodings with respect to the anchors. Args: box_encodings: A float32 tensor of shape [batch_size, num_anchors, box_code_size] containing box encodings. Returns: decoded_boxes: A float32 tensor of shape [batch_size, num_anchors, 4] containing the decoded boxes. decoded_keypoints: A float32 tensor of shape [batch_size, num_anchors, num_keypoints, 2] containing the decoded keypoints if present in the input `box_encodings`, None otherwise. """ combined_shape = shape_utils.combined_static_and_dynamic_shape( box_encodings) batch_size = combined_shape[0] tiled_anchor_boxes = tf.tile( tf.expand_dims(anchors.get(), 0), [batch_size, 1, 1]) tiled_anchors_boxlist = box_list.BoxList( tf.reshape(tiled_anchor_boxes, [-1, 4])) box_coder = box_coder_builder.build("faster_rcnn_box_coder") decoded_boxes = box_coder.decode( tf.reshape(box_encodings, [-1, box_coder.code_size]), tiled_anchors_boxlist) decoded_keypoints = None if decoded_boxes.has_field(fields.BoxListFields.keypoints): decoded_keypoints = decoded_boxes.get_field( fields.BoxListFields.keypoints) num_keypoints = decoded_keypoints.get_shape()[1] decoded_keypoints = tf.reshape( decoded_keypoints, tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2])) decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack( [combined_shape[0], combined_shape[1], 4])) return decoded_boxes, decoded_keypoints
def _create_regression_targets(self, anchors, groundtruth_boxes, match): """Returns a regression target for each anchor. Args: anchors: a BoxList representing N anchors groundtruth_boxes: a BoxList representing M groundtruth_boxes match: a matcher.Match object Returns: reg_targets: a float32 tensor with shape [N, box_code_dimension] """ matched_gt_boxes = match.gather_based_on_match( groundtruth_boxes.get(), unmatched_value=tf.zeros(4), ignored_value=tf.zeros(4)) matched_gt_boxlist = box_list.BoxList(matched_gt_boxes) if groundtruth_boxes.has_field(fields.BoxListFields.keypoints): groundtruth_keypoints = groundtruth_boxes.get_field( fields.BoxListFields.keypoints) matched_keypoints = match.gather_based_on_match( groundtruth_keypoints, unmatched_value=tf.zeros( groundtruth_keypoints.get_shape()[1:]), ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:])) matched_gt_boxlist.add_field(fields.BoxListFields.keypoints, matched_keypoints) matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors) match_results_shape = shape_utils.combined_static_and_dynamic_shape( match.match_results) # Zero out the unmatched and ignored regression targets. unmatched_ignored_reg_targets = tf.tile( self._default_regression_target(), [match_results_shape[0], 1]) matched_anchors_mask = match.matched_column_indicator() reg_targets = tf.where(matched_anchors_mask, matched_reg_targets, unmatched_ignored_reg_targets) return reg_targets
def _to_absolute_coordinates(normalized_boxes): return box_list_ops.to_absolute_coordinates( box_list.BoxList(normalized_boxes), image_shape[1], image_shape[2], check_range=False).get()
def multiclass_non_max_suppression(boxes, scores, score_thresh, iou_thresh, max_size_per_class, max_total_size=0, clip_window=None, change_coordinate_frame=False, masks=None, boundaries=None, additional_fields=None, scope=None): """Multi-class version of non maximum suppression. This op greedily selects a subset of detection bounding boxes, pruning away boxes that have high IOU (intersection over union) overlap (> thresh) with already selected boxes. It operates independently for each class for which scores are provided (via the scores field of the input box_list), pruning boxes with score less than a provided threshold prior to applying NMS. Please note that this operation is performed on *all* classes, therefore any background classes should be removed prior to calling this function. Args: boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either number of classes or 1 depending on whether a separate box is predicted per class. scores: A [k, num_classes] float32 tensor containing the scores for each of the k detections. score_thresh: scalar threshold for score (low scoring boxes are removed). iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap with previously selected boxes are removed). max_size_per_class: maximum number of retained boxes per class. max_total_size: maximum number of boxes retained over all classes. By default returns all boxes retained after capping boxes per class. clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max] representing the window to clip and normalize boxes to before performing non-max suppression. change_coordinate_frame: Whether to normalize coordinates after clipping relative to clip_window (this can only be set to True if a clip_window is provided) masks: (optional) a [k, q, mask_height, mask_width] float32 tensor containing box masks. `q` can be either number of classes or 1 depending on whether a separate mask is predicted per class. boundaries: (optional) a [k, q, boundary_height, boundary_width] float32 tensor containing box boundaries. `q` can be either number of classes or 1 depending on whether a separate boundary is predicted per class. additional_fields: (optional) If not None, a dictionary that maps keys to tensors whose first dimensions are all of size `k`. After non-maximum suppression, all tensors corresponding to the selected boxes will be added to resulting BoxList. scope: name scope. Returns: a BoxList holding M boxes with a rank-1 scores field representing corresponding scores for each box with scores sorted in decreasing order and a rank-1 classes field representing a class label for each box. Raises: ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have a valid scores field. """ if not 0 <= iou_thresh <= 1.0: raise ValueError('iou_thresh must be between 0 and 1') if scores.shape.ndims != 2: raise ValueError('scores field must be of rank 2') if scores.shape[1].value is None: raise ValueError('scores must have statically defined second ' 'dimension') if boxes.shape.ndims != 3: raise ValueError('boxes must be of rank 3.') if not (boxes.shape[1].value == scores.shape[1].value or boxes.shape[1].value == 1): raise ValueError('second dimension of boxes must be either 1 or equal ' 'to the second dimension of scores') if boxes.shape[2].value != 4: raise ValueError('last dimension of boxes must be of size 4.') if change_coordinate_frame and clip_window is None: raise ValueError( 'if change_coordinate_frame is True, then a clip_window' 'must be specified.') with tf.name_scope(scope, 'MultiClassNonMaxSuppression'): num_boxes = tf.shape(boxes)[0] num_scores = tf.shape(scores)[0] num_classes = scores.get_shape()[1] length_assert = tf.Assert(tf.equal(num_boxes, num_scores), [ 'Incorrect scores field length: actual vs expected.', num_scores, num_boxes ]) selected_boxes_list = [] per_class_boxes_list = tf.unstack(boxes, axis=1) if masks is not None: per_class_masks_list = tf.unstack(masks, axis=1) if boundaries is not None: per_class_boundaries_list = tf.unstack(boundaries, axis=1) boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1 else [0] * num_classes.value) for class_idx, boxes_idx in zip(range(num_classes), boxes_ids): per_class_boxes = per_class_boxes_list[boxes_idx] boxlist_and_class_scores = box_list.BoxList(per_class_boxes) with tf.control_dependencies([length_assert]): class_scores = tf.reshape( tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1]) boxlist_and_class_scores.add_field(fields.BoxListFields.scores, class_scores) if masks is not None: per_class_masks = per_class_masks_list[boxes_idx] boxlist_and_class_scores.add_field(fields.BoxListFields.masks, per_class_masks) if boundaries is not None: per_class_boundaries = per_class_boundaries_list[boxes_idx] boxlist_and_class_scores.add_field( fields.BoxListFields.boundaries, per_class_boundaries) if additional_fields is not None: for key, tensor in additional_fields.items(): boxlist_and_class_scores.add_field(key, tensor) boxlist_filtered = box_list_ops.filter_greater_than( boxlist_and_class_scores, score_thresh) if clip_window is not None: boxlist_filtered = box_list_ops.clip_to_window( boxlist_filtered, clip_window) if change_coordinate_frame: boxlist_filtered = box_list_ops.change_coordinate_frame( boxlist_filtered, clip_window) max_selection_size = tf.minimum(max_size_per_class, boxlist_filtered.num_boxes()) selected_indices = tf.image.non_max_suppression( boxlist_filtered.get(), boxlist_filtered.get_field(fields.BoxListFields.scores), max_selection_size, iou_threshold=iou_thresh) nms_result = box_list_ops.gather(boxlist_filtered, selected_indices) nms_result.add_field(fields.BoxListFields.classes, (tf.zeros_like( nms_result.get_field(fields.BoxListFields.scores)) + class_idx)) selected_boxes_list.append(nms_result) selected_boxes = box_list_ops.concatenate(selected_boxes_list) sorted_boxes = box_list_ops.sort_by_field(selected_boxes, fields.BoxListFields.scores) if max_total_size: max_total_size = tf.minimum(max_total_size, sorted_boxes.num_boxes()) sorted_boxes = box_list_ops.gather(sorted_boxes, tf.range(max_total_size)) return sorted_boxes