def _mosaic_crop_image(self, image, boxes, classes, is_crowd, area): """Process a patched image in preperation for final output.""" if self._mosaic_crop_mode != 'crop': shape = tf.cast(preprocessing_ops.get_image_shape(image), tf.float32) center = shape * self._mosaic_center # shift the center of the image by applying a translation to the whole # image ch = tf.math.round( preprocessing_ops.random_uniform_strong(-center[0], center[0], seed=self._seed)) cw = tf.math.round( preprocessing_ops.random_uniform_strong(-center[1], center[1], seed=self._seed)) # clip the boxes to those with in the image image = tfa.image.translate(image, [cw, ch], fill_value=self._pad_value) boxes = box_ops.denormalize_boxes(boxes, shape[:2]) boxes = boxes + tf.cast([ch, cw, ch, cw], boxes.dtype) boxes = box_ops.clip_boxes(boxes, shape[:2]) inds = box_ops.get_non_empty_box_indices(boxes) boxes = box_ops.normalize_boxes(boxes, shape[:2]) boxes, classes, is_crowd, area = self._select_ind( inds, boxes, classes, # pylint:disable=unbalanced-tuple-unpacking is_crowd, area) # warp and scale the fully stitched sample image, _, affine = preprocessing_ops.affine_warp_image( image, [self._output_size[0], self._output_size[1]], scale_min=self._aug_scale_min, scale_max=self._aug_scale_max, translate=self._aug_rand_translate, degrees=self._aug_rand_angle, perspective=self._aug_rand_perspective, random_pad=self._random_pad, seed=self._seed) height, width = self._output_size[0], self._output_size[1] image = tf.image.resize(image, (height, width)) # clip and clean boxes boxes, inds = preprocessing_ops.transform_and_clip_boxes( boxes, None, affine=affine, area_thresh=self._area_thresh, seed=self._seed) classes, is_crowd, area = self._select_ind(inds, classes, is_crowd, area) # pylint:disable=unbalanced-tuple-unpacking return image, boxes, classes, is_crowd, area, area
def _parse_eval_data(self, data): """Parses data for training and evaluation.""" classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] is_crowd = data['groundtruth_is_crowd'] # Gets original image and its size. image = data['image'] # Normalizes image with mean and std pixel values. image = preprocess_ops.normalize_image(image) scales = tf.constant([self._resize_scales[-1]], tf.float32) image_shape = tf.shape(image)[:2] boxes = box_ops.denormalize_boxes(boxes, image_shape) gt_boxes = boxes short_side = scales[0] image, image_info = preprocess_ops.resize_image( image, short_side, max(self._output_size)) boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :], image_info[1, :], image_info[3, :]) boxes = box_ops.normalize_boxes(boxes, image_info[1, :]) # Filters out ground truth boxes that are all zeros. indices = box_ops.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) is_crowd = tf.gather(is_crowd, indices) boxes = box_ops.yxyx_to_cycxhw(boxes) image = tf.image.pad_to_bounding_box(image, 0, 0, self._output_size[0], self._output_size[1]) labels = { 'classes': preprocess_ops.clip_or_pad_to_fixed_size(classes, self._max_num_boxes), 'boxes': preprocess_ops.clip_or_pad_to_fixed_size(boxes, self._max_num_boxes) } labels.update({ 'id': int(data['source_id']), 'image_info': image_info, 'is_crowd': preprocess_ops.clip_or_pad_to_fixed_size(is_crowd, self._max_num_boxes), 'gt_boxes': preprocess_ops.clip_or_pad_to_fixed_size(gt_boxes, self._max_num_boxes), }) return image, labels
def scale_boxes(self, patch, ishape, boxes, classes, xs, ys): """Scale and translate the boxes for each image prior to patching.""" xs = tf.cast(xs, boxes.dtype) ys = tf.cast(ys, boxes.dtype) pshape = tf.cast(tf.shape(patch), boxes.dtype) ishape = tf.cast(ishape, boxes.dtype) translate = tf.cast((ishape - pshape), boxes.dtype) boxes = box_ops.denormalize_boxes(boxes, pshape[:2]) boxes = boxes + tf.cast([ translate[0] * ys, translate[1] * xs, translate[0] * ys, translate[1] * xs ], boxes.dtype) boxes = box_ops.normalize_boxes(boxes, ishape[:2]) return boxes, classes
def visualize_images_with_bounding_boxes(images, box_outputs, step, summary_writer): """Records subset of evaluation images with bounding boxes.""" if not isinstance(images, list): logging.warning( 'visualize_images_with_bounding_boxes expects list of ' 'images but received type: %s and value: %s', type(images), images) return image_shape = tf.shape(images[0]) image_height = tf.cast(image_shape[0], tf.float32) image_width = tf.cast(image_shape[1], tf.float32) normalized_boxes = box_ops.normalize_boxes(box_outputs, [image_height, image_width]) bounding_box_color = tf.constant([[1.0, 1.0, 0.0, 1.0]]) image_summary = tf.image.draw_bounding_boxes( tf.cast(images, tf.float32), normalized_boxes, bounding_box_color) with summary_writer.as_default(): tf.summary.image('bounding_box_summary', image_summary, step=step) summary_writer.flush()
def undo_info(boxes: tf.Tensor, num_detections: int, info: tf.Tensor, expand: bool = True) -> tf.Tensor: """Clip and normalize boxes for serving.""" mask = tf.sequence_mask(num_detections, maxlen=tf.shape(boxes)[1]) boxes = tf.cast(tf.expand_dims(mask, axis=-1), boxes.dtype) * boxes if expand: info = tf.cast(tf.expand_dims(info, axis=0), boxes.dtype) inshape = tf.expand_dims(info[:, 1, :], axis=1) ogshape = tf.expand_dims(info[:, 0, :], axis=1) scale = tf.expand_dims(info[:, 2, :], axis=1) offset = tf.expand_dims(info[:, 3, :], axis=1) boxes = box_ops.denormalize_boxes(boxes, inshape) boxes += tf.tile(offset, [1, 1, 2]) boxes /= tf.tile(scale, [1, 1, 2]) boxes = box_ops.clip_boxes(boxes, ogshape) boxes = box_ops.normalize_boxes(boxes, ogshape) return boxes
def serve(self, images: tf.Tensor): """Cast image to float and run inference. Args: images: uint8 Tensor of shape [batch_size, None, None, 3] Returns: Tensor holding detection output logits. """ # Skip image preprocessing when input_type is tflite so it is compatible # with TFLite quantization. if self._input_type != 'tflite': images, anchor_boxes, image_info = self.preprocess(images) else: with tf.device('cpu:0'): anchor_boxes = self._build_anchor_boxes() # image_info is a 3D tensor of shape [batch_size, 4, 2]. It is in the # format of [[original_height, original_width], # [desired_height, desired_width], [y_scale, x_scale], # [y_offset, x_offset]]. When input_type is tflite, input image is # supposed to be preprocessed already. image_info = tf.convert_to_tensor([[ self._input_image_size, self._input_image_size, [1.0, 1.0], [0, 0] ]], dtype=tf.float32) input_image_shape = image_info[:, 1, :] # To overcome keras.Model extra limitation to save a model with layers that # have multiple inputs, we use `model.call` here to trigger the forward # path. Note that, this disables some keras magics happens in `__call__`. detections = self.model.call(images=images, image_shape=input_image_shape, anchor_boxes=anchor_boxes, training=False) if self.params.task.model.detection_generator.apply_nms: # For RetinaNet model, apply export_config. # TODO(huizhongc): Add export_config to fasterrcnn and maskrcnn as needed. if isinstance(self.params.task.model, configs.retinanet.RetinaNet): export_config = self.params.task.export_config # Normalize detection box coordinates to [0, 1]. if export_config.output_normalized_coordinates: detection_boxes = ( detections['detection_boxes'] / tf.tile(image_info[:, 2:3, :], [1, 1, 2])) detections['detection_boxes'] = box_ops.normalize_boxes( detection_boxes, image_info[:, 0:1, :]) # Cast num_detections and detection_classes to float. This allows the # model inference to work on chain (go/chain) as chain requires floating # point outputs. if export_config.cast_num_detections_to_float: detections['num_detections'] = tf.cast( detections['num_detections'], dtype=tf.float32) if export_config.cast_detection_classes_to_float: detections['detection_classes'] = tf.cast( detections['detection_classes'], dtype=tf.float32) final_outputs = { 'detection_boxes': detections['detection_boxes'], 'detection_scores': detections['detection_scores'], 'detection_classes': detections['detection_classes'], 'num_detections': detections['num_detections'] } else: final_outputs = { 'decoded_boxes': detections['decoded_boxes'], 'decoded_box_scores': detections['decoded_box_scores'] } if 'detection_masks' in detections.keys(): final_outputs['detection_masks'] = detections['detection_masks'] final_outputs.update({'image_info': image_info}) return final_outputs
def _parse_train_data(self, data): """Parses data for training and evaluation.""" classes = data['groundtruth_classes'] + self._class_offset boxes = data['groundtruth_boxes'] is_crowd = data['groundtruth_is_crowd'] # Gets original image. image = data['image'] # Normalizes image with mean and std pixel values. image = preprocess_ops.normalize_image(image) image, boxes, _ = preprocess_ops.random_horizontal_flip(image, boxes) do_crop = tf.greater(tf.random.uniform([]), 0.5) if do_crop: # Rescale boxes = box_ops.denormalize_boxes(boxes, tf.shape(image)[:2]) index = tf.random.categorical(tf.zeros([1, 3]), 1)[0] scales = tf.gather([400.0, 500.0, 600.0], index, axis=0) short_side = scales[0] image, image_info = preprocess_ops.resize_image(image, short_side) boxes = preprocess_ops.resize_and_crop_boxes( boxes, image_info[2, :], image_info[1, :], image_info[3, :]) boxes = box_ops.normalize_boxes(boxes, image_info[1, :]) # Do croping shape = tf.cast(image_info[1], dtype=tf.int32) h = tf.random.uniform([], 384, tf.math.minimum(shape[0], 600), dtype=tf.int32) w = tf.random.uniform([], 384, tf.math.minimum(shape[1], 600), dtype=tf.int32) i = tf.random.uniform([], 0, shape[0] - h + 1, dtype=tf.int32) j = tf.random.uniform([], 0, shape[1] - w + 1, dtype=tf.int32) image = tf.image.crop_to_bounding_box(image, i, j, h, w) boxes = tf.clip_by_value( (boxes[..., :] * tf.cast(tf.stack([shape[0], shape[1], shape[0], shape[1]]), dtype=tf.float32) - tf.cast(tf.stack([i, j, i, j]), dtype=tf.float32)) / tf.cast(tf.stack([h, w, h, w]), dtype=tf.float32), 0.0, 1.0) scales = tf.constant(self._resize_scales, dtype=tf.float32) index = tf.random.categorical(tf.zeros([1, 11]), 1)[0] scales = tf.gather(scales, index, axis=0) image_shape = tf.shape(image)[:2] boxes = box_ops.denormalize_boxes(boxes, image_shape) short_side = scales[0] image, image_info = preprocess_ops.resize_image( image, short_side, max(self._output_size)) boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :], image_info[1, :], image_info[3, :]) boxes = box_ops.normalize_boxes(boxes, image_info[1, :]) # Filters out ground truth boxes that are all zeros. indices = box_ops.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) is_crowd = tf.gather(is_crowd, indices) boxes = box_ops.yxyx_to_cycxhw(boxes) image = tf.image.pad_to_bounding_box(image, 0, 0, self._output_size[0], self._output_size[1]) labels = { 'classes': preprocess_ops.clip_or_pad_to_fixed_size(classes, self._max_num_boxes), 'boxes': preprocess_ops.clip_or_pad_to_fixed_size(boxes, self._max_num_boxes) } return image, labels
def preprocess(self, inputs): """Preprocess COCO for DETR.""" image = inputs['image'] boxes = inputs['objects']['bbox'] classes = inputs['objects']['label'] + 1 is_crowd = inputs['objects']['is_crowd'] image = preprocess_ops.normalize_image(image) if self._params.is_training: image, boxes, _ = preprocess_ops.random_horizontal_flip(image, boxes) do_crop = tf.greater(tf.random.uniform([]), 0.5) if do_crop: # Rescale boxes = box_ops.denormalize_boxes(boxes, tf.shape(image)[:2]) index = tf.random.categorical(tf.zeros([1, 3]), 1)[0] scales = tf.gather([400.0, 500.0, 600.0], index, axis=0) short_side = scales[0] image, image_info = preprocess_ops.resize_image(image, short_side) boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :], image_info[1, :], image_info[3, :]) boxes = box_ops.normalize_boxes(boxes, image_info[1, :]) # Do croping shape = tf.cast(image_info[1], dtype=tf.int32) h = tf.random.uniform( [], 384, tf.math.minimum(shape[0], 600), dtype=tf.int32) w = tf.random.uniform( [], 384, tf.math.minimum(shape[1], 600), dtype=tf.int32) i = tf.random.uniform([], 0, shape[0] - h + 1, dtype=tf.int32) j = tf.random.uniform([], 0, shape[1] - w + 1, dtype=tf.int32) image = tf.image.crop_to_bounding_box(image, i, j, h, w) boxes = tf.clip_by_value( (boxes[..., :] * tf.cast( tf.stack([shape[0], shape[1], shape[0], shape[1]]), dtype=tf.float32) - tf.cast(tf.stack([i, j, i, j]), dtype=tf.float32)) / tf.cast(tf.stack([h, w, h, w]), dtype=tf.float32), 0.0, 1.0) scales = tf.constant( self._params.resize_scales, dtype=tf.float32) index = tf.random.categorical(tf.zeros([1, 11]), 1)[0] scales = tf.gather(scales, index, axis=0) else: scales = tf.constant([self._params.resize_scales[-1]], tf.float32) image_shape = tf.shape(image)[:2] boxes = box_ops.denormalize_boxes(boxes, image_shape) gt_boxes = boxes short_side = scales[0] image, image_info = preprocess_ops.resize_image( image, short_side, max(self._params.output_size)) boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :], image_info[1, :], image_info[3, :]) boxes = box_ops.normalize_boxes(boxes, image_info[1, :]) # Filters out ground truth boxes that are all zeros. indices = box_ops.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) is_crowd = tf.gather(is_crowd, indices) boxes = box_ops.yxyx_to_cycxhw(boxes) image = tf.image.pad_to_bounding_box( image, 0, 0, self._params.output_size[0], self._params.output_size[1]) labels = { 'classes': preprocess_ops.clip_or_pad_to_fixed_size( classes, self._params.max_num_boxes), 'boxes': preprocess_ops.clip_or_pad_to_fixed_size( boxes, self._params.max_num_boxes) } if not self._params.is_training: labels.update({ 'id': inputs['image/id'], 'image_info': image_info, 'is_crowd': preprocess_ops.clip_or_pad_to_fixed_size( is_crowd, self._params.max_num_boxes), 'gt_boxes': preprocess_ops.clip_or_pad_to_fixed_size( gt_boxes, self._params.max_num_boxes), }) return image, labels
def _parse_train_data(self, data): """Parses data for training. Args: data: the decoded tensor dictionary from TfExampleDecoder. Returns: image: image tensor that is preproessed to have normalized value and dimension [output_size[0], output_size[1], 3] labels: a dictionary of tensors used for training. The following describes {key: value} pairs in the dictionary. image_info: a 2D `Tensor` that encodes the information of the image and the applied preprocessing. It is in the format of [[original_height, original_width], [scaled_height, scaled_width], anchor_boxes: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, 4] representing anchor boxes at each level. rpn_score_targets: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, anchors_per_location]. The height_l and width_l represent the dimension of class logits at l-th level. rpn_box_targets: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, anchors_per_location * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. gt_boxes: Groundtruth bounding box annotations. The box is represented in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled image that is fed to the network. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances, 4]. gt_classes: Groundtruth classes annotations. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances]. gt_masks: groundtrugh masks cropped by the bounding box and resized to a fixed size determined by mask_crop_size. """ classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] if self._include_mask: masks = data['groundtruth_instance_masks'] is_crowds = data['groundtruth_is_crowd'] # Skips annotations with `is_crowd` = True. if self._skip_crowd_during_training: num_groundtruths = tf.shape(classes)[0] with tf.control_dependencies([num_groundtruths, is_crowds]): indices = tf.cond( tf.greater(tf.size(is_crowds), 0), lambda: tf.where(tf.logical_not(is_crowds))[:, 0], lambda: tf.cast(tf.range(num_groundtruths), tf.int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) if self._include_mask: masks = tf.gather(masks, indices) # Gets original image and its size. image = data['image'] if self._augmenter is not None: image = self._augmenter.distort(image) image_shape = tf.shape(image)[0:2] # Normalizes image with mean and std pixel values. image = preprocess_ops.normalize_image(image) # Flips image randomly during training. if self._aug_rand_hflip: if self._include_mask: image, boxes, masks = preprocess_ops.random_horizontal_flip( image, boxes, masks) else: image, boxes, _ = preprocess_ops.random_horizontal_flip( image, boxes) # Converts boxes from normalized coordinates to pixel coordinates. # Now the coordinates of boxes are w.r.t. the original image. boxes = box_ops.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, self._output_size, padded_size=preprocess_ops.compute_padded_size( self._output_size, 2 ** self._max_level), aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_height, image_width, _ = image.get_shape().as_list() # Resizes and crops boxes. # Now the coordinates of boxes are w.r.t the scaled image. image_scale = image_info[2, :] offset = image_info[3, :] boxes = preprocess_ops.resize_and_crop_boxes( boxes, image_scale, image_info[1, :], offset) # Filters out ground truth boxes that are all zeros. indices = box_ops.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) if self._include_mask: masks = tf.gather(masks, indices) # Transfer boxes to the original image space and do normalization. cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0), [1, 2]) cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2]) cropped_boxes = box_ops.normalize_boxes(cropped_boxes, image_shape) num_masks = tf.shape(masks)[0] masks = tf.image.crop_and_resize( tf.expand_dims(masks, axis=-1), cropped_boxes, box_indices=tf.range(num_masks, dtype=tf.int32), crop_size=[self._mask_crop_size, self._mask_crop_size], method='bilinear') masks = tf.squeeze(masks, axis=-1) # Assigns anchor targets. # Note that after the target assignment, box targets are absolute pixel # offsets w.r.t. the scaled image. input_anchor = anchor.build_anchor_generator( min_level=self._min_level, max_level=self._max_level, num_scales=self._num_scales, aspect_ratios=self._aspect_ratios, anchor_size=self._anchor_size) anchor_boxes = input_anchor(image_size=(image_height, image_width)) anchor_labeler = anchor.RpnAnchorLabeler( self._rpn_match_threshold, self._rpn_unmatched_threshold, self._rpn_batch_size_per_im, self._rpn_fg_fraction) rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors( anchor_boxes, boxes, tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32)) # Casts input image to self._dtype image = tf.cast(image, dtype=self._dtype) # Packs labels for model_fn outputs. labels = { 'anchor_boxes': anchor_boxes, 'image_info': image_info, 'rpn_score_targets': rpn_score_targets, 'rpn_box_targets': rpn_box_targets, 'gt_boxes': preprocess_ops.clip_or_pad_to_fixed_size(boxes, self._max_num_instances, -1), 'gt_classes': preprocess_ops.clip_or_pad_to_fixed_size(classes, self._max_num_instances, -1), } if self._include_mask: labels['gt_masks'] = preprocess_ops.clip_or_pad_to_fixed_size( masks, self._max_num_instances, -1) return image, labels
def transform_and_clip_boxes(boxes, infos, affine=None, shuffle_boxes=False, area_thresh=0.1, seed=None, filter_and_clip_boxes=True): """Clips and cleans the boxes. Args: boxes: A `Tensor` for the boxes. infos: A `list` that contains the image infos. affine: A `list` that contains parameters for resize and crop. shuffle_boxes: A `bool` for shuffling the boxes. area_thresh: An `int` for the area threshold. seed: seed for random number generation. filter_and_clip_boxes: A `bool` for filtering and clipping the boxes to [0, 1]. Returns: boxes: A `Tensor` representing the augmented boxes. ind: A `Tensor` valid box indices. """ # Clip and clean boxes. def get_valid_boxes(boxes): """Get indices for non-empty boxes.""" # Convert the boxes to center width height formatting. height = boxes[:, 2] - boxes[:, 0] width = boxes[:, 3] - boxes[:, 1] base = tf.logical_and(tf.greater(height, 0), tf.greater(width, 0)) return base # Initialize history to track operation applied to boxes box_history = boxes # Make sure all boxes are valid to start, clip to [0, 1] and get only the # valid boxes. output_size = None if filter_and_clip_boxes: boxes = tf.math.maximum(tf.math.minimum(boxes, 1.0), 0.0) cond = get_valid_boxes(boxes) if infos is None: infos = [] for info in infos: # Denormalize the boxes. boxes = bbox_ops.denormalize_boxes(boxes, info[0]) box_history = bbox_ops.denormalize_boxes(box_history, info[0]) # Shift and scale all boxes, and keep track of box history with no # box clipping, history is used for removing boxes that have become # too small or exit the image area. (boxes, box_history) = resize_and_crop_boxes( boxes, info[2, :], info[1, :], info[3, :], box_history=box_history) # Get all the boxes that still remain in the image and store # in a bit vector for later use. cond = tf.logical_and(get_valid_boxes(boxes), cond) # Normalize the boxes to [0, 1]. output_size = info[1] boxes = bbox_ops.normalize_boxes(boxes, output_size) box_history = bbox_ops.normalize_boxes(box_history, output_size) if affine is not None: # Denormalize the boxes. boxes = bbox_ops.denormalize_boxes(boxes, affine[0]) box_history = bbox_ops.denormalize_boxes(box_history, affine[0]) # Clipped final boxes. (boxes, box_history) = affine_warp_boxes( affine[2], boxes, affine[1], box_history=box_history) # Get all the boxes that still remain in the image and store # in a bit vector for later use. cond = tf.logical_and(get_valid_boxes(boxes), cond) # Normalize the boxes to [0, 1]. output_size = affine[1] boxes = bbox_ops.normalize_boxes(boxes, output_size) box_history = bbox_ops.normalize_boxes(box_history, output_size) # Remove the bad boxes. boxes *= tf.cast(tf.expand_dims(cond, axis=-1), boxes.dtype) # Threshold the existing boxes. if filter_and_clip_boxes: if output_size is not None: boxes_ = bbox_ops.denormalize_boxes(boxes, output_size) box_history_ = bbox_ops.denormalize_boxes(box_history, output_size) inds = boxes_candidates(boxes_, box_history_, area_thr=area_thresh) else: inds = boxes_candidates( boxes, box_history, wh_thr=0.0, area_thr=area_thresh) # Select and gather the good boxes. if shuffle_boxes: inds = tf.random.shuffle(inds, seed=seed) else: inds = bbox_ops.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, inds) return boxes, inds