def _parse_train_data(self, data): """Parses data for training and evaluation.""" classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] is_crowds = data['groundtruth_is_crowd'] # Skips annotations with `is_crowd` = True. if self._skip_crowd_during_training and self._is_training: num_groundtrtuhs = tf.shape(classes)[0] with tf.control_dependencies([num_groundtrtuhs, is_crowds]): indices = tf.cond( tf.greater(tf.size(is_crowds), 0), lambda: tf.where(tf.logical_not(is_crowds))[:, 0], lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) # Gets original image and its size. image = data['image'] # NOTE: The autoaugment method works best when used alongside the standard # horizontal flipping of images along with size jittering and normalization. if self._use_autoaugment: try: from utils import autoaugment_utils # pylint: disable=g-import-not-at-top except ImportError as e: logging.exception('Autoaugment is not supported in TF 2.x.') raise e image, boxes = autoaugment_utils.distort_image_with_autoaugment( image, boxes, self._autoaugment_policy_name) image_shape = tf.shape(image)[0:2] # Normalizes image with mean and std pixel values. image = input_utils.normalize_image(image) # Flips image randomly during training. if self._aug_rand_hflip: image, boxes = input_utils.random_horizontal_flip(image, boxes) # Converts boxes from normalized coordinates to pixel coordinates. # Now the coordinates of boxes are w.r.t. the original image. boxes = box_utils.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = input_utils.resize_and_crop_image( image, self._output_size, padded_size=input_utils.compute_padded_size( self._output_size, 2**self._max_level), aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_height, image_width, _ = image.get_shape().as_list() # Resizes and crops boxes. # Now the coordinates of boxes are w.r.t the scaled image. image_scale = image_info[2, :] offset = image_info[3, :] boxes = input_utils.resize_and_crop_boxes(boxes, image_scale, (image_height, image_width), offset) # Filters out ground truth boxes that are all zeros. indices = box_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) # Assigns anchor targets. # Note that after the target assignment, box targets are absolute pixel # offsets w.r.t. the scaled image. input_anchor = anchor.Anchor(self._min_level, self._max_level, self._num_scales, self._aspect_ratios, self._anchor_size, (image_height, image_width)) anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold, self._unmatched_threshold) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) # If bfloat16 is used, casts input image to tf.bfloat16. if self._use_bfloat16: image = tf.cast(image, dtype=tf.bfloat16) # Packs labels for model_fn outputs. labels = { 'cls_targets': cls_targets, 'box_targets': box_targets, 'anchor_boxes': input_anchor.multilevel_boxes, 'num_positives': num_positives, 'image_info': image_info, } return image, labels
def _parse_train_data(self, data): """Parses data for training and evaluation.""" classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] is_crowds = data['groundtruth_is_crowd'] # Skips annotations with `is_crowd` = True. if self._skip_crowd_during_training and self._is_training: num_groundtrtuhs = tf.shape(input=classes)[0] with tf.control_dependencies([num_groundtrtuhs, is_crowds]): indices = tf.cond( pred=tf.greater(tf.size(input=is_crowds), 0), true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0], false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf. int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) # Gets original image and its size. image = data['image'] # NOTE: The autoaugment method works best when used alongside the standard # horizontal flipping of images along with size jittering and normalization. if self._use_autoaugment: image, boxes = autoaugment_utils.distort_image_with_autoaugment( image, boxes, self._autoaugment_policy_name) image_shape = tf.shape(input=image)[0:2] # Normalizes image with mean and std pixel values. image = input_utils.normalize_image(image) # Flips image randomly during training. if self._aug_rand_hflip: image, boxes = input_utils.random_horizontal_flip(image, boxes) # Converts boxes from normalized coordinates to pixel coordinates. boxes = box_utils.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = input_utils.resize_and_crop_image( image, self._output_size, padded_size=input_utils.compute_padded_size( self._output_size, 2**self._max_level), aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_height, image_width, _ = image.get_shape().as_list() # Resizes and crops boxes. image_scale = image_info[2, :] offset = image_info[3, :] boxes = input_utils.resize_and_crop_boxes(boxes, image_scale, image_info[1, :], offset) # Filters out ground truth boxes that are all zeros. indices = box_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) # Assigns anchors. input_anchor = anchor.Anchor(self._min_level, self._max_level, self._num_scales, self._aspect_ratios, self._anchor_size, (image_height, image_width)) anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold, self._unmatched_threshold) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) # If bfloat16 is used, casts input image to tf.bfloat16. if self._use_bfloat16: image = tf.cast(image, dtype=tf.bfloat16) # Packs labels for model_fn outputs. labels = { 'cls_targets': cls_targets, 'box_targets': box_targets, 'anchor_boxes': input_anchor.multilevel_boxes, 'num_positives': num_positives, 'image_info': image_info, } # return image, labels num_anchors = input_anchor.anchors_per_location mlvl_cls_targets = tf.concat([tf.reshape(cls_targets[lv], [-1, num_anchors]) \ for lv in range(self._min_level, self._max_level+1)], axis=0) mlvl_box_targets = tf.concat([tf.reshape(box_targets[lv], [-1, num_anchors*4]) \ for lv in range(self._min_level, self._max_level + 1)], axis=0) num_positives_expand = tf.ones_like( mlvl_box_targets[..., 0:1]) * num_positives mlvl_cls_targets_wp = tf.concat( [mlvl_cls_targets, tf.cast(num_positives_expand, dtype=tf.int32)], axis=-1) mlvl_box_targets_wp = tf.concat( [mlvl_box_targets, num_positives_expand], axis=-1) return image, (mlvl_cls_targets_wp, mlvl_box_targets_wp)