def _common_image_process(self, image, classes, boxes, data, params): # Training time preprocessing. if params['skip_crowd_during_training']: indices = tf.where(tf.logical_not(data['groundtruth_is_crowd'])) classes = tf.gather_nd(classes, indices) boxes = tf.gather_nd(boxes, indices) if params.get('grid_mask', None): from aug import gridmask # pylint: disable=g-import-not-at-top image, boxes = gridmask.gridmask(image, boxes) if params.get('autoaugment_policy', None): from aug import autoaugment # pylint: disable=g-import-not-at-top if params['autoaugment_policy'] == 'randaug': image, boxes = autoaugment.distort_image_with_randaugment( image, boxes, num_layers=1, magnitude=15) else: image, boxes = autoaugment.distort_image_with_autoaugment( image, boxes, params['autoaugment_policy']) return image, boxes, classes
def dataset_parser(self, value, example_decoder, anchor_labeler, params): """Parse data to a fixed dimension input image and learning targets. Args: value: a single serialized tf.Example string. example_decoder: TF example decoder. anchor_labeler: anchor box labeler. params: a dict of extra parameters. Returns: image: Image tensor that is preprocessed to have normalized value and fixed dimension [image_height, image_width, 3] cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l represent the dimension of class logits at l-th level. box_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. num_positives: Number of positive anchors in the image. source_id: Source image id. Default value -1 if the source id is empty in the groundtruth annotation. image_scale: Scale of the processed image to the original image. boxes: Groundtruth bounding box annotations. The box is represented in [y1, x1, y2, x2] format. The tensor is padded with -1 to the fixed dimension [self._max_instances_per_image, 4]. is_crowds: Groundtruth annotations to indicate if an annotation represents a group of instances by value {0, 1}. The tensor is padded with 0 to the fixed dimension [self._max_instances_per_image]. areas: Groundtruth areas annotations. The tensor is padded with -1 to the fixed dimension [self._max_instances_per_image]. classes: Groundtruth classes annotations. The tensor is padded with -1 to the fixed dimension [self._max_instances_per_image]. """ with tf.name_scope('parser'): data = example_decoder.decode(value) source_id = data['source_id'] image = data['image'] boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) areas = data['groundtruth_area'] is_crowds = data['groundtruth_is_crowd'] image_masks = data.get('groundtruth_instance_masks', []) classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) if self._is_training: # Training time preprocessing. if params['skip_crowd_during_training']: indices = tf.where(tf.logical_not(data['groundtruth_is_crowd'])) classes = tf.gather_nd(classes, indices) boxes = tf.gather_nd(boxes, indices) if params.get('grid_mask', None): from aug import gridmask # pylint: disable=g-import-not-at-top image, boxes = gridmask.gridmask(image, boxes) if params.get('autoaugment_policy', None): from aug import autoaugment # pylint: disable=g-import-not-at-top if params['autoaugment_policy'] == 'randaug': image, boxes = autoaugment.distort_image_with_randaugment( image, boxes, num_layers=1, magnitude=15) else: image, boxes = autoaugment.distort_image_with_autoaugment( image, boxes, params['autoaugment_policy'], params['use_augmix'], *params['augmix_params']) input_processor = DetectionInputProcessor(image, params['image_size'], boxes, classes) input_processor.normalize_image() if self._is_training: if params['input_rand_hflip']: input_processor.random_horizontal_flip() input_processor.set_training_random_scale_factors( params['jitter_min'], params['jitter_max'], params.get('target_size', None)) else: input_processor.set_scale_factors_to_output_size() image = input_processor.resize_and_crop_image() boxes, classes = input_processor.resize_and_crop_boxes() # Assign anchors. (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(boxes, classes) source_id = tf.where( tf.equal(source_id, tf.constant('')), '-1', source_id) source_id = tf.strings.to_number(source_id) # Pad groundtruth data for evaluation. image_scale = input_processor.image_scale_to_original boxes *= image_scale is_crowds = tf.cast(is_crowds, dtype=tf.float32) boxes = pad_to_fixed_size(boxes, -1, [self._max_instances_per_image, 4]) is_crowds = pad_to_fixed_size(is_crowds, 0, [self._max_instances_per_image, 1]) areas = pad_to_fixed_size(areas, -1, [self._max_instances_per_image, 1]) classes = pad_to_fixed_size(classes, -1, [self._max_instances_per_image, 1]) return (image, cls_targets, box_targets, num_positives, source_id, image_scale, boxes, is_crowds, areas, classes, image_masks)
def test_randaugment_policy(self): image = tf.placeholder(tf.uint8, shape=[320, 320, 3]) bboxes = tf.placeholder(tf.float32, shape=[4, 4]) autoaugment.distort_image_with_randaugment(image, bboxes, 1, 15)