Exemple #1
0
 def test_autoaugment_policy(self):
   # A very simple test to verify no syntax error.
   image = tf.placeholder(tf.uint8, shape=[640, 640, 3])
   bboxes = tf.placeholder(tf.float32, shape=[4, 4])
   autoaugment.distort_image_with_autoaugment(image, bboxes, 'test')
   autoaugment.distort_image_with_autoaugment(
       image, bboxes, 'test', use_augmix=True)
Exemple #2
0
  def _common_image_process(self, image, classes, boxes, data, params):
    # Training time preprocessing.
    if params['skip_crowd_during_training']:
      indices = tf.where(tf.logical_not(data['groundtruth_is_crowd']))
      classes = tf.gather_nd(classes, indices)
      boxes = tf.gather_nd(boxes, indices)

    if params.get('grid_mask', None):
      from aug import gridmask  # pylint: disable=g-import-not-at-top
      image, boxes = gridmask.gridmask(image, boxes)

    if params.get('autoaugment_policy', None):
      from aug import autoaugment  # pylint: disable=g-import-not-at-top
      if params['autoaugment_policy'] == 'randaug':
        image, boxes = autoaugment.distort_image_with_randaugment(
          image, boxes, num_layers=1, magnitude=15)
      else:
        image, boxes = autoaugment.distort_image_with_autoaugment(
          image, boxes, params['autoaugment_policy'])
    return image, boxes, classes
Exemple #3
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preprocessed to have normalized value and
          fixed dimension [image_height, image_width, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the processed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tensor is padded with -1 to the fixed
          dimension [self._max_instances_per_image, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tensor is
          padded with 0 to the fixed dimension [self._max_instances_per_image].
        areas: Groundtruth areas annotations. The tensor is padded with -1
          to the fixed dimension [self._max_instances_per_image].
        classes: Groundtruth classes annotations. The tensor is padded with -1
          to the fixed dimension [self._max_instances_per_image].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                if params['skip_crowd_during_training'] and self._is_training:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                # NOTE: The autoaugment method works best when used alongside the
                # standard horizontal flipping of images along with size jittering
                # and normalization.
                if params.get('autoaugment_policy',
                              None) and self._is_training:
                    from aug import autoaugment  # pylint: disable=g-import-not-at-top
                    image, boxes = autoaugment.distort_image_with_autoaugment(
                        image, boxes, params['autoaugment_policy'],
                        params['use_augmix'], *params['augmix_params'])

                input_processor = DetectionInputProcessor(
                    image, params['image_size'], boxes, classes)
                input_processor.normalize_image()
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'],
                        params.get('target_size', None))
                else:
                    input_processor.set_scale_factors_to_output_size()
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()

                # Assign anchors.
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                # Pad groundtruth data for evaluation.
                image_scale = input_processor.image_scale_to_original
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_instances_per_image, 4])
                is_crowds = pad_to_fixed_size(
                    is_crowds, 0, [self._max_instances_per_image, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_instances_per_image, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_instances_per_image, 1])
                return (image, cls_targets, box_targets, num_positives,
                        source_id, image_scale, boxes, is_crowds, areas,
                        classes)
Exemple #4
0
    def dataset_parser(self, value, example_decoder, anchor_labeler, params):
        """Parse data to a fixed dimension input image and learning targets.

    Args:
      value: a single serialized tf.Example string.
      example_decoder: TF example decoder.
      anchor_labeler: anchor box labeler.
      params: a dict of extra parameters.

    Returns:
      image: Image tensor that is preprocessed to have normalized value and
        fixed dimension [image_height, image_width, 3]
      cls_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors]. The height_l and width_l
        represent the dimension of class logits at l-th level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
      num_positives: Number of positive anchors in the image.
      source_id: Source image id. Default value -1 if the source id is empty
        in the groundtruth annotation.
      image_scale: Scale of the processed image to the original image.
      boxes: Groundtruth bounding box annotations. The box is represented in
        [y1, x1, y2, x2] format. The tensor is padded with -1 to the fixed
        dimension [self._max_instances_per_image, 4].
      is_crowds: Groundtruth annotations to indicate if an annotation
        represents a group of instances by value {0, 1}. The tensor is
        padded with 0 to the fixed dimension [self._max_instances_per_image].
      areas: Groundtruth areas annotations. The tensor is padded with -1
        to the fixed dimension [self._max_instances_per_image].
      classes: Groundtruth classes annotations. The tensor is padded with -1
        to the fixed dimension [self._max_instances_per_image].
    """
        with tf.name_scope('parser'):
            data = example_decoder.decode(value)
            source_id = data['source_id']
            image = data['image']
            boxes = data['groundtruth_boxes']
            classes = data['groundtruth_classes']
            classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1])
            areas = data['groundtruth_area']
            is_crowds = data['groundtruth_is_crowd']
            image_masks = data.get('groundtruth_instance_masks', [])

            if self._is_training:
                # Training time preprocessing.
                if params['skip_crowd_during_training']:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                if params.get('grid_mask', None):
                    from aug import gridmask  # pylint: disable=g-import-not-at-top
                    image, boxes = gridmask.gridmask(image, boxes)

                if params.get('autoaugment_policy', None):
                    from aug import autoaugment  # pylint: disable=g-import-not-at-top
                    if params['autoaugment_policy'] == 'randaug':
                        image, boxes = autoaugment.distort_image_with_randaugment(
                            image, boxes, num_layers=1, magnitude=15)
                    else:
                        image, boxes = autoaugment.distort_image_with_autoaugment(
                            image, boxes, params['autoaugment_policy'])

            input_processor = DetectionInputProcessor(image,
                                                      params['image_size'],
                                                      boxes, classes)
            input_processor.normalize_image()
            if self._is_training:
                if params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()

                input_processor.set_training_random_scale_factors(
                    params['jitter_min'], params['jitter_max'],
                    params.get('target_size', None))
            else:
                input_processor.set_scale_factors_to_output_size()
            image = input_processor.resize_and_crop_image()
            boxes, classes = input_processor.resize_and_crop_boxes()
            # Assign anchors.
            (cls_targets, box_targets,
             num_positives) = anchor_labeler.label_anchors(boxes, classes)

            source_id = tf.where(tf.equal(source_id, tf.constant('')), '-1',
                                 source_id)
            source_id = tf.strings.to_number(source_id)

            # Pad groundtruth data for evaluation.
            image_scale = input_processor.image_scale_to_original
            boxes *= image_scale
            is_crowds = tf.cast(is_crowds, dtype=tf.float32)
            boxes = pad_to_fixed_size(boxes, -1,
                                      [self._max_instances_per_image, 4])
            is_crowds = pad_to_fixed_size(is_crowds, 0,
                                          [self._max_instances_per_image, 1])
            areas = pad_to_fixed_size(areas, -1,
                                      [self._max_instances_per_image, 1])
            classes = pad_to_fixed_size(classes, -1,
                                        [self._max_instances_per_image, 1])
            if params['mixed_precision']:
                dtype = tf.keras.mixed_precision.global_policy().compute_dtype
                image = tf.cast(image, dtype=dtype)
                box_targets = tf.nest.map_structure(
                    lambda box_target: tf.cast(box_target, dtype=dtype),
                    box_targets)
            return (image, cls_targets, box_targets, num_positives, source_id,
                    image_scale, boxes, is_crowds, areas, classes, image_masks)