Ejemplo n.º 1
0
    def test_custom_policy(self):
        """Test autoaugment with a custom policy."""
        image = tf.zeros((224, 224, 3), dtype=tf.uint8)
        augmenter = augment.AutoAugment(policies=self._generate_test_policy())
        aug_image = augmenter.distort(image)

        self.assertEqual((224, 224, 3), aug_image.shape)
Ejemplo n.º 2
0
    def test_invalid_custom_policy_shape(self):
        """Test autoaugment with wrong shape in the custom policy."""
        policy = [[('Equalize', 0.8, 1, 1), ('Shear', 0.8, 4, 1)],
                  [('TranslateY', 0.6, 3, 1), ('Rotate', 0.9, 3, 1)]]

        with self.assertRaisesRegex(
                ValueError, r'Expected \(:, :, 3\) but got \(2, 2, 4\)'):
            augment.AutoAugment(policies=policy)
Ejemplo n.º 3
0
    def test_invalid_custom_policy_key(self):
        """Test autoaugment with invalid key in the custom policy."""
        image = tf.zeros((224, 224, 3), dtype=tf.uint8)
        policy = [[('AAAAA', 0.8, 1), ('Shear', 0.8, 4)],
                  [('TranslateY', 0.6, 3), ('Rotate', 0.9, 3)]]
        augmenter = augment.AutoAugment(policies=policy)

        with self.assertRaisesRegex(KeyError, '\'AAAAA\''):
            augmenter.distort(image)
Ejemplo n.º 4
0
    def test_autoaugment_video(self):
        """Smoke test with video to be sure there are no syntax errors."""
        image = tf.zeros((2, 224, 224, 3), dtype=tf.uint8)

        for policy in self.AVAILABLE_POLICIES:
            augmenter = augment.AutoAugment(augmentation_name=policy)
            aug_image = augmenter.distort(image)

            self.assertEqual((2, 224, 224, 3), aug_image.shape)
Ejemplo n.º 5
0
    def test_invalid_custom_policy_ndim(self):
        """Test autoaugment with wrong dimension in the custom policy."""
        policy = [[('Equalize', 0.8, 1), ('Shear', 0.8, 4)],
                  [('TranslateY', 0.6, 3), ('Rotate', 0.9, 3)]]
        policy = [[policy]]

        with self.assertRaisesRegex(
                ValueError,
                r'Expected \(:, :, 3\) but got \(1, 1, 2, 2, 3\).'):
            augment.AutoAugment(policies=policy)
Ejemplo n.º 6
0
    def test_autoaugment_video_with_boxes(self):
        """Smoke test with video to be sure there are no syntax errors."""
        image = tf.zeros((2, 224, 224, 3), dtype=tf.uint8)
        bboxes = tf.ones((2, 2, 4), dtype=tf.float32)

        for policy in self.AVAILABLE_POLICIES:
            augmenter = augment.AutoAugment(augmentation_name=policy)
            aug_image, aug_bboxes = augmenter.distort_with_boxes(image, bboxes)

            self.assertEqual((2, 224, 224, 3), aug_image.shape)
            self.assertEqual((2, 2, 4), aug_bboxes.shape)
Ejemplo n.º 7
0
    def __init__(self,
                 input_params: exp_cfg.DataConfig,
                 image_key: str = IMAGE_KEY,
                 label_key: str = LABEL_KEY):
        self._num_frames = input_params.feature_shape[0]
        self._stride = input_params.temporal_stride
        self._random_stride_range = input_params.random_stride_range
        self._num_test_clips = input_params.num_test_clips
        self._min_resize = input_params.min_image_size
        crop_height = input_params.feature_shape[1]
        crop_width = input_params.feature_shape[2]
        self._crop_size = crop_height if crop_height == crop_width else (
            crop_height, crop_width)
        self._num_channels = input_params.feature_shape[3]
        self._num_crops = input_params.num_test_crops
        self._zero_centering_image = input_params.zero_centering_image
        self._one_hot_label = input_params.one_hot
        self._num_classes = input_params.num_classes
        self._image_key = image_key
        self._label_key = label_key
        self._dtype = tf.dtypes.as_dtype(input_params.dtype)
        self._label_dtype = tf.dtypes.as_dtype(input_params.label_dtype)
        self._output_audio = input_params.output_audio
        self._min_aspect_ratio = input_params.aug_min_aspect_ratio
        self._max_aspect_ratio = input_params.aug_max_aspect_ratio
        self._min_area_ratio = input_params.aug_min_area_ratio
        self._max_area_ratio = input_params.aug_max_area_ratio
        if self._output_audio:
            self._audio_feature = input_params.audio_feature
            self._audio_shape = input_params.audio_feature_shape

        aug_type = input_params.aug_type
        if aug_type is not None:
            if aug_type.type == 'autoaug':
                logging.info('Using AutoAugment.')
                self._augmenter = augment.AutoAugment(
                    augmentation_name=aug_type.autoaug.augmentation_name,
                    cutout_const=aug_type.autoaug.cutout_const,
                    translate_const=aug_type.autoaug.translate_const)
            elif aug_type.type == 'randaug':
                logging.info('Using RandAugment.')
                self._augmenter = augment.RandAugment(
                    num_layers=aug_type.randaug.num_layers,
                    magnitude=aug_type.randaug.magnitude,
                    cutout_const=aug_type.randaug.cutout_const,
                    translate_const=aug_type.randaug.translate_const,
                    prob_to_apply=aug_type.randaug.prob_to_apply,
                    exclude_ops=aug_type.randaug.exclude_ops)
            else:
                raise ValueError(
                    'Augmentation policy {} not supported.'.format(
                        aug_type.type))
        else:
            self._augmenter = None
Ejemplo n.º 8
0
    def test_invalid_custom_sub_policy(self, sub_policy, value):
        """Test autoaugment with out-of-range values in the custom policy."""
        image = tf.zeros((224, 224, 3), dtype=tf.uint8)
        policy = self._generate_test_policy()
        policy[0][0] = sub_policy
        augmenter = augment.AutoAugment(policies=policy)

        with self.assertRaisesRegex(
                tf.errors.InvalidArgumentError,
                r'Expected \'tf.Tensor\(False, shape=\(\), dtype=bool\)\' to be true. '
                r'Summarized data: ({})'.format(value)):
            augmenter.distort(image)
Ejemplo n.º 9
0
    def __init__(self,
                 input_params: exp_cfg.DataConfig,
                 image_key: str = IMAGE_KEY,
                 label_key: str = LABEL_KEY):
        self._num_frames = input_params.feature_shape[0]
        self._stride = input_params.temporal_stride
        self._random_stride_range = input_params.random_stride_range
        self._num_test_clips = input_params.num_test_clips
        self._min_resize = input_params.min_image_size
        self._crop_size = input_params.feature_shape[1]
        self._num_crops = input_params.num_test_crops
        self._zero_centering_image = input_params.zero_centering_image
        self._one_hot_label = input_params.one_hot
        self._num_classes = input_params.num_classes
        self._image_key = image_key
        self._label_key = label_key
        self._dtype = tf.dtypes.as_dtype(input_params.dtype)
        self._output_audio = input_params.output_audio
        self._min_aspect_ratio = input_params.aug_min_aspect_ratio
        self._max_aspect_ratio = input_params.aug_max_aspect_ratio
        self._min_area_ratio = input_params.aug_min_area_ratio
        self._max_area_ratio = input_params.aug_max_area_ratio
        if self._output_audio:
            self._audio_feature = input_params.audio_feature
            self._audio_shape = input_params.audio_feature_shape

        self._augmenter = None
        if input_params.aug_type is not None:
            aug_type = input_params.aug_type
            if aug_type == 'autoaug':
                logging.info('Using AutoAugment.')
                self._augmenter = augment.AutoAugment()
            elif aug_type == 'randaug':
                logging.info('Using RandAugment.')
                self._augmenter = augment.RandAugment()
            else:
                raise ValueError(
                    'Augmentation policy {} is not supported.'.format(
                        aug_type))
Ejemplo n.º 10
0
    def __init__(self,
                 output_size: List[int],
                 num_classes: float,
                 image_field_key: str = DEFAULT_IMAGE_FIELD_KEY,
                 label_field_key: str = DEFAULT_LABEL_FIELD_KEY,
                 decode_jpeg_only: bool = True,
                 aug_rand_hflip: bool = True,
                 aug_crop: Optional[bool] = True,
                 aug_type: Optional[common.Augmentation] = None,
                 color_jitter: float = 0.,
                 random_erasing: Optional[common.RandomErasing] = None,
                 is_multilabel: bool = False,
                 dtype: str = 'float32'):
        """Initializes parameters for parsing annotations in the dataset.

    Args:
      output_size: `Tensor` or `list` for [height, width] of output image. The
        output_size should be divided by the largest feature stride 2^max_level.
      num_classes: `float`, number of classes.
      image_field_key: `str`, the key name to encoded image in tf.Example.
      label_field_key: `str`, the key name to label in tf.Example.
      decode_jpeg_only: `bool`, if True, only JPEG format is decoded, this is
        faster than decoding other types. Default is True.
      aug_rand_hflip: `bool`, if True, augment training with random
        horizontal flip.
      aug_crop: `bool`, if True, perform random cropping during training and
        center crop during validation.
      aug_type: An optional Augmentation object to choose from AutoAugment and
        RandAugment.
      color_jitter: Magnitude of color jitter. If > 0, the value is used to
        generate random scale factor for brightness, contrast and saturation.
        See `preprocess_ops.color_jitter` for more details.
      random_erasing: if not None, augment input image by random erasing. See
        `augment.RandomErasing` for more details.
      is_multilabel: A `bool`, whether or not each example has multiple labels.
      dtype: `str`, cast output image in dtype. It can be 'float32', 'float16',
        or 'bfloat16'.
    """
        self._output_size = output_size
        self._aug_rand_hflip = aug_rand_hflip
        self._aug_crop = aug_crop
        self._num_classes = num_classes
        self._image_field_key = image_field_key
        if dtype == 'float32':
            self._dtype = tf.float32
        elif dtype == 'float16':
            self._dtype = tf.float16
        elif dtype == 'bfloat16':
            self._dtype = tf.bfloat16
        else:
            raise ValueError('dtype {!r} is not supported!'.format(dtype))
        if aug_type:
            if aug_type.type == 'autoaug':
                self._augmenter = augment.AutoAugment(
                    augmentation_name=aug_type.autoaug.augmentation_name,
                    cutout_const=aug_type.autoaug.cutout_const,
                    translate_const=aug_type.autoaug.translate_const)
            elif aug_type.type == 'randaug':
                self._augmenter = augment.RandAugment(
                    num_layers=aug_type.randaug.num_layers,
                    magnitude=aug_type.randaug.magnitude,
                    cutout_const=aug_type.randaug.cutout_const,
                    translate_const=aug_type.randaug.translate_const,
                    prob_to_apply=aug_type.randaug.prob_to_apply,
                    exclude_ops=aug_type.randaug.exclude_ops)
            else:
                raise ValueError(
                    'Augmentation policy {} not supported.'.format(
                        aug_type.type))
        else:
            self._augmenter = None
        self._label_field_key = label_field_key
        self._color_jitter = color_jitter
        if random_erasing:
            self._random_erasing = augment.RandomErasing(
                probability=random_erasing.probability,
                min_area=random_erasing.min_area,
                max_area=random_erasing.max_area,
                min_aspect=random_erasing.min_aspect,
                max_aspect=random_erasing.max_aspect,
                min_count=random_erasing.min_count,
                max_count=random_erasing.max_count,
                trials=random_erasing.trials)
        else:
            self._random_erasing = None
        self._is_multilabel = is_multilabel
        self._decode_jpeg_only = decode_jpeg_only
Ejemplo n.º 11
0
    def __init__(self,
                 output_size: List[int],
                 resize_eval_groundtruth: bool = True,
                 groundtruth_padded_size: Optional[List[int]] = None,
                 ignore_label: int = 0,
                 aug_rand_hflip: bool = False,
                 aug_scale_min: float = 1.0,
                 aug_scale_max: float = 1.0,
                 aug_type: Optional[common.Augmentation] = None,
                 sigma: float = 8.0,
                 small_instance_area_threshold: int = 4096,
                 small_instance_weight: float = 3.0,
                 dtype: str = 'float32'):
        """Initializes parameters for parsing annotations in the dataset.

    Args:
      output_size: `Tensor` or `list` for [height, width] of output image. The
        output_size should be divided by the largest feature stride 2^max_level.
      resize_eval_groundtruth: `bool`, if True, eval groundtruth masks are
        resized to output_size.
      groundtruth_padded_size: `Tensor` or `list` for [height, width]. When
        resize_eval_groundtruth is set to False, the groundtruth masks are
        padded to this size.
      ignore_label: `int` the pixel with ignore label will not used for training
        and evaluation.
      aug_rand_hflip: `bool`, if True, augment training with random
        horizontal flip.
      aug_scale_min: `float`, the minimum scale applied to `output_size` for
        data augmentation during training.
      aug_scale_max: `float`, the maximum scale applied to `output_size` for
        data augmentation during training.
      aug_type: An optional Augmentation object with params for AutoAugment.
      sigma: `float`, standard deviation for generating 2D Gaussian to encode
        centers.
      small_instance_area_threshold: `int`, small instance area threshold.
      small_instance_weight: `float`, small instance weight.
      dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
    """
        self._output_size = output_size
        self._resize_eval_groundtruth = resize_eval_groundtruth
        if (not resize_eval_groundtruth) and (groundtruth_padded_size is None):
            raise ValueError(
                'groundtruth_padded_size ([height, width]) needs to be'
                'specified when resize_eval_groundtruth is False.')
        self._groundtruth_padded_size = groundtruth_padded_size
        self._ignore_label = ignore_label

        # Data augmentation.
        self._aug_rand_hflip = aug_rand_hflip
        self._aug_scale_min = aug_scale_min
        self._aug_scale_max = aug_scale_max

        if aug_type and aug_type.type:
            if aug_type.type == 'autoaug':
                self._augmenter = augment.AutoAugment(
                    augmentation_name=aug_type.autoaug.augmentation_name,
                    cutout_const=aug_type.autoaug.cutout_const,
                    translate_const=aug_type.autoaug.translate_const)
            else:
                raise ValueError(
                    'Augmentation policy {} not supported.'.format(
                        aug_type.type))
        else:
            self._augmenter = None

        self._dtype = dtype

        self._sigma = sigma
        self._gaussian, self._gaussian_size = _compute_gaussian_from_std(
            self._sigma)
        self._gaussian = tf.reshape(self._gaussian, shape=[-1])
        self._small_instance_area_threshold = small_instance_area_threshold
        self._small_instance_weight = small_instance_weight
Ejemplo n.º 12
0
  def __init__(self,
               output_size,
               min_level,
               max_level,
               num_scales,
               aspect_ratios,
               anchor_size,
               rpn_match_threshold=0.7,
               rpn_unmatched_threshold=0.3,
               rpn_batch_size_per_im=256,
               rpn_fg_fraction=0.5,
               aug_rand_hflip=False,
               aug_scale_min=1.0,
               aug_scale_max=1.0,
               aug_type: Optional[common.Augmentation] = None,
               skip_crowd_during_training=True,
               max_num_instances=100,
               include_mask=False,
               mask_crop_size=112,
               dtype='float32'):
    """Initializes parameters for parsing annotations in the dataset.

    Args:
      output_size: `Tensor` or `list` for [height, width] of output image. The
        output_size should be divided by the largest feature stride 2^max_level.
      min_level: `int` number of minimum level of the output feature pyramid.
      max_level: `int` number of maximum level of the output feature pyramid.
      num_scales: `int` number representing intermediate scales added
        on each level. For instances, num_scales=2 adds one additional
        intermediate anchor scales [2^0, 2^0.5] on each level.
      aspect_ratios: `list` of float numbers representing the aspect raito
        anchors added on each level. The number indicates the ratio of width to
        height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
        on each scale level.
      anchor_size: `float` number representing the scale of size of the base
        anchor to the feature stride 2^level.
      rpn_match_threshold:
      rpn_unmatched_threshold:
      rpn_batch_size_per_im:
      rpn_fg_fraction:
      aug_rand_hflip: `bool`, if True, augment training with random
        horizontal flip.
      aug_scale_min: `float`, the minimum scale applied to `output_size` for
        data augmentation during training.
      aug_scale_max: `float`, the maximum scale applied to `output_size` for
        data augmentation during training.
      aug_type: An optional Augmentation object with params for AutoAugment.
        The AutoAug policy should not use rotation/translation/shear.
        Only in-place augmentations can be used.
      skip_crowd_during_training: `bool`, if True, skip annotations labeled with
        `is_crowd` equals to 1.
      max_num_instances: `int` number of maximum number of instances in an
        image. The groundtruth data will be padded to `max_num_instances`.
      include_mask: a bool to indicate whether parse mask groundtruth.
      mask_crop_size: the size which groundtruth mask is cropped to.
      dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
    """

    self._max_num_instances = max_num_instances
    self._skip_crowd_during_training = skip_crowd_during_training

    # Anchor.
    self._output_size = output_size
    self._min_level = min_level
    self._max_level = max_level
    self._num_scales = num_scales
    self._aspect_ratios = aspect_ratios
    self._anchor_size = anchor_size

    # Target assigning.
    self._rpn_match_threshold = rpn_match_threshold
    self._rpn_unmatched_threshold = rpn_unmatched_threshold
    self._rpn_batch_size_per_im = rpn_batch_size_per_im
    self._rpn_fg_fraction = rpn_fg_fraction

    # Data augmentation.
    self._aug_rand_hflip = aug_rand_hflip
    self._aug_scale_min = aug_scale_min
    self._aug_scale_max = aug_scale_max

    if aug_type and aug_type.type:
      if aug_type.type == 'autoaug':
        self._augmenter = augment.AutoAugment(
            augmentation_name=aug_type.autoaug.augmentation_name,
            cutout_const=aug_type.autoaug.cutout_const,
            translate_const=aug_type.autoaug.translate_const)
      elif aug_type.type == 'randaug':
        self._augmenter = augment.RandAugment(
            num_layers=aug_type.randaug.num_layers,
            magnitude=aug_type.randaug.magnitude,
            cutout_const=aug_type.randaug.cutout_const,
            translate_const=aug_type.randaug.translate_const,
            prob_to_apply=aug_type.randaug.prob_to_apply,
            exclude_ops=aug_type.randaug.exclude_ops)
      else:
        raise ValueError('Augmentation policy {} not supported.'.format(
            aug_type.type))
    else:
      self._augmenter = None

    # Mask.
    self._include_mask = include_mask
    self._mask_crop_size = mask_crop_size

    # Image output dtype.
    self._dtype = dtype
Ejemplo n.º 13
0
    def __init__(self,
                 output_size,
                 min_level,
                 max_level,
                 num_scales,
                 aspect_ratios,
                 anchor_size,
                 match_threshold=0.5,
                 unmatched_threshold=0.5,
                 aug_type=None,
                 aug_rand_hflip=False,
                 aug_scale_min=1.0,
                 aug_scale_max=1.0,
                 use_autoaugment=False,
                 autoaugment_policy_name='v0',
                 skip_crowd_during_training=True,
                 max_num_instances=100,
                 dtype='bfloat16',
                 mode=None):
        """Initializes parameters for parsing annotations in the dataset.

    Args:
      output_size: `Tensor` or `list` for [height, width] of output image. The
        output_size should be divided by the largest feature stride 2^max_level.
      min_level: `int` number of minimum level of the output feature pyramid.
      max_level: `int` number of maximum level of the output feature pyramid.
      num_scales: `int` number representing intermediate scales added on each
        level. For instances, num_scales=2 adds one additional intermediate
        anchor scales [2^0, 2^0.5] on each level.
      aspect_ratios: `list` of float numbers representing the aspect raito
        anchors added on each level. The number indicates the ratio of width to
        height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
        on each scale level.
      anchor_size: `float` number representing the scale of size of the base
        anchor to the feature stride 2^level.
      match_threshold: `float` number between 0 and 1 representing the
        lower-bound threshold to assign positive labels for anchors. An anchor
        with a score over the threshold is labeled positive.
      unmatched_threshold: `float` number between 0 and 1 representing the
        upper-bound threshold to assign negative labels for anchors. An anchor
        with a score below the threshold is labeled negative.
      aug_type: An optional Augmentation object to choose from AutoAugment and
        RandAugment.
      aug_rand_hflip: `bool`, if True, augment training with random horizontal
        flip.
      aug_scale_min: `float`, the minimum scale applied to `output_size` for
        data augmentation during training.
      aug_scale_max: `float`, the maximum scale applied to `output_size` for
        data augmentation during training.
      use_autoaugment: `bool`, if True, use the AutoAugment augmentation policy
        during training.
      autoaugment_policy_name: `string` that specifies the name of the
        AutoAugment policy that will be used during training.
      skip_crowd_during_training: `bool`, if True, skip annotations labeled with
        `is_crowd` equals to 1.
      max_num_instances: `int` number of maximum number of instances in an
        image. The groundtruth data will be padded to `max_num_instances`.
      dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
      mode: a ModeKeys. Specifies if this is training, evaluation, prediction or
        prediction with groundtruths in the outputs.
    """
        self._mode = mode
        self._max_num_instances = max_num_instances
        self._skip_crowd_during_training = skip_crowd_during_training

        # Anchor.
        self._output_size = output_size
        self._min_level = min_level
        self._max_level = max_level
        self._num_scales = num_scales
        self._aspect_ratios = aspect_ratios
        self._anchor_size = anchor_size
        self._match_threshold = match_threshold
        self._unmatched_threshold = unmatched_threshold

        # Data augmentation.
        self._aug_rand_hflip = aug_rand_hflip
        self._aug_scale_min = aug_scale_min
        self._aug_scale_max = aug_scale_max

        # Data augmentation with AutoAugment or RandAugment.
        self._augmenter = None
        if aug_type is not None:
            if aug_type.type == 'autoaug':
                logging.info('Using AutoAugment.')
                self._augmenter = augment.AutoAugment(
                    augmentation_name=aug_type.autoaug.augmentation_name,
                    cutout_const=aug_type.autoaug.cutout_const,
                    translate_const=aug_type.autoaug.translate_const)
            elif aug_type.type == 'randaug':
                logging.info('Using RandAugment.')
                self._augmenter = augment.RandAugment.build_for_detection(
                    num_layers=aug_type.randaug.num_layers,
                    magnitude=aug_type.randaug.magnitude,
                    cutout_const=aug_type.randaug.cutout_const,
                    translate_const=aug_type.randaug.translate_const,
                    prob_to_apply=aug_type.randaug.prob_to_apply,
                    exclude_ops=aug_type.randaug.exclude_ops)
            else:
                raise ValueError(
                    f'Augmentation policy {aug_type.type} not supported.')

        # Deprecated. Data Augmentation with AutoAugment.
        self._use_autoaugment = use_autoaugment
        self._autoaugment_policy_name = autoaugment_policy_name

        # Data type.
        self._dtype = dtype