def test_custom_policy(self): """Test autoaugment with a custom policy.""" image = tf.zeros((224, 224, 3), dtype=tf.uint8) augmenter = augment.AutoAugment(policies=self._generate_test_policy()) aug_image = augmenter.distort(image) self.assertEqual((224, 224, 3), aug_image.shape)
def test_invalid_custom_policy_shape(self): """Test autoaugment with wrong shape in the custom policy.""" policy = [[('Equalize', 0.8, 1, 1), ('Shear', 0.8, 4, 1)], [('TranslateY', 0.6, 3, 1), ('Rotate', 0.9, 3, 1)]] with self.assertRaisesRegex( ValueError, r'Expected \(:, :, 3\) but got \(2, 2, 4\)'): augment.AutoAugment(policies=policy)
def test_autoaugment(self): """Smoke test to be sure there are no syntax errors.""" image = tf.zeros((224, 224, 3), dtype=tf.uint8) augmenter = augment.AutoAugment() aug_image = augmenter.distort(image) self.assertEqual((224, 224, 3), aug_image.shape)
def test_invalid_custom_policy_key(self): """Test autoaugment with invalid key in the custom policy.""" image = tf.zeros((224, 224, 3), dtype=tf.uint8) policy = [[('AAAAA', 0.8, 1), ('Shear', 0.8, 4)], [('TranslateY', 0.6, 3), ('Rotate', 0.9, 3)]] augmenter = augment.AutoAugment(policies=policy) with self.assertRaisesRegex(KeyError, '\'AAAAA\''): augmenter.distort(image)
def test_autoaugment_video(self): """Smoke test with video to be sure there are no syntax errors.""" image = tf.zeros((2, 224, 224, 3), dtype=tf.uint8) for policy in self.AVAILABLE_POLICIES: augmenter = augment.AutoAugment(augmentation_name=policy) aug_image = augmenter.distort(image) self.assertEqual((2, 224, 224, 3), aug_image.shape)
def __init__(self, output_size: List[int], num_classes: float, image_field_key: str = DEFAULT_IMAGE_FIELD_KEY, label_field_key: str = DEFAULT_LABEL_FIELD_KEY, aug_rand_hflip: bool = True, aug_type: Optional[common.Augmentation] = None, is_multilabel: bool = False, dtype: str = 'float32'): """Initializes parameters for parsing annotations in the dataset. Args: output_size: `Tensor` or `list` for [height, width] of output image. The output_size should be divided by the largest feature stride 2^max_level. num_classes: `float`, number of classes. image_field_key: `str`, the key name to encoded image in tf.Example. label_field_key: `str`, the key name to label in tf.Example. aug_rand_hflip: `bool`, if True, augment training with random horizontal flip. aug_type: An optional Augmentation object to choose from AutoAugment and RandAugment. is_multilabel: A `bool`, whether or not each example has multiple labels. dtype: `str`, cast output image in dtype. It can be 'float32', 'float16', or 'bfloat16'. """ self._output_size = output_size self._aug_rand_hflip = aug_rand_hflip self._num_classes = num_classes self._image_field_key = image_field_key if dtype == 'float32': self._dtype = tf.float32 elif dtype == 'float16': self._dtype = tf.float16 elif dtype == 'bfloat16': self._dtype = tf.bfloat16 else: raise ValueError('dtype {!r} is not supported!'.format(dtype)) if aug_type: if aug_type.type == 'autoaug': self._augmenter = augment.AutoAugment( augmentation_name=aug_type.autoaug.augmentation_name, cutout_const=aug_type.autoaug.cutout_const, translate_const=aug_type.autoaug.translate_const) elif aug_type.type == 'randaug': self._augmenter = augment.RandAugment( num_layers=aug_type.randaug.num_layers, magnitude=aug_type.randaug.magnitude, cutout_const=aug_type.randaug.cutout_const, translate_const=aug_type.randaug.translate_const, prob_to_apply=aug_type.randaug.prob_to_apply) else: raise ValueError('Augmentation policy {} not supported.'.format( aug_type.type)) else: self._augmenter = None self._label_field_key = label_field_key self._is_multilabel = is_multilabel
def test_invalid_custom_policy_ndim(self): """Test autoaugment with wrong dimension in the custom policy.""" policy = [[('Equalize', 0.8, 1), ('Shear', 0.8, 4)], [('TranslateY', 0.6, 3), ('Rotate', 0.9, 3)]] policy = [[policy]] with self.assertRaisesRegex( ValueError, r'Expected \(:, :, 3\) but got \(1, 1, 2, 2, 3\).'): augment.AutoAugment(policies=policy)
def test_autoaugment_video_with_boxes(self): """Smoke test with video to be sure there are no syntax errors.""" image = tf.zeros((2, 224, 224, 3), dtype=tf.uint8) bboxes = tf.ones((2, 2, 4), dtype=tf.float32) for policy in self.AVAILABLE_POLICIES: augmenter = augment.AutoAugment(augmentation_name=policy) aug_image, aug_bboxes = augmenter.distort_with_boxes(image, bboxes) self.assertEqual((2, 224, 224, 3), aug_image.shape) self.assertEqual((2, 2, 4), aug_bboxes.shape)
def test_invalid_custom_sub_policy(self, sub_policy, value): """Test autoaugment with out-of-range values in the custom policy.""" image = tf.zeros((224, 224, 3), dtype=tf.uint8) policy = self._generate_test_policy() policy[0][0] = sub_policy augmenter = augment.AutoAugment(policies=policy) with self.assertRaisesRegex( tf.errors.InvalidArgumentError, r'Expected \'tf.Tensor\(False, shape=\(\), dtype=bool\)\' to be true. ' r'Summarized data: ({})'.format(value)): augmenter.distort(image)
def __init__(self, output_size: List[int], num_classes: float, image_field_key: str = 'image/encoded', label_field_key: str = 'image/class/label', aug_rand_hflip: bool = True, aug_policy: Optional[str] = None, randaug_magnitude: Optional[int] = 10, dtype: str = 'float32'): """Initializes parameters for parsing annotations in the dataset. Args: output_size: `Tensor` or `list` for [height, width] of output image. The output_size should be divided by the largest feature stride 2^max_level. num_classes: `float`, number of classes. image_field_key: A `str` of the key name to encoded image in TFExample. label_field_key: A `str` of the key name to label in TFExample. aug_rand_hflip: `bool`, if True, augment training with random horizontal flip. aug_policy: `str`, augmentation policies. None, 'autoaug', or 'randaug'. randaug_magnitude: `int`, magnitude of the randaugment policy. dtype: `str`, cast output image in dtype. It can be 'float32', 'float16', or 'bfloat16'. """ self._output_size = output_size self._aug_rand_hflip = aug_rand_hflip self._num_classes = num_classes self._image_field_key = image_field_key self._label_field_key = label_field_key if dtype == 'float32': self._dtype = tf.float32 elif dtype == 'float16': self._dtype = tf.float16 elif dtype == 'bfloat16': self._dtype = tf.bfloat16 else: raise ValueError('dtype {!r} is not supported!'.format(dtype)) if aug_policy: if aug_policy == 'autoaug': self._augmenter = augment.AutoAugment() elif aug_policy == 'randaug': self._augmenter = augment.RandAugment( num_layers=2, magnitude=randaug_magnitude) else: raise ValueError( 'Augmentation policy {} not supported.'.format(aug_policy)) else: self._augmenter = None
def __init__(self, output_size, aug_policy, scale=[128, 448], dtype='float32'): """Initializes parameters for parsing annotations in the dataset. Args: output_size: `Tensor` or `list` for [height, width] of output image. The output_size should be divided by the largest feature stride 2^max_level. num_classes: `float`, number of classes. aug_rand_saturation: `bool`, if True, augment training with random saturation. aug_rand_brightness: `bool`, if True, augment training with random brightness. aug_rand_zoom: `bool`, if True, augment training with random zoom. aug_rand_rotate: `bool`, if True, augment training with random rotate. aug_rand_hue: `bool`, if True, augment training with random hue. aug_rand_aspect: `bool`, if True, augment training with random aspect. scale: 'list', `Tensor` or `list` for [low, high] of the bounds of the random scale. seed: an `int` for the seed used by tf.random """ self._output_size = output_size if aug_policy: if aug_policy == 'autoaug': self._augmenter = augment.AutoAugment() elif aug_policy == 'randaug': self._augmenter = augment.RandAugment(num_layers=2, magnitude=20) else: raise ValueError( 'Augmentation policy {} not supported.'.format(aug_policy)) else: self._augmenter = None self._scale = scale if dtype == 'float32': self._dtype = tf.float32 elif dtype == 'float16': self._dtype = tf.float16 elif dtype == 'bfloat16': self._dtype = tf.bfloat16 else: raise ValueError('dtype {!r} is not supported!'.format(dtype))
def __init__(self, input_params: exp_cfg.DataConfig, image_key: str = IMAGE_KEY, label_key: str = LABEL_KEY): self._num_frames = input_params.feature_shape[0] self._stride = input_params.temporal_stride self._random_stride_range = input_params.random_stride_range self._num_test_clips = input_params.num_test_clips self._min_resize = input_params.min_image_size self._crop_size = input_params.feature_shape[1] self._num_crops = input_params.num_test_crops self._one_hot_label = input_params.one_hot self._num_classes = input_params.num_classes self._image_key = image_key self._label_key = label_key self._dtype = tf.dtypes.as_dtype(input_params.dtype) self._output_audio = input_params.output_audio self._min_aspect_ratio = input_params.aug_min_aspect_ratio self._max_aspect_ratio = input_params.aug_max_aspect_ratio self._min_area_ratio = input_params.aug_min_area_ratio self._max_area_ratio = input_params.aug_max_area_ratio if self._output_audio: self._audio_feature = input_params.audio_feature self._audio_shape = input_params.audio_feature_shape self._augmenter = None if input_params.aug_type is not None: aug_type = input_params.aug_type if aug_type == 'autoaug': logging.info('Using AutoAugment.') self._augmenter = augment.AutoAugment() elif aug_type == 'randaug': logging.info('Using RandAugment.') self._augmenter = augment.RandAugment() else: raise ValueError( 'Augmentation policy {} is not supported.'.format( aug_type))
def __init__(self, output_size: List[int], num_classes: float, image_field_key: str = DEFAULT_IMAGE_FIELD_KEY, label_field_key: str = DEFAULT_LABEL_FIELD_KEY, decode_jpeg_only: bool = True, aug_rand_hflip: bool = True, aug_type: Optional[common.Augmentation] = None, color_jitter: float = 0., random_erasing: Optional[common.RandomErasing] = None, is_multilabel: bool = False, dtype: str = 'float32'): """Initializes parameters for parsing annotations in the dataset. Args: output_size: `Tensor` or `list` for [height, width] of output image. The output_size should be divided by the largest feature stride 2^max_level. num_classes: `float`, number of classes. image_field_key: `str`, the key name to encoded image in tf.Example. label_field_key: `str`, the key name to label in tf.Example. decode_jpeg_only: `bool`, if True, only JPEG format is decoded, this is faster than decoding other types. Default is True. aug_rand_hflip: `bool`, if True, augment training with random horizontal flip. aug_type: An optional Augmentation object to choose from AutoAugment and RandAugment. color_jitter: Magnitude of color jitter. If > 0, the value is used to generate random scale factor for brightness, contrast and saturation. See `preprocess_ops.color_jitter` for more details. random_erasing: if not None, augment input image by random erasing. See `augment.RandomErasing` for more details. is_multilabel: A `bool`, whether or not each example has multiple labels. dtype: `str`, cast output image in dtype. It can be 'float32', 'float16', or 'bfloat16'. """ self._output_size = output_size self._aug_rand_hflip = aug_rand_hflip self._num_classes = num_classes self._image_field_key = image_field_key if dtype == 'float32': self._dtype = tf.float32 elif dtype == 'float16': self._dtype = tf.float16 elif dtype == 'bfloat16': self._dtype = tf.bfloat16 else: raise ValueError('dtype {!r} is not supported!'.format(dtype)) if aug_type: if aug_type.type == 'autoaug': self._augmenter = augment.AutoAugment( augmentation_name=aug_type.autoaug.augmentation_name, cutout_const=aug_type.autoaug.cutout_const, translate_const=aug_type.autoaug.translate_const) elif aug_type.type == 'randaug': self._augmenter = augment.RandAugment( num_layers=aug_type.randaug.num_layers, magnitude=aug_type.randaug.magnitude, cutout_const=aug_type.randaug.cutout_const, translate_const=aug_type.randaug.translate_const, prob_to_apply=aug_type.randaug.prob_to_apply, exclude_ops=aug_type.randaug.exclude_ops) else: raise ValueError( 'Augmentation policy {} not supported.'.format( aug_type.type)) else: self._augmenter = None self._label_field_key = label_field_key self._color_jitter = color_jitter if random_erasing: self._random_erasing = augment.RandomErasing( probability=random_erasing.probability, min_area=random_erasing.min_area, max_area=random_erasing.max_area, min_aspect=random_erasing.min_aspect, max_aspect=random_erasing.max_aspect, min_count=random_erasing.min_count, max_count=random_erasing.max_count, trials=random_erasing.trials) else: self._random_erasing = None self._is_multilabel = is_multilabel self._decode_jpeg_only = decode_jpeg_only
def __init__(self, output_size, min_level, max_level, num_scales, aspect_ratios, anchor_size, match_threshold=0.5, unmatched_threshold=0.5, aug_type=None, aug_rand_hflip=False, aug_scale_min=1.0, aug_scale_max=1.0, use_autoaugment=False, autoaugment_policy_name='v0', skip_crowd_during_training=True, max_num_instances=100, dtype='bfloat16', mode=None): """Initializes parameters for parsing annotations in the dataset. Args: output_size: `Tensor` or `list` for [height, width] of output image. The output_size should be divided by the largest feature stride 2^max_level. min_level: `int` number of minimum level of the output feature pyramid. max_level: `int` number of maximum level of the output feature pyramid. num_scales: `int` number representing intermediate scales added on each level. For instances, num_scales=2 adds one additional intermediate anchor scales [2^0, 2^0.5] on each level. aspect_ratios: `list` of float numbers representing the aspect raito anchors added on each level. The number indicates the ratio of width to height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each scale level. anchor_size: `float` number representing the scale of size of the base anchor to the feature stride 2^level. match_threshold: `float` number between 0 and 1 representing the lower-bound threshold to assign positive labels for anchors. An anchor with a score over the threshold is labeled positive. unmatched_threshold: `float` number between 0 and 1 representing the upper-bound threshold to assign negative labels for anchors. An anchor with a score below the threshold is labeled negative. aug_type: An optional Augmentation object to choose from AutoAugment and RandAugment. The latter is not supported, and will raise ValueError. aug_rand_hflip: `bool`, if True, augment training with random horizontal flip. aug_scale_min: `float`, the minimum scale applied to `output_size` for data augmentation during training. aug_scale_max: `float`, the maximum scale applied to `output_size` for data augmentation during training. use_autoaugment: `bool`, if True, use the AutoAugment augmentation policy during training. autoaugment_policy_name: `string` that specifies the name of the AutoAugment policy that will be used during training. skip_crowd_during_training: `bool`, if True, skip annotations labeled with `is_crowd` equals to 1. max_num_instances: `int` number of maximum number of instances in an image. The groundtruth data will be padded to `max_num_instances`. dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}. mode: a ModeKeys. Specifies if this is training, evaluation, prediction or prediction with groundtruths in the outputs. """ self._mode = mode self._max_num_instances = max_num_instances self._skip_crowd_during_training = skip_crowd_during_training # Anchor. self._output_size = output_size self._min_level = min_level self._max_level = max_level self._num_scales = num_scales self._aspect_ratios = aspect_ratios self._anchor_size = anchor_size self._match_threshold = match_threshold self._unmatched_threshold = unmatched_threshold # Data augmentation. self._aug_rand_hflip = aug_rand_hflip self._aug_scale_min = aug_scale_min self._aug_scale_max = aug_scale_max # Data augmentation with AutoAugment or RandAugment. self._augmenter = None if aug_type is not None: if aug_type.type == 'autoaug': logging.info('Using AutoAugment.') self._augmenter = augment.AutoAugment( augmentation_name=aug_type.autoaug.augmentation_name, cutout_const=aug_type.autoaug.cutout_const, translate_const=aug_type.autoaug.translate_const) else: # TODO(b/205346436) Support RandAugment. raise ValueError( f'Augmentation policy {aug_type.type} not supported.') # Deprecated. Data Augmentation with AutoAugment. self._use_autoaugment = use_autoaugment self._autoaugment_policy_name = autoaugment_policy_name # Data type. self._dtype = dtype