def test_randaug_video(self): """Smoke test with video to be sure there are no syntax errors.""" image = tf.zeros((2, 224, 224, 3), dtype=tf.uint8) augmenter = augment.RandAugment() aug_image = augmenter.distort(image) self.assertEqual((2, 224, 224, 3), aug_image.shape)
def __init__(self, output_size: List[int], num_classes: float, image_field_key: str = DEFAULT_IMAGE_FIELD_KEY, label_field_key: str = DEFAULT_LABEL_FIELD_KEY, aug_rand_hflip: bool = True, aug_type: Optional[common.Augmentation] = None, is_multilabel: bool = False, dtype: str = 'float32'): """Initializes parameters for parsing annotations in the dataset. Args: output_size: `Tensor` or `list` for [height, width] of output image. The output_size should be divided by the largest feature stride 2^max_level. num_classes: `float`, number of classes. image_field_key: `str`, the key name to encoded image in tf.Example. label_field_key: `str`, the key name to label in tf.Example. aug_rand_hflip: `bool`, if True, augment training with random horizontal flip. aug_type: An optional Augmentation object to choose from AutoAugment and RandAugment. is_multilabel: A `bool`, whether or not each example has multiple labels. dtype: `str`, cast output image in dtype. It can be 'float32', 'float16', or 'bfloat16'. """ self._output_size = output_size self._aug_rand_hflip = aug_rand_hflip self._num_classes = num_classes self._image_field_key = image_field_key if dtype == 'float32': self._dtype = tf.float32 elif dtype == 'float16': self._dtype = tf.float16 elif dtype == 'bfloat16': self._dtype = tf.bfloat16 else: raise ValueError('dtype {!r} is not supported!'.format(dtype)) if aug_type: if aug_type.type == 'autoaug': self._augmenter = augment.AutoAugment( augmentation_name=aug_type.autoaug.augmentation_name, cutout_const=aug_type.autoaug.cutout_const, translate_const=aug_type.autoaug.translate_const) elif aug_type.type == 'randaug': self._augmenter = augment.RandAugment( num_layers=aug_type.randaug.num_layers, magnitude=aug_type.randaug.magnitude, cutout_const=aug_type.randaug.cutout_const, translate_const=aug_type.randaug.translate_const, prob_to_apply=aug_type.randaug.prob_to_apply) else: raise ValueError('Augmentation policy {} not supported.'.format( aug_type.type)) else: self._augmenter = None self._label_field_key = label_field_key self._is_multilabel = is_multilabel
def test_randaug_with_bboxes(self): """Smoke test to be sure there are no syntax errors with bboxes.""" image = tf.zeros((224, 224, 3), dtype=tf.uint8) bboxes = tf.ones((2, 4), dtype=tf.float32) augmenter = augment.RandAugment() aug_image, aug_bboxes = augmenter.distort_with_boxes(image, bboxes) self.assertEqual((224, 224, 3), aug_image.shape) self.assertEqual((2, 4), aug_bboxes.shape)
def __init__(self, output_size: List[int], num_classes: float, image_field_key: str = 'image/encoded', label_field_key: str = 'image/class/label', aug_rand_hflip: bool = True, aug_policy: Optional[str] = None, randaug_magnitude: Optional[int] = 10, dtype: str = 'float32'): """Initializes parameters for parsing annotations in the dataset. Args: output_size: `Tensor` or `list` for [height, width] of output image. The output_size should be divided by the largest feature stride 2^max_level. num_classes: `float`, number of classes. image_field_key: A `str` of the key name to encoded image in TFExample. label_field_key: A `str` of the key name to label in TFExample. aug_rand_hflip: `bool`, if True, augment training with random horizontal flip. aug_policy: `str`, augmentation policies. None, 'autoaug', or 'randaug'. randaug_magnitude: `int`, magnitude of the randaugment policy. dtype: `str`, cast output image in dtype. It can be 'float32', 'float16', or 'bfloat16'. """ self._output_size = output_size self._aug_rand_hflip = aug_rand_hflip self._num_classes = num_classes self._image_field_key = image_field_key self._label_field_key = label_field_key if dtype == 'float32': self._dtype = tf.float32 elif dtype == 'float16': self._dtype = tf.float16 elif dtype == 'bfloat16': self._dtype = tf.bfloat16 else: raise ValueError('dtype {!r} is not supported!'.format(dtype)) if aug_policy: if aug_policy == 'autoaug': self._augmenter = augment.AutoAugment() elif aug_policy == 'randaug': self._augmenter = augment.RandAugment( num_layers=2, magnitude=randaug_magnitude) else: raise ValueError( 'Augmentation policy {} not supported.'.format(aug_policy)) else: self._augmenter = None
def __init__(self, output_size, aug_policy, scale=[128, 448], dtype='float32'): """Initializes parameters for parsing annotations in the dataset. Args: output_size: `Tensor` or `list` for [height, width] of output image. The output_size should be divided by the largest feature stride 2^max_level. num_classes: `float`, number of classes. aug_rand_saturation: `bool`, if True, augment training with random saturation. aug_rand_brightness: `bool`, if True, augment training with random brightness. aug_rand_zoom: `bool`, if True, augment training with random zoom. aug_rand_rotate: `bool`, if True, augment training with random rotate. aug_rand_hue: `bool`, if True, augment training with random hue. aug_rand_aspect: `bool`, if True, augment training with random aspect. scale: 'list', `Tensor` or `list` for [low, high] of the bounds of the random scale. seed: an `int` for the seed used by tf.random """ self._output_size = output_size if aug_policy: if aug_policy == 'autoaug': self._augmenter = augment.AutoAugment() elif aug_policy == 'randaug': self._augmenter = augment.RandAugment(num_layers=2, magnitude=20) else: raise ValueError( 'Augmentation policy {} not supported.'.format(aug_policy)) else: self._augmenter = None self._scale = scale if dtype == 'float32': self._dtype = tf.float32 elif dtype == 'float16': self._dtype = tf.float16 elif dtype == 'bfloat16': self._dtype = tf.bfloat16 else: raise ValueError('dtype {!r} is not supported!'.format(dtype))
def __init__(self, input_params: exp_cfg.DataConfig, image_key: str = IMAGE_KEY, label_key: str = LABEL_KEY): self._num_frames = input_params.feature_shape[0] self._stride = input_params.temporal_stride self._random_stride_range = input_params.random_stride_range self._num_test_clips = input_params.num_test_clips self._min_resize = input_params.min_image_size self._crop_size = input_params.feature_shape[1] self._num_crops = input_params.num_test_crops self._one_hot_label = input_params.one_hot self._num_classes = input_params.num_classes self._image_key = image_key self._label_key = label_key self._dtype = tf.dtypes.as_dtype(input_params.dtype) self._output_audio = input_params.output_audio self._min_aspect_ratio = input_params.aug_min_aspect_ratio self._max_aspect_ratio = input_params.aug_max_aspect_ratio self._min_area_ratio = input_params.aug_min_area_ratio self._max_area_ratio = input_params.aug_max_area_ratio if self._output_audio: self._audio_feature = input_params.audio_feature self._audio_shape = input_params.audio_feature_shape self._augmenter = None if input_params.aug_type is not None: aug_type = input_params.aug_type if aug_type == 'autoaug': logging.info('Using AutoAugment.') self._augmenter = augment.AutoAugment() elif aug_type == 'randaug': logging.info('Using RandAugment.') self._augmenter = augment.RandAugment() else: raise ValueError( 'Augmentation policy {} is not supported.'.format( aug_type))
def __init__(self, output_size, input_size: List[int], anchor_per_scale: int, num_classes: int, max_bbox_per_scale: int, strides: List, anchors: List, aug_policy: Optional[str] = None, randaug_magnitude: Optional[int] = 10, randaug_available_ops: Optional[List[str]] = None, aug_rand_hflip=False, aug_scale_min=1.0, aug_scale_max=1.0, preserve_aspect_ratio=True, aug_jitter_im=0.1, aug_jitter_boxes=0.005, dtype='float32'): """Initializes parameters for parsing annotations in the dataset. !!! Augmentation ops assumes that boxes are yxyx format, non-normalized (top left, bottom right coords) in pixels. Args: output_size: `Tensor` or `list` for [height, width] of output image. The output_size should be divided by the largest feature stride 2^max_level. input_size: `List[int]`, shape of image input anchor_per_scale: `int`, number of anchors per scale num_classes: `int`, number of classes. max_bbox_per_Scale: `int`, maximum number of bounding boxes per scale. strides: `List[int]` of output strides, ratio of input to output resolution. anchors: `tf.Tensor` of shape (None, anchor_per_scale, 2) denothing positions of anchors aug_policy: `str`, augmentation policies. None or 'randaug'. TODO support 'autoaug' randaug_magnitude: `int`, magnitude of the randaugment policy. randaug_available_ops: `List[str]`, specify augmentations for randaug aug_rand_hflip: `bool`, if True, augment training with random horizontal flip. aug_scale_min: `float`, the minimum scale applied to `output_size` for data augmentation during training. aug_scale_max: `float`, the maximum scale applied to `output_size` for data augmentation during training. preserve_aspect_ratio: `bool`, whether to preserve aspect ratio during resize aug_jitter_im: `float`, pixel value of maximum jitter applied to the image aug_jitter_boxes: `float`, pixel value of maximum jitter applied to bbox dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}. """ self._output_size = output_size self._input_size = input_size # yolo true boxes processing self.train_output_sizes = input_size[0] // np.array(strides) self.anchor_per_scale = anchor_per_scale self.num_classes = num_classes self.max_bbox_per_scale = max_bbox_per_scale self.strides = strides self.anchors = tf.constant(anchors, dtype=tf.float32) self.anchors = tf.reshape(self.anchors, [ int(len(anchors) / self.anchor_per_scale / 2), self.anchor_per_scale, 2 ]) # Data augmentation. self._aug_rand_hflip = aug_rand_hflip self._aug_scale_min = aug_scale_min self._aug_scale_max = aug_scale_max self._preserve_aspect_ratio = preserve_aspect_ratio self._aug_jitter_im = aug_jitter_im self._aug_jitter_boxes = aug_jitter_boxes if aug_policy: # ops that changes the shape of the mask (any form of translation / rotation) if aug_policy == 'randaug': self._augmenter = augment.RandAugment( num_layers=2, magnitude=randaug_magnitude, available_ops=randaug_available_ops) else: raise ValueError( 'Augmentation policy {} not supported.'.format(aug_policy)) else: self._augmenter = None # dtype. self._dtype = dtype
def __init__(self, output_size: List[int], num_classes: float, image_field_key: str = DEFAULT_IMAGE_FIELD_KEY, label_field_key: str = DEFAULT_LABEL_FIELD_KEY, decode_jpeg_only: bool = True, aug_rand_hflip: bool = True, aug_type: Optional[common.Augmentation] = None, color_jitter: float = 0., random_erasing: Optional[common.RandomErasing] = None, is_multilabel: bool = False, dtype: str = 'float32'): """Initializes parameters for parsing annotations in the dataset. Args: output_size: `Tensor` or `list` for [height, width] of output image. The output_size should be divided by the largest feature stride 2^max_level. num_classes: `float`, number of classes. image_field_key: `str`, the key name to encoded image in tf.Example. label_field_key: `str`, the key name to label in tf.Example. decode_jpeg_only: `bool`, if True, only JPEG format is decoded, this is faster than decoding other types. Default is True. aug_rand_hflip: `bool`, if True, augment training with random horizontal flip. aug_type: An optional Augmentation object to choose from AutoAugment and RandAugment. color_jitter: Magnitude of color jitter. If > 0, the value is used to generate random scale factor for brightness, contrast and saturation. See `preprocess_ops.color_jitter` for more details. random_erasing: if not None, augment input image by random erasing. See `augment.RandomErasing` for more details. is_multilabel: A `bool`, whether or not each example has multiple labels. dtype: `str`, cast output image in dtype. It can be 'float32', 'float16', or 'bfloat16'. """ self._output_size = output_size self._aug_rand_hflip = aug_rand_hflip self._num_classes = num_classes self._image_field_key = image_field_key if dtype == 'float32': self._dtype = tf.float32 elif dtype == 'float16': self._dtype = tf.float16 elif dtype == 'bfloat16': self._dtype = tf.bfloat16 else: raise ValueError('dtype {!r} is not supported!'.format(dtype)) if aug_type: if aug_type.type == 'autoaug': self._augmenter = augment.AutoAugment( augmentation_name=aug_type.autoaug.augmentation_name, cutout_const=aug_type.autoaug.cutout_const, translate_const=aug_type.autoaug.translate_const) elif aug_type.type == 'randaug': self._augmenter = augment.RandAugment( num_layers=aug_type.randaug.num_layers, magnitude=aug_type.randaug.magnitude, cutout_const=aug_type.randaug.cutout_const, translate_const=aug_type.randaug.translate_const, prob_to_apply=aug_type.randaug.prob_to_apply, exclude_ops=aug_type.randaug.exclude_ops) else: raise ValueError( 'Augmentation policy {} not supported.'.format( aug_type.type)) else: self._augmenter = None self._label_field_key = label_field_key self._color_jitter = color_jitter if random_erasing: self._random_erasing = augment.RandomErasing( probability=random_erasing.probability, min_area=random_erasing.min_area, max_area=random_erasing.max_area, min_aspect=random_erasing.min_aspect, max_aspect=random_erasing.max_aspect, min_count=random_erasing.min_count, max_count=random_erasing.max_count, trials=random_erasing.trials) else: self._random_erasing = None self._is_multilabel = is_multilabel self._decode_jpeg_only = decode_jpeg_only
def __init__(self, output_size, crop_size=None, resize_eval_groundtruth=True, groundtruth_padded_size=None, ignore_label=255, aug_rand_hflip=False, aug_policy: Optional[str] = None, randaug_magnitude: Optional[int] = 10, randaug_available_ops: Optional[List[str]] = None, aug_scale_min=1.0, aug_scale_max=1.0, preserve_aspect_ratio=True, rotate_min=0.0, rotate_max=0.0, bright_min=1.0, bright_max=1.0, dtype='float32'): """Initializes parameters for parsing annotations in the dataset. Args: output_size: `Tensor` or `list` for [height, width] of output image. The output_size should be divided by the largest feature stride 2^max_level. crop_size: `Tensor` or `list` for [height, width] of the crop. If specified a training crop of size crop_size is returned. This is useful for cropping original images during training while evaluating on original image sizes. resize_eval_groundtruth: `bool`, if True, eval groundtruth masks are resized to output_size. groundtruth_padded_size: `Tensor` or `list` for [height, width]. When resize_eval_groundtruth is set to False, the groundtruth masks are padded to this size. ignore_label: `int` the pixel with ignore label will not used for training and evaluation. aug_rand_hflip: `bool`, if True, augment training with random horizontal flip. aug_policy: `str`, augmentation policies. None or 'randaug'. TODO support 'autoaug' randaug_magnitude: `int`, magnitude of the randaugment policy. randaug_available_ops: `List[str]`, specify augmentations for randaug aug_scale_min: `float`, the minimum scale applied to `output_size` for data augmentation during training. aug_scale_max: `float`, the maximum scale applied to `output_size` for data augmentation during training. preserve_aspect_ratio: `bool`, whether to preserve aspect ratio during resize rotate_min: `float`, the minimum rotation applied to `output_size` for data augmentation during training. rotate_max: `float`, the maximum rotation applied to `output_size` for data augmentation during training. bright_min: `float`, the minimum brightness applied to `output_size` for data augmentation during training. bright_max: `float`, the maximum brightness applied to `output_size` for data augmentation during training. dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}. """ self._output_size = output_size self._crop_size = crop_size self._resize_eval_groundtruth = resize_eval_groundtruth if (not resize_eval_groundtruth) and (groundtruth_padded_size is None): raise ValueError( 'groundtruth_padded_size ([height, width]) needs to be' 'specified when resize_eval_groundtruth is False.') self._groundtruth_padded_size = groundtruth_padded_size self._ignore_label = ignore_label # Data augmentation. self._aug_rand_hflip = aug_rand_hflip self._aug_scale_min = aug_scale_min self._aug_scale_max = aug_scale_max self._preserve_aspect_ratio = preserve_aspect_ratio self._bright_min = bright_min self._bright_max = bright_max self._rotate_min = rotate_min self._rotate_max = rotate_max if aug_policy: # ops that changes the shape of the mask (any form of translation / rotation) if aug_policy == 'randaug': self._augmenter = augment.RandAugment( num_layers=2, magnitude=randaug_magnitude, available_ops=randaug_available_ops) else: raise ValueError( 'Augmentation policy {} not supported.'.format(aug_policy)) else: self._augmenter = None # dtype. self._dtype = dtype