コード例 #1
0
ファイル: augment_test.py プロジェクト: Desaiakshata/models
    def test_randaug_video(self):
        """Smoke test with video to be sure there are no syntax errors."""
        image = tf.zeros((2, 224, 224, 3), dtype=tf.uint8)

        augmenter = augment.RandAugment()
        aug_image = augmenter.distort(image)

        self.assertEqual((2, 224, 224, 3), aug_image.shape)
コード例 #2
0
  def __init__(self,
               output_size: List[int],
               num_classes: float,
               image_field_key: str = DEFAULT_IMAGE_FIELD_KEY,
               label_field_key: str = DEFAULT_LABEL_FIELD_KEY,
               aug_rand_hflip: bool = True,
               aug_type: Optional[common.Augmentation] = None,
               is_multilabel: bool = False,
               dtype: str = 'float32'):
    """Initializes parameters for parsing annotations in the dataset.

    Args:
      output_size: `Tensor` or `list` for [height, width] of output image. The
        output_size should be divided by the largest feature stride 2^max_level.
      num_classes: `float`, number of classes.
      image_field_key: `str`, the key name to encoded image in tf.Example.
      label_field_key: `str`, the key name to label in tf.Example.
      aug_rand_hflip: `bool`, if True, augment training with random
        horizontal flip.
      aug_type: An optional Augmentation object to choose from AutoAugment and
        RandAugment.
      is_multilabel: A `bool`, whether or not each example has multiple labels.
      dtype: `str`, cast output image in dtype. It can be 'float32', 'float16',
        or 'bfloat16'.
    """
    self._output_size = output_size
    self._aug_rand_hflip = aug_rand_hflip
    self._num_classes = num_classes
    self._image_field_key = image_field_key
    if dtype == 'float32':
      self._dtype = tf.float32
    elif dtype == 'float16':
      self._dtype = tf.float16
    elif dtype == 'bfloat16':
      self._dtype = tf.bfloat16
    else:
      raise ValueError('dtype {!r} is not supported!'.format(dtype))
    if aug_type:
      if aug_type.type == 'autoaug':
        self._augmenter = augment.AutoAugment(
            augmentation_name=aug_type.autoaug.augmentation_name,
            cutout_const=aug_type.autoaug.cutout_const,
            translate_const=aug_type.autoaug.translate_const)
      elif aug_type.type == 'randaug':
        self._augmenter = augment.RandAugment(
            num_layers=aug_type.randaug.num_layers,
            magnitude=aug_type.randaug.magnitude,
            cutout_const=aug_type.randaug.cutout_const,
            translate_const=aug_type.randaug.translate_const,
            prob_to_apply=aug_type.randaug.prob_to_apply)
      else:
        raise ValueError('Augmentation policy {} not supported.'.format(
            aug_type.type))
    else:
      self._augmenter = None
    self._label_field_key = label_field_key
    self._is_multilabel = is_multilabel
コード例 #3
0
ファイル: augment_test.py プロジェクト: Desaiakshata/models
    def test_randaug_with_bboxes(self):
        """Smoke test to be sure there are no syntax errors with bboxes."""
        image = tf.zeros((224, 224, 3), dtype=tf.uint8)
        bboxes = tf.ones((2, 4), dtype=tf.float32)

        augmenter = augment.RandAugment()
        aug_image, aug_bboxes = augmenter.distort_with_boxes(image, bboxes)

        self.assertEqual((224, 224, 3), aug_image.shape)
        self.assertEqual((2, 4), aug_bboxes.shape)
コード例 #4
0
  def __init__(self,
               output_size: List[int],
               num_classes: float,
               image_field_key: str = 'image/encoded',
               label_field_key: str = 'image/class/label',
               aug_rand_hflip: bool = True,
               aug_policy: Optional[str] = None,
               randaug_magnitude: Optional[int] = 10,
               dtype: str = 'float32'):
    """Initializes parameters for parsing annotations in the dataset.

    Args:
      output_size: `Tensor` or `list` for [height, width] of output image. The
        output_size should be divided by the largest feature stride 2^max_level.
      num_classes: `float`, number of classes.
      image_field_key: A `str` of the key name to encoded image in TFExample.
      label_field_key: A `str` of the key name to label in TFExample.
      aug_rand_hflip: `bool`, if True, augment training with random
        horizontal flip.
      aug_policy: `str`, augmentation policies. None, 'autoaug', or 'randaug'.
      randaug_magnitude: `int`, magnitude of the randaugment policy.
      dtype: `str`, cast output image in dtype. It can be 'float32', 'float16',
        or 'bfloat16'.
    """
    self._output_size = output_size
    self._aug_rand_hflip = aug_rand_hflip
    self._num_classes = num_classes
    self._image_field_key = image_field_key
    self._label_field_key = label_field_key

    if dtype == 'float32':
      self._dtype = tf.float32
    elif dtype == 'float16':
      self._dtype = tf.float16
    elif dtype == 'bfloat16':
      self._dtype = tf.bfloat16
    else:
      raise ValueError('dtype {!r} is not supported!'.format(dtype))
    if aug_policy:
      if aug_policy == 'autoaug':
        self._augmenter = augment.AutoAugment()
      elif aug_policy == 'randaug':
        self._augmenter = augment.RandAugment(
            num_layers=2, magnitude=randaug_magnitude)
      else:
        raise ValueError(
            'Augmentation policy {} not supported.'.format(aug_policy))
    else:
      self._augmenter = None
コード例 #5
0
    def __init__(self,
                 output_size,
                 aug_policy,
                 scale=[128, 448],
                 dtype='float32'):
        """Initializes parameters for parsing annotations in the dataset.
    Args:
      output_size: `Tensor` or `list` for [height, width] of output image. The
        output_size should be divided by the largest feature stride 2^max_level.
      num_classes: `float`, number of classes.
      aug_rand_saturation: `bool`, if True, augment training with random
        saturation.
      aug_rand_brightness: `bool`, if True, augment training with random
        brightness.
      aug_rand_zoom: `bool`, if True, augment training with random
        zoom.
      aug_rand_rotate: `bool`, if True, augment training with random
        rotate.
      aug_rand_hue: `bool`, if True, augment training with random
        hue.
      aug_rand_aspect: `bool`, if True, augment training with random
        aspect.
      scale: 'list', `Tensor` or `list` for [low, high] of the bounds of the random
        scale.
      seed: an `int` for the seed used by tf.random
    """
        self._output_size = output_size
        if aug_policy:
            if aug_policy == 'autoaug':
                self._augmenter = augment.AutoAugment()
            elif aug_policy == 'randaug':
                self._augmenter = augment.RandAugment(num_layers=2,
                                                      magnitude=20)
            else:
                raise ValueError(
                    'Augmentation policy {} not supported.'.format(aug_policy))
        else:
            self._augmenter = None

        self._scale = scale
        if dtype == 'float32':
            self._dtype = tf.float32
        elif dtype == 'float16':
            self._dtype = tf.float16
        elif dtype == 'bfloat16':
            self._dtype = tf.bfloat16
        else:
            raise ValueError('dtype {!r} is not supported!'.format(dtype))
コード例 #6
0
    def __init__(self,
                 input_params: exp_cfg.DataConfig,
                 image_key: str = IMAGE_KEY,
                 label_key: str = LABEL_KEY):
        self._num_frames = input_params.feature_shape[0]
        self._stride = input_params.temporal_stride
        self._random_stride_range = input_params.random_stride_range
        self._num_test_clips = input_params.num_test_clips
        self._min_resize = input_params.min_image_size
        self._crop_size = input_params.feature_shape[1]
        self._num_crops = input_params.num_test_crops
        self._one_hot_label = input_params.one_hot
        self._num_classes = input_params.num_classes
        self._image_key = image_key
        self._label_key = label_key
        self._dtype = tf.dtypes.as_dtype(input_params.dtype)
        self._output_audio = input_params.output_audio
        self._min_aspect_ratio = input_params.aug_min_aspect_ratio
        self._max_aspect_ratio = input_params.aug_max_aspect_ratio
        self._min_area_ratio = input_params.aug_min_area_ratio
        self._max_area_ratio = input_params.aug_max_area_ratio
        if self._output_audio:
            self._audio_feature = input_params.audio_feature
            self._audio_shape = input_params.audio_feature_shape

        self._augmenter = None
        if input_params.aug_type is not None:
            aug_type = input_params.aug_type
            if aug_type == 'autoaug':
                logging.info('Using AutoAugment.')
                self._augmenter = augment.AutoAugment()
            elif aug_type == 'randaug':
                logging.info('Using RandAugment.')
                self._augmenter = augment.RandAugment()
            else:
                raise ValueError(
                    'Augmentation policy {} is not supported.'.format(
                        aug_type))
コード例 #7
0
    def __init__(self,
                 output_size,
                 input_size: List[int],
                 anchor_per_scale: int,
                 num_classes: int,
                 max_bbox_per_scale: int,
                 strides: List,
                 anchors: List,
                 aug_policy: Optional[str] = None,
                 randaug_magnitude: Optional[int] = 10,
                 randaug_available_ops: Optional[List[str]] = None,
                 aug_rand_hflip=False,
                 aug_scale_min=1.0,
                 aug_scale_max=1.0,
                 preserve_aspect_ratio=True,
                 aug_jitter_im=0.1,
                 aug_jitter_boxes=0.005,
                 dtype='float32'):
        """Initializes parameters for parsing annotations in the dataset.
    !!! Augmentation ops assumes that boxes are yxyx format, non-normalized
      (top left, bottom right coords) in pixels.

    Args:
      output_size: `Tensor` or `list` for [height, width] of output image. The
        output_size should be divided by the largest feature stride 2^max_level.
      input_size: `List[int]`, shape of image input
      anchor_per_scale: `int`, number of anchors per scale
      num_classes: `int`, number of classes.
      max_bbox_per_Scale: `int`, maximum number of bounding boxes per scale.
      strides: `List[int]` of output strides, ratio of input to output resolution.
      anchors: `tf.Tensor` of shape (None, anchor_per_scale, 2) denothing positions
        of anchors
      aug_policy: `str`, augmentation policies. None or 'randaug'. TODO support 'autoaug'
      randaug_magnitude: `int`, magnitude of the randaugment policy.
      randaug_available_ops: `List[str]`, specify augmentations for randaug
      aug_rand_hflip: `bool`, if True, augment training with random
        horizontal flip.
      aug_scale_min: `float`, the minimum scale applied to `output_size` for
        data augmentation during training.
      aug_scale_max: `float`, the maximum scale applied to `output_size` for
        data augmentation during training.
      preserve_aspect_ratio: `bool`, whether to preserve aspect ratio during resize
      aug_jitter_im: `float`, pixel value of maximum jitter applied to the image
      aug_jitter_boxes: `float`, pixel value of maximum jitter applied to bbox
      dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
    """
        self._output_size = output_size
        self._input_size = input_size

        # yolo true boxes processing
        self.train_output_sizes = input_size[0] // np.array(strides)
        self.anchor_per_scale = anchor_per_scale
        self.num_classes = num_classes
        self.max_bbox_per_scale = max_bbox_per_scale
        self.strides = strides
        self.anchors = tf.constant(anchors, dtype=tf.float32)
        self.anchors = tf.reshape(self.anchors, [
            int(len(anchors) / self.anchor_per_scale / 2),
            self.anchor_per_scale, 2
        ])

        # Data augmentation.
        self._aug_rand_hflip = aug_rand_hflip
        self._aug_scale_min = aug_scale_min
        self._aug_scale_max = aug_scale_max
        self._preserve_aspect_ratio = preserve_aspect_ratio
        self._aug_jitter_im = aug_jitter_im
        self._aug_jitter_boxes = aug_jitter_boxes

        if aug_policy:
            # ops that changes the shape of the mask (any form of translation / rotation)
            if aug_policy == 'randaug':
                self._augmenter = augment.RandAugment(
                    num_layers=2,
                    magnitude=randaug_magnitude,
                    available_ops=randaug_available_ops)
            else:
                raise ValueError(
                    'Augmentation policy {} not supported.'.format(aug_policy))
        else:
            self._augmenter = None

        # dtype.
        self._dtype = dtype
コード例 #8
0
    def __init__(self,
                 output_size: List[int],
                 num_classes: float,
                 image_field_key: str = DEFAULT_IMAGE_FIELD_KEY,
                 label_field_key: str = DEFAULT_LABEL_FIELD_KEY,
                 decode_jpeg_only: bool = True,
                 aug_rand_hflip: bool = True,
                 aug_type: Optional[common.Augmentation] = None,
                 color_jitter: float = 0.,
                 random_erasing: Optional[common.RandomErasing] = None,
                 is_multilabel: bool = False,
                 dtype: str = 'float32'):
        """Initializes parameters for parsing annotations in the dataset.

    Args:
      output_size: `Tensor` or `list` for [height, width] of output image. The
        output_size should be divided by the largest feature stride 2^max_level.
      num_classes: `float`, number of classes.
      image_field_key: `str`, the key name to encoded image in tf.Example.
      label_field_key: `str`, the key name to label in tf.Example.
      decode_jpeg_only: `bool`, if True, only JPEG format is decoded, this is
        faster than decoding other types. Default is True.
      aug_rand_hflip: `bool`, if True, augment training with random
        horizontal flip.
      aug_type: An optional Augmentation object to choose from AutoAugment and
        RandAugment.
      color_jitter: Magnitude of color jitter. If > 0, the value is used to
        generate random scale factor for brightness, contrast and saturation.
        See `preprocess_ops.color_jitter` for more details.
      random_erasing: if not None, augment input image by random erasing. See
        `augment.RandomErasing` for more details.
      is_multilabel: A `bool`, whether or not each example has multiple labels.
      dtype: `str`, cast output image in dtype. It can be 'float32', 'float16',
        or 'bfloat16'.
    """
        self._output_size = output_size
        self._aug_rand_hflip = aug_rand_hflip
        self._num_classes = num_classes
        self._image_field_key = image_field_key
        if dtype == 'float32':
            self._dtype = tf.float32
        elif dtype == 'float16':
            self._dtype = tf.float16
        elif dtype == 'bfloat16':
            self._dtype = tf.bfloat16
        else:
            raise ValueError('dtype {!r} is not supported!'.format(dtype))
        if aug_type:
            if aug_type.type == 'autoaug':
                self._augmenter = augment.AutoAugment(
                    augmentation_name=aug_type.autoaug.augmentation_name,
                    cutout_const=aug_type.autoaug.cutout_const,
                    translate_const=aug_type.autoaug.translate_const)
            elif aug_type.type == 'randaug':
                self._augmenter = augment.RandAugment(
                    num_layers=aug_type.randaug.num_layers,
                    magnitude=aug_type.randaug.magnitude,
                    cutout_const=aug_type.randaug.cutout_const,
                    translate_const=aug_type.randaug.translate_const,
                    prob_to_apply=aug_type.randaug.prob_to_apply,
                    exclude_ops=aug_type.randaug.exclude_ops)
            else:
                raise ValueError(
                    'Augmentation policy {} not supported.'.format(
                        aug_type.type))
        else:
            self._augmenter = None
        self._label_field_key = label_field_key
        self._color_jitter = color_jitter
        if random_erasing:
            self._random_erasing = augment.RandomErasing(
                probability=random_erasing.probability,
                min_area=random_erasing.min_area,
                max_area=random_erasing.max_area,
                min_aspect=random_erasing.min_aspect,
                max_aspect=random_erasing.max_aspect,
                min_count=random_erasing.min_count,
                max_count=random_erasing.max_count,
                trials=random_erasing.trials)
        else:
            self._random_erasing = None
        self._is_multilabel = is_multilabel
        self._decode_jpeg_only = decode_jpeg_only
コード例 #9
0
    def __init__(self,
                 output_size,
                 crop_size=None,
                 resize_eval_groundtruth=True,
                 groundtruth_padded_size=None,
                 ignore_label=255,
                 aug_rand_hflip=False,
                 aug_policy: Optional[str] = None,
                 randaug_magnitude: Optional[int] = 10,
                 randaug_available_ops: Optional[List[str]] = None,
                 aug_scale_min=1.0,
                 aug_scale_max=1.0,
                 preserve_aspect_ratio=True,
                 rotate_min=0.0,
                 rotate_max=0.0,
                 bright_min=1.0,
                 bright_max=1.0,
                 dtype='float32'):
        """Initializes parameters for parsing annotations in the dataset.

    Args:
      output_size: `Tensor` or `list` for [height, width] of output image. The
        output_size should be divided by the largest feature stride 2^max_level.
      crop_size: `Tensor` or `list` for [height, width] of the crop. If
        specified a training crop of size crop_size is returned. This is useful
        for cropping original images during training while evaluating on
        original image sizes.
      resize_eval_groundtruth: `bool`, if True, eval groundtruth masks are
        resized to output_size.
      groundtruth_padded_size: `Tensor` or `list` for [height, width]. When
        resize_eval_groundtruth is set to False, the groundtruth masks are
        padded to this size.
      ignore_label: `int` the pixel with ignore label will not used for training
        and evaluation.
      aug_rand_hflip: `bool`, if True, augment training with random
        horizontal flip.
      aug_policy: `str`, augmentation policies. None or 'randaug'. TODO support 'autoaug'
      randaug_magnitude: `int`, magnitude of the randaugment policy.
      randaug_available_ops: `List[str]`, specify augmentations for randaug
      aug_scale_min: `float`, the minimum scale applied to `output_size` for
        data augmentation during training.
      aug_scale_max: `float`, the maximum scale applied to `output_size` for
        data augmentation during training.
      preserve_aspect_ratio: `bool`, whether to preserve aspect ratio during resize
      rotate_min: `float`, the minimum rotation applied to `output_size` for
        data augmentation during training.
      rotate_max: `float`, the maximum rotation applied to `output_size` for
        data augmentation during training.
      bright_min: `float`, the minimum brightness applied to `output_size` for
        data augmentation during training.
      bright_max: `float`, the maximum brightness applied to `output_size` for
        data augmentation during training.
      dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
    """
        self._output_size = output_size
        self._crop_size = crop_size
        self._resize_eval_groundtruth = resize_eval_groundtruth
        if (not resize_eval_groundtruth) and (groundtruth_padded_size is None):
            raise ValueError(
                'groundtruth_padded_size ([height, width]) needs to be'
                'specified when resize_eval_groundtruth is False.')
        self._groundtruth_padded_size = groundtruth_padded_size
        self._ignore_label = ignore_label

        # Data augmentation.
        self._aug_rand_hflip = aug_rand_hflip
        self._aug_scale_min = aug_scale_min
        self._aug_scale_max = aug_scale_max
        self._preserve_aspect_ratio = preserve_aspect_ratio
        self._bright_min = bright_min
        self._bright_max = bright_max
        self._rotate_min = rotate_min
        self._rotate_max = rotate_max

        if aug_policy:
            # ops that changes the shape of the mask (any form of translation / rotation)
            if aug_policy == 'randaug':
                self._augmenter = augment.RandAugment(
                    num_layers=2,
                    magnitude=randaug_magnitude,
                    available_ops=randaug_available_ops)
            else:
                raise ValueError(
                    'Augmentation policy {} not supported.'.format(aug_policy))
        else:
            self._augmenter = None

        # dtype.
        self._dtype = dtype