コード例 #1
0
  def testReturnPaddedImageWithNonZeroPadValue(self):
    for dtype in [np.int32, np.int64, np.float32, np.float64]:
      image = np.dstack([[[5, 6],
                          [9, 0]],
                         [[4, 3],
                          [3, 5]]]).astype(dtype)
      expected_image = np.dstack([[[255, 255, 255, 255, 255],
                                   [255, 255, 255, 255, 255],
                                   [255, 5, 6, 255, 255],
                                   [255, 9, 0, 255, 255],
                                   [255, 255, 255, 255, 255]],
                                  [[255, 255, 255, 255, 255],
                                   [255, 255, 255, 255, 255],
                                   [255, 4, 3, 255, 255],
                                   [255, 3, 5, 255, 255],
                                   [255, 255, 255, 255, 255]]]).astype(dtype)

      with self.session() as sess:
        padded_image = preprocess_utils.pad_to_bounding_box(
            image, 2, 1, 5, 5, 255)
        padded_image = sess.run(padded_image)
        self.assertAllClose(padded_image, expected_image)
        # Add batch size = 1 to image.
        padded_image = preprocess_utils.pad_to_bounding_box(
            np.expand_dims(image, 0), 2, 1, 5, 5, 255)
        padded_image = sess.run(padded_image)
        self.assertAllClose(padded_image, np.expand_dims(expected_image, 0))
コード例 #2
0
  def testReturnPaddedImageWithNonZeroPadValue(self):
    for dtype in [np.int32, np.int64, np.float32, np.float64]:
      image = np.dstack([[[5, 6],
                          [9, 0]],
                         [[4, 3],
                          [3, 5]]]).astype(dtype)
      expected_image = np.dstack([[[255, 255, 255, 255, 255],
                                   [255, 255, 255, 255, 255],
                                   [255, 5, 6, 255, 255],
                                   [255, 9, 0, 255, 255],
                                   [255, 255, 255, 255, 255]],
                                  [[255, 255, 255, 255, 255],
                                   [255, 255, 255, 255, 255],
                                   [255, 4, 3, 255, 255],
                                   [255, 3, 5, 255, 255],
                                   [255, 255, 255, 255, 255]]]).astype(dtype)

      with self.session() as sess:
        padded_image = preprocess_utils.pad_to_bounding_box(
            image, 2, 1, 5, 5, 255)
        padded_image = sess.run(padded_image)
        self.assertAllClose(padded_image, expected_image)
        # Add batch size = 1 to image.
        padded_image = preprocess_utils.pad_to_bounding_box(
            np.expand_dims(image, 0), 2, 1, 5, 5, 255)
        padded_image = sess.run(padded_image)
        self.assertAllClose(padded_image, np.expand_dims(expected_image, 0))
コード例 #3
0
 def testDieOnTargetSizeGreaterThanImageSize(self):
     image = np.dstack([[[5, 6], [9, 0]], [[4, 3], [3, 5]]])
     with self.test_session():
         image_placeholder = tf.placeholder(tf.float32)
         padded_image = preprocess_utils.pad_to_bounding_box(
             image_placeholder, 0, 0, 2, 1, 255)
         with self.assertRaisesWithPredicateMatch(
                 errors.InvalidArgumentError,
                 'target_width must be >= width'):
             padded_image.eval(feed_dict={image_placeholder: image})
         padded_image = preprocess_utils.pad_to_bounding_box(
             image_placeholder, 0, 0, 1, 2, 255)
         with self.assertRaisesWithPredicateMatch(
                 errors.InvalidArgumentError,
                 'target_height must be >= height'):
             padded_image.eval(feed_dict={image_placeholder: image})
コード例 #4
0
 def testReturnOriginalImageWhenTargetSizeIsEqualToImageSize(self):
     image = np.dstack([[[5, 6], [9, 0]], [[4, 3], [3, 5]]])
     with self.session() as sess:
         padded_image = preprocess_utils.pad_to_bounding_box(
             image, 0, 0, 2, 2, 255)
         padded_image = sess.run(padded_image)
         self.assertAllClose(padded_image, image)
コード例 #5
0
    def testReturnOriginalImageWhenTargetSizeIsEqualToImageSize(self):
        image = np.dstack([[[5, 6], [9, 0]], [[4, 3], [3, 5]]])

        with self.test_session():
            image_placeholder = tf.placeholder(tf.float32)
            padded_image = preprocess_utils.pad_to_bounding_box(
                image_placeholder, 0, 0, 2, 2, 255)
            self.assertAllClose(
                padded_image.eval(feed_dict={image_placeholder: image}), image)
コード例 #6
0
 def testDieIfImageTensorRankIsNotThree(self):
     image = np.vstack([[5, 6], [9, 0]])
     with self.test_session():
         image_placeholder = tf.placeholder(tf.float32)
         padded_image = preprocess_utils.pad_to_bounding_box(
             image_placeholder, 0, 0, 2, 2, 255)
         with self.assertRaisesWithPredicateMatch(
                 errors.InvalidArgumentError, 'Wrong image tensor rank'):
             padded_image.eval(feed_dict={image_placeholder: image})
コード例 #7
0
 def testDieOnTargetSizeGreaterThanImageSize(self):
   image = np.dstack([[[5, 6],
                       [9, 0]],
                      [[4, 3],
                       [3, 5]]])
   with self.test_session():
     image_placeholder = tf.placeholder(tf.float32)
     padded_image = preprocess_utils.pad_to_bounding_box(
         image_placeholder, 0, 0, 2, 1, 255)
     with self.assertRaisesWithPredicateMatch(
         errors.InvalidArgumentError,
         'target_width must be >= width'):
       padded_image.eval(feed_dict={image_placeholder: image})
     padded_image = preprocess_utils.pad_to_bounding_box(
         image_placeholder, 0, 0, 1, 2, 255)
     with self.assertRaisesWithPredicateMatch(
         errors.InvalidArgumentError,
         'target_height must be >= height'):
       padded_image.eval(feed_dict={image_placeholder: image})
コード例 #8
0
 def testReturnOriginalImageWhenTargetSizeIsEqualToImageSize(self):
   image = np.dstack([[[5, 6],
                       [9, 0]],
                      [[4, 3],
                       [3, 5]]])
   with self.session() as sess:
     padded_image = preprocess_utils.pad_to_bounding_box(
         image, 0, 0, 2, 2, 255)
     padded_image = sess.run(padded_image)
     self.assertAllClose(padded_image, image)
コード例 #9
0
 def testDieIfTargetSizeNotPossibleWithGivenOffset(self):
     image = np.dstack([[[5, 6], [9, 0]], [[4, 3], [3, 5]]])
     with self.test_session():
         image_placeholder = tf.placeholder(tf.float32)
         padded_image = preprocess_utils.pad_to_bounding_box(
             image_placeholder, 3, 0, 4, 4, 255)
         with self.assertRaisesWithPredicateMatch(
                 errors.InvalidArgumentError,
                 'target size not possible with the given target offsets'):
             padded_image.eval(feed_dict={image_placeholder: image})
コード例 #10
0
 def testDieIfImageTensorRankIsTwo(self):
   image = np.vstack([[5, 6],
                      [9, 0]])
   with self.test_session():
     image_placeholder = tf.placeholder(tf.float32)
     padded_image = preprocess_utils.pad_to_bounding_box(
         image_placeholder, 0, 0, 2, 2, 255)
     with self.assertRaisesWithPredicateMatch(
         errors.InvalidArgumentError,
         'Wrong image tensor rank'):
       padded_image.eval(feed_dict={image_placeholder: image})
コード例 #11
0
  def testReturnOriginalImageWhenTargetSizeIsEqualToImageSize(self):
    image = np.dstack([[[5, 6],
                        [9, 0]],
                       [[4, 3],
                        [3, 5]]])

    with self.test_session():
      image_placeholder = tf.placeholder(tf.float32)
      padded_image = preprocess_utils.pad_to_bounding_box(
          image_placeholder, 0, 0, 2, 2, 255)
      self.assertAllClose(padded_image.eval(
          feed_dict={image_placeholder: image}), image)
コード例 #12
0
 def testDieIfTargetSizeNotPossibleWithGivenOffset(self):
   image = np.dstack([[[5, 6],
                       [9, 0]],
                      [[4, 3],
                       [3, 5]]])
   with self.test_session():
     image_placeholder = tf.placeholder(tf.float32)
     padded_image = preprocess_utils.pad_to_bounding_box(
         image_placeholder, 3, 0, 4, 4, 255)
     with self.assertRaisesWithPredicateMatch(
         errors.InvalidArgumentError,
         'target size not possible with the given target offsets'):
       padded_image.eval(feed_dict={image_placeholder: image})
コード例 #13
0
  def testReturnPaddedImageWithNonZeroPadValue(self):
    for dtype in [np.int32, np.int64, np.float32, np.float64]:
      image = np.dstack([[[5, 6],
                          [9, 0]],
                         [[4, 3],
                          [3, 5]]]).astype(dtype)
      expected_image = np.dstack([[[255, 255, 255, 255, 255],
                                   [255, 255, 255, 255, 255],
                                   [255, 5, 6, 255, 255],
                                   [255, 9, 0, 255, 255],
                                   [255, 255, 255, 255, 255]],
                                  [[255, 255, 255, 255, 255],
                                   [255, 255, 255, 255, 255],
                                   [255, 4, 3, 255, 255],
                                   [255, 3, 5, 255, 255],
                                   [255, 255, 255, 255, 255]]]).astype(dtype)

      with self.test_session():
        image_placeholder = tf.placeholder(tf.float32)
        padded_image = preprocess_utils.pad_to_bounding_box(
            image_placeholder, 2, 1, 5, 5, 255)
        self.assertAllClose(padded_image.eval(
            feed_dict={image_placeholder: image}), expected_image)
コード例 #14
0
    def testReturnPaddedImageWithNonZeroPadValue(self):
        for dtype in [np.int32, np.int64, np.float32, np.float64]:
            image = np.dstack([[[5, 6], [9, 0]], [[4, 3], [3,
                                                           5]]]).astype(dtype)
            expected_image = np.dstack([[[255, 255, 255, 255, 255],
                                         [255, 255, 255, 255, 255],
                                         [255, 5, 6, 255, 255],
                                         [255, 9, 0, 255, 255],
                                         [255, 255, 255, 255, 255]],
                                        [[255, 255, 255, 255, 255],
                                         [255, 255, 255, 255, 255],
                                         [255, 4, 3, 255, 255],
                                         [255, 3, 5, 255, 255],
                                         [255, 255, 255, 255,
                                          255]]]).astype(dtype)

            with self.test_session():
                image_placeholder = tf.placeholder(tf.float32)
                padded_image = preprocess_utils.pad_to_bounding_box(
                    image_placeholder, 2, 1, 5, 5, 255)
                self.assertAllClose(
                    padded_image.eval(feed_dict={image_placeholder: image}),
                    expected_image)
コード例 #15
0
def preprocess_images_and_labels_consistently(images,
                                              labels,
                                              crop_height,
                                              crop_width,
                                              min_resize_value=None,
                                              max_resize_value=None,
                                              resize_factor=None,
                                              min_scale_factor=1.,
                                              max_scale_factor=1.,
                                              scale_factor_step_size=0,
                                              ignore_label=255,
                                              is_training=True,
                                              model_variant=None):
    """Preprocesses images and labels in a consistent way.

  Similar to preprocess_image_and_label, but works on a list of images
  and a list of labels and uses the same crop coordinates and either flips
  all images and labels or none of them.

  Args:
    images: List of input images.
    labels: List of ground truth annotation labels.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    min_resize_value: Desired size of the smaller image side.
    max_resize_value: Maximum allowed size of the larger image side.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor value.
    max_scale_factor: Maximum scale factor value.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    model_variant: Model variant (string) for choosing how to mean-subtract the
      images. See feature_extractor.network_map for supported model variants.

  Returns:
    original_images: Original images (could be resized).
    processed_images: Preprocessed images.
    labels: Preprocessed ground truth segmentation labels.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
    if is_training and labels is None:
        raise ValueError('During training, labels must be provided.')
    if model_variant is None:
        tf.logging.warning(
            'Default mean-subtraction is performed. Please specify '
            'a model_variant. See feature_extractor.network_map for '
            'supported model variants.')
    if labels is not None:
        assert len(images) == len(labels)
    num_imgs = len(images)

    # Keep reference to original images.
    original_images = images

    processed_images = [tf.cast(image, tf.float32) for image in images]

    if labels is not None:
        labels = [tf.cast(label, tf.int32) for label in labels]

    # Resize images and labels to the desired range.
    if min_resize_value is not None or max_resize_value is not None:
        processed_images, labels = zip(*[
            preprocess_utils.resize_to_range(image=processed_image,
                                             label=label,
                                             min_size=min_resize_value,
                                             max_size=max_resize_value,
                                             factor=resize_factor,
                                             align_corners=True)
            for processed_image, label in zip(processed_images, labels)
        ])
        # The `original_images` becomes the resized images.
        original_images = [
            tf.identity(processed_image)
            for processed_image in processed_images
        ]

    # Data augmentation by randomly scaling the inputs.
    scale = get_random_scale(min_scale_factor, max_scale_factor,
                             scale_factor_step_size)
    processed_images, labels = zip(*[
        randomly_scale_image_and_label(processed_image, label, scale)
        for processed_image, label in zip(processed_images, labels)
    ])

    for processed_image in processed_images:
        processed_image.set_shape([None, None, 3])

    if crop_height is not None and crop_width is not None:
        # Pad image and label to have dimensions >= [crop_height, crop_width].
        image_shape = tf.shape(processed_images[0])
        image_height = image_shape[0]
        image_width = image_shape[1]

        target_height = image_height + tf.maximum(crop_height - image_height,
                                                  0)
        target_width = image_width + tf.maximum(crop_width - image_width, 0)

        # Pad image with mean pixel value.
        mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant),
                                [1, 1, 3])
        processed_images = [
            preprocess_utils.pad_to_bounding_box(processed_image, 0, 0,
                                                 target_height, target_width,
                                                 mean_pixel)
            for processed_image in processed_images
        ]

        if labels is not None:
            labels = [
                preprocess_utils.pad_to_bounding_box(label, 0, 0,
                                                     target_height,
                                                     target_width,
                                                     ignore_label)
                for label in labels
            ]

        # Randomly crop the images and labels.
        if is_training and labels is not None:
            cropped = preprocess_utils.random_crop(processed_images + labels,
                                                   crop_height, crop_width)
            assert len(cropped) == 2 * num_imgs
            processed_images = cropped[:num_imgs]
            labels = cropped[num_imgs:]

        for processed_image in processed_images:
            processed_image.set_shape([crop_height, crop_width, 3])

        if labels is not None:
            for label in labels:
                label.set_shape([crop_height, crop_width, 1])

    if is_training:
        # Randomly left-right flip the image and label.
        res = preprocess_utils.flip_dim(list(processed_images + labels),
                                        _PROB_OF_FLIP,
                                        dim=1)
        maybe_flipped = res[:-1]
        assert len(maybe_flipped) == 2 * num_imgs
        processed_images = maybe_flipped[:num_imgs]
        labels = maybe_flipped[num_imgs:]

    return original_images, processed_images, labels
コード例 #16
0
def preprocess_image_and_label(
        image,  # 预处理图像和标签
        label,
        crop_height,
        crop_width,
        min_resize_value=None,
        max_resize_value=None,
        resize_factor=None,
        min_scale_factor=1.,
        max_scale_factor=1.,
        scale_factor_step_size=0,
        ignore_label=255,
        is_training=True,
        model_variant=None):
    """Preprocesses the image and label.

  Args:
    image: Input image.
    label: Ground truth annotation label.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    min_resize_value: Desired size of the smaller image side.
    max_resize_value: Maximum allowed size of the larger image side.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor value.
    max_scale_factor: Maximum scale factor value.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    model_variant: Model variant (string) for choosing how to mean-subtract the
      images. See feature_extractor.network_map for supported model variants.

  Returns:
    original_image: Original image (could be resized).
    processed_image: Preprocessed image.
    label: Preprocessed ground truth segmentation label.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
    if is_training and label is None:
        raise ValueError('During training, label must be provided.')
    if model_variant is None:  # 未指定模型
        tf.logging.warning(
            'Default mean-subtraction is performed. Please specify '
            'a model_variant. See feature_extractor.network_map for '
            'supported model variants.')

    # Keep reference to original image.
    original_image = image  # 保存原始图像

    processed_image = tf.cast(image, tf.float32)  # 转换像素值为float

    if label is not None:
        label = tf.cast(label, tf.int32)  #将label转化成float

    # Resize image and label to the desired range. 缩放图像尺度
    if min_resize_value or max_resize_value:  # 输入中为者都为None  暂不考虑
        [processed_image,
         label] = (preprocess_utils.resize_to_range(image=processed_image,
                                                    label=label,
                                                    min_size=min_resize_value,
                                                    max_size=max_resize_value,
                                                    factor=resize_factor,
                                                    align_corners=True))
        # The `original_image` becomes the resized image.
        original_image = tf.identity(processed_image)

    # Data augmentation by randomly scaling the inputs.
    # 随机缩放输入图像来进行数据增强
    if is_training:
        scale = preprocess_utils.get_random_scale(  # 随机获取一个变换尺度
            min_scale_factor, max_scale_factor, scale_factor_step_size)
        # 通过得到的尺度进行 image和label的尺度变换
        processed_image, label = preprocess_utils.randomly_scale_image_and_label(
            processed_image, label, scale)
        processed_image.set_shape([None, None, 3])

    # Pad image and label to have dimensions >= [crop_height, crop_width]
    # 填补image和label 当处理后的图像高度和宽度比裁剪的尺寸大
    image_shape = tf.shape(processed_image)
    image_height = image_shape[0]
    image_width = image_shape[1]

    # 选择原图尺度和裁剪尺度中较大的
    target_height = image_height + tf.maximum(crop_height - image_height, 0)
    target_width = image_width + tf.maximum(crop_width - image_width, 0)

    # Pad image with mean pixel value.
    # 用平均像素填补图像
    mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant),
                            [1, 1, 3])
    # 根据target_height 和 target_width 进行补 0
    processed_image = preprocess_utils.pad_to_bounding_box(
        processed_image, 0, 0, target_height, target_width, mean_pixel)

    if label is not None:
        label = preprocess_utils.pad_to_bounding_box(  # label进行填充补0
            label, 0, 0, target_height, target_width, ignore_label)

    # Randomly crop the image and label.
    # 随机裁剪图像和标签
    if is_training and label is not None:
        # 裁剪被给的图像列表
        processed_image, label = preprocess_utils.random_crop(
            [processed_image, label], crop_height, crop_width)

    processed_image.set_shape([crop_height, crop_width, 3])

    if label is not None:
        label.set_shape([crop_height, crop_width, 1])

    if is_training:
        # Randomly left-right flip the image and label.
        # 随机左右翻转输入的图像进行数据增强
        processed_image, label, _ = preprocess_utils.flip_dim(
            [processed_image, label], _PROB_OF_FLIP, dim=1)

    return original_image, processed_image, label
コード例 #17
0
def preprocess_images_and_labels_consistently(images,
                                              labels,
                                              crop_height,
                                              crop_width,
                                              min_resize_value=None,
                                              max_resize_value=None,
                                              resize_factor=None,
                                              min_scale_factor=1.,
                                              max_scale_factor=1.,
                                              scale_factor_step_size=0,
                                              ignore_label=255,
                                              is_training=True,
                                              model_variant=None):
  """Preprocesses images and labels in a consistent way.

  Similar to preprocess_image_and_label, but works on a list of images
  and a list of labels and uses the same crop coordinates and either flips
  all images and labels or none of them.

  Args:
    images: List of input images.
    labels: List of ground truth annotation labels.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    min_resize_value: Desired size of the smaller image side.
    max_resize_value: Maximum allowed size of the larger image side.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor value.
    max_scale_factor: Maximum scale factor value.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    model_variant: Model variant (string) for choosing how to mean-subtract the
      images. See feature_extractor.network_map for supported model variants.

  Returns:
    original_images: Original images (could be resized).
    processed_images: Preprocessed images.
    labels: Preprocessed ground truth segmentation labels.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
  if is_training and labels is None:
    raise ValueError('During training, labels must be provided.')
  if model_variant is None:
    tf.logging.warning('Default mean-subtraction is performed. Please specify '
                       'a model_variant. See feature_extractor.network_map for '
                       'supported model variants.')
  if labels is not None:
    assert len(images) == len(labels)
  num_imgs = len(images)

  # Keep reference to original images.
  original_images = images

  processed_images = [tf.cast(image, tf.float32) for image in images]

  if labels is not None:
    labels = [tf.cast(label, tf.int32) for label in labels]

  # Resize images and labels to the desired range.
  if min_resize_value is not None or max_resize_value is not None:
    processed_images, labels = zip(*[
        preprocess_utils.resize_to_range(
            image=processed_image,
            label=label,
            min_size=min_resize_value,
            max_size=max_resize_value,
            factor=resize_factor,
            align_corners=True) for processed_image, label
        in zip(processed_images, labels)])
    # The `original_images` becomes the resized images.
    original_images = [tf.identity(processed_image)
                       for processed_image in processed_images]

  # Data augmentation by randomly scaling the inputs.
  scale = get_random_scale(
      min_scale_factor, max_scale_factor, scale_factor_step_size)
  processed_images, labels = zip(
      *[randomly_scale_image_and_label(processed_image, label, scale)
        for processed_image, label in zip(processed_images, labels)])

  for processed_image in processed_images:
    processed_image.set_shape([None, None, 3])

  if crop_height is not None and crop_width is not None:
    # Pad image and label to have dimensions >= [crop_height, crop_width].
    image_shape = tf.shape(processed_images[0])
    image_height = image_shape[0]
    image_width = image_shape[1]

    target_height = image_height + tf.maximum(crop_height - image_height, 0)
    target_width = image_width + tf.maximum(crop_width - image_width, 0)

    # Pad image with mean pixel value.
    mean_pixel = tf.reshape(
        feature_extractor.mean_pixel(model_variant), [1, 1, 3])
    processed_images = [preprocess_utils.pad_to_bounding_box(
        processed_image, 0, 0, target_height, target_width, mean_pixel)
                        for processed_image in processed_images]

    if labels is not None:
      labels = [preprocess_utils.pad_to_bounding_box(
          label, 0, 0, target_height, target_width, ignore_label)
                for label in labels]

    # Randomly crop the images and labels.
    if is_training and labels is not None:
      cropped = preprocess_utils.random_crop(
          processed_images + labels, crop_height, crop_width)
      assert len(cropped) == 2 * num_imgs
      processed_images = cropped[:num_imgs]
      labels = cropped[num_imgs:]

    for processed_image in processed_images:
      processed_image.set_shape([crop_height, crop_width, 3])

    if labels is not None:
      for label in labels:
        label.set_shape([crop_height, crop_width, 1])

  if is_training:
    # Randomly left-right flip the image and label.
    res = preprocess_utils.flip_dim(
        list(processed_images + labels), _PROB_OF_FLIP, dim=1)
    maybe_flipped = res[:-1]
    assert len(maybe_flipped) == 2 * num_imgs
    processed_images = maybe_flipped[:num_imgs]
    labels = maybe_flipped[num_imgs:]

  return original_images, processed_images, labels
コード例 #18
0
def preprocess_image_and_label(image,
                               label,
                               crop_height,
                               crop_width,
                               min_resize_value=None,
                               max_resize_value=None,
                               resize_factor=None,
                               min_scale_factor=1.,
                               max_scale_factor=1.,
                               scale_factor_step_size=0,
                               ignore_label=255,
                               is_training=True,
                               model_variant=None):
  """Preprocesses the image and label.

  Args:
    image: Input image.
    label: Ground truth annotation label.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    min_resize_value: Desired size of the smaller image side.
    max_resize_value: Maximum allowed size of the larger image side.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor value.
    max_scale_factor: Maximum scale factor value.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    model_variant: Model variant (string) for choosing how to mean-subtract the
      images. See feature_extractor.network_map for supported model variants.

  Returns:
    original_image: Original image (could be resized).
    processed_image: Preprocessed image.
    label: Preprocessed ground truth segmentation label.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
  if is_training and label is None:
    raise ValueError('During training, label must be provided.')
  if model_variant is None:
    tf.logging.warning('Default mean-subtraction is performed. Please specify '
                       'a model_variant. See feature_extractor.network_map for '
                       'supported model variants.')

  # Keep reference to original image.
  original_image = image

  processed_image = tf.cast(image, tf.float32)

  if label is not None:
    label = tf.cast(label, tf.int32)

  # Resize image and label to the desired range.
  if min_resize_value is not None or max_resize_value is not None:
    [processed_image, label] = (
        preprocess_utils.resize_to_range(
            image=processed_image,
            label=label,
            min_size=min_resize_value,
            max_size=max_resize_value,
            factor=resize_factor,
            align_corners=True))
    # The `original_image` becomes the resized image.
    original_image = tf.identity(processed_image)

  # Data augmentation by randomly scaling the inputs.
  scale = preprocess_utils.get_random_scale(
      min_scale_factor, max_scale_factor, scale_factor_step_size)
  processed_image, label = preprocess_utils.randomly_scale_image_and_label(
      processed_image, label, scale)
  processed_image.set_shape([None, None, 3])

  # Pad image and label to have dimensions >= [crop_height, crop_width]
  image_shape = tf.shape(processed_image)
  image_height = image_shape[0]
  image_width = image_shape[1]

  target_height = image_height + tf.maximum(crop_height - image_height, 0)
  target_width = image_width + tf.maximum(crop_width - image_width, 0)

  # Pad image with mean pixel value.
  mean_pixel = tf.reshape(
      feature_extractor.mean_pixel(model_variant), [1, 1, 3])
  processed_image = preprocess_utils.pad_to_bounding_box(
      processed_image, 0, 0, target_height, target_width, mean_pixel)

  if label is not None:
    label = preprocess_utils.pad_to_bounding_box(
        label, 0, 0, target_height, target_width, ignore_label)

  # Randomly crop the image and label.
  if is_training and label is not None:
    processed_image, label = preprocess_utils.random_crop(
        [processed_image, label], crop_height, crop_width)

  processed_image.set_shape([crop_height, crop_width, 3])

  if label is not None:
    label.set_shape([crop_height, crop_width, 1])

  if is_training:
    # Randomly left-right flip the image and label.
    processed_image, label, _ = preprocess_utils.flip_dim(
        [processed_image, label], _PROB_OF_FLIP, dim=1)

  return original_image, processed_image, label
コード例 #19
0
def preprocess_image_and_label(image,
                               label,
                               crop_height,
                               crop_width,
                               min_resize_value=None,
                               max_resize_value=None,
                               resize_factor=None,
                               min_scale_factor=1.,
                               max_scale_factor=1.,
                               scale_factor_step_size=0,
                               ignore_label=255,
                               is_training=True,
                               model_variant=None):
    """Preprocesses the image and label.

  Args:
    image: Input image.
    label: Ground truth annotation label.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    min_resize_value: Desired size of the smaller image side.
    max_resize_value: Maximum allowed size of the larger image side.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor value.
    max_scale_factor: Maximum scale factor value.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    model_variant: Model variant (string) for choosing how to mean-subtract the
      images. See feature_extractor.network_map for supported model variants.

  Returns:
    original_image: Original image (could be resized).
    processed_image: Preprocessed image.
    label: Preprocessed ground truth segmentation label.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
    if is_training and label is None:
        raise ValueError('During training, label must be provided.')
    if model_variant is None:
        tf.logging.warning(
            'Default mean-subtraction is performed. Please specify '
            'a model_variant. See feature_extractor.network_map for '
            'supported model variants.')

    # Keep reference to original image.
    original_image = image

    processed_image = tf.cast(image, tf.float32)

    if label is not None:
        label = tf.cast(label, tf.int32)

    # Resize image and label to the desired range.
    if min_resize_value is not None or max_resize_value is not None:
        [processed_image,
         label] = (preprocess_utils.resize_to_range(image=processed_image,
                                                    label=label,
                                                    min_size=min_resize_value,
                                                    max_size=max_resize_value,
                                                    factor=resize_factor,
                                                    align_corners=True))
        # The `original_image` becomes the resized image.
        original_image = tf.identity(processed_image)

    # Data augmentation by randomly scaling the inputs.
    scale = get_random_scale(min_scale_factor, max_scale_factor,
                             scale_factor_step_size)
    processed_image, label = randomly_scale_image_and_label(
        processed_image, label, scale)

    processed_image.set_shape([None, None, 3])

    if crop_height is not None and crop_width is not None:
        # Pad image and label to have dimensions >= [crop_height, crop_width].
        image_shape = tf.shape(processed_image)
        image_height = image_shape[0]
        image_width = image_shape[1]

        target_height = image_height + tf.maximum(crop_height - image_height,
                                                  0)
        target_width = image_width + tf.maximum(crop_width - image_width, 0)

        # Pad image with mean pixel value.
        mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant),
                                [1, 1, 3])
        processed_image = preprocess_utils.pad_to_bounding_box(
            processed_image, 0, 0, target_height, target_width, mean_pixel)

        if label is not None:
            label = preprocess_utils.pad_to_bounding_box(
                label, 0, 0, target_height, target_width, ignore_label)

        # Randomly crop the image and label.
        if is_training and label is not None:
            processed_image, label = preprocess_utils.random_crop(
                [processed_image, label], crop_height, crop_width)

        processed_image.set_shape([crop_height, crop_width, 3])

        if label is not None:
            label.set_shape([crop_height, crop_width, 1])

    if is_training:
        # Randomly left-right flip the image and label.
        processed_image, label, _ = preprocess_utils.flip_dim(
            [processed_image, label], _PROB_OF_FLIP, dim=1)

    return original_image, processed_image, label
コード例 #20
0
def preprocess_image_and_label(image,
                               label,
                               crop_height,
                               crop_width,
                               min_resize_value=None,
                               max_resize_value=None,
                               resize_factor=None,
                               min_scale_factor=1.,
                               max_scale_factor=1.,
                               scale_factor_step_size=0,
                               ignore_label=255,
                               is_training=True,
                               model_variant=None,
                               non_uniform_sampling=None,
                               output_target_sampling=False):
    """Preprocesses the image and label.

  Args:
    image: Input image.
    label: Ground truth annotation label.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    min_resize_value: Desired size of the smaller image side.
    max_resize_value: Maximum allowed size of the larger image side.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor value.
    max_scale_factor: Maximum scale factor value.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    model_variant: Model variant (string) for choosing how to mean-subtract the
      images. See feature_extractor.network_map for supported model variants.

  Returns:
    original_image: Original image (could be resized).
    processed_image: Preprocessed image.
    label: Preprocessed ground truth segmentation label.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
    if is_training and label is None:
        raise ValueError('During training, label must be provided.')
    if model_variant is None:
        tf.logging.warning(
            'Default mean-subtraction is performed. Please specify '
            'a model_variant. See feature_extractor.network_map for '
            'supported model variants.')

    # Keep reference to original image.
    original_image = image

    processed_image = tf.cast(image, tf.float32)

    if label is not None:
        label = tf.cast(label, tf.int32)

    # Resize image and label to the desired range.
    if not output_target_sampling and (min_resize_value or max_resize_value):
        [processed_image,
         label] = (preprocess_utils.resize_to_range(image=processed_image,
                                                    label=label,
                                                    min_size=min_resize_value,
                                                    max_size=max_resize_value,
                                                    factor=resize_factor,
                                                    align_corners=True))
        # The `original_image` becomes the resized image.
        original_image = tf.identity(processed_image)

    # Data augmentation by randomly scaling the inputs.
    if is_training:
        scale = preprocess_utils.get_random_scale(min_scale_factor,
                                                  max_scale_factor,
                                                  scale_factor_step_size)
        processed_image, label = preprocess_utils.randomly_scale_image_and_label(
            processed_image, label, scale)
        processed_image.set_shape([None, None, 3])

    # Pad image and label to have dimensions >= [crop_height, crop_width]
    image_shape = tf.shape(processed_image)
    image_height = image_shape[0]
    image_width = image_shape[1]

    target_height = image_height + tf.maximum(crop_height - image_height, 0)
    target_width = image_width + tf.maximum(crop_width - image_width, 0)

    # Pad image with mean pixel value.
    mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant),
                            [1, 1, 3])
    processed_image = preprocess_utils.pad_to_bounding_box(
        processed_image, 0, 0, target_height, target_width, mean_pixel)

    if label is not None:
        label = preprocess_utils.pad_to_bounding_box(label, 0, 0,
                                                     target_height,
                                                     target_width,
                                                     ignore_label)

    # Randomly crop the image and label.
    if is_training and label is not None:
        processed_image, label = preprocess_utils.random_crop(
            [processed_image, label], crop_height, crop_width)

    if not is_training and label is not None:
        with tf.name_scope("CentralCrop"):
            offset_height = (target_height - crop_height) // 2
            offset_width = (target_width - crop_width) // 2
            processed_image = tf.image.crop_to_bounding_box(
                processed_image, offset_height, offset_width, crop_height,
                crop_width)
            label = tf.image.crop_to_bounding_box(label, offset_height,
                                                  offset_width, crop_height,
                                                  crop_width)

    processed_image.set_shape([crop_height, crop_width, 3])

    if label is not None:
        label.set_shape([crop_height, crop_width, 1])

    if is_training:
        # Randomly left-right flip the image and label.
        processed_image, label, _ = preprocess_utils.flip_dim(
            [processed_image, label], _PROB_OF_FLIP, dim=1)

    if non_uniform_sampling is not None:
        if non_uniform_sampling == "net":
            sampling = tf.py_func(
                lambda image: get_nus_predictor()(image),
                [processed_image[None, ...]],
                tf.float32,
                name="nus_preprocess_sampling",
            )
            sampling = _resize_locations(sampling)
        elif non_uniform_sampling == "uniform":
            sampling = _nus_uniform_locations()
        else:
            raise Exception("Unknown non-uniform sampling type: %s" %
                            non_uniform_sampling)
        samples = {common.IMAGE: processed_image[None, ...]}
        if is_training:
            samples[common.LABEL] = label[None, ...]
        samples = _nus_sample(samples, sampling)
        processed_image = samples[common.IMAGE][0]
        if is_training:
            label = samples[common.LABEL][0]
        return original_image, processed_image, label, sampling[0]

    if output_target_sampling:
        target_sampling = _get_near_boundary_sampling_locations(
            label[None, ...], ignore_label)[0]
        if min_resize_value or max_resize_value:
            [processed_image, label
             ] = preprocess_utils.resize_to_range(image=processed_image,
                                                  label=label,
                                                  min_size=min_resize_value,
                                                  max_size=max_resize_value,
                                                  factor=resize_factor,
                                                  align_corners=True)

        return original_image, processed_image, label, target_sampling

    return original_image, processed_image, label
def preprocess_image_and_label(image,
                               label,
                               crop_height,
                               crop_width,
                               min_resize_value=None,
                               max_resize_value=None,
                               resize_factor=None,
                               min_scale_factor=1.,
                               max_scale_factor=1.,
                               scale_factor_step_size=0,
                               ignore_label=255,
                               is_training=True,
                               model_variant=None):
    """Preprocesses the image and label.

  Args:
    image: Input image.
    label: Ground truth annotation label.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    min_resize_value: Desired size of the smaller image side.
    max_resize_value: Maximum allowed size of the larger image side.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor value.
    max_scale_factor: Maximum scale factor value.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    model_variant: Model variant (string) for choosing how to mean-subtract the
      images. See feature_extractor.network_map for supported model variants.

  Returns:
    original_image: Original image (could be resized).
    processed_image: Preprocessed image.
    label: Preprocessed ground truth segmentation label.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
    if is_training and label is None:
        raise ValueError('During training, label must be provided.')
    if model_variant is None:
        tf.logging.warning(
            'Default mean-subtraction is performed. Please specify '
            'a model_variant. See feature_extractor.network_map for '
            'supported model variants.')

    # Keep reference to original image.
    original_image = image

    processed_image = tf.cast(image, tf.float32)

    if label is not None:
        label = tf.cast(label, tf.int32)

    # Resize image and label to the desired range.
    if min_resize_value or max_resize_value:
        [processed_image,
         label] = (preprocess_utils.resize_to_range(image=processed_image,
                                                    label=label,
                                                    min_size=min_resize_value,
                                                    max_size=max_resize_value,
                                                    factor=resize_factor,
                                                    align_corners=True))
        # The `original_image` becomes the resized image.
        original_image = tf.identity(processed_image)

    # Data augmentation by randomly scaling the inputs.
    if is_training:
        scale = preprocess_utils.get_random_scale(min_scale_factor,
                                                  max_scale_factor,
                                                  scale_factor_step_size)
        processed_image, label = preprocess_utils.randomly_scale_image_and_label(
            processed_image, label, scale)
        processed_image.set_shape([None, None, 3])

    # Pad image and label to have dimensions >= [crop_height, crop_width]
    image_shape = tf.shape(processed_image)
    image_height = image_shape[0]
    image_width = image_shape[1]

    target_height = image_height + tf.maximum(crop_height - image_height, 0)
    target_width = image_width + tf.maximum(crop_width - image_width, 0)

    # Pad image with mean pixel value.
    mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant),
                            [1, 1, 3])
    processed_image = preprocess_utils.pad_to_bounding_box(
        processed_image, 0, 0, target_height, target_width, mean_pixel)

    if label is not None:
        label = preprocess_utils.pad_to_bounding_box(label, 0, 0,
                                                     target_height,
                                                     target_width,
                                                     ignore_label)

    # Randomly crop the image and label.
    if is_training and label is not None:
        processed_image, label = preprocess_utils.random_crop(
            [processed_image, label], crop_height, crop_width)

    processed_image.set_shape([crop_height, crop_width, 3])

    if label is not None:
        label.set_shape([crop_height, crop_width, 1])

    if is_training:
        # Randomly left-right flip the image and label.
        processed_image, label, _ = preprocess_utils.flip_dim(
            [processed_image, label], _PROB_OF_FLIP, dim=1)

    # def aug_image(image):
    #   # 亮度
    #   image = tf.image.random_brightness(image, max_delta=0.1, seed=None)
    #   # 对比度
    #   image = tf.image.random_contrast(image, lower=0.9, upper=1.3, seed=None)
    #   # 色度
    #   image = tf.image.random_hue(image, max_delta=0.05, seed=None)
    #   # 饱和度
    #   image = tf.image.random_saturation(image, lower=0.7, upper=1.3, seed=None)
    #
    #   image = image + tf.random_normal(shape=(crop_height, crop_width, 3), mean=0, stddev=5, dtype=tf.float32, seed=2)
    #   image = tf.clip_by_value(image, 0.0, 255.0)
    #
    #   return image
    # if is_training:
    #     random = tf.random_uniform([], 0, 1, seed=2)
    #     processed_image = tf.cond(random < 0.3, lambda: processed_image, lambda: aug_image(processed_image))

    def _aug_image(image, label):
        aug = A.Compose([
            A.OneOf(
                [
                    A.HorizontalFlip(p=1),
                    A.VerticalFlip(p=1),
                    A.RandomRotate90(p=1),
                    A.Transpose(p=1)
                ],
                p=0.8,
            )
        ])
        auged = aug(image=image, mask=label)
        aug_img = auged['image']
        aug_label = auged['mask']
        return aug_img, aug_label

    def aug_image(image, label):
        processed_image, processed_label = tf.py_func(_aug_image,
                                                      [image, label],
                                                      [tf.float32, tf.int32])
        return processed_image, processed_label

    # print(processed_image.get_shape().as_list())
    # print(label.get_shape().as_list())
    if is_training:
        processed_image, label = aug_image(processed_image, label)
        processed_image.set_shape([crop_height, crop_width, 3])
        label.set_shape([crop_height, crop_width, 1])
        print(processed_image.get_shape().as_list())
        print(label.get_shape().as_list())
    # processed_image.set_shape([crop_height, crop_width, 3])
    # label.set_shape([crop_height, crop_width, 1])

    return original_image, processed_image, label