Beispiel #1
0
def preprocess_image_and_label(image,
                               label,
                               crop_height,
                               crop_width,
                               label_norm=True,
                               ignore_label=0,
                               is_training=True,
                               model_variant=None):
    """Preprocesses the image and label.

    Args:
        image: Input image.
        label: Ground truth annotation label.
        crop_height: The height value used to crop the image and label.
        crop_width: The width value used to crop the image and label.
        min_resize_value: Desired size of the smaller image side.
        max_resize_value: Maximum allowed size of the larger image side.
        resize_factor: Resized dimensions are multiple of factor plus one.
        min_scale_factor: Minimum scale factor value.
        max_scale_factor: Maximum scale factor value.
        scale_factor_step_size: The step size from min scale factor to max scale
          factor. The input is randomly scaled based on the value of
          (min_scale_factor, max_scale_factor, scale_factor_step_size).
        ignore_label: The label value which will be ignored for training and
          evaluation.
        is_training: If the preprocessing is used for training or not.
        model_variant: Model variant (string) for choosing how to mean-subtract the
          images. See feature_extractor.network_map for supported model variants.

    Returns:
        original_image: Original image (could be resized).
        processed_image: Preprocessed image.
        label: Preprocessed ground truth segmentation label.

    Raises:
        ValueError: Ground truth label not provided during training.
    """
    if is_training and label is None:
        raise ValueError('During training, label must be provided.')
    if model_variant is None:
        tf.logging.warning(
            'Default mean-subtraction is performed. Please specify '
            'a model_variant. See feature_extractor.network_map for '
            'supported model variants.')

# Keep reference to original image.
    original_image = image

    processed_image = tf.cast(image, tf.float32)
    ###c插入 数据增强的方法,和之前的传统方式一样
    processed_image = tf.image.per_image_standardization(processed_image)

    if label is not None:
        label = tf.cast(label, tf.int32)

    if label_norm:
        label = tf.div(label, 255)

    original_image = tf.identity(processed_image)

    processed_image.set_shape([None, None, 3])

    #   Pad image and label to have dimensions >= [crop_height, crop_width]
    image_shape = tf.shape(processed_image)
    image_height = image_shape[0]
    image_width = image_shape[1]

    target_height = image_height + tf.maximum(crop_height - image_height, 0)
    target_width = image_width + tf.maximum(crop_width - image_width, 0)

    #   Pad image with mean pixel value.
    mean_pixel = tf.reshape([127.5, 127.5, 127.5], [1, 1, 3])
    processed_image = preprocess_utils.pad_to_bounding_box(
        processed_image, 0, 0, target_height, target_width, mean_pixel)

    #    if label is not None:
    label = preprocess_utils.pad_to_bounding_box(label, 0, 0, target_height,
                                                 target_width,
                                                 0)  #ignore_label

    #   Randomly crop the image and label.
    if is_training and label is not None:
        processed_image, label = preprocess_utils.random_crop(
            [processed_image, label], crop_height, crop_width)

    processed_image.set_shape([crop_height, crop_width, 3])

    if label is not None:
        label.set_shape([crop_height, crop_width, 1])

    if is_training:
        #    # Randomly left-right flip the image and label.
        processed_image, label, _ = preprocess_utils.flip_dim(
            [processed_image, label], 0.5, dim=1)

#    processed_image=mean_image_subtraction(processed_image, means)

    return original_image, processed_image, label
Beispiel #2
0
def preprocess_eval_image(image,
                          label,
                          crop_height,
                          crop_width,
                          label_norm=True,
                          ignore_label=0,
                          eval_model='center',
                          model_variant=None):

    #    if is_training and label is None:
    #        raise ValueError('During training, label must be provided.')
    #    if model_variant is None:
    #        tf.logging.warning('Default mean-subtraction is performed. Please specify '
    #                       'a model_variant. See feature_extractor.network_map for '
    #                       'supported model variants.')

    # Keep reference to original image.
    original_image = image

    processed_image = tf.cast(image, tf.float32)
    ###c插入 数据增强的方法,和之前的传统方式一样
    processed_image = tf.image.per_image_standardization(processed_image)

    #    if label is not None:
    label = tf.cast(label, tf.int32)
    if label_norm:
        label = tf.div(label, 255)

    original_image = tf.identity(processed_image)

    processed_image.set_shape([None, None, 3])

    #   Pad image and label to have dimensions >= [crop_height, crop_width]
    image_shape = tf.shape(processed_image)
    image_height = image_shape[0]
    image_width = image_shape[1]

    target_height = image_height + tf.maximum(crop_height - image_height, 0)
    target_width = image_width + tf.maximum(crop_width - image_width, 0)

    #   Pad image with mean pixel value.
    mean_pixel = tf.reshape([127.5, 127.5, 127.5], [1, 1, 3])
    processed_image = preprocess_utils.pad_to_bounding_box(
        processed_image, 0, 0, target_height, target_width, mean_pixel)

    if label is not None:
        label = preprocess_utils.pad_to_bounding_box(label, 0, 0,
                                                     target_height,
                                                     target_width,
                                                     0)  #ignore_label

#   Randomly crop the image and label.
    if eval_model == 'center':

        processed_image, label = preprocess_utils.center_crop(
            [processed_image, label], crop_height, crop_width)

        print('eval_model: center')
    elif eval_model == 'resize':

        [processed_image,
         label] = (preprocess_utils.resize_image(image=processed_image,
                                                 label=label,
                                                 height_size=crop_height,
                                                 width_size=crop_width,
                                                 align_corners=True))
        print('eval_model: resize')
    else:

        raise ValueError(' eval_model: wrong !')

    processed_image.set_shape([crop_height, crop_width, 3])

    if label is not None:
        label.set_shape([crop_height, crop_width, 1])

    return original_image, processed_image, label
def preprocess_images_and_labels_consistently(images,
                                              labels,
                                              crop_height,
                                              crop_width,
                                              min_resize_value=None,
                                              max_resize_value=None,
                                              resize_factor=None,
                                              min_scale_factor=1.,
                                              max_scale_factor=1.,
                                              scale_factor_step_size=0,
                                              ignore_label=255,
                                              is_training=True,
                                              model_variant=None):
    """Preprocesses images and labels in a consistent way.

  Similar to preprocess_image_and_label, but works on a list of images
  and a list of labels and uses the same crop coordinates and either flips
  all images and labels or none of them.

  Args:
    images: List of input images.
    labels: List of ground truth annotation labels.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    min_resize_value: Desired size of the smaller image side.
    max_resize_value: Maximum allowed size of the larger image side.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor value.
    max_scale_factor: Maximum scale factor value.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    model_variant: Model variant (string) for choosing how to mean-subtract the
      images. See feature_extractor.network_map for supported model variants.

  Returns:
    original_images: Original images (could be resized).
    processed_images: Preprocessed images.
    labels: Preprocessed ground truth segmentation labels.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
    if is_training and labels is None:
        raise ValueError('During training, labels must be provided.')
    if model_variant is None:
        tf.logging.warning(
            'Default mean-subtraction is performed. Please specify '
            'a model_variant. See feature_extractor.network_map for '
            'supported model variants.')
    if labels is not None:
        assert len(images) == len(labels)
    num_imgs = len(images)

    # Keep reference to original images.
    original_images = images

    processed_images = [tf.cast(image, tf.float32) for image in images]

    if labels is not None:
        labels = [tf.cast(label, tf.int32) for label in labels]

    # Resize images and labels to the desired range.
    if min_resize_value is not None or max_resize_value is not None:
        processed_images, labels = zip(*[
            preprocess_utils.resize_to_range(image=processed_image,
                                             label=label,
                                             min_size=min_resize_value,
                                             max_size=max_resize_value,
                                             factor=resize_factor,
                                             align_corners=True)
            for processed_image, label in zip(processed_images, labels)
        ])
        # The `original_images` becomes the resized images.
        original_images = [
            tf.identity(processed_image)
            for processed_image in processed_images
        ]

    # Data augmentation by randomly scaling the inputs.
    scale = get_random_scale(min_scale_factor, max_scale_factor,
                             scale_factor_step_size)
    processed_images, labels = zip(*[
        randomly_scale_image_and_label(processed_image, label, scale)
        for processed_image, label in zip(processed_images, labels)
    ])

    for processed_image in processed_images:
        processed_image.set_shape([None, None, 3])

    if crop_height is not None and crop_width is not None:
        # Pad image and label to have dimensions >= [crop_height, crop_width].
        image_shape = tf.shape(processed_images[0])
        image_height = image_shape[0]
        image_width = image_shape[1]

        target_height = image_height + tf.maximum(crop_height - image_height,
                                                  0)
        target_width = image_width + tf.maximum(crop_width - image_width, 0)

        # Pad image with mean pixel value.
        mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant),
                                [1, 1, 3])
        processed_images = [
            preprocess_utils.pad_to_bounding_box(processed_image, 0, 0,
                                                 target_height, target_width,
                                                 mean_pixel)
            for processed_image in processed_images
        ]

        if labels is not None:
            labels = [
                preprocess_utils.pad_to_bounding_box(label, 0, 0,
                                                     target_height,
                                                     target_width,
                                                     ignore_label)
                for label in labels
            ]

        # Randomly crop the images and labels.
        if is_training and labels is not None:
            cropped = preprocess_utils.random_crop(processed_images + labels,
                                                   crop_height, crop_width)
            assert len(cropped) == 2 * num_imgs
            processed_images = cropped[:num_imgs]
            labels = cropped[num_imgs:]

        for processed_image in processed_images:
            processed_image.set_shape([crop_height, crop_width, 3])

        if labels is not None:
            for label in labels:
                label.set_shape([crop_height, crop_width, 1])

    if is_training:
        # Randomly left-right flip the image and label.
        res = preprocess_utils.flip_dim(list(processed_images + labels),
                                        _PROB_OF_FLIP,
                                        dim=1)
        maybe_flipped = res[:-1]
        assert len(maybe_flipped) == 2 * num_imgs
        processed_images = maybe_flipped[:num_imgs]
        labels = maybe_flipped[num_imgs:]

    return original_images, processed_images, labels
def preprocess_image_and_label(image,
                               label,
                               crop_height,
                               crop_width,
                               min_resize_value=None,
                               max_resize_value=None,
                               resize_factor=None,
                               min_scale_factor=1.,
                               max_scale_factor=1.,
                               scale_factor_step_size=0,
                               ignore_label=255,
                               is_training=True,
                               model_variant=None):
    """Preprocesses the image and label.

  Args:
    image: Input image.
    label: Ground truth annotation label.
    crop_height: The height value used to crop the image and label.
    crop_width: The width value used to crop the image and label.
    min_resize_value: Desired size of the smaller image side.
    max_resize_value: Maximum allowed size of the larger image side.
    resize_factor: Resized dimensions are multiple of factor plus one.
    min_scale_factor: Minimum scale factor value.
    max_scale_factor: Maximum scale factor value.
    scale_factor_step_size: The step size from min scale factor to max scale
      factor. The input is randomly scaled based on the value of
      (min_scale_factor, max_scale_factor, scale_factor_step_size).
    ignore_label: The label value which will be ignored for training and
      evaluation.
    is_training: If the preprocessing is used for training or not.
    model_variant: Model variant (string) for choosing how to mean-subtract the
      images. See feature_extractor.network_map for supported model variants.

  Returns:
    original_image: Original image (could be resized).
    processed_image: Preprocessed image.
    label: Preprocessed ground truth segmentation label.

  Raises:
    ValueError: Ground truth label not provided during training.
  """
    if is_training and label is None:
        raise ValueError('During training, label must be provided.')
    if model_variant is None:
        tf.logging.warning(
            'Default mean-subtraction is performed. Please specify '
            'a model_variant. See feature_extractor.network_map for '
            'supported model variants.')

    # Keep reference to original image.
    original_image = image

    processed_image = tf.cast(image, tf.float32)

    if label is not None:
        label = tf.cast(label, tf.int32)

    # Resize image and label to the desired range.
    if min_resize_value is not None or max_resize_value is not None:
        [processed_image,
         label] = (preprocess_utils.resize_to_range(image=processed_image,
                                                    label=label,
                                                    min_size=min_resize_value,
                                                    max_size=max_resize_value,
                                                    factor=resize_factor,
                                                    align_corners=True))
        # The `original_image` becomes the resized image.
        original_image = tf.identity(processed_image)

    # Data augmentation by randomly scaling the inputs.
    scale = get_random_scale(min_scale_factor, max_scale_factor,
                             scale_factor_step_size)
    processed_image, label = randomly_scale_image_and_label(
        processed_image, label, scale)

    processed_image.set_shape([None, None, 3])

    if crop_height is not None and crop_width is not None:
        # Pad image and label to have dimensions >= [crop_height, crop_width].
        image_shape = tf.shape(processed_image)
        image_height = image_shape[0]
        image_width = image_shape[1]

        target_height = image_height + tf.maximum(crop_height - image_height,
                                                  0)
        target_width = image_width + tf.maximum(crop_width - image_width, 0)

        # Pad image with mean pixel value.
        mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant),
                                [1, 1, 3])
        processed_image = preprocess_utils.pad_to_bounding_box(
            processed_image, 0, 0, target_height, target_width, mean_pixel)

        if label is not None:
            label = preprocess_utils.pad_to_bounding_box(
                label, 0, 0, target_height, target_width, ignore_label)

        # Randomly crop the image and label.
        if is_training and label is not None:
            processed_image, label = preprocess_utils.random_crop(
                [processed_image, label], crop_height, crop_width)

        processed_image.set_shape([crop_height, crop_width, 3])

        if label is not None:
            label.set_shape([crop_height, crop_width, 1])

    if is_training:
        # Randomly left-right flip the image and label.
        processed_image, label, _ = preprocess_utils.flip_dim(
            [processed_image, label], _PROB_OF_FLIP, dim=1)

    return original_image, processed_image, label
Beispiel #5
0
    def aug_single_data_tf(self, image, label):
        processed_image = tf.cast(image, tf.float32)
        label = tf.cast(label, tf.int32)

        if self.with_aug_scale_crop:
            # Resize image and label to the desired range.
            if self.min_resize_value or self.max_resize_value:
                [processed_image, label] = (preprocess_utils.resize_to_range(
                    image=processed_image,
                    label=label,
                    min_size=self.min_resize_value,
                    max_size=self.max_resize_value,
                    factor=self.resize_factor))

            # Data augmentation by randomly scaling the inputs.
            scale = preprocess_utils.get_random_scale(
                self.min_scale_factor, self.max_scale_factor,
                self.scale_factor_step_size)
            processed_image, label = preprocess_utils.randomly_scale_image_and_label(
                processed_image, label, scale)
            processed_image.set_shape([None, None, 10])

            # Pad image and label to have dimensions >= [crop_height, crop_width]
            image_shape = tf.shape(processed_image)
            image_height = image_shape[0]
            image_width = image_shape[1]

            target_height = image_height + tf.maximum(
                self.crop_height - image_height, 0)
            target_width = image_width + tf.maximum(
                self.crop_width - image_width, 0)
            # offset_height = (target_height - image_height) // 2
            # offset_width = (target_width - image_width) // 2
            offset_height = 0
            offset_width = 0

            # Pad image with mean pixel value.
            mean_pixel = tf.reshape(
                self.mean_rgb + self.mean_hha + self.mean_dep + self.mean_xyz,
                [1, 1, 10])
            processed_image = preprocess_utils.pad_to_bounding_box(
                processed_image, offset_height, offset_width, target_height,
                target_width, mean_pixel)

            label = preprocess_utils.pad_to_bounding_box(
                label, offset_height, offset_width, target_height,
                target_width, self.ignore_label)

            # Randomly crop the image and label.
            processed_image, label = preprocess_utils.random_crop(
                [processed_image, label], self.crop_height, self.crop_width)

            processed_image.set_shape([self.crop_height, self.crop_width, 10])
            label.set_shape([self.crop_height, self.crop_width, 1])

        # Randomly left-right flip the image and label.
        if self.with_aug_flip:
            processed_image, label, _ = preprocess_utils.random_flip(
                [processed_image, label], prob=0.5)

        # Noise
        if self.with_aug_gaus_noise:

            def tf_rand(minval=0., maxval=1., dtype=tf.float32):
                return tf.reduce_sum(
                    tf.random.uniform(shape=[1],
                                      minval=minval,
                                      maxval=maxval,
                                      dtype=dtype))

            rgb, hha, dep, xyz = tf.split(axis=-1,
                                          num_or_size_splits=[3, 3, 1, 3],
                                          value=processed_image)
            hsv = tf.image.rgb_to_hsv(rgb)
            hue, sat, val = tf.split(axis=-1, num_or_size_splits=3, value=hsv)

            hue = tf.keras.backend.clip(hue + tf_rand() * 70 - 35, 0, 360.)
            sat = tf.keras.backend.clip(sat + tf_rand() * 0.3 - 0.15, 0, 1.)
            val = tf.keras.backend.clip(val + tf_rand() * 50 - 25, 0, 255.)

            hsv = tf.concat([hue, sat, val], axis=-1)
            rgb = tf.image.hsv_to_rgb(hsv)

            processed_image = tf.concat([rgb, hha, dep, xyz], axis=-1)

        processed_image = self.normal_data(processed_image)
        return processed_image, label