def testFlipWhenProbIsOne(self): numpy_image = np.dstack([[[5., 6.], [9., 0.]], [[4., 3.], [3., 5.]]]) dim0_flipped = np.dstack([[[9., 0.], [5., 6.]], [[3., 5.], [4., 3.]]]) dim1_flipped = np.dstack([[[6., 5.], [0., 9.]], [[3., 4.], [5., 3.]]]) dim2_flipped = np.dstack([[[4., 3.], [3., 5.]], [[5., 6.], [9., 0.]]]) image = tf.convert_to_tensor(numpy_image) with self.test_session(): actual, is_flipped = preprocess_utils.flip_dim([image], prob=1, dim=0) self.assertAllEqual(dim0_flipped, actual.eval()) self.assertAllEqual(True, is_flipped.eval()) actual, is_flipped = preprocess_utils.flip_dim([image], prob=1, dim=1) self.assertAllEqual(dim1_flipped, actual.eval()) self.assertAllEqual(True, is_flipped.eval()) actual, is_flipped = preprocess_utils.flip_dim([image], prob=1, dim=2) self.assertAllEqual(dim2_flipped, actual.eval()) self.assertAllEqual(True, is_flipped.eval())
def preprocess_image_and_label_flip_only(image, vis, label, mask, is_training=True): """Preprocesses the image and label by flipping the sample IN TRAINING ONLY. Args: image: Input image. label: Ground truth annotation label. is_training: If the preprocessing is used for training or not. Returns: image: Preprocessed image. label: Preprocessed ground truth segmentation label. Raises: ValueError: Ground truth label not provided during training. """ if is_training and label is None: raise ValueError('During training, label must be provided.') if is_training: # Randomly left-right flip the image and label. image, vis, label, mask, _ = preprocess_utils.flip_dim( [image, vis, label, mask], _PROB_OF_FLIP, dim=1) return image, vis, label, mask
def testNoFlipWhenProbIsZero(self): numpy_image = np.dstack([[[5., 6.], [9., 0.]], [[4., 3.], [3., 5.]]]) image = tf.convert_to_tensor(numpy_image) with self.test_session(): actual, is_flipped = preprocess_utils.flip_dim([image], prob=0, dim=0) self.assertAllEqual(numpy_image, actual.eval()) self.assertAllEqual(False, is_flipped.eval()) actual, is_flipped = preprocess_utils.flip_dim([image], prob=0, dim=1) self.assertAllEqual(numpy_image, actual.eval()) self.assertAllEqual(False, is_flipped.eval()) actual, is_flipped = preprocess_utils.flip_dim([image], prob=0, dim=2) self.assertAllEqual(numpy_image, actual.eval()) self.assertAllEqual(False, is_flipped.eval())
def testReturnRandomFlipsOnMultipleEvals(self): numpy_image = np.dstack([[[5., 6.], [9., 0.]], [[4., 3.], [3., 5.]]]) dim1_flipped = np.dstack([[[6., 5.], [0., 9.]], [[3., 4.], [5., 3.]]]) image = tf.convert_to_tensor(numpy_image) tf.set_random_seed(53) with self.test_session() as sess: actual, is_flipped = preprocess_utils.flip_dim([image], prob=0.5, dim=1) actual_image, actual_is_flipped = sess.run([actual, is_flipped]) self.assertAllEqual(numpy_image, actual_image) self.assertEqual(False, actual_is_flipped) actual_image, actual_is_flipped = sess.run([actual, is_flipped]) self.assertAllEqual(dim1_flipped, actual_image) self.assertEqual(True, actual_is_flipped)
def testFlipMultipleImagesConsistentlyWhenProbIsOne(self): numpy_image = np.dstack([[[5., 6.], [9., 0.]], [[4., 3.], [3., 5.]]]) numpy_label = np.dstack([[[0., 1.], [2., 3.]]]) image_dim1_flipped = np.dstack([[[6., 5.], [0., 9.]], [[3., 4.], [5., 3.]]]) label_dim1_flipped = np.dstack([[[1., 0.], [3., 2.]]]) image = tf.convert_to_tensor(numpy_image) label = tf.convert_to_tensor(numpy_label) with self.test_session() as sess: image, label, is_flipped = preprocess_utils.flip_dim( [image, label], prob=1, dim=1) actual_image, actual_label = sess.run([image, label]) self.assertAllEqual(image_dim1_flipped, actual_image) self.assertAllEqual(label_dim1_flipped, actual_label) self.assertEqual(True, is_flipped.eval())
def testReturnRandomFlipsOnMultipleEvals(self): numpy_image = np.dstack([[[5., 6.], [9., 0.]], [[4., 3.], [3., 5.]]]) dim1_flipped = np.dstack([[[6., 5.], [0., 9.]], [[3., 4.], [5., 3.]]]) image = tf.convert_to_tensor(numpy_image) tf.set_random_seed(53) with self.test_session() as sess: actual, is_flipped = preprocess_utils.flip_dim( [image], prob=0.5, dim=1) actual_image, actual_is_flipped = sess.run([actual, is_flipped]) self.assertAllEqual(numpy_image, actual_image) self.assertEqual(False, actual_is_flipped) actual_image, actual_is_flipped = sess.run([actual, is_flipped]) self.assertAllEqual(dim1_flipped, actual_image) self.assertEqual(True, actual_is_flipped)
def preprocess_image_and_label(image, label, crop_height, crop_width, min_resize_value=None, max_resize_value=None, resize_factor=None, min_scale_factor=1., max_scale_factor=1., scale_factor_step_size=0, ignore_label=255, is_training=True, model_variant=None, non_uniform_sampling=None, output_target_sampling=False): """Preprocesses the image and label. Args: image: Input image. label: Ground truth annotation label. crop_height: The height value used to crop the image and label. crop_width: The width value used to crop the image and label. min_resize_value: Desired size of the smaller image side. max_resize_value: Maximum allowed size of the larger image side. resize_factor: Resized dimensions are multiple of factor plus one. min_scale_factor: Minimum scale factor value. max_scale_factor: Maximum scale factor value. scale_factor_step_size: The step size from min scale factor to max scale factor. The input is randomly scaled based on the value of (min_scale_factor, max_scale_factor, scale_factor_step_size). ignore_label: The label value which will be ignored for training and evaluation. is_training: If the preprocessing is used for training or not. model_variant: Model variant (string) for choosing how to mean-subtract the images. See feature_extractor.network_map for supported model variants. Returns: original_image: Original image (could be resized). processed_image: Preprocessed image. label: Preprocessed ground truth segmentation label. Raises: ValueError: Ground truth label not provided during training. """ if is_training and label is None: raise ValueError('During training, label must be provided.') if model_variant is None: tf.logging.warning( 'Default mean-subtraction is performed. Please specify ' 'a model_variant. See feature_extractor.network_map for ' 'supported model variants.') # Keep reference to original image. original_image = image processed_image = tf.cast(image, tf.float32) if label is not None: label = tf.cast(label, tf.int32) # Resize image and label to the desired range. if not output_target_sampling and (min_resize_value or max_resize_value): [processed_image, label] = (preprocess_utils.resize_to_range(image=processed_image, label=label, min_size=min_resize_value, max_size=max_resize_value, factor=resize_factor, align_corners=True)) # The `original_image` becomes the resized image. original_image = tf.identity(processed_image) # Data augmentation by randomly scaling the inputs. if is_training: scale = preprocess_utils.get_random_scale(min_scale_factor, max_scale_factor, scale_factor_step_size) processed_image, label = preprocess_utils.randomly_scale_image_and_label( processed_image, label, scale) processed_image.set_shape([None, None, 3]) # Pad image and label to have dimensions >= [crop_height, crop_width] image_shape = tf.shape(processed_image) image_height = image_shape[0] image_width = image_shape[1] target_height = image_height + tf.maximum(crop_height - image_height, 0) target_width = image_width + tf.maximum(crop_width - image_width, 0) # Pad image with mean pixel value. mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant), [1, 1, 3]) processed_image = preprocess_utils.pad_to_bounding_box( processed_image, 0, 0, target_height, target_width, mean_pixel) if label is not None: label = preprocess_utils.pad_to_bounding_box(label, 0, 0, target_height, target_width, ignore_label) # Randomly crop the image and label. if is_training and label is not None: processed_image, label = preprocess_utils.random_crop( [processed_image, label], crop_height, crop_width) if not is_training and label is not None: with tf.name_scope("CentralCrop"): offset_height = (target_height - crop_height) // 2 offset_width = (target_width - crop_width) // 2 processed_image = tf.image.crop_to_bounding_box( processed_image, offset_height, offset_width, crop_height, crop_width) label = tf.image.crop_to_bounding_box(label, offset_height, offset_width, crop_height, crop_width) processed_image.set_shape([crop_height, crop_width, 3]) if label is not None: label.set_shape([crop_height, crop_width, 1]) if is_training: # Randomly left-right flip the image and label. processed_image, label, _ = preprocess_utils.flip_dim( [processed_image, label], _PROB_OF_FLIP, dim=1) if non_uniform_sampling is not None: if non_uniform_sampling == "net": sampling = tf.py_func( lambda image: get_nus_predictor()(image), [processed_image[None, ...]], tf.float32, name="nus_preprocess_sampling", ) sampling = _resize_locations(sampling) elif non_uniform_sampling == "uniform": sampling = _nus_uniform_locations() else: raise Exception("Unknown non-uniform sampling type: %s" % non_uniform_sampling) samples = {common.IMAGE: processed_image[None, ...]} if is_training: samples[common.LABEL] = label[None, ...] samples = _nus_sample(samples, sampling) processed_image = samples[common.IMAGE][0] if is_training: label = samples[common.LABEL][0] return original_image, processed_image, label, sampling[0] if output_target_sampling: target_sampling = _get_near_boundary_sampling_locations( label[None, ...], ignore_label)[0] if min_resize_value or max_resize_value: [processed_image, label ] = preprocess_utils.resize_to_range(image=processed_image, label=label, min_size=min_resize_value, max_size=max_resize_value, factor=resize_factor, align_corners=True) return original_image, processed_image, label, target_sampling return original_image, processed_image, label
def preprocess_image_and_label(image, label, crop_height, crop_width, min_resize_value=None, max_resize_value=None, resize_factor=None, min_scale_factor=1., max_scale_factor=1., scale_factor_step_size=0, ignore_label=255, is_training=True, model_variant=None): """Preprocesses the image and label. Args: image: Input image. label: Ground truth annotation label. crop_height: The height value used to crop the image and label. crop_width: The width value used to crop the image and label. min_resize_value: Desired size of the smaller image side. max_resize_value: Maximum allowed size of the larger image side. resize_factor: Resized dimensions are multiple of factor plus one. min_scale_factor: Minimum scale factor value. max_scale_factor: Maximum scale factor value. scale_factor_step_size: The step size from min scale factor to max scale factor. The input is randomly scaled based on the value of (min_scale_factor, max_scale_factor, scale_factor_step_size). ignore_label: The label value which will be ignored for training and evaluation. is_training: If the preprocessing is used for training or not. model_variant: Model variant (string) for choosing how to mean-subtract the images. See feature_extractor.network_map for supported model variants. Returns: original_image: Original image (could be resized). processed_image: Preprocessed image. label: Preprocessed ground truth segmentation label. Raises: ValueError: Ground truth label not provided during training. """ if is_training and label is None: raise ValueError('During training, label must be provided.') if model_variant is None: tf.logging.warning( 'Default mean-subtraction is performed. Please specify ' 'a model_variant. See feature_extractor.network_map for ' 'supported model variants.') # Keep reference to original image. original_image = image processed_image = tf.cast(image, tf.float32) if label is not None: label = tf.cast(label, tf.int32) # Resize image and label to the desired range. if min_resize_value is not None or max_resize_value is not None: [processed_image, label] = (preprocess_utils.resize_to_range(image=processed_image, label=label, min_size=min_resize_value, max_size=max_resize_value, factor=resize_factor, align_corners=True)) # The `original_image` becomes the resized image. original_image = tf.identity(processed_image) # Data augmentation by randomly scaling the inputs. scale = get_random_scale(min_scale_factor, max_scale_factor, scale_factor_step_size) processed_image, label = randomly_scale_image_and_label( processed_image, label, scale) processed_image.set_shape([None, None, 3]) if crop_height is not None and crop_width is not None: # Pad image and label to have dimensions >= [crop_height, crop_width]. image_shape = tf.shape(processed_image) image_height = image_shape[0] image_width = image_shape[1] target_height = image_height + tf.maximum(crop_height - image_height, 0) target_width = image_width + tf.maximum(crop_width - image_width, 0) # Pad image with mean pixel value. mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant), [1, 1, 3]) processed_image = preprocess_utils.pad_to_bounding_box( processed_image, 0, 0, target_height, target_width, mean_pixel) if label is not None: label = preprocess_utils.pad_to_bounding_box( label, 0, 0, target_height, target_width, ignore_label) # Randomly crop the image and label. if is_training and label is not None: processed_image, label = preprocess_utils.random_crop( [processed_image, label], crop_height, crop_width) processed_image.set_shape([crop_height, crop_width, 3]) if label is not None: label.set_shape([crop_height, crop_width, 1]) if is_training: # Randomly left-right flip the image and label. processed_image, label, _ = preprocess_utils.flip_dim( [processed_image, label], _PROB_OF_FLIP, dim=1) return original_image, processed_image, label
def preprocess_images_and_labels_consistently(images, labels, crop_height, crop_width, min_resize_value=None, max_resize_value=None, resize_factor=None, min_scale_factor=1., max_scale_factor=1., scale_factor_step_size=0, ignore_label=255, is_training=True, model_variant=None): """Preprocesses images and labels in a consistent way. Similar to preprocess_image_and_label, but works on a list of images and a list of labels and uses the same crop coordinates and either flips all images and labels or none of them. Args: images: List of input images. labels: List of ground truth annotation labels. crop_height: The height value used to crop the image and label. crop_width: The width value used to crop the image and label. min_resize_value: Desired size of the smaller image side. max_resize_value: Maximum allowed size of the larger image side. resize_factor: Resized dimensions are multiple of factor plus one. min_scale_factor: Minimum scale factor value. max_scale_factor: Maximum scale factor value. scale_factor_step_size: The step size from min scale factor to max scale factor. The input is randomly scaled based on the value of (min_scale_factor, max_scale_factor, scale_factor_step_size). ignore_label: The label value which will be ignored for training and evaluation. is_training: If the preprocessing is used for training or not. model_variant: Model variant (string) for choosing how to mean-subtract the images. See feature_extractor.network_map for supported model variants. Returns: original_images: Original images (could be resized). processed_images: Preprocessed images. labels: Preprocessed ground truth segmentation labels. Raises: ValueError: Ground truth label not provided during training. """ if is_training and labels is None: raise ValueError('During training, labels must be provided.') if model_variant is None: tf.logging.warning( 'Default mean-subtraction is performed. Please specify ' 'a model_variant. See feature_extractor.network_map for ' 'supported model variants.') if labels is not None: assert len(images) == len(labels) num_imgs = len(images) # Keep reference to original images. original_images = images processed_images = [tf.cast(image, tf.float32) for image in images] if labels is not None: labels = [tf.cast(label, tf.int32) for label in labels] # Resize images and labels to the desired range. if min_resize_value is not None or max_resize_value is not None: processed_images, labels = zip(*[ preprocess_utils.resize_to_range(image=processed_image, label=label, min_size=min_resize_value, max_size=max_resize_value, factor=resize_factor, align_corners=True) for processed_image, label in zip(processed_images, labels) ]) # The `original_images` becomes the resized images. original_images = [ tf.identity(processed_image) for processed_image in processed_images ] # Data augmentation by randomly scaling the inputs. scale = get_random_scale(min_scale_factor, max_scale_factor, scale_factor_step_size) processed_images, labels = zip(*[ randomly_scale_image_and_label(processed_image, label, scale) for processed_image, label in zip(processed_images, labels) ]) for processed_image in processed_images: processed_image.set_shape([None, None, 3]) if crop_height is not None and crop_width is not None: # Pad image and label to have dimensions >= [crop_height, crop_width]. image_shape = tf.shape(processed_images[0]) image_height = image_shape[0] image_width = image_shape[1] target_height = image_height + tf.maximum(crop_height - image_height, 0) target_width = image_width + tf.maximum(crop_width - image_width, 0) # Pad image with mean pixel value. mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant), [1, 1, 3]) processed_images = [ preprocess_utils.pad_to_bounding_box(processed_image, 0, 0, target_height, target_width, mean_pixel) for processed_image in processed_images ] if labels is not None: labels = [ preprocess_utils.pad_to_bounding_box(label, 0, 0, target_height, target_width, ignore_label) for label in labels ] # Randomly crop the images and labels. if is_training and labels is not None: cropped = preprocess_utils.random_crop(processed_images + labels, crop_height, crop_width) assert len(cropped) == 2 * num_imgs processed_images = cropped[:num_imgs] labels = cropped[num_imgs:] for processed_image in processed_images: processed_image.set_shape([crop_height, crop_width, 3]) if labels is not None: for label in labels: label.set_shape([crop_height, crop_width, 1]) if is_training: # Randomly left-right flip the image and label. res = preprocess_utils.flip_dim(list(processed_images + labels), _PROB_OF_FLIP, dim=1) maybe_flipped = res[:-1] assert len(maybe_flipped) == 2 * num_imgs processed_images = maybe_flipped[:num_imgs] labels = maybe_flipped[num_imgs:] return original_images, processed_images, labels
def preprocess_image_and_label(image, label, crop_height, crop_width, min_resize_value=None, max_resize_value=None, resize_factor=None, min_scale_factor=1., max_scale_factor=1., scale_factor_step_size=0, ignore_label=255, is_training=True, model_variant=None): """Preprocesses the image and label. Args: image: Input image. label: Ground truth annotation label. crop_height: The height value used to crop the image and label. crop_width: The width value used to crop the image and label. min_resize_value: Desired size of the smaller image side. max_resize_value: Maximum allowed size of the larger image side. resize_factor: Resized dimensions are multiple of factor plus one. min_scale_factor: Minimum scale factor value. max_scale_factor: Maximum scale factor value. scale_factor_step_size: The step size from min scale factor to max scale factor. The input is randomly scaled based on the value of (min_scale_factor, max_scale_factor, scale_factor_step_size). ignore_label: The label value which will be ignored for training and evaluation. is_training: If the preprocessing is used for training or not. model_variant: Model variant (string) for choosing how to mean-subtract the images. See feature_extractor.network_map for supported model variants. Returns: original_image: Original image (could be resized). processed_image: Preprocessed image. label: Preprocessed ground truth segmentation label. Raises: ValueError: Ground truth label not provided during training. """ if is_training and label is None: raise ValueError('During training, label must be provided.') if model_variant is None: tf.logging.warning('Default mean-subtraction is performed. Please specify ' 'a model_variant. See feature_extractor.network_map for ' 'supported model variants.') # Keep reference to original image. original_image = image processed_image = tf.cast(image, tf.float32) if label is not None: label = tf.cast(label, tf.int32) # Resize image and label to the desired range. if min_resize_value is not None or max_resize_value is not None: [processed_image, label] = ( preprocess_utils.resize_to_range( image=processed_image, label=label, min_size=min_resize_value, max_size=max_resize_value, factor=resize_factor, align_corners=True)) # The `original_image` becomes the resized image. original_image = tf.identity(processed_image) # Data augmentation by randomly scaling the inputs. scale = preprocess_utils.get_random_scale( min_scale_factor, max_scale_factor, scale_factor_step_size) processed_image, label = preprocess_utils.randomly_scale_image_and_label( processed_image, label, scale) processed_image.set_shape([None, None, 3]) # Pad image and label to have dimensions >= [crop_height, crop_width] image_shape = tf.shape(processed_image) image_height = image_shape[0] image_width = image_shape[1] target_height = image_height + tf.maximum(crop_height - image_height, 0) target_width = image_width + tf.maximum(crop_width - image_width, 0) # Pad image with mean pixel value. mean_pixel = tf.reshape( feature_extractor.mean_pixel(model_variant), [1, 1, 3]) processed_image = preprocess_utils.pad_to_bounding_box( processed_image, 0, 0, target_height, target_width, mean_pixel) if label is not None: label = preprocess_utils.pad_to_bounding_box( label, 0, 0, target_height, target_width, ignore_label) # Randomly crop the image and label. if is_training and label is not None: processed_image, label = preprocess_utils.random_crop( [processed_image, label], crop_height, crop_width) processed_image.set_shape([crop_height, crop_width, 3]) if label is not None: label.set_shape([crop_height, crop_width, 1]) if is_training: # Randomly left-right flip the image and label. processed_image, label, _ = preprocess_utils.flip_dim( [processed_image, label], _PROB_OF_FLIP, dim=1) return original_image, processed_image, label
def preprocess_image_and_label(image, label, crop_height, crop_width, min_resize_value=None, max_resize_value=None, resize_factor=None, min_scale_factor=1., max_scale_factor=1., scale_factor_step_size=0, ignore_label=255, is_training=True, model_variant=None): """Preprocesses the image and label. Args: image: Input image. label: Ground truth annotation label. crop_height: The height value used to crop the image and label. crop_width: The width value used to crop the image and label. min_resize_value: Desired size of the smaller image side. max_resize_value: Maximum allowed size of the larger image side. resize_factor: Resized dimensions are multiple of factor plus one. min_scale_factor: Minimum scale factor value. max_scale_factor: Maximum scale factor value. scale_factor_step_size: The step size from min scale factor to max scale factor. The input is randomly scaled based on the value of (min_scale_factor, max_scale_factor, scale_factor_step_size). ignore_label: The label value which will be ignored for training and evaluation. is_training: If the preprocessing is used for training or not. model_variant: Model variant (string) for choosing how to mean-subtract the images. See feature_extractor.network_map for supported model variants. Returns: original_image: Original image (could be resized). processed_image: Preprocessed image. label: Preprocessed ground truth segmentation label. Raises: ValueError: Ground truth label not provided during training. """ if is_training and label is None: raise ValueError('During training, label must be provided.') if model_variant is None: tf.logging.warning( 'Default mean-subtraction is performed. Please specify ' 'a model_variant. See feature_extractor.network_map for ' 'supported model variants.') # Keep reference to original image. original_image = image processed_image = tf.cast(image, tf.float32) if label is not None: label = tf.cast(label, tf.int32) # Resize image and label to the desired range. if min_resize_value or max_resize_value: [processed_image, label] = (preprocess_utils.resize_to_range(image=processed_image, label=label, min_size=min_resize_value, max_size=max_resize_value, factor=resize_factor, align_corners=True)) # The `original_image` becomes the resized image. original_image = tf.identity(processed_image) # Data augmentation by randomly scaling the inputs. if is_training: scale = preprocess_utils.get_random_scale(min_scale_factor, max_scale_factor, scale_factor_step_size) processed_image, label = preprocess_utils.randomly_scale_image_and_label( processed_image, label, scale) processed_image.set_shape([None, None, 3]) # Pad image and label to have dimensions >= [crop_height, crop_width] image_shape = tf.shape(processed_image) image_height = image_shape[0] image_width = image_shape[1] target_height = image_height + tf.maximum(crop_height - image_height, 0) target_width = image_width + tf.maximum(crop_width - image_width, 0) # Pad image with mean pixel value. mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant), [1, 1, 3]) processed_image = preprocess_utils.pad_to_bounding_box( processed_image, 0, 0, target_height, target_width, mean_pixel) if label is not None: label = preprocess_utils.pad_to_bounding_box(label, 0, 0, target_height, target_width, ignore_label) # Randomly crop the image and label. if is_training and label is not None: processed_image, label = preprocess_utils.random_crop( [processed_image, label], crop_height, crop_width) processed_image.set_shape([crop_height, crop_width, 3]) if label is not None: label.set_shape([crop_height, crop_width, 1]) if is_training: # Randomly left-right flip the image and label. processed_image, label, _ = preprocess_utils.flip_dim( [processed_image, label], _PROB_OF_FLIP, dim=1) # def aug_image(image): # # 亮度 # image = tf.image.random_brightness(image, max_delta=0.1, seed=None) # # 对比度 # image = tf.image.random_contrast(image, lower=0.9, upper=1.3, seed=None) # # 色度 # image = tf.image.random_hue(image, max_delta=0.05, seed=None) # # 饱和度 # image = tf.image.random_saturation(image, lower=0.7, upper=1.3, seed=None) # # image = image + tf.random_normal(shape=(crop_height, crop_width, 3), mean=0, stddev=5, dtype=tf.float32, seed=2) # image = tf.clip_by_value(image, 0.0, 255.0) # # return image # if is_training: # random = tf.random_uniform([], 0, 1, seed=2) # processed_image = tf.cond(random < 0.3, lambda: processed_image, lambda: aug_image(processed_image)) def _aug_image(image, label): aug = A.Compose([ A.OneOf( [ A.HorizontalFlip(p=1), A.VerticalFlip(p=1), A.RandomRotate90(p=1), A.Transpose(p=1) ], p=0.8, ) ]) auged = aug(image=image, mask=label) aug_img = auged['image'] aug_label = auged['mask'] return aug_img, aug_label def aug_image(image, label): processed_image, processed_label = tf.py_func(_aug_image, [image, label], [tf.float32, tf.int32]) return processed_image, processed_label # print(processed_image.get_shape().as_list()) # print(label.get_shape().as_list()) if is_training: processed_image, label = aug_image(processed_image, label) processed_image.set_shape([crop_height, crop_width, 3]) label.set_shape([crop_height, crop_width, 1]) print(processed_image.get_shape().as_list()) print(label.get_shape().as_list()) # processed_image.set_shape([crop_height, crop_width, 3]) # label.set_shape([crop_height, crop_width, 1]) return original_image, processed_image, label
def preprocess_image_and_label( image, # 预处理图像和标签 label, crop_height, crop_width, min_resize_value=None, max_resize_value=None, resize_factor=None, min_scale_factor=1., max_scale_factor=1., scale_factor_step_size=0, ignore_label=255, is_training=True, model_variant=None): """Preprocesses the image and label. Args: image: Input image. label: Ground truth annotation label. crop_height: The height value used to crop the image and label. crop_width: The width value used to crop the image and label. min_resize_value: Desired size of the smaller image side. max_resize_value: Maximum allowed size of the larger image side. resize_factor: Resized dimensions are multiple of factor plus one. min_scale_factor: Minimum scale factor value. max_scale_factor: Maximum scale factor value. scale_factor_step_size: The step size from min scale factor to max scale factor. The input is randomly scaled based on the value of (min_scale_factor, max_scale_factor, scale_factor_step_size). ignore_label: The label value which will be ignored for training and evaluation. is_training: If the preprocessing is used for training or not. model_variant: Model variant (string) for choosing how to mean-subtract the images. See feature_extractor.network_map for supported model variants. Returns: original_image: Original image (could be resized). processed_image: Preprocessed image. label: Preprocessed ground truth segmentation label. Raises: ValueError: Ground truth label not provided during training. """ if is_training and label is None: raise ValueError('During training, label must be provided.') if model_variant is None: # 未指定模型 tf.logging.warning( 'Default mean-subtraction is performed. Please specify ' 'a model_variant. See feature_extractor.network_map for ' 'supported model variants.') # Keep reference to original image. original_image = image # 保存原始图像 processed_image = tf.cast(image, tf.float32) # 转换像素值为float if label is not None: label = tf.cast(label, tf.int32) #将label转化成float # Resize image and label to the desired range. 缩放图像尺度 if min_resize_value or max_resize_value: # 输入中为者都为None 暂不考虑 [processed_image, label] = (preprocess_utils.resize_to_range(image=processed_image, label=label, min_size=min_resize_value, max_size=max_resize_value, factor=resize_factor, align_corners=True)) # The `original_image` becomes the resized image. original_image = tf.identity(processed_image) # Data augmentation by randomly scaling the inputs. # 随机缩放输入图像来进行数据增强 if is_training: scale = preprocess_utils.get_random_scale( # 随机获取一个变换尺度 min_scale_factor, max_scale_factor, scale_factor_step_size) # 通过得到的尺度进行 image和label的尺度变换 processed_image, label = preprocess_utils.randomly_scale_image_and_label( processed_image, label, scale) processed_image.set_shape([None, None, 3]) # Pad image and label to have dimensions >= [crop_height, crop_width] # 填补image和label 当处理后的图像高度和宽度比裁剪的尺寸大 image_shape = tf.shape(processed_image) image_height = image_shape[0] image_width = image_shape[1] # 选择原图尺度和裁剪尺度中较大的 target_height = image_height + tf.maximum(crop_height - image_height, 0) target_width = image_width + tf.maximum(crop_width - image_width, 0) # Pad image with mean pixel value. # 用平均像素填补图像 mean_pixel = tf.reshape(feature_extractor.mean_pixel(model_variant), [1, 1, 3]) # 根据target_height 和 target_width 进行补 0 processed_image = preprocess_utils.pad_to_bounding_box( processed_image, 0, 0, target_height, target_width, mean_pixel) if label is not None: label = preprocess_utils.pad_to_bounding_box( # label进行填充补0 label, 0, 0, target_height, target_width, ignore_label) # Randomly crop the image and label. # 随机裁剪图像和标签 if is_training and label is not None: # 裁剪被给的图像列表 processed_image, label = preprocess_utils.random_crop( [processed_image, label], crop_height, crop_width) processed_image.set_shape([crop_height, crop_width, 3]) if label is not None: label.set_shape([crop_height, crop_width, 1]) if is_training: # Randomly left-right flip the image and label. # 随机左右翻转输入的图像进行数据增强 processed_image, label, _ = preprocess_utils.flip_dim( [processed_image, label], _PROB_OF_FLIP, dim=1) return original_image, processed_image, label
def preprocess_images_and_labels_consistently(images, labels, crop_height, crop_width, min_resize_value=None, max_resize_value=None, resize_factor=None, min_scale_factor=1., max_scale_factor=1., scale_factor_step_size=0, ignore_label=255, is_training=True, model_variant=None): """Preprocesses images and labels in a consistent way. Similar to preprocess_image_and_label, but works on a list of images and a list of labels and uses the same crop coordinates and either flips all images and labels or none of them. Args: images: List of input images. labels: List of ground truth annotation labels. crop_height: The height value used to crop the image and label. crop_width: The width value used to crop the image and label. min_resize_value: Desired size of the smaller image side. max_resize_value: Maximum allowed size of the larger image side. resize_factor: Resized dimensions are multiple of factor plus one. min_scale_factor: Minimum scale factor value. max_scale_factor: Maximum scale factor value. scale_factor_step_size: The step size from min scale factor to max scale factor. The input is randomly scaled based on the value of (min_scale_factor, max_scale_factor, scale_factor_step_size). ignore_label: The label value which will be ignored for training and evaluation. is_training: If the preprocessing is used for training or not. model_variant: Model variant (string) for choosing how to mean-subtract the images. See feature_extractor.network_map for supported model variants. Returns: original_images: Original images (could be resized). processed_images: Preprocessed images. labels: Preprocessed ground truth segmentation labels. Raises: ValueError: Ground truth label not provided during training. """ if is_training and labels is None: raise ValueError('During training, labels must be provided.') if model_variant is None: tf.logging.warning('Default mean-subtraction is performed. Please specify ' 'a model_variant. See feature_extractor.network_map for ' 'supported model variants.') if labels is not None: assert len(images) == len(labels) num_imgs = len(images) # Keep reference to original images. original_images = images processed_images = [tf.cast(image, tf.float32) for image in images] if labels is not None: labels = [tf.cast(label, tf.int32) for label in labels] # Resize images and labels to the desired range. if min_resize_value is not None or max_resize_value is not None: processed_images, labels = zip(*[ preprocess_utils.resize_to_range( image=processed_image, label=label, min_size=min_resize_value, max_size=max_resize_value, factor=resize_factor, align_corners=True) for processed_image, label in zip(processed_images, labels)]) # The `original_images` becomes the resized images. original_images = [tf.identity(processed_image) for processed_image in processed_images] # Data augmentation by randomly scaling the inputs. scale = get_random_scale( min_scale_factor, max_scale_factor, scale_factor_step_size) processed_images, labels = zip( *[randomly_scale_image_and_label(processed_image, label, scale) for processed_image, label in zip(processed_images, labels)]) for processed_image in processed_images: processed_image.set_shape([None, None, 3]) if crop_height is not None and crop_width is not None: # Pad image and label to have dimensions >= [crop_height, crop_width]. image_shape = tf.shape(processed_images[0]) image_height = image_shape[0] image_width = image_shape[1] target_height = image_height + tf.maximum(crop_height - image_height, 0) target_width = image_width + tf.maximum(crop_width - image_width, 0) # Pad image with mean pixel value. mean_pixel = tf.reshape( feature_extractor.mean_pixel(model_variant), [1, 1, 3]) processed_images = [preprocess_utils.pad_to_bounding_box( processed_image, 0, 0, target_height, target_width, mean_pixel) for processed_image in processed_images] if labels is not None: labels = [preprocess_utils.pad_to_bounding_box( label, 0, 0, target_height, target_width, ignore_label) for label in labels] # Randomly crop the images and labels. if is_training and labels is not None: cropped = preprocess_utils.random_crop( processed_images + labels, crop_height, crop_width) assert len(cropped) == 2 * num_imgs processed_images = cropped[:num_imgs] labels = cropped[num_imgs:] for processed_image in processed_images: processed_image.set_shape([crop_height, crop_width, 3]) if labels is not None: for label in labels: label.set_shape([crop_height, crop_width, 1]) if is_training: # Randomly left-right flip the image and label. res = preprocess_utils.flip_dim( list(processed_images + labels), _PROB_OF_FLIP, dim=1) maybe_flipped = res[:-1] assert len(maybe_flipped) == 2 * num_imgs processed_images = maybe_flipped[:num_imgs] labels = maybe_flipped[num_imgs:] return original_images, processed_images, labels