def preprocess_for_train(image, labels, bboxes, out_shape, scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'image_with_bboxes') # # Remove DontCare labels. # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, # labels, # bboxes) # Distort image and bounding boxes. dst_image = image dst_image, labels, bboxes, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, aspect_ratio_range=CROP_RATIO_RANGE) # Resize image to output size. dst_image = tf_image.resize_image(dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'image_with_bboxes') # 上面保证了图片是3维的tf.float32格式 # (有条件的)随机裁剪,labels(n,)、bboxes(n, 4),裁剪图片对应原图坐标(4,) dst_image, labels, bboxes, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, min_object_covered=MIN_OBJECT_COVERED, # 0.25 aspect_ratio_range=CROP_RATIO_RANGE) # (0.6, 1.67) # Resize image to output size. dst_image = tf_image.resize_image(dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # mean = tf.constant(means, dtype=image.dtype) # image = image - mean # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) # 'NHWC' (n,) (n, 4) return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, out_shape=EVAL_SIZE, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, '0_original_image') # # Remove DontCare labels. # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, # labels, # bboxes) # Distort image and bounding boxes. dst_image = image dst_image, labels, bboxes, distort_bbox = \ distorted_bounding_box_crop(dst_image, labels, bboxes) tf_summary_image(image, tf.reshape(distort_bbox, (1, -1)), '1_cropped_position') # Resize image to output size. dst_image = tf_image.resize_image( dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, '2_resized_image') # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) # Randomly flip the image vertically. # dst_image, bboxes = tf_image.random_flip_up_down(dst_image, bboxes) # Randomly rotate the image 90 degrees counterclockwise. # dst_image, bboxes = tf_image.random_rot90(dst_image, bboxes) tf_summary_image(dst_image, bboxes, '3_flipped_image') # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, '4_color_distorted_image') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes
def ron_preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', scope='ron_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ron_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'image_with_bboxes_0') image, bboxes = control_flow_ops.cond( tf.random_uniform([1], minval=0., maxval=1., dtype=tf.float32)[0] < 0.5, lambda: (image, bboxes), lambda: tf_image.ssd_random_expand(image, bboxes, 2)) tf_summary_image(image, bboxes, 'image_on_canvas_1') # Distort image and bounding boxes. random_sample_image, labels, bboxes = tf_image.ssd_random_sample_patch( image, labels, bboxes) tf_summary_image(random_sample_image, bboxes, 'image_shape_distorted_2') # Randomly flip the image horizontally. random_sample_flip_image, bboxes = tf_image.random_flip_left_right( random_sample_image, bboxes) random_sample_flip_resized_image = tf_image.resize_image( random_sample_flip_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(random_sample_flip_resized_image, bboxes, 'image_fliped_and_resized_3') # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( random_sample_flip_resized_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted_4') dst_image = random_sample_flip_resized_image # Rescale to VGG input scale. dst_image.set_shape([None, None, 3]) image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, out_shape=EVAL_SIZE, data_format='NHWC', scope='ssd_preprocessing_train'): ''' Pre-processes the given image for training. :param image: images :param labels: labels for faces :param bboxes: bounding boxes :param out_shape: output shape :param data_format: :param scope: :return: ''' fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'original_image') #print(tfe.get_shape(image),image.get_shape().as_list(),'debug') # # Remove DontCare labels. # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, # labels, # bboxes) # Distort image and bounding boxes. dst_image = image debug_image = image bboxes = tf.clip_by_value(bboxes, 0., 1.) dst_image, labels, bboxes, distort_bbox = distorted_bounding_box_crop( dst_image, labels, bboxes) #tf_summary_image(image, tf.reshape(distort_bbox, (1,-1)), 'cropped_position') # Resize image to output size. dst_image = tf_image.resize_image( dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) tf_summary_image(dst_image, bboxes, 'resized_image') # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'color_distorted_image') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes,
def preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. #注意底下所给的参数和上面提供的参数不一致,因此我们在程序中关注它的实际参数就可以了! Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'image_with_bboxes') # # Remove DontCare labels. # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, # labels, # bboxes) # Distort image and bounding boxes. dst_image = image dst_image, labels, bboxes, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, min_object_covered=MIN_OBJECT_COVERED, aspect_ratio_range=CROP_RATIO_RANGE) # Resize image to output size. #因为distorted_bounding_box_crop返回的图像我们都已经set_shape为了[None,None,3],我们需要将其调整为网络所需要的输入大小, #所以统一resize为out_shape大小!!! dst_image = tf_image.resize_image( dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Randomly flip the image horizontally. #随机左右翻转图像 dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) # Randomly distort the colors. There are 4 ways to do it. # 使用一种随机的顺序调整图像的色彩!!! dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. # 注意dst_image的输出为0~1.0之间,我们需要进行调整恢复为0~255.0作为VGG网络的输入! image = dst_image * 255. #对图像进行白化操作! image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes