def preprocess_for_test(image, out_shape=IMAGE_SIZE, scope='ssd_preprocessing_test'): with tf.name_scope(scope): image = tf.to_float(image) image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) image = tf_image.resize_image(image, out_shape) bbox_img = tf.constant([0., 0., 1., 1.]) return image, bbox_img
def preprocess_for_train(image, labels, bboxes, height, width, out_shape, data_format='NHWC', use_whiten=True, scope='textbox_process_train'): """Preprocesses the given image for training. Args: image: A `Tensor` representing an image of arbitrary size. labels : A Tensor inlcudes all labels bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4] out_shape : Image_size ,default is [300, 300] Returns: A preprocessed image. """ with tf.name_scope(scope, 'textbox_process_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.image.convert_image_dtype(image, dtype=tf.float32) bboxes = tf.minimum(bboxes, 1.0) bboxes = tf.maximum(bboxes, 0.0) image = tf_image.distorter(image) image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad2( image, bboxes, height[0], width[0]) image, labels, bboxes = tf_image.Random_crop(image, labels, bboxes) image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) image = tf.clip_by_value(image, 0., 1.) image, bboxes = tf_image.random_flip_left_right(image, bboxes) num = tf.reduce_sum(tf.cast(labels, tf.int32)) image.set_shape([out_shape[0], out_shape[1], 3]) tf_image.tf_summary_image(image, bboxes) image = image * 255. image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) bboxes = tf.minimum(bboxes, 1.0) bboxes = tf.maximum(bboxes, 0.0) #image = tf.subtract(image, 128.) #image = tf.multiply(image, 2.0) if data_format == 'NHWC': image = image else: image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, num
def preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', use_whiten=True, scope='textbox_process_train'): with tf.name_scope(scope, 'textbox_process_train', [image, labels, bboxes]): if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) bboxes = tf_image.clip_bboxes(bboxes) dst_image = image dst_image, labels, bboxes = tf_image.distorted_bounding_box_crop(image, labels, bboxes) dst_image = tf_image.distort_color(dst_image) dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) dst_image = tf_image.resize_image(dst_image, out_shape) num = tf.reduce_sum(tf.cast(labels, tf.int32)) image = dst_image*255.0 image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) return image, labels, bboxes, num
def preprocess_for_eval(image, labels, bboxes, cord, out_shape, data_format='NHWC', scope='txt_preprocessing_test'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. labels : A Tensor inlcudes all labels bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4] out_shape : Image_size ,default is [300, 300] Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) num = 0 if labels is not None: num = tf.reduce_sum(tf.cast(labels, tf.int32)) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat([bbox_img, bboxes], axis=0) image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) image.set_shape([out_shape[0], out_shape[1], 3]) image = image / 255. return image, labels, bboxes, cord, num
def preprocess_for_eval(image, labels, bboxes, out_shape=EVAL_SIZE, data_format='NHWC', difficults=None, resize=Resize.WARP_RESIZE, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat([bbox_img, bboxes], axis=0) if resize == Resize.NONE: # No resizing... pass elif resize == Resize.CENTRAL_CROP: # Central cropping of the image. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum( tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image( image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.WARP_RESIZE: # Warp resize of the image. image = tf_image.resize_image( image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] # Remove difficult boxes. if difficults is not None: mask = tf.logical_not(tf.cast(difficults, tf.bool)) labels = tf.boolean_mask(labels, mask) bboxes = tf.boolean_mask(bboxes, mask) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, bbox_img
def preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'image_with_bboxes') # # Remove DontCare labels. # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, # labels, # bboxes) # Distort image and bounding boxes. dst_image = image dst_image, labels, bboxes, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, aspect_ratio_range=CROP_RATIO_RANGE) # Resize image to output size. dst_image = tf_image.resize_image( dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, xs, ys, out_shape, data_format='NHWC', scope='ssd_preprocessing_train', clip=True, crop_area_range=AREA_RANGE): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.compat.v1.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') orig_dtype = image.dtype print('orig_dtype:', orig_dtype) # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) # tf_summary_image(image, bboxes, 'image_with_bboxes') # Distort image and bounding boxes. dst_image = image dst_image, labels, bboxes,xs, ys, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes,xs, ys, aspect_ratio_range=CROP_RATIO_RANGE,min_object_covered=MIN_OBJECT_COVERED,area_range=crop_area_range) # Resize image to output size. dst_image = tf_image.resize_image(dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) #tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Randomly flip the image horizontally. #bboxes and xs ys all need to random dst_image, bboxes, xs, ys = tf_image.random_flip_left_right(dst_image, bboxes, xs, ys) # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. image = tf.cast(tf.image.convert_image_dtype(dst_image, orig_dtype, saturate=True), dtype=tf.float32) # image = dst_image * 255. # image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) image = _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN] ) # Image data format. if data_format == 'NCHW': image = tf.transpose(a=image, perm=(2, 0, 1)) if clip: xy_clip_min = tf.constant([0., 0., 0., 0.]) xy_clip_max = tf.constant([1., 1., 1., 1.]) bbox_img_max = tf.constant([1., 1., 1. , 1.]) bbox_img_min = tf.constant([0., 0., 0., 0.]) bboxes = tf.minimum(bboxes, bbox_img_max) bboxes = tf.maximum(bboxes, bbox_img_min) xs = tf.maximum(xs, xy_clip_min) ys = tf.maximum(ys, xy_clip_min) xs = tf.minimum(xs, xy_clip_max) ys = tf.minimum(ys, xy_clip_max) tf_summary_image(image, bboxes, ' image whitened') # image = tf.Print(image, [image[0]], ' image: ', summarize=20) # xs = tf.Print(xs, [xs, tf.shape(xs)], ' xs ', summarize=20) # ys = tf.Print(ys, [ys, tf.shape(ys)], ' ys ', summarize=20) # bboxes = tf.Print(bboxes, [bboxes, tf.shape(bboxes)], ' bboxes ',summarize=20) return image, labels, bboxes, xs, ys
def preprocess_for_eval(image, labels, bboxes, height, width, out_shape=EVAL_SIZE, data_format='NHWC', use_whiten=True, difficults=None, resize=Resize.WARP_RESIZE, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. labels : A Tensor inlcudes all labels bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4] out_shape : Image_size ,default is [300, 300] Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) num = 0 if labels is not None: num = tf.reduce_sum(tf.cast(labels, tf.int32)) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat([bbox_img, bboxes], axis=0) if resize == Resize.NONE: # No resizing... pass elif resize == Resize.CENTRAL_CROP: # Central cropping of the image. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum( tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image( image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.WARP_RESIZE: # Warp resize of the image. image = tf_image.resize_image( image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] # Remove difficult boxes. if difficults is not None: mask = tf.logical_not(tf.cast(difficults, tf.bool)) labels = tf.boolean_mask(labels, mask) bboxes = tf.boolean_mask(bboxes, mask) image = tf.clip_by_value(image, 0., 255.) image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) #image = image/255. #image = tf.clip_by_value(image, 0., 255.) #image = tf.subtract(image, 128.) #image = tf.multiply(image, 2.0) if data_format == 'NHWC': image = image else: image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, bbox_img, num
def preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', scope='textbox_process_train'): """Preprocesses the given image for training. Args: image: A `Tensor` representing an image of arbitrary size. labels : A Tensor inlcudes all labels bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4] out_shape : Image_size ,default is [300, 300] Returns: A preprocessed image. """ with tf.name_scope(scope, 'textbox_process_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) num = tf.reduce_sum(tf.cast(labels, tf.int32)) bboxes = tf.minimum(bboxes, 0.9999) bboxes = tf.maximum(bboxes, 0.0001) # Distort image and bounding boxes. image, labels, bboxes, distort_bbox ,num= \ distorted_bounding_box_crop(image, labels, bboxes, aspect_ratio_range=CROP_RATIO_RANGE) # Resize image to output size. dst_image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) ''' dst_image ,bboxes = \ tf_image.resize_image_bboxes_with_crop_or_pad(image, bboxes, out_shape[0],out_shape[1]) ''' # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) bbox_image = tf.image.draw_bounding_boxes(tf.expand_dims(dst_image,0), tf.expand_dims(bboxes,0)) tf.summary.image('image_with_box', bbox_image) tf.add_to_collection('EXTRA_LOSSES', num) dst_image = tf_image.apply_with_random_selector( dst_image, lambda x, ordering: tf_image.distort_color_2(x, ordering, True), num_cases=4) tf_image.tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to normal range image = dst_image * 255. dst_image.set_shape([out_shape[0], out_shape[1], 3]) image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) #dst_image = tf.cast(dst_image,tf.float32) return image, labels, bboxes,num
def preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', use_whiten=True, scope='textbox_process_train'): """Preprocesses the given image for training. Args: image: A `Tensor` representing an image of arbitrary size. labels : A Tensor inlcudes all labels bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4] out_shape : Image_size ,default is [300, 300] Returns: A preprocessed image. """ with tf.name_scope(scope, 'textbox_process_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) num = tf.reduce_sum(tf.cast(labels, tf.int32)) bboxes = tf.minimum(bboxes, 1.0) bboxes = tf.maximum(bboxes, 0.0) #image, boxes = zoom_out(image, boxes) # Distort image and bounding boxes. object_covered = np.random.randint(5) min_object_covered = OBJECT_COVERED[object_covered] image, labels, bboxes, distort_bbox ,num= \ distorted_bounding_box_crop(image, labels, bboxes, min_object_covered=min_object_covered, aspect_ratio_range=CROP_RATIO_RANGE) # Resize image to output size. dst_image = tf_image.resize_image( image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) #dst_image.set_shape([out_shape[0], out_shape[1], 3]) #bbox_image = tf.image.draw_bounding_boxes(tf.expand_dims(dst_image,0), tf.expand_dims(bboxes,0)) #tf.summary.image('image_with_box', bbox_image) dst_image = tf_image.apply_with_random_selector( dst_image, lambda x, ordering: tf_image.distort_color_2(x, ordering, False), num_cases=4) # Rescale to normal range image = dst_image * 255 image.set_shape([out_shape[0], out_shape[1], 3]) if use_whiten: image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) image = image / 255.0 bboxes = tf.minimum(bboxes, 1.0) bboxes = tf.maximum(bboxes, 0.0) return image, labels, bboxes, num