def preprocessing_training(image, mask, out_shape, prob=0.5): with tf.name_scope('preprocessing_training'): with tf.name_scope('rotate'): rnd = tf.random_uniform((), minval=0, maxval=1, name='rotate') def rotate(): k = random_ops.random_uniform([], 0, 10000) k = tf.cast(k, tf.int32) return tf.image.rot90(image, k=k), tf.image.rot90(mask, k=k) def no_rotate(): return image, mask image, mask = tf.cond(tf.less(rnd, prob), rotate, no_rotate) with tf.name_scope('flip_left_right'): def flip_left_right(): return tf.image.flip_left_right( image), tf.image.flip_left_right(mask) def no_flip_left_right(): return image, mask rnd = tf.random_uniform((), minval=0, maxval=1, name='flip_left_right') image, mask = tf.cond(tf.less(rnd, prob), flip_left_right, no_flip_left_right) with tf.name_scope('flip_up_down'): def flip_up_down(): return tf.image.flip_up_down(image), tf.image.flip_up_down( mask) def no_flip_up_down(): return image, mask rnd = tf.random_uniform((), minval=0, maxval=1, name='flip_up_down') image, mask = tf.cond(tf.less(rnd, prob), flip_up_down, no_flip_up_down) image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) mask = tf_image.resize_image( mask, out_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=False) return image, mask
def preprocess_for_eval(image, labels, bboxes, xs, ys, out_shape, data_format='NHWC', resize=Resize.WARP_RESIZE, do_resize = True, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) if do_resize: if resize == Resize.NONE: pass else: image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, xs, ys
def preprocess_for_eval(image, labels, bboxes, xs, ys, out_shape, data_format='NHWC', resize=Resize.WARP_RESIZE, do_resize = True, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) if do_resize: if resize == Resize.NONE: pass else: image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, xs, ys
def preprocess_for_eval(image, labels, bboxes, out_shape, resize, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat(0, [bbox_img, bboxes]) # Resize strategy... if resize == Resize.NONE: pass elif resize == Resize.CENTRAL_CROP: image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum( tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image( image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] return image, labels, bboxes, bbox_img
def preprocess_for_train(image, labels, bboxes, out_shape, scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'image_with_bboxes') # # Remove DontCare labels. # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, # labels, # bboxes) # Distort image and bounding boxes. dst_image = image dst_image, labels, bboxes, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, aspect_ratio_range=CROP_RATIO_RANGE) # Resize image to output size. dst_image = tf_image.resize_image(dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, out_shape, scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'image_with_bboxes') # # Remove DontCare labels. # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, # labels, # bboxes) # Distort image and bounding boxes. dst_image = image dst_image, labels, bboxes, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, aspect_ratio_range=CROP_RATIO_RANGE) # Resize image to output size. dst_image = tf_image.resize_image(dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) return image, labels, bboxes
def preprocessing_val(image, out_shape): with tf.name_scope('preprocessing_val'): print('image is ', image) image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) return image
def preprocess_for_eval(image, labels, bboxes, out_shape, resize, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat(0, [bbox_img, bboxes]) # Resize strategy... if resize == Resize.NONE: pass elif resize == Resize.CENTRAL_CROP: image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum(tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image(image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] return image, labels, bboxes, bbox_img
def preprocess_for_eval_multi(image, labels, bboxes, out_shape, resize, scope='ssd_preprocessing_train'): with tf.name_scope(scope): image = tf.to_float(image) #image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) image = image - np.array([123.6800, 116.7790, 103.9390]).reshape( (1, 1, 1, 3)) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat(0, [bbox_img, bboxes]) # Resize strategy... if resize == Resize.NONE: pass elif resize == Resize.CENTRAL_CROP: image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum( tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image( image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] return image, labels, bboxes, bbox_img
def preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'image_with_bboxes') # 上面保证了图片是3维的tf.float32格式 # (有条件的)随机裁剪,labels(n,)、bboxes(n, 4),裁剪图片对应原图坐标(4,) dst_image, labels, bboxes, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, min_object_covered=MIN_OBJECT_COVERED, # 0.25 aspect_ratio_range=CROP_RATIO_RANGE) # (0.6, 1.67) # Resize image to output size. dst_image = tf_image.resize_image(dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # mean = tf.constant(means, dtype=image.dtype) # image = image - mean # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) # 'NHWC' (n,) (n, 4) return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. #注意底下所给的参数和上面提供的参数不一致,因此我们在程序中关注它的实际参数就可以了! Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'image_with_bboxes') # # Remove DontCare labels. # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, # labels, # bboxes) # Distort image and bounding boxes. dst_image = image dst_image, labels, bboxes, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, min_object_covered=MIN_OBJECT_COVERED, aspect_ratio_range=CROP_RATIO_RANGE) # Resize image to output size. #因为distorted_bounding_box_crop返回的图像我们都已经set_shape为了[None,None,3],我们需要将其调整为网络所需要的输入大小, #所以统一resize为out_shape大小!!! dst_image = tf_image.resize_image( dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Randomly flip the image horizontally. #随机左右翻转图像 dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) # Randomly distort the colors. There are 4 ways to do it. # 使用一种随机的顺序调整图像的色彩!!! dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. # 注意dst_image的输出为0~1.0之间,我们需要进行调整恢复为0~255.0作为VGG网络的输入! image = dst_image * 255. #对图像进行白化操作! image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, xs, ys, out_shape, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly # if USE_ROTATION: # image, bboxes, xs, ys = tf_image.random_rotate90(image, bboxes, xs, ys) # rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly if USE_ROTATION: rnd = tf.random_uniform((), minval = 0, maxval = 1) def rotate(): return tf_image.random_rotate90(image, bboxes, xs, ys) def no_rotate(): return image, bboxes, xs, ys image, bboxes, xs, ys = tf.cond(tf.less(rnd, config.rotation_prob), rotate, no_rotate) # expand image if MAX_EXPAND_SCALE > 1: rnd2 = tf.random_uniform((), minval = 0, maxval = 1) def expand(): scale = tf.random_uniform([], minval = 1.0, maxval = MAX_EXPAND_SCALE, dtype=tf.float32) image_shape = tf.cast(tf.shape(image), dtype = tf.float32) image_h, image_w = image_shape[0], image_shape[1] target_h = tf.cast(image_h * scale, dtype = tf.int32) target_w = tf.cast(image_w * scale, dtype = tf.int32) tf.logging.info('expanded') return tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, xs, ys, target_h, target_w) def no_expand(): return image, bboxes, xs, ys image, bboxes, xs, ys = tf.cond(tf.less(rnd2, config.expand_prob), expand, no_expand) # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) # tf_summary_image(image, bboxes, 'image_with_bboxes') # Distort image and bounding boxes. dst_image = image dst_image, labels, bboxes, xs, ys, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, xs, ys, min_object_covered = MIN_OBJECT_COVERED, aspect_ratio_range = CROP_ASPECT_RATIO_RANGE, area_range = AREA_RANGE) # Resize image to output size. dst_image = tf_image.resize_image(dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Filter bboxes using the length of shorter sides if USING_SHORTER_SIDE_FILTERING: xs = xs * out_shape[1] ys = ys * out_shape[0] labels, bboxes, xs, ys = tfe.bboxes_filter_by_shorter_side(labels, bboxes, xs, ys, min_height = MIN_SHORTER_SIDE, max_height = MAX_SHORTER_SIDE, assign_value = LABEL_IGNORE) xs = xs / out_shape[1] ys = ys / out_shape[0] # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, xs, ys
def preprocess_for_train(image, labels, bboxes, out_shape=EVAL_SIZE, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, '0_original_image') # # Remove DontCare labels. # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, # labels, # bboxes) # Distort image and bounding boxes. dst_image = image dst_image, labels, bboxes, distort_bbox = \ distorted_bounding_box_crop(dst_image, labels, bboxes) tf_summary_image(image, tf.reshape(distort_bbox, (1, -1)), '1_cropped_position') # Resize image to output size. dst_image = tf_image.resize_image( dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, '2_resized_image') # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) # Randomly flip the image vertically. # dst_image, bboxes = tf_image.random_flip_up_down(dst_image, bboxes) # Randomly rotate the image 90 degrees counterclockwise. # dst_image, bboxes = tf_image.random_rot90(dst_image, bboxes) tf_summary_image(dst_image, bboxes, '3_flipped_image') # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, '4_color_distorted_image') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, xs, ys, out_shape, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly # if USE_ROTATION: # image, bboxes, xs, ys = tf_image.random_rotate90(image, bboxes, xs, ys) # rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly if USE_ROTATION: rnd = tf.random_uniform((), minval = 0, maxval = 1) def rotate(): return tf_image.random_rotate90(image, bboxes, xs, ys) def no_rotate(): return image, bboxes, xs, ys image, bboxes, xs, ys = tf.cond(tf.less(rnd, config.rotation_prob), rotate, no_rotate) # expand image if MAX_EXPAND_SCALE > 1: rnd2 = tf.random_uniform((), minval = 0, maxval = 1) def expand(): scale = tf.random_uniform([], minval = 1.0, maxval = MAX_EXPAND_SCALE, dtype=tf.float32) image_shape = tf.cast(tf.shape(image), dtype = tf.float32) image_h, image_w = image_shape[0], image_shape[1] target_h = tf.cast(image_h * scale, dtype = tf.int32) target_w = tf.cast(image_w * scale, dtype = tf.int32) tf.logging.info('expanded') return tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, xs, ys, target_h, target_w) def no_expand(): return image, bboxes, xs, ys image, bboxes, xs, ys = tf.cond(tf.less(rnd2, config.expand_prob), expand, no_expand) # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) # tf_summary_image(image, bboxes, 'image_with_bboxes') # Distort image and bounding boxes. dst_image = image dst_image, labels, bboxes, xs, ys, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, xs, ys, min_object_covered = MIN_OBJECT_COVERED, aspect_ratio_range = CROP_ASPECT_RATIO_RANGE, area_range = AREA_RANGE) # Resize image to output size. dst_image = tf_image.resize_image(dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Filter bboxes using the length of shorter sides if USING_SHORTER_SIDE_FILTERING: xs = xs * out_shape[1] ys = ys * out_shape[0] labels, bboxes, xs, ys = tfe.bboxes_filter_by_shorter_side(labels, bboxes, xs, ys, min_height = MIN_SHORTER_SIDE, max_height = MAX_SHORTER_SIDE, assign_value = LABEL_IGNORE) xs = xs / out_shape[1] ys = ys / out_shape[0] # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, xs, ys
def preprocessing_val(image, out_shape): image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) return image
# Resize bboxes to original image shape. rbboxes = tf_image.bboxes_resize(rbboxes, img) return rscores, rbboxes # Test on demo images. path = '../test/' image_names = sorted(os.listdir(path)) image_raw = tf.gfile.FastGFile(image_names[0], 'rb').read() image = tf.image.decode_png(image_raw) #img = mpimg.imread(path + image_names[0]) #resized_img = cv2.resize(img, (512, 512)) resized_img = tf_image.resize_image(image, net_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) saver = tf.train.Saver() saver.restore(isess, ckpt_filename) rscores, rbboxes = process_image(resized_img) print(rscores) print('--------------------------') print(rbboxes) # Draw results. # img_bboxes = np.copy(img) # bboxes_draw_on_img(img_bboxes, rclasses, rscores, rbboxes, colors_tableau, thickness=2) # mpimg.imsave('output.jpeg', img_bboxes)
def preprocess_for_train(image, labels, bboxes, orientations, out_shape, data_format='NHWC', scope='simplifyssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes, orientations]): # if image.get_shape().ndims != 3: # raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. #if image.dtype != tf.float32: # image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = tf.cast(image,tf.float32) t1, t2, t3 = tf.split(image, [1, 1, 1], axis=2) scale = tf.constant(256.0) shift = tf.constant(0.5) t1 = tf.subtract(tf.divide(t1, scale), shift) scale = tf.constant(256.0) t2 = tf.subtract(tf.divide(t2, scale), shift) scale = tf.constant(32.0) t3 = tf.subtract(tf.divide(t3, scale), shift) t = tf.concat([t1, t2, t3], axis=2) tf_summary_image(t, bboxes, 'image_with_bboxes') # # Remove DontCare labels. # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, # labels, # bboxes) # Distort image and bounding boxes. dst_image = t dst_image, labels, bboxes, distort_bbox, orientations = \ distorted_bounding_box_crop(dst_image, labels, bboxes, orientations, min_object_covered=MIN_OBJECT_COVERED, aspect_ratio_range=CROP_RATIO_RANGE) # Resize image to output size. dst_image = tf_image.resize_image(dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) #img_show,_=tf.split(dst_image,[3,8],axis=2) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Randomly flip the image horizontally. dst_image, bboxes, orientations = tf_image.random_flip_left_right_with_ori(dst_image, bboxes, orientations) # Randomly distort the colors. There are 4 ways to do it. # dst_image = apply_with_random_selector( # dst_image, # lambda x, ordering: distort_color(x, ordering, fast_mode), # num_cases=4) #img_show,_=tf.split(dst_image,[3,8],axis=2) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. image = dst_image #image = dst_image * 255. #image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, orientations
def ron_preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', scope='ron_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ron_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'image_with_bboxes_0') image, bboxes = control_flow_ops.cond( tf.random_uniform([1], minval=0., maxval=1., dtype=tf.float32)[0] < 0.5, lambda: (image, bboxes), lambda: tf_image.ssd_random_expand(image, bboxes, 2)) tf_summary_image(image, bboxes, 'image_on_canvas_1') # Distort image and bounding boxes. random_sample_image, labels, bboxes = tf_image.ssd_random_sample_patch( image, labels, bboxes) tf_summary_image(random_sample_image, bboxes, 'image_shape_distorted_2') # Randomly flip the image horizontally. random_sample_flip_image, bboxes = tf_image.random_flip_left_right( random_sample_image, bboxes) random_sample_flip_resized_image = tf_image.resize_image( random_sample_flip_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(random_sample_flip_resized_image, bboxes, 'image_fliped_and_resized_3') # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( random_sample_flip_resized_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted_4') dst_image = random_sample_flip_resized_image # Rescale to VGG input scale. dst_image.set_shape([None, None, 3]) image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, out_shape=EVAL_SIZE, data_format='NHWC', scope='ssd_preprocessing_train'): ''' Pre-processes the given image for training. :param image: images :param labels: labels for faces :param bboxes: bounding boxes :param out_shape: output shape :param data_format: :param scope: :return: ''' fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) tf_summary_image(image, bboxes, 'original_image') #print(tfe.get_shape(image),image.get_shape().as_list(),'debug') # # Remove DontCare labels. # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label, # labels, # bboxes) # Distort image and bounding boxes. dst_image = image debug_image = image bboxes = tf.clip_by_value(bboxes, 0., 1.) dst_image, labels, bboxes, distort_bbox = distorted_bounding_box_crop( dst_image, labels, bboxes) #tf_summary_image(image, tf.reshape(distort_bbox, (1,-1)), 'cropped_position') # Resize image to output size. dst_image = tf_image.resize_image( dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) tf_summary_image(dst_image, bboxes, 'resized_image') # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'color_distorted_image') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes,
def preprocess_for_eval(image, labels, bboxes, out_shape=EVAL_SIZE, data_format='NHWC', difficults=None, resize=Resize.WARP_RESIZE, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat([bbox_img, bboxes], axis=0) if resize == Resize.NONE: # No resizing... pass elif resize == Resize.CENTRAL_CROP: # Central cropping of the image. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum(tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image(image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.WARP_RESIZE: # Warp resize of the image. image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] # Remove difficult boxes. if difficults is not None: mask = tf.logical_not(tf.cast(difficults, tf.bool)) labels = tf.boolean_mask(labels, mask) bboxes = tf.boolean_mask(bboxes, mask) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, bbox_img
def preprocess_for_train(image, labels, bboxes, xs, ys, out_shape, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Randomly flip the image horizontally. if FLIP: image, bboxes, xs, ys = tf_image.random_flip_left_right_bbox( image, bboxes, xs, ys) if ROTATE: # random rotate the image [-10, 10] image, bboxes, xs, ys = tf_rotate_image(image, xs, ys) # samples = tf.multinomial(tf.log([[0.25, 0.25, 0.25, 0.25]]), 1) # note log-prob # scale=elems[tf.cast(samples[0][0], tf.int32)] # if SCALE: # image,bboxes,xs,ys=tf_scale_image(image,bboxes,xs,ys,640) image_shape = tf.cast(tf.shape(image), dtype=tf.float32) image_h, image_w = image_shape[0], image_shape[1] if USE_NM_CROP: mask = tf.greater_equal(labels, 1) valid_bboxes = tf.boolean_mask(bboxes, mask) # FIXME bboxes may is empty # NOTE tf_func must return value must be numpy, or you will madding!!!!!! crop_bbox = tf.py_func(generate_sample, [image_shape, valid_bboxes], tf.float32) else: scales = tf.random_shuffle([0.5, 1.]) scales = tf.Print(scales, [crop]) target_h = tf.cast(640 / scales[0], dtype=tf.float32) target_w = tf.cast(640 / scales[0], dtype=tf.float32) bbox_begin_h_max = tf.maximum(image_h - target_h, 0) bbox_begin_w_max = tf.maximum(image_w - target_w, 0) bbox_begin_h = tf.random_uniform([], minval=0, maxval=bbox_begin_h_max, dtype=tf.float32) bbox_begin_w = tf.random_uniform([], minval=0, maxval=bbox_begin_w_max, dtype=tf.float32) crop_bbox = [bbox_begin_h/image_h, bbox_begin_w/image_w, \ (bbox_begin_h+target_h)/image_h, (bbox_begin_w+target_w)/image_w] image = tf.image.crop_and_resize(tf.expand_dims(image, 0), [crop_bbox], [0], (640, 640), extrapolation_value=128) image = tf.squeeze(image, 0) bboxes, xs, ys = tfe.bboxes_resize(crop_bbox, bboxes, xs, ys) labels, bboxes, xs, ys = tfe.bboxes_filter_overlap( labels, bboxes, xs, ys, threshold=BBOX_CROP_OVERLAP, assign_value=LABEL_IGNORE) if ROTATE_90: rnd = tf.random_uniform((), minval=0, maxval=1) image, bboxes, xs, ys = tf.cond( tf.less(rnd, 0.2), lambda: tf_image.random_rotate90(image, bboxes, xs, ys), lambda: (image, bboxes, xs, ys)) # tf_summary_image(tf.to_float(image), bboxes, 'crop_image') # what is the enpand's meanoing? # expand image if MAX_EXPAND_SCALE > 1: rnd2 = tf.random_uniform((), minval=0, maxval=1) def expand(): scale = tf.random_uniform([], minval=1.0, maxval=MAX_EXPAND_SCALE, dtype=tf.float32) image_shape = tf.cast(tf.shape(image), dtype=tf.float32) image_h, image_w = image_shape[0], image_shape[1] target_h = tf.cast(image_h * scale, dtype=tf.int32) target_w = tf.cast(image_w * scale, dtype=tf.int32) tf.logging.info('expanded') return tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, xs, ys, target_h, target_w) def no_expand(): return image, bboxes, xs, ys image, bboxes, xs, ys = tf.cond(tf.less(rnd2, config.expand_prob), expand, no_expand) # Convert to float scaled [0, 1]. # if image.dtype != tf.float32: # image = tf.image.convert_image_dtype(image, dtype=tf.float32) # tf_summary_image(image, bboxes, 'image_with_bboxes') # Distort image and bounding boxes. dst_image = image # use tf.image.sample_distorted_bounding_box() random crop train patch, but can't control the scale if False: dst_image, labels, bboxes, xs, ys, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, xs, ys, min_object_covered=MIN_OBJECT_COVERED, aspect_ratio_range=CROP_ASPECT_RATIO_RANGE, area_range=AREA_RANGE) # Resize image to output size. dst_image = tf_image.resize_image( dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Filter bboxes using the length of shorter sides if USING_SHORTER_SIDE_FILTERING: xs = xs * out_shape[1] ys = ys * out_shape[0] labels, bboxes, xs, ys = tfe.bboxes_filter_by_shorter_side( labels, bboxes, xs, ys, min_height=MIN_SHORTER_SIDE, max_height=MAX_SHORTER_SIDE, assign_value=LABEL_IGNORE) xs = xs / out_shape[1] ys = ys / out_shape[0] # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image / 255.0, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) dst_image = dst_image * 255. # tf_summary_image(dst_image, bboxes, 'image_color_distorted') # FIXME: change the input value # NOTE: resnet v1 use VGG data process, so we use the same way image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, xs, ys