def preprocess_for_eval(image, labels, bboxes, out_shape, resize, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat(0, [bbox_img, bboxes]) # Resize strategy... if resize == Resize.NONE: pass elif resize == Resize.CENTRAL_CROP: image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum( tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image( image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] return image, labels, bboxes, bbox_img
def preprocess_for_eval(image, labels, bboxes, out_shape, resize, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat(0, [bbox_img, bboxes]) # Resize strategy... if resize == Resize.NONE: pass elif resize == Resize.CENTRAL_CROP: image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum(tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image(image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] return image, labels, bboxes, bbox_img
def preprocess_for_eval_multi(image, labels, bboxes, out_shape, resize, scope='ssd_preprocessing_train'): with tf.name_scope(scope): image = tf.to_float(image) #image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) image = image - np.array([123.6800, 116.7790, 103.9390]).reshape( (1, 1, 1, 3)) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat(0, [bbox_img, bboxes]) # Resize strategy... if resize == Resize.NONE: pass elif resize == Resize.CENTRAL_CROP: image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum( tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image( image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] return image, labels, bboxes, bbox_img
def expand(): scale = tf.random_uniform([], minval = 1.0, maxval = MAX_EXPAND_SCALE, dtype=tf.float32) image_shape = tf.cast(tf.shape(image), dtype = tf.float32) image_h, image_w = image_shape[0], image_shape[1] target_h = tf.cast(image_h * scale, dtype = tf.int32) target_w = tf.cast(image_w * scale, dtype = tf.int32) tf.logging.info('expanded') return tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, xs, ys, target_h, target_w)
def expand(): scale = tf.random_uniform([], minval = 1.0, maxval = MAX_EXPAND_SCALE, dtype=tf.float32) image_shape = tf.cast(tf.shape(image), dtype = tf.float32) image_h, image_w = image_shape[0], image_shape[1] target_h = tf.cast(image_h * scale, dtype = tf.int32) target_w = tf.cast(image_w * scale, dtype = tf.int32) tf.logging.info('expanded') return tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, xs, ys, target_h, target_w)
def preprocess_for_eval(image, labels, bboxes, out_shape=EVAL_SIZE, data_format='NHWC', difficults=None, resize=Resize.WARP_RESIZE, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat([bbox_img, bboxes], axis=0) if resize == Resize.NONE: # No resizing... pass elif resize == Resize.CENTRAL_CROP: # Central cropping of the image. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum(tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image(image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.WARP_RESIZE: # Warp resize of the image. image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] # Remove difficult boxes. if difficults is not None: mask = tf.logical_not(tf.cast(difficults, tf.bool)) labels = tf.boolean_mask(labels, mask) bboxes = tf.boolean_mask(bboxes, mask) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, bbox_img
def preprocess_for_train(image, labels, bboxes, xs, ys, out_shape, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = True with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly # if USE_ROTATION: # image, bboxes, xs, ys = tf_image.random_rotate90(image, bboxes, xs, ys) # rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly if USE_ROTATION: rnd = tf.random_uniform((), minval=0, maxval=1) def rotate(): return tf_image.random_rotate90(image, bboxes, xs, ys) def no_rotate(): return image, bboxes, xs, ys image, bboxes, xs, ys = tf.cond(tf.less(rnd, config.rotation_prob), rotate, no_rotate) # image_shape = tf.cast(tf.shape(image), dtype=tf.float32) image_h, image_w = image_shape[0], image_shape[1] target = tf.cast(tf.maximum(image_h, image_w), dtype=tf.int32) image, bboxes, xs, ys = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, xs, ys, target, target) # expand image if MAX_EXPAND_SCALE > 1: rnd2 = tf.random_uniform((), minval=0, maxval=1) def expand(): scale = tf.random_uniform([], minval=1.0, maxval=MAX_EXPAND_SCALE, dtype=tf.float32) image_shape = tf.cast(tf.shape(image), dtype=tf.float32) image_h, image_w = image_shape[0], image_shape[1] target_h = tf.cast(image_h * scale, dtype=tf.int32) target_w = tf.cast(image_w * scale, dtype=tf.int32) tf.logging.info('expanded') return tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, xs, ys, target_h, target_w) def no_expand(): return image, bboxes, xs, ys image, bboxes, xs, ys = tf.cond(tf.less(rnd2, config.expand_prob), expand, no_expand) # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) # tf_summary_image(image, bboxes, 'image_with_bboxes') # Distort image and bounding boxes. dst_image = image # dst_image, labels, bboxes, xs, ys, distort_bbox = \ # distorted_bounding_box_crop(image, labels, bboxes, xs, ys, # min_object_covered = MIN_OBJECT_COVERED, # aspect_ratio_range = CROP_ASPECT_RATIO_RANGE, # area_range = AREA_RANGE) # Resize image to output size. dst_image = tf_image.resize_image( dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) tf_summary_image(dst_image, bboxes, 'image_shape_distorted') # Filter bboxes using the length of shorter sides if USING_SHORTER_SIDE_FILTERING: xs = xs * out_shape[1] ys = ys * out_shape[0] labels, bboxes, xs, ys = tfe.bboxes_filter_by_shorter_side( labels, bboxes, xs, ys, min_height=MIN_SHORTER_SIDE, max_height=MAX_SHORTER_SIDE, assign_value=LABEL_IGNORE) xs = xs / out_shape[1] ys = ys / out_shape[0] # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to VGG input scale. image = dst_image * 255. image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, xs, ys