def eval_image(image, height, width, bbox, thread_id, resize): """Get the image for model evaluation.""" with tf.name_scope('eval_image'): if not thread_id: tf.summary.image( 'original_image', tf.expand_dims(image, 0)) if resize == 'crop': # Note: This is much slower than crop_to_bounding_box # It seems that the redundant pad step has huge overhead # distorted_image = tf.image.resize_image_with_crop_or_pad(image, # height, width) shape = tf.shape(image) image = tf.cond(tf.less(shape[0], shape[1]), lambda: tf.image.resize_images(image, tf.convert_to_tensor( [256, 256 * shape[1] / shape[0]], dtype=tf.int32)), lambda: tf.image.resize_images(image, tf.convert_to_tensor([256 * shape[0] / shape[1], 256], dtype=tf.int32))) shape = tf.shape(image) y0 = (shape[0] - height) // 2 x0 = (shape[1] - width) // 2 # y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32) # x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32) # distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) else: sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=bbox, min_object_covered=0.5, aspect_ratio_range=[0.90, 1.10], area_range=[0.10, 1.0], max_attempts=100, use_image_if_no_bounding_boxes=True) bbox_begin, bbox_size, _ = sample_distorted_bounding_box # Crop the image to the specified bounding box. distorted_image = tf.slice(image, bbox_begin, bbox_size) resize_method = { 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, 'bilinear': tf.image.ResizeMethod.BILINEAR, 'bicubic': tf.image.ResizeMethod.BICUBIC, 'area': tf.image.ResizeMethod.AREA }[resize] # This resizing operation may distort the images because the aspect # ratio is not respected. if cnn_util.tensorflow_version() >= 11: distorted_image = tf.image.resize_images( distorted_image, [height, width], resize_method, align_corners=False) else: distorted_image = tf.image.resize_images( distorted_image, height, width, resize_method, align_corners=False) distorted_image.set_shape([height, width, 3]) if not thread_id: tf.summary.image( 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) image = distorted_image return image
def eval_image(image, height, width, bbox, thread_id, resize): """Get the image for model evaluation.""" with tf.name_scope('eval_image'): if not thread_id: tf.summary.image('original_image', tf.expand_dims(image, 0)) if resize == 'crop': # Simple center cropping. distorted_image = eval_image_crop(image, height, width) elif resize == 'crop_inception': # Eval code from Inception pre-processing. distorted_image = eval_image_crop_inception( image, height, width, CROP_CENTRAL_FRACTION) elif resize == 'crop_vgg': # Eval code from VGG pre-processing. image = vgg_preprocessing._aspect_preserving_resize( image, VGG_RESIZE_SIDE_MIN) distorted_image = vgg_preprocessing._central_crop([image], height, width)[0] else: sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=bbox, min_object_covered=MIN_OBJECT_COVERED, aspect_ratio_range=ASPECT_RATIO_RANGE, area_range=AREA_RANGE, max_attempts=MAX_ATTEMPTS, use_image_if_no_bounding_boxes=True) bbox_begin, bbox_size, _ = sample_distorted_bounding_box # Crop the image to the specified bounding box. distorted_image = tf.slice(image, bbox_begin, bbox_size) resize_method = { 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, 'bilinear': tf.image.ResizeMethod.BILINEAR, 'bicubic': tf.image.ResizeMethod.BICUBIC, 'area': tf.image.ResizeMethod.AREA }[resize] # This resizing operation may distort the images because the aspect # ratio is not respected. if cnn_util.tensorflow_version() >= 11: distorted_image = tf.image.resize_images(distorted_image, [height, width], resize_method, align_corners=False) else: distorted_image = tf.image.resize_images(distorted_image, height, width, resize_method, align_corners=False) distorted_image.set_shape([height, width, 3]) if not thread_id: tf.summary.image('cropped_resized_image', tf.expand_dims(distorted_image, 0)) image = distorted_image return image
def eval_image(image, height, width, bbox, thread_id, resize): """Get the image for model evaluation.""" with tf.name_scope('eval_image'): if not thread_id: tf.summary.image( 'original_image', tf.expand_dims(image, 0)) if resize == 'crop': # Note: This is much slower than crop_to_bounding_box # It seems that the redundant pad step has huge overhead # distorted_image = tf.image.resize_image_with_crop_or_pad(image, # height, width) shape = tf.shape(image) y0 = (shape[0] - height) // 2 x0 = (shape[1] - width) // 2 # distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) else: sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=bbox, min_object_covered=0.1, aspect_ratio_range=[0.75, 1.33], area_range=[0.05, 1.0], max_attempts=100, use_image_if_no_bounding_boxes=True) bbox_begin, bbox_size, _ = sample_distorted_bounding_box # Crop the image to the specified bounding box. distorted_image = tf.slice(image, bbox_begin, bbox_size) resize_method = { 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, 'bilinear': tf.image.ResizeMethod.BILINEAR, 'bicubic': tf.image.ResizeMethod.BICUBIC, 'area': tf.image.ResizeMethod.AREA }[resize] # This resizing operation may distort the images because the aspect # ratio is not respected. if cnn_util.tensorflow_version() >= 11: distorted_image = tf.image.resize_images( distorted_image, [height, width], resize_method, align_corners=False) else: distorted_image = tf.image.resize_images( distorted_image, height, width, resize_method, align_corners=False) distorted_image.set_shape([height, width, 3]) if not thread_id: tf.summary.image( 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) image = distorted_image return image
def eval_image(image, height, width, bbox, thread_id, resize_method): """Get the image for model evaluation.""" with tf.name_scope('eval_image'): if not thread_id and FLAGS.summary_verbosity >= 2: tf.summary.image('original_image', tf.expand_dims(image, 0)) if resize_method == 'crop': # Note: This is much slower than crop_to_bounding_box # It seems that the redundant pad step has huge overhead # distorted_image = tf.image.resize_image_with_crop_or_pad(image, # height, width) shape = tf.shape(image) y0 = (shape[0] - height) // 2 x0 = (shape[1] - width) // 2 # distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) distorted_image = tf.image.crop_to_bounding_box( image, y0, x0, height, width) else: sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=bbox, min_object_covered=0.1, aspect_ratio_range=[0.75, 1.33], area_range=[0.05, 1.0], max_attempts=100, use_image_if_no_bounding_boxes=True) bbox_begin, bbox_size, _ = sample_distorted_bounding_box # Crop the image to the specified bounding box. distorted_image = tf.slice(image, bbox_begin, bbox_size) # TODO(reedwm): revise this resize method for eval. image_resize_method = get_image_resize_method( resize_method, thread_id) # This resizing operation may distort the images because the aspect # ratio is not respected. if cnn_util.tensorflow_version() >= 11: distorted_image = tf.image.resize_images(distorted_image, [height, width], image_resize_method, align_corners=False) else: distorted_image = tf.image.resize_images(distorted_image, height, width, image_resize_method, align_corners=False) distorted_image.set_shape([height, width, 3]) if not thread_id and FLAGS.summary_verbosity >= 2: tf.summary.image('cropped_resized_image', tf.expand_dims(distorted_image, 0)) image = distorted_image return image
def train_image(image_buffer, height, width, bbox, batch_position, resize_method, distortions, scope=None, summary_verbosity=0, distort_color_in_yiq=False, fuse_decode_and_crop=False): """Distort one image for training a network. Distorting images provides a useful technique for augmenting the data set during training in order to make the network invariant to aspects of the image that do not effect the label. Args: image_buffer: scalar string Tensor representing the raw JPEG image buffer. height: integer width: integer bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. batch_position: position of the image in a batch, which affects how images are distorted and resized. NOTE: this argument can be an integer or a tensor resize_method: round_robin, nearest, bilinear, bicubic, or area. distortions: If true, apply full distortions for image colors. scope: Optional scope for op_scope. summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both summaries and checkpoints. distort_color_in_yiq: distort color of input images in YIQ space. fuse_decode_and_crop: fuse the decode/crop operation. Returns: 3-D float Tensor of distorted image used for training. """ # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): with tf.name_scope(scope or 'distort_image'): # A large fraction of image datasets contain a human-annotated bounding box # delineating the region of the image containing the object of interest. We # choose to create a new bounding box for the object which is a randomly # distorted version of the human-annotated bounding box that obeys an # allowed range of aspect ratios, sizes and overlap with the human-annotated # bounding box. If no box is supplied, then we assume the bounding box is # the entire image. sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.image.extract_jpeg_shape(image_buffer), bounding_boxes=bbox, min_object_covered=0.1, aspect_ratio_range=[0.75, 1.33], area_range=[0.05, 1.0], max_attempts=100, use_image_if_no_bounding_boxes=True) bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box if summary_verbosity >= 3: image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_FAST') image = tf.image.convert_image_dtype(image, dtype=tf.float32) image_with_distorted_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), distort_bbox) tf.summary.image('images_with_distorted_bounding_box', image_with_distorted_box) # Crop the image to the specified bounding box. if fuse_decode_and_crop: offset_y, offset_x, _ = tf.unstack(bbox_begin) target_height, target_width, _ = tf.unstack(bbox_size) crop_window = tf.stack( [offset_y, offset_x, target_height, target_width]) image = tf.image.decode_and_crop_jpeg(image_buffer, crop_window, channels=3) else: image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_FAST') image = tf.slice(image, bbox_begin, bbox_size) if distortions: # After this point, all image pixels reside in [0,1]. Before, they were # uint8s in the range [0, 255]. image = tf.image.convert_image_dtype(image, dtype=tf.float32) # This resizing operation may distort the images because the aspect # ratio is not respected. image_resize_method = get_image_resize_method(resize_method, batch_position) if cnn_util.tensorflow_version() >= 11: distorted_image = tf.image.resize_images(image, [height, width], image_resize_method, align_corners=False) else: distorted_image = tf.image.resize_images(image, height, width, image_resize_method, align_corners=False) # Restore the shape since the dynamic slice based upon the bbox_size loses # the third dimension. distorted_image.set_shape([height, width, 3]) if summary_verbosity >= 3: tf.summary.image('cropped_resized_image', tf.expand_dims(distorted_image, 0)) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) if distortions: # Randomly distort the colors. distorted_image = distort_color( distorted_image, batch_position, distort_color_in_yiq=distort_color_in_yiq) # Note: This ensures the scaling matches the output of eval_image distorted_image *= 255 if summary_verbosity >= 3: tf.summary.image('final_distorted_image', tf.expand_dims(distorted_image, 0)) return distorted_image
def distort_image(image, height, width, bbox, thread_id=0, scope=None): """Distort one image for training a network. Distorting images provides a useful technique for augmenting the data set during training in order to make the network invariant to aspects of the image that do not effect the label. Args: image: 3-D float Tensor of image height: integer width: integer bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. thread_id: integer indicating the preprocessing thread. scope: Optional scope for op_scope. Returns: 3-D float Tensor of distorted image used for training. """ # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): with tf.compat.v1.name_scope(scope or 'distort_image'): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. # After this point, all image pixels reside in [0,1) # until the very end, when they're rescaled to (-1, 1). The various # adjust_* ops all require this range for dtype float. image = tf.image.convert_image_dtype(image, dtype=tf.float32) # Display the bounding box in the first thread only. if not thread_id: image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), bbox) tf.compat.v1.summary.image( 'image_with_bounding_boxes', image_with_box) # A large fraction of image datasets contain a human-annotated bounding # box delineating the region of the image containing the object of interest. # We choose to create a new bounding box for the object which is a randomly # distorted version of the human-annotated bounding box that obeys an allowed # range of aspect ratios, sizes and overlap with the human-annotated # bounding box. If no box is supplied, then we assume the bounding box is # the entire image. sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( image_size=tf.shape(input=image), bounding_boxes=bbox, min_object_covered=0.1, aspect_ratio_range=[0.99, 1.01], area_range=[0.05, 1.0], max_attempts=100, use_image_if_no_bounding_boxes=True) bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box if not thread_id: image_with_distorted_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), distort_bbox) tf.compat.v1.summary.image( 'images_with_distorted_bounding_box', image_with_distorted_box) # Crop the image to the specified bounding box. distorted_image = tf.slice(image, bbox_begin, bbox_size) # This resizing operation may distort the images because the aspect # ratio is not respected. We select a resize method in a round robin # fashion based on the thread number. # Note that ResizeMethod contains 4 enumerated resizing methods. resize_method = thread_id % 4 if cnn_util.tensorflow_version() >= 11: distorted_image = tf.image.resize( distorted_image, [height, width], resize_method) else: distorted_image = tf.image.resize( distorted_image, height, width, resize_method) # Restore the shape since the dynamic slice based upon the bbox_size loses # the third dimension. distorted_image.set_shape([height, width, 3]) if not thread_id: tf.compat.v1.summary.image( 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Randomly distort the colors. distorted_image = distort_color(distorted_image, thread_id) # Note: This ensures the scaling matches the output of eval_image distorted_image *= 256 if not thread_id: tf.compat.v1.summary.image( 'final_distorted_image', tf.expand_dims(distorted_image, 0)) return distorted_image
def distort_image(image, height, width, bbox, thread_id=0, scope=None): """Distort one image for training a network. Distorting images provides a useful technique for augmenting the data set during training in order to make the network invariant to aspects of the image that do not effect the label. Args: image: 3-D float Tensor of image height: integer width: integer bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. thread_id: integer indicating the preprocessing thread. scope: Optional scope for op_scope. Returns: 3-D float Tensor of distorted image used for training. """ # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): with tf.name_scope(scope or 'distort_image'): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. # After this point, all image pixels reside in [0,1) # until the very end, when they're rescaled to (-1, 1). The various # adjust_* ops all require this range for dtype float. image = tf.image.convert_image_dtype(image, dtype=tf.float32) # Display the bounding box in the first thread only. if not thread_id: image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), bbox) tf.summary.image( 'image_with_bounding_boxes', image_with_box) # A large fraction of image datasets contain a human-annotated bounding # box delineating the region of the image containing the object of interest. # We choose to create a new bounding box for the object which is a randomly # distorted version of the human-annotated bounding box that obeys an allowed # range of aspect ratios, sizes and overlap with the human-annotated # bounding box. If no box is supplied, then we assume the bounding box is # the entire image. sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=bbox, min_object_covered=0.1, aspect_ratio_range=[0.75, 1.33], area_range=[0.05, 1.0], max_attempts=100, use_image_if_no_bounding_boxes=True) bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box if not thread_id: image_with_distorted_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), distort_bbox) tf.summary.image( 'images_with_distorted_bounding_box', image_with_distorted_box) # Crop the image to the specified bounding box. distorted_image = tf.slice(image, bbox_begin, bbox_size) # This resizing operation may distort the images because the aspect # ratio is not respected. We select a resize method in a round robin # fashion based on the thread number. # Note that ResizeMethod contains 4 enumerated resizing methods. resize_method = thread_id % 4 if cnn_util.tensorflow_version() >= 11: distorted_image = tf.image.resize_images( distorted_image, [height, width], resize_method, align_corners=False) else: distorted_image = tf.image.resize_images( distorted_image, height, width, resize_method, align_corners=False) # Restore the shape since the dynamic slice based upon the bbox_size loses # the third dimension. distorted_image.set_shape([height, width, 3]) if not thread_id: tf.summary.image( 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) # Randomly distort the colors. distorted_image = distort_color(distorted_image, thread_id) # Note: This ensures the scaling matches the output of eval_image distorted_image *= 256 if not thread_id: tf.summary.image( 'final_distorted_image', tf.expand_dims(distorted_image, 0)) return distorted_image
def train_image(image, height, width, bbox, batch_position, resize_method, distortions, scope=None): """Distort one image for training a network. Distorting images provides a useful technique for augmenting the data set during training in order to make the network invariant to aspects of the image that do not effect the label. Args: image: 3-D float Tensor of image height: integer width: integer bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. batch_position: position of the image in a batch, which affects how images are distorted and resized. NOTE: this argument can be an integer or a tensor resize_method: round_robin, nearest, bilinear, bicubic, or area. distortions: If true, apply full distortions for image colors. scope: Optional scope for op_scope. Returns: 3-D float Tensor of distorted image used for training. """ # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): with tf.name_scope(scope or 'distort_image'): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. if distortions: # After this point, all image pixels reside in [0,1) # until the very end, when they're rescaled to (-1, 1). The various # adjust_* ops all require this range for dtype float. image = tf.image.convert_image_dtype(image, dtype=tf.float32) # Display the bounding box. if FLAGS.summary_verbosity >= 2: image_with_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), bbox) tf.summary.image('image_with_bounding_boxes', image_with_box) # A large fraction of image datasets contain a human-annotated bounding box # delineating the region of the image containing the object of interest. We # choose to create a new bounding box for the object which is a randomly # distorted version of the human-annotated bounding box that obeys an # allowed range of aspect ratios, sizes and overlap with the human-annotated # bounding box. If no box is supplied, then we assume the bounding box is # the entire image. sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=bbox, min_object_covered=0.1, aspect_ratio_range=[0.75, 1.33], area_range=[0.05, 1.0], max_attempts=100, use_image_if_no_bounding_boxes=True) bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box if FLAGS.summary_verbosity >= 2: image_with_distorted_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), distort_bbox) tf.summary.image('images_with_distorted_bounding_box', image_with_distorted_box) # Crop the image to the specified bounding box. distorted_image = tf.slice(image, bbox_begin, bbox_size) # This resizing operation may distort the images because the aspect # ratio is not respected. image_resize_method = get_image_resize_method(resize_method, batch_position) if cnn_util.tensorflow_version() >= 11: distorted_image = tf.image.resize_images(distorted_image, [height, width], image_resize_method, align_corners=False) else: distorted_image = tf.image.resize_images(distorted_image, height, width, image_resize_method, align_corners=False) # Restore the shape since the dynamic slice based upon the bbox_size loses # the third dimension. distorted_image.set_shape([height, width, 3]) if FLAGS.summary_verbosity >= 2: tf.summary.image('cropped_resized_image', tf.expand_dims(distorted_image, 0)) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) if distortions: # Randomly distort the colors. distorted_image = distort_color(distorted_image, batch_position) # Note: This ensures the scaling matches the output of eval_image distorted_image *= 256 if FLAGS.summary_verbosity >= 2: tf.summary.image('final_distorted_image', tf.expand_dims(distorted_image, 0)) return distorted_image
def train_image(image_buffer, height, width, bbox, batch_position, resize_method, distortions, scope=None, summary_verbosity=0, distort_color_in_yiq=False, fuse_decode_and_crop=False): """Distort one image for training a network. Distorting images provides a useful technique for augmenting the data set during training in order to make the network invariant to aspects of the image that do not effect the label. Args: image_buffer: scalar string Tensor representing the raw JPEG image buffer. height: integer width: integer bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. batch_position: position of the image in a batch, which affects how images are distorted and resized. NOTE: this argument can be an integer or a tensor resize_method: round_robin, nearest, bilinear, bicubic, or area. distortions: If true, apply full distortions for image colors. scope: Optional scope for op_scope. summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both summaries and checkpoints. distort_color_in_yiq: distort color of input images in YIQ space. fuse_decode_and_crop: fuse the decode/crop operation. Returns: 3-D float Tensor of distorted image used for training. """ # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): with tf.name_scope(scope or 'distort_image'): # A large fraction of image datasets contain a human-annotated bounding box # delineating the region of the image containing the object of interest. We # choose to create a new bounding box for the object which is a randomly # distorted version of the human-annotated bounding box that obeys an # allowed range of aspect ratios, sizes and overlap with the human-annotated # bounding box. If no box is supplied, then we assume the bounding box is # the entire image. sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.image.extract_jpeg_shape(image_buffer), bounding_boxes=bbox, min_object_covered=0.1, aspect_ratio_range=[0.75, 1.33], area_range=[0.05, 1.0], max_attempts=100, use_image_if_no_bounding_boxes=True) bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box if summary_verbosity >= 3: image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_FAST') image = tf.image.convert_image_dtype(image, dtype=tf.float32) image_with_distorted_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), distort_bbox) tf.summary.image( 'images_with_distorted_bounding_box', image_with_distorted_box) # Crop the image to the specified bounding box. if fuse_decode_and_crop: offset_y, offset_x, _ = tf.unstack(bbox_begin) target_height, target_width, _ = tf.unstack(bbox_size) crop_window = tf.stack([offset_y, offset_x, target_height, target_width]) image = tf.image.decode_and_crop_jpeg( image_buffer, crop_window, channels=3) else: image = tf.image.decode_jpeg(image_buffer, channels=3, dct_method='INTEGER_FAST') image = tf.slice(image, bbox_begin, bbox_size) if distortions: # After this point, all image pixels reside in [0,1]. Before, they were # uint8s in the range [0, 255]. image = tf.image.convert_image_dtype(image, dtype=tf.float32) # This resizing operation may distort the images because the aspect # ratio is not respected. image_resize_method = get_image_resize_method(resize_method, batch_position) if cnn_util.tensorflow_version() >= 11: distorted_image = tf.image.resize_images( image, [height, width], image_resize_method, align_corners=False) else: distorted_image = tf.image.resize_images( image, height, width, image_resize_method, align_corners=False) # Restore the shape since the dynamic slice based upon the bbox_size loses # the third dimension. distorted_image.set_shape([height, width, 3]) if summary_verbosity >= 3: tf.summary.image( 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) if distortions: # Randomly distort the colors. distorted_image = distort_color(distorted_image, batch_position, distort_color_in_yiq=distort_color_in_yiq) # Note: This ensures the scaling matches the output of eval_image distorted_image *= 255 if summary_verbosity >= 3: tf.summary.image( 'final_distorted_image', tf.expand_dims(distorted_image, 0)) return distorted_image