コード例 #1
0
ファイル: preprocessing.py プロジェクト: seanlin2000/IntelAI
def eval_image(image, height, width, bbox, thread_id, resize):
    """Get the image for model evaluation."""
    with tf.name_scope('eval_image'):
        if not thread_id:
            tf.summary.image(
                'original_image', tf.expand_dims(image, 0))

        if resize == 'crop':
            # Note: This is much slower than crop_to_bounding_box
            #         It seems that the redundant pad step has huge overhead
            # distorted_image = tf.image.resize_image_with_crop_or_pad(image,
            #                                                         height, width)
            shape = tf.shape(image)
            image = tf.cond(tf.less(shape[0], shape[1]),
                            lambda: tf.image.resize_images(image, tf.convert_to_tensor(
                                [256, 256 * shape[1] / shape[0]], dtype=tf.int32)),
                            lambda: tf.image.resize_images(image, tf.convert_to_tensor([256 * shape[0] / shape[1], 256], dtype=tf.int32)))
            shape = tf.shape(image)

            y0 = (shape[0] - height) // 2
            x0 = (shape[1] - width) // 2
            # y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32)
            # x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32)
            # distorted_image = tf.slice(image, [y0,x0,0], [height,width,3])
            distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height,
                                                            width)
        else:
            sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
                tf.shape(image),
                bounding_boxes=bbox,
                min_object_covered=0.5,
                aspect_ratio_range=[0.90, 1.10],
                area_range=[0.10, 1.0],
                max_attempts=100,
                use_image_if_no_bounding_boxes=True)
            bbox_begin, bbox_size, _ = sample_distorted_bounding_box
            # Crop the image to the specified bounding box.
            distorted_image = tf.slice(image, bbox_begin, bbox_size)
            resize_method = {
                'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                'bilinear': tf.image.ResizeMethod.BILINEAR,
                'bicubic': tf.image.ResizeMethod.BICUBIC,
                'area': tf.image.ResizeMethod.AREA
            }[resize]
            # This resizing operation may distort the images because the aspect
            # ratio is not respected.
            if cnn_util.tensorflow_version() >= 11:
                distorted_image = tf.image.resize_images(
                    distorted_image, [height, width],
                    resize_method,
                    align_corners=False)
            else:
                distorted_image = tf.image.resize_images(
                    distorted_image, height, width, resize_method, align_corners=False)
        distorted_image.set_shape([height, width, 3])
        if not thread_id:
            tf.summary.image(
                'cropped_resized_image', tf.expand_dims(distorted_image, 0))
        image = distorted_image
    return image
コード例 #2
0
def eval_image(image, height, width, bbox, thread_id, resize):
    """Get the image for model evaluation."""
    with tf.name_scope('eval_image'):
        if not thread_id:
            tf.summary.image('original_image', tf.expand_dims(image, 0))

        if resize == 'crop':
            # Simple center cropping.
            distorted_image = eval_image_crop(image, height, width)
        elif resize == 'crop_inception':
            # Eval code from Inception pre-processing.
            distorted_image = eval_image_crop_inception(
                image, height, width, CROP_CENTRAL_FRACTION)
        elif resize == 'crop_vgg':
            # Eval code from VGG pre-processing.
            image = vgg_preprocessing._aspect_preserving_resize(
                image, VGG_RESIZE_SIDE_MIN)
            distorted_image = vgg_preprocessing._central_crop([image], height,
                                                              width)[0]
        else:
            sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
                tf.shape(image),
                bounding_boxes=bbox,
                min_object_covered=MIN_OBJECT_COVERED,
                aspect_ratio_range=ASPECT_RATIO_RANGE,
                area_range=AREA_RANGE,
                max_attempts=MAX_ATTEMPTS,
                use_image_if_no_bounding_boxes=True)

            bbox_begin, bbox_size, _ = sample_distorted_bounding_box
            # Crop the image to the specified bounding box.
            distorted_image = tf.slice(image, bbox_begin, bbox_size)
            resize_method = {
                'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                'bilinear': tf.image.ResizeMethod.BILINEAR,
                'bicubic': tf.image.ResizeMethod.BICUBIC,
                'area': tf.image.ResizeMethod.AREA
            }[resize]
            # This resizing operation may distort the images because the aspect
            # ratio is not respected.
            if cnn_util.tensorflow_version() >= 11:
                distorted_image = tf.image.resize_images(distorted_image,
                                                         [height, width],
                                                         resize_method,
                                                         align_corners=False)
            else:
                distorted_image = tf.image.resize_images(distorted_image,
                                                         height,
                                                         width,
                                                         resize_method,
                                                         align_corners=False)
        distorted_image.set_shape([height, width, 3])
        if not thread_id:
            tf.summary.image('cropped_resized_image',
                             tf.expand_dims(distorted_image, 0))
        image = distorted_image
    return image
コード例 #3
0
ファイル: preprocessing.py プロジェクト: cyliustack/benchmark
def eval_image(image, height, width, bbox, thread_id, resize):
  """Get the image for model evaluation."""
  with tf.name_scope('eval_image'):
    if not thread_id:
      tf.summary.image(
          'original_image', tf.expand_dims(image, 0))

    if resize == 'crop':
      # Note: This is much slower than crop_to_bounding_box
      #         It seems that the redundant pad step has huge overhead
      # distorted_image = tf.image.resize_image_with_crop_or_pad(image,
      #                                                         height, width)
      shape = tf.shape(image)
      y0 = (shape[0] - height) // 2
      x0 = (shape[1] - width) // 2
      # distorted_image = tf.slice(image, [y0,x0,0], [height,width,3])
      distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height,
                                                      width)
    else:
      sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
          tf.shape(image),
          bounding_boxes=bbox,
          min_object_covered=0.1,
          aspect_ratio_range=[0.75, 1.33],
          area_range=[0.05, 1.0],
          max_attempts=100,
          use_image_if_no_bounding_boxes=True)
      bbox_begin, bbox_size, _ = sample_distorted_bounding_box
      # Crop the image to the specified bounding box.
      distorted_image = tf.slice(image, bbox_begin, bbox_size)
      resize_method = {
          'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR,
          'bilinear': tf.image.ResizeMethod.BILINEAR,
          'bicubic': tf.image.ResizeMethod.BICUBIC,
          'area': tf.image.ResizeMethod.AREA
      }[resize]
      # This resizing operation may distort the images because the aspect
      # ratio is not respected.
      if cnn_util.tensorflow_version() >= 11:
        distorted_image = tf.image.resize_images(
            distorted_image, [height, width],
            resize_method,
            align_corners=False)
      else:
        distorted_image = tf.image.resize_images(
            distorted_image, height, width, resize_method, align_corners=False)
    distorted_image.set_shape([height, width, 3])
    if not thread_id:
      tf.summary.image(
          'cropped_resized_image', tf.expand_dims(distorted_image, 0))
    image = distorted_image
  return image
コード例 #4
0
def eval_image(image, height, width, bbox, thread_id, resize_method):
    """Get the image for model evaluation."""
    with tf.name_scope('eval_image'):
        if not thread_id and FLAGS.summary_verbosity >= 2:
            tf.summary.image('original_image', tf.expand_dims(image, 0))

        if resize_method == 'crop':
            # Note: This is much slower than crop_to_bounding_box
            #         It seems that the redundant pad step has huge overhead
            # distorted_image = tf.image.resize_image_with_crop_or_pad(image,
            #                                                         height, width)
            shape = tf.shape(image)
            y0 = (shape[0] - height) // 2
            x0 = (shape[1] - width) // 2
            # distorted_image = tf.slice(image, [y0,x0,0], [height,width,3])
            distorted_image = tf.image.crop_to_bounding_box(
                image, y0, x0, height, width)
        else:
            sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
                tf.shape(image),
                bounding_boxes=bbox,
                min_object_covered=0.1,
                aspect_ratio_range=[0.75, 1.33],
                area_range=[0.05, 1.0],
                max_attempts=100,
                use_image_if_no_bounding_boxes=True)
            bbox_begin, bbox_size, _ = sample_distorted_bounding_box
            # Crop the image to the specified bounding box.
            distorted_image = tf.slice(image, bbox_begin, bbox_size)
            # TODO(reedwm): revise this resize method for eval.
            image_resize_method = get_image_resize_method(
                resize_method, thread_id)
            # This resizing operation may distort the images because the aspect
            # ratio is not respected.
            if cnn_util.tensorflow_version() >= 11:
                distorted_image = tf.image.resize_images(distorted_image,
                                                         [height, width],
                                                         image_resize_method,
                                                         align_corners=False)
            else:
                distorted_image = tf.image.resize_images(distorted_image,
                                                         height,
                                                         width,
                                                         image_resize_method,
                                                         align_corners=False)
        distorted_image.set_shape([height, width, 3])
        if not thread_id and FLAGS.summary_verbosity >= 2:
            tf.summary.image('cropped_resized_image',
                             tf.expand_dims(distorted_image, 0))
        image = distorted_image
    return image
コード例 #5
0
def train_image(image_buffer,
                height,
                width,
                bbox,
                batch_position,
                resize_method,
                distortions,
                scope=None,
                summary_verbosity=0,
                distort_color_in_yiq=False,
                fuse_decode_and_crop=False):
    """Distort one image for training a network.

  Distorting images provides a useful technique for augmenting the data
  set during training in order to make the network invariant to aspects
  of the image that do not effect the label.

  Args:
    image_buffer: scalar string Tensor representing the raw JPEG image buffer.
    height: integer
    width: integer
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged
      as [ymin, xmin, ymax, xmax].
    batch_position: position of the image in a batch, which affects how images
      are distorted and resized. NOTE: this argument can be an integer or a
      tensor
    resize_method: round_robin, nearest, bilinear, bicubic, or area.
    distortions: If true, apply full distortions for image colors.
    scope: Optional scope for op_scope.
    summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both
      summaries and checkpoints.
    distort_color_in_yiq: distort color of input images in YIQ space.
    fuse_decode_and_crop: fuse the decode/crop operation.
  Returns:
    3-D float Tensor of distorted image used for training.
  """
    # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'):
    # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
    with tf.name_scope(scope or 'distort_image'):
        # A large fraction of image datasets contain a human-annotated bounding box
        # delineating the region of the image containing the object of interest.  We
        # choose to create a new bounding box for the object which is a randomly
        # distorted version of the human-annotated bounding box that obeys an
        # allowed range of aspect ratios, sizes and overlap with the human-annotated
        # bounding box. If no box is supplied, then we assume the bounding box is
        # the entire image.
        sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
            tf.image.extract_jpeg_shape(image_buffer),
            bounding_boxes=bbox,
            min_object_covered=0.1,
            aspect_ratio_range=[0.75, 1.33],
            area_range=[0.05, 1.0],
            max_attempts=100,
            use_image_if_no_bounding_boxes=True)
        bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
        if summary_verbosity >= 3:
            image = tf.image.decode_jpeg(image_buffer,
                                         channels=3,
                                         dct_method='INTEGER_FAST')
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
            image_with_distorted_box = tf.image.draw_bounding_boxes(
                tf.expand_dims(image, 0), distort_bbox)
            tf.summary.image('images_with_distorted_bounding_box',
                             image_with_distorted_box)

        # Crop the image to the specified bounding box.
        if fuse_decode_and_crop:
            offset_y, offset_x, _ = tf.unstack(bbox_begin)
            target_height, target_width, _ = tf.unstack(bbox_size)
            crop_window = tf.stack(
                [offset_y, offset_x, target_height, target_width])
            image = tf.image.decode_and_crop_jpeg(image_buffer,
                                                  crop_window,
                                                  channels=3)
        else:
            image = tf.image.decode_jpeg(image_buffer,
                                         channels=3,
                                         dct_method='INTEGER_FAST')
            image = tf.slice(image, bbox_begin, bbox_size)

        if distortions:
            # After this point, all image pixels reside in [0,1]. Before, they were
            # uint8s in the range [0, 255].
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)

        # This resizing operation may distort the images because the aspect
        # ratio is not respected.
        image_resize_method = get_image_resize_method(resize_method,
                                                      batch_position)
        if cnn_util.tensorflow_version() >= 11:
            distorted_image = tf.image.resize_images(image, [height, width],
                                                     image_resize_method,
                                                     align_corners=False)
        else:
            distorted_image = tf.image.resize_images(image,
                                                     height,
                                                     width,
                                                     image_resize_method,
                                                     align_corners=False)
        # Restore the shape since the dynamic slice based upon the bbox_size loses
        # the third dimension.
        distorted_image.set_shape([height, width, 3])
        if summary_verbosity >= 3:
            tf.summary.image('cropped_resized_image',
                             tf.expand_dims(distorted_image, 0))

        # Randomly flip the image horizontally.
        distorted_image = tf.image.random_flip_left_right(distorted_image)

        if distortions:
            # Randomly distort the colors.
            distorted_image = distort_color(
                distorted_image,
                batch_position,
                distort_color_in_yiq=distort_color_in_yiq)

            # Note: This ensures the scaling matches the output of eval_image
            distorted_image *= 255

        if summary_verbosity >= 3:
            tf.summary.image('final_distorted_image',
                             tf.expand_dims(distorted_image, 0))
        return distorted_image
def distort_image(image, height, width, bbox, thread_id=0, scope=None):
  """Distort one image for training a network.

  Distorting images provides a useful technique for augmenting the data
  set during training in order to make the network invariant to aspects
  of the image that do not effect the label.

  Args:
    image: 3-D float Tensor of image
    height: integer
    width: integer
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged
      as [ymin, xmin, ymax, xmax].
    thread_id: integer indicating the preprocessing thread.
    scope: Optional scope for op_scope.
  Returns:
    3-D float Tensor of distorted image used for training.
  """
  # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'):
  # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
  with tf.compat.v1.name_scope(scope or 'distort_image'):
    # Each bounding box has shape [1, num_boxes, box coords] and
    # the coordinates are ordered [ymin, xmin, ymax, xmax].

    # After this point, all image pixels reside in [0,1)
    # until the very end, when they're rescaled to (-1, 1).  The various
    # adjust_* ops all require this range for dtype float.
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)

    # Display the bounding box in the first thread only.
    if not thread_id:
      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
                                                    bbox)
      tf.compat.v1.summary.image(
          'image_with_bounding_boxes', image_with_box)

  # A large fraction of image datasets contain a human-annotated bounding
  # box delineating the region of the image containing the object of interest.
  # We choose to create a new bounding box for the object which is a randomly
  # distorted version of the human-annotated bounding box that obeys an allowed
  # range of aspect ratios, sizes and overlap with the human-annotated
  # bounding box. If no box is supplied, then we assume the bounding box is
  # the entire image.
    sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
        image_size=tf.shape(input=image),
        bounding_boxes=bbox,
        min_object_covered=0.1,
        aspect_ratio_range=[0.99, 1.01],
        area_range=[0.05, 1.0],
        max_attempts=100,
        use_image_if_no_bounding_boxes=True)
    bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
    if not thread_id:
      image_with_distorted_box = tf.image.draw_bounding_boxes(
          tf.expand_dims(image, 0), distort_bbox)
      tf.compat.v1.summary.image(
          'images_with_distorted_bounding_box',
          image_with_distorted_box)

    # Crop the image to the specified bounding box.
    distorted_image = tf.slice(image, bbox_begin, bbox_size)

    # This resizing operation may distort the images because the aspect
    # ratio is not respected. We select a resize method in a round robin
    # fashion based on the thread number.
    # Note that ResizeMethod contains 4 enumerated resizing methods.
    resize_method = thread_id % 4
    if cnn_util.tensorflow_version() >= 11:
      distorted_image = tf.image.resize(
          distorted_image, [height, width], resize_method)
    else:
      distorted_image = tf.image.resize(
          distorted_image, height, width, resize_method)
    # Restore the shape since the dynamic slice based upon the bbox_size loses
    # the third dimension.
    distorted_image.set_shape([height, width, 3])
    if not thread_id:
      tf.compat.v1.summary.image(
          'cropped_resized_image',
          tf.expand_dims(distorted_image, 0))

    # Randomly flip the image horizontally.
    distorted_image = tf.image.random_flip_left_right(distorted_image)

    # Randomly distort the colors.
    distorted_image = distort_color(distorted_image, thread_id)

    # Note: This ensures the scaling matches the output of eval_image
    distorted_image *= 256

    if not thread_id:
      tf.compat.v1.summary.image(
          'final_distorted_image',
          tf.expand_dims(distorted_image, 0))
    return distorted_image
コード例 #7
0
ファイル: preprocessing.py プロジェクト: cyliustack/benchmark
def distort_image(image, height, width, bbox, thread_id=0, scope=None):
  """Distort one image for training a network.

  Distorting images provides a useful technique for augmenting the data
  set during training in order to make the network invariant to aspects
  of the image that do not effect the label.

  Args:
    image: 3-D float Tensor of image
    height: integer
    width: integer
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged
      as [ymin, xmin, ymax, xmax].
    thread_id: integer indicating the preprocessing thread.
    scope: Optional scope for op_scope.
  Returns:
    3-D float Tensor of distorted image used for training.
  """
  # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'):
  # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
  with tf.name_scope(scope or 'distort_image'):
    # Each bounding box has shape [1, num_boxes, box coords] and
    # the coordinates are ordered [ymin, xmin, ymax, xmax].

    # After this point, all image pixels reside in [0,1)
    # until the very end, when they're rescaled to (-1, 1).  The various
    # adjust_* ops all require this range for dtype float.
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)

    # Display the bounding box in the first thread only.
    if not thread_id:
      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
                                                    bbox)
      tf.summary.image(
          'image_with_bounding_boxes', image_with_box)

  # A large fraction of image datasets contain a human-annotated bounding
  # box delineating the region of the image containing the object of interest.
  # We choose to create a new bounding box for the object which is a randomly
  # distorted version of the human-annotated bounding box that obeys an allowed
  # range of aspect ratios, sizes and overlap with the human-annotated
  # bounding box. If no box is supplied, then we assume the bounding box is
  # the entire image.
    sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
        tf.shape(image),
        bounding_boxes=bbox,
        min_object_covered=0.1,
        aspect_ratio_range=[0.75, 1.33],
        area_range=[0.05, 1.0],
        max_attempts=100,
        use_image_if_no_bounding_boxes=True)
    bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
    if not thread_id:
      image_with_distorted_box = tf.image.draw_bounding_boxes(
          tf.expand_dims(image, 0), distort_bbox)
      tf.summary.image(
          'images_with_distorted_bounding_box',
          image_with_distorted_box)

    # Crop the image to the specified bounding box.
    distorted_image = tf.slice(image, bbox_begin, bbox_size)

    # This resizing operation may distort the images because the aspect
    # ratio is not respected. We select a resize method in a round robin
    # fashion based on the thread number.
    # Note that ResizeMethod contains 4 enumerated resizing methods.
    resize_method = thread_id % 4
    if cnn_util.tensorflow_version() >= 11:
      distorted_image = tf.image.resize_images(
          distorted_image, [height, width], resize_method, align_corners=False)
    else:
      distorted_image = tf.image.resize_images(
          distorted_image, height, width, resize_method, align_corners=False)
    # Restore the shape since the dynamic slice based upon the bbox_size loses
    # the third dimension.
    distorted_image.set_shape([height, width, 3])
    if not thread_id:
      tf.summary.image(
          'cropped_resized_image',
          tf.expand_dims(distorted_image, 0))

    # Randomly flip the image horizontally.
    distorted_image = tf.image.random_flip_left_right(distorted_image)

    # Randomly distort the colors.
    distorted_image = distort_color(distorted_image, thread_id)

    # Note: This ensures the scaling matches the output of eval_image
    distorted_image *= 256

    if not thread_id:
      tf.summary.image(
          'final_distorted_image',
          tf.expand_dims(distorted_image, 0))
    return distorted_image
コード例 #8
0
def train_image(image,
                height,
                width,
                bbox,
                batch_position,
                resize_method,
                distortions,
                scope=None):
    """Distort one image for training a network.

  Distorting images provides a useful technique for augmenting the data
  set during training in order to make the network invariant to aspects
  of the image that do not effect the label.

  Args:
    image: 3-D float Tensor of image
    height: integer
    width: integer
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged
      as [ymin, xmin, ymax, xmax].
    batch_position: position of the image in a batch, which affects how images
      are distorted and resized. NOTE: this argument can be an integer or a
      tensor
    resize_method: round_robin, nearest, bilinear, bicubic, or area.
    distortions: If true, apply full distortions for image colors.
    scope: Optional scope for op_scope.
  Returns:
    3-D float Tensor of distorted image used for training.
  """
    # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'):
    # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
    with tf.name_scope(scope or 'distort_image'):
        # Each bounding box has shape [1, num_boxes, box coords] and
        # the coordinates are ordered [ymin, xmin, ymax, xmax].

        if distortions:
            # After this point, all image pixels reside in [0,1)
            # until the very end, when they're rescaled to (-1, 1).  The various
            # adjust_* ops all require this range for dtype float.
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)

            # Display the bounding box.
            if FLAGS.summary_verbosity >= 2:
                image_with_box = tf.image.draw_bounding_boxes(
                    tf.expand_dims(image, 0), bbox)
                tf.summary.image('image_with_bounding_boxes', image_with_box)

        # A large fraction of image datasets contain a human-annotated bounding box
        # delineating the region of the image containing the object of interest.  We
        # choose to create a new bounding box for the object which is a randomly
        # distorted version of the human-annotated bounding box that obeys an
        # allowed range of aspect ratios, sizes and overlap with the human-annotated
        # bounding box. If no box is supplied, then we assume the bounding box is
        # the entire image.
        sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
            tf.shape(image),
            bounding_boxes=bbox,
            min_object_covered=0.1,
            aspect_ratio_range=[0.75, 1.33],
            area_range=[0.05, 1.0],
            max_attempts=100,
            use_image_if_no_bounding_boxes=True)
        bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
        if FLAGS.summary_verbosity >= 2:
            image_with_distorted_box = tf.image.draw_bounding_boxes(
                tf.expand_dims(image, 0), distort_bbox)
            tf.summary.image('images_with_distorted_bounding_box',
                             image_with_distorted_box)

        # Crop the image to the specified bounding box.
        distorted_image = tf.slice(image, bbox_begin, bbox_size)

        # This resizing operation may distort the images because the aspect
        # ratio is not respected.
        image_resize_method = get_image_resize_method(resize_method,
                                                      batch_position)
        if cnn_util.tensorflow_version() >= 11:
            distorted_image = tf.image.resize_images(distorted_image,
                                                     [height, width],
                                                     image_resize_method,
                                                     align_corners=False)
        else:
            distorted_image = tf.image.resize_images(distorted_image,
                                                     height,
                                                     width,
                                                     image_resize_method,
                                                     align_corners=False)
        # Restore the shape since the dynamic slice based upon the bbox_size loses
        # the third dimension.
        distorted_image.set_shape([height, width, 3])
        if FLAGS.summary_verbosity >= 2:
            tf.summary.image('cropped_resized_image',
                             tf.expand_dims(distorted_image, 0))

        # Randomly flip the image horizontally.
        distorted_image = tf.image.random_flip_left_right(distorted_image)

        if distortions:
            # Randomly distort the colors.
            distorted_image = distort_color(distorted_image, batch_position)

            # Note: This ensures the scaling matches the output of eval_image
            distorted_image *= 256

        if FLAGS.summary_verbosity >= 2:
            tf.summary.image('final_distorted_image',
                             tf.expand_dims(distorted_image, 0))
        return distorted_image
コード例 #9
0
def train_image(image_buffer,
                height,
                width,
                bbox,
                batch_position,
                resize_method,
                distortions,
                scope=None,
                summary_verbosity=0,
                distort_color_in_yiq=False,
                fuse_decode_and_crop=False):
  """Distort one image for training a network.

  Distorting images provides a useful technique for augmenting the data
  set during training in order to make the network invariant to aspects
  of the image that do not effect the label.

  Args:
    image_buffer: scalar string Tensor representing the raw JPEG image buffer.
    height: integer
    width: integer
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged
      as [ymin, xmin, ymax, xmax].
    batch_position: position of the image in a batch, which affects how images
      are distorted and resized. NOTE: this argument can be an integer or a
      tensor
    resize_method: round_robin, nearest, bilinear, bicubic, or area.
    distortions: If true, apply full distortions for image colors.
    scope: Optional scope for op_scope.
    summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both
      summaries and checkpoints.
    distort_color_in_yiq: distort color of input images in YIQ space.
    fuse_decode_and_crop: fuse the decode/crop operation.
  Returns:
    3-D float Tensor of distorted image used for training.
  """
  # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'):
  # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
  with tf.name_scope(scope or 'distort_image'):
    # A large fraction of image datasets contain a human-annotated bounding box
    # delineating the region of the image containing the object of interest.  We
    # choose to create a new bounding box for the object which is a randomly
    # distorted version of the human-annotated bounding box that obeys an
    # allowed range of aspect ratios, sizes and overlap with the human-annotated
    # bounding box. If no box is supplied, then we assume the bounding box is
    # the entire image.
    sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
        tf.image.extract_jpeg_shape(image_buffer),
        bounding_boxes=bbox,
        min_object_covered=0.1,
        aspect_ratio_range=[0.75, 1.33],
        area_range=[0.05, 1.0],
        max_attempts=100,
        use_image_if_no_bounding_boxes=True)
    bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
    if summary_verbosity >= 3:
      image = tf.image.decode_jpeg(image_buffer, channels=3,
                                   dct_method='INTEGER_FAST')
      image = tf.image.convert_image_dtype(image, dtype=tf.float32)
      image_with_distorted_box = tf.image.draw_bounding_boxes(
          tf.expand_dims(image, 0), distort_bbox)
      tf.summary.image(
          'images_with_distorted_bounding_box',
          image_with_distorted_box)

    # Crop the image to the specified bounding box.
    if fuse_decode_and_crop:
      offset_y, offset_x, _ = tf.unstack(bbox_begin)
      target_height, target_width, _ = tf.unstack(bbox_size)
      crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
      image = tf.image.decode_and_crop_jpeg(
          image_buffer, crop_window, channels=3)
    else:
      image = tf.image.decode_jpeg(image_buffer, channels=3,
                                   dct_method='INTEGER_FAST')
      image = tf.slice(image, bbox_begin, bbox_size)

    if distortions:
      # After this point, all image pixels reside in [0,1]. Before, they were
      # uint8s in the range [0, 255].
      image = tf.image.convert_image_dtype(image, dtype=tf.float32)

    # This resizing operation may distort the images because the aspect
    # ratio is not respected.
    image_resize_method = get_image_resize_method(resize_method, batch_position)
    if cnn_util.tensorflow_version() >= 11:
      distorted_image = tf.image.resize_images(
          image, [height, width],
          image_resize_method,
          align_corners=False)
    else:
      distorted_image = tf.image.resize_images(
          image,
          height,
          width,
          image_resize_method,
          align_corners=False)
    # Restore the shape since the dynamic slice based upon the bbox_size loses
    # the third dimension.
    distorted_image.set_shape([height, width, 3])
    if summary_verbosity >= 3:
      tf.summary.image(
          'cropped_resized_image',
          tf.expand_dims(distorted_image, 0))

    # Randomly flip the image horizontally.
    distorted_image = tf.image.random_flip_left_right(distorted_image)

    if distortions:
      # Randomly distort the colors.
      distorted_image = distort_color(distorted_image, batch_position,
                                      distort_color_in_yiq=distort_color_in_yiq)

      # Note: This ensures the scaling matches the output of eval_image
      distorted_image *= 255

    if summary_verbosity >= 3:
      tf.summary.image(
          'final_distorted_image',
          tf.expand_dims(distorted_image, 0))
    return distorted_image