def preprocessing_training(image, mask, out_shape, prob=0.5):
    with tf.name_scope('preprocessing_training'):
        with tf.name_scope('rotate'):
            rnd = tf.random_uniform((), minval=0, maxval=1, name='rotate')

            def rotate():
                k = random_ops.random_uniform([], 0, 10000)
                k = tf.cast(k, tf.int32)
                return tf.image.rot90(image, k=k), tf.image.rot90(mask, k=k)

            def no_rotate():
                return image, mask

            image, mask = tf.cond(tf.less(rnd, prob), rotate, no_rotate)
        with tf.name_scope('flip_left_right'):

            def flip_left_right():
                return tf.image.flip_left_right(
                    image), tf.image.flip_left_right(mask)

            def no_flip_left_right():
                return image, mask

            rnd = tf.random_uniform((),
                                    minval=0,
                                    maxval=1,
                                    name='flip_left_right')
            image, mask = tf.cond(tf.less(rnd, prob), flip_left_right,
                                  no_flip_left_right)

        with tf.name_scope('flip_up_down'):

            def flip_up_down():
                return tf.image.flip_up_down(image), tf.image.flip_up_down(
                    mask)

            def no_flip_up_down():
                return image, mask

            rnd = tf.random_uniform((),
                                    minval=0,
                                    maxval=1,
                                    name='flip_up_down')
            image, mask = tf.cond(tf.less(rnd, prob), flip_up_down,
                                  no_flip_up_down)
        image = tf_image.resize_image(image,
                                      out_shape,
                                      method=tf.image.ResizeMethod.BILINEAR,
                                      align_corners=False)
        mask = tf_image.resize_image(
            mask,
            out_shape,
            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
            align_corners=False)
    return image, mask
def preprocess_for_eval(image, labels, bboxes, xs, ys,
                        out_shape, data_format='NHWC',
                        resize=Resize.WARP_RESIZE,
                        do_resize = True,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        out_shape: Output shape after pre-processing (if resize != None)
        resize: Resize strategy.

    Returns:
        A preprocessed image.
    """
    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        
        if do_resize:
            if resize == Resize.NONE:
                pass
            else:
                image = tf_image.resize_image(image, out_shape,
                                              method=tf.image.ResizeMethod.BILINEAR,
                                              align_corners=False)

        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, xs, ys
def preprocess_for_eval(image, labels, bboxes, xs, ys,
                        out_shape, data_format='NHWC',
                        resize=Resize.WARP_RESIZE,
                        do_resize = True,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        out_shape: Output shape after pre-processing (if resize != None)
        resize: Resize strategy.

    Returns:
        A preprocessed image.
    """
    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        
        if do_resize:
            if resize == Resize.NONE:
                pass
            else:
                image = tf_image.resize_image(image, out_shape,
                                              method=tf.image.ResizeMethod.BILINEAR,
                                              align_corners=False)

        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, xs, ys
Example #4
0
def preprocess_for_eval(image,
                        labels,
                        bboxes,
                        out_shape,
                        resize,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        out_shape: Output shape after pre-processing (if resize != None)
        resize: Resize strategy.

    Returns:
        A preprocessed image.
    """
    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])

        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat(0, [bbox_img, bboxes])

        # Resize strategy...
        if resize == Resize.NONE:
            pass
        elif resize == Resize.CENTRAL_CROP:
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(
                tf.to_double(1.0),
                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(
                image,
                resize_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        return image, labels, bboxes, bbox_img
def preprocess_for_train(image, labels, bboxes, out_shape,
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'image_with_bboxes')

        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes,
                                        aspect_ratio_range=CROP_RATIO_RANGE)
        # Resize image to output size.
        dst_image = tf_image.resize_image(dst_image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
                dst_image,
                lambda x, ordering: distort_color(x, ordering, fast_mode),
                num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        return image, labels, bboxes
Example #6
0
def preprocess_for_train(image, labels, bboxes, out_shape,
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'image_with_bboxes')

        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes,
                                        aspect_ratio_range=CROP_RATIO_RANGE)
        # Resize image to output size.
        dst_image = tf_image.resize_image(dst_image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
                dst_image,
                lambda x, ordering: distort_color(x, ordering, fast_mode),
                num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        return image, labels, bboxes
def preprocessing_val(image, out_shape):
    with tf.name_scope('preprocessing_val'):
        print('image is ', image)
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        image = tf_image.resize_image(image,
                                      out_shape,
                                      method=tf.image.ResizeMethod.BILINEAR,
                                      align_corners=False)
    return image
def preprocess_for_eval(image, labels, bboxes, out_shape, resize,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        out_shape: Output shape after pre-processing (if resize != None)
        resize: Resize strategy.

    Returns:
        A preprocessed image.
    """
    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])

        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat(0, [bbox_img, bboxes])

        # Resize strategy...
        if resize == Resize.NONE:
            pass
        elif resize == Resize.CENTRAL_CROP:
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(tf.to_double(1.0),
                                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(image, resize_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        return image, labels, bboxes, bbox_img
Example #9
0
def preprocess_for_eval_multi(image,
                              labels,
                              bboxes,
                              out_shape,
                              resize,
                              scope='ssd_preprocessing_train'):
    with tf.name_scope(scope):

        image = tf.to_float(image)
        #image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        image = image - np.array([123.6800, 116.7790, 103.9390]).reshape(
            (1, 1, 1, 3))

        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat(0, [bbox_img, bboxes])

        # Resize strategy...
        if resize == Resize.NONE:
            pass
        elif resize == Resize.CENTRAL_CROP:
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(
                tf.to_double(1.0),
                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(
                image,
                resize_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        return image, labels, bboxes, bbox_img
Example #10
0
def preprocess_for_train(image, labels, bboxes,
                         out_shape, data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'image_with_bboxes')
        # 上面保证了图片是3维的tf.float32格式

        # (有条件的)随机裁剪,labels(n,)、bboxes(n, 4),裁剪图片对应原图坐标(4,)
        dst_image, labels, bboxes, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes,
                                        min_object_covered=MIN_OBJECT_COVERED,  # 0.25
                                        aspect_ratio_range=CROP_RATIO_RANGE)  # (0.6, 1.67)

        # Resize image to output size.
        dst_image = tf_image.resize_image(dst_image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
                dst_image,
                lambda x, ordering: distort_color(x, ordering, fast_mode),
                num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # mean = tf.constant(means, dtype=image.dtype)
        # image = image - mean

        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        # 'NHWC' (n,) (n, 4)
        return image, labels, bboxes
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         out_shape,
                         data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].
    #注意底下所给的参数和上面提供的参数不一致,因此我们在程序中关注它的实际参数就可以了!
    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'image_with_bboxes')

        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes,
                                        min_object_covered=MIN_OBJECT_COVERED,
                                        aspect_ratio_range=CROP_RATIO_RANGE)
        # Resize image to output size.
        #因为distorted_bounding_box_crop返回的图像我们都已经set_shape为了[None,None,3],我们需要将其调整为网络所需要的输入大小,
        #所以统一resize为out_shape大小!!!
        dst_image = tf_image.resize_image(
            dst_image,
            out_shape,
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Randomly flip the image horizontally.
        #随机左右翻转图像
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)

        # Randomly distort the colors. There are 4 ways to do it.
        # 使用一种随机的顺序调整图像的色彩!!!
        dst_image = apply_with_random_selector(
            dst_image,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        # 注意dst_image的输出为0~1.0之间,我们需要进行调整恢复为0~255.0作为VGG网络的输入!
        image = dst_image * 255.
        #对图像进行白化操作!
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, xs, ys,
                         out_shape, data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        
        # rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly
#         if USE_ROTATION:
#             image, bboxes, xs, ys = tf_image.random_rotate90(image, bboxes, xs, ys)
            # rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly
        if USE_ROTATION:
            rnd = tf.random_uniform((), minval = 0, maxval = 1)
            def rotate():
                return tf_image.random_rotate90(image, bboxes, xs, ys)
 
            def no_rotate():
                return image, bboxes, xs, ys
             
            image, bboxes, xs, ys = tf.cond(tf.less(rnd, config.rotation_prob), rotate, no_rotate)
    
        # expand image
        if MAX_EXPAND_SCALE > 1:
            rnd2 = tf.random_uniform((), minval = 0, maxval = 1)
            def expand():
                scale = tf.random_uniform([], minval = 1.0, 
                              maxval = MAX_EXPAND_SCALE, dtype=tf.float32)
                image_shape = tf.cast(tf.shape(image), dtype = tf.float32)
                image_h, image_w = image_shape[0], image_shape[1]
                target_h = tf.cast(image_h * scale, dtype = tf.int32)
                target_w = tf.cast(image_w * scale, dtype = tf.int32)
                tf.logging.info('expanded')
                return tf_image.resize_image_bboxes_with_crop_or_pad(
                             image, bboxes, xs, ys, target_h, target_w)
 
            def no_expand():
                return image, bboxes, xs, ys
             
            image, bboxes, xs, ys = tf.cond(tf.less(rnd2, config.expand_prob), expand, no_expand)

        
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
#         tf_summary_image(image, bboxes, 'image_with_bboxes')

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes, xs, ys, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes, xs, ys,
                                        min_object_covered = MIN_OBJECT_COVERED,
                                        aspect_ratio_range = CROP_ASPECT_RATIO_RANGE, 
                                        area_range = AREA_RANGE)
        # Resize image to output size.
        dst_image = tf_image.resize_image(dst_image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')
        
        # Filter bboxes using the length of shorter sides
        if USING_SHORTER_SIDE_FILTERING:
             xs = xs * out_shape[1]
             ys = ys * out_shape[0]
             labels, bboxes, xs, ys = tfe.bboxes_filter_by_shorter_side(labels, 
                bboxes, xs, ys, 
                min_height = MIN_SHORTER_SIDE, max_height = MAX_SHORTER_SIDE, 
                assign_value = LABEL_IGNORE)
             xs = xs / out_shape[1]
             ys = ys / out_shape[0]
             
        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
                dst_image,
                lambda x, ordering: distort_color(x, ordering, fast_mode),
                num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, xs, ys
Example #13
0
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         out_shape=EVAL_SIZE,
                         data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, '0_original_image')

        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes, distort_bbox = \
            distorted_bounding_box_crop(dst_image, labels, bboxes)

        tf_summary_image(image, tf.reshape(distort_bbox, (1, -1)),
                         '1_cropped_position')

        # Resize image to output size.
        dst_image = tf_image.resize_image(
            dst_image,
            out_shape,
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=False)
        tf_summary_image(dst_image, bboxes, '2_resized_image')
        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)
        # Randomly flip the image vertically.
        # dst_image, bboxes = tf_image.random_flip_up_down(dst_image, bboxes)
        # Randomly rotate the image 90 degrees counterclockwise.
        # dst_image, bboxes = tf_image.random_rot90(dst_image, bboxes)
        tf_summary_image(dst_image, bboxes, '3_flipped_image')

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
            dst_image,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        tf_summary_image(dst_image, bboxes, '4_color_distorted_image')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, xs, ys,
                         out_shape, data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        
        # rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly
#         if USE_ROTATION:
#             image, bboxes, xs, ys = tf_image.random_rotate90(image, bboxes, xs, ys)
            # rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly
        if USE_ROTATION:
            rnd = tf.random_uniform((), minval = 0, maxval = 1)
            def rotate():
                return tf_image.random_rotate90(image, bboxes, xs, ys)
 
            def no_rotate():
                return image, bboxes, xs, ys
             
            image, bboxes, xs, ys = tf.cond(tf.less(rnd, config.rotation_prob), rotate, no_rotate)
    
        # expand image
        if MAX_EXPAND_SCALE > 1:
            rnd2 = tf.random_uniform((), minval = 0, maxval = 1)
            def expand():
                scale = tf.random_uniform([], minval = 1.0, 
                              maxval = MAX_EXPAND_SCALE, dtype=tf.float32)
                image_shape = tf.cast(tf.shape(image), dtype = tf.float32)
                image_h, image_w = image_shape[0], image_shape[1]
                target_h = tf.cast(image_h * scale, dtype = tf.int32)
                target_w = tf.cast(image_w * scale, dtype = tf.int32)
                tf.logging.info('expanded')
                return tf_image.resize_image_bboxes_with_crop_or_pad(
                             image, bboxes, xs, ys, target_h, target_w)
 
            def no_expand():
                return image, bboxes, xs, ys
             
            image, bboxes, xs, ys = tf.cond(tf.less(rnd2, config.expand_prob), expand, no_expand)

        
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
#         tf_summary_image(image, bboxes, 'image_with_bboxes')

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes, xs, ys, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes, xs, ys,
                                        min_object_covered = MIN_OBJECT_COVERED,
                                        aspect_ratio_range = CROP_ASPECT_RATIO_RANGE, 
                                        area_range = AREA_RANGE)
        # Resize image to output size.
        dst_image = tf_image.resize_image(dst_image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')
        
        # Filter bboxes using the length of shorter sides
        if USING_SHORTER_SIDE_FILTERING:
             xs = xs * out_shape[1]
             ys = ys * out_shape[0]
             labels, bboxes, xs, ys = tfe.bboxes_filter_by_shorter_side(labels, 
                bboxes, xs, ys, 
                min_height = MIN_SHORTER_SIDE, max_height = MAX_SHORTER_SIDE, 
                assign_value = LABEL_IGNORE)
             xs = xs / out_shape[1]
             ys = ys / out_shape[0]
             
        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
                dst_image,
                lambda x, ordering: distort_color(x, ordering, fast_mode),
                num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, xs, ys
def preprocessing_val(image, out_shape):
    image = tf_image.resize_image(image,
                                  out_shape,
                                  method=tf.image.ResizeMethod.BILINEAR,
                                  align_corners=False)
    return image
Example #16
0
    # Resize bboxes to original image shape.
    rbboxes = tf_image.bboxes_resize(rbboxes, img)

    return rscores, rbboxes


# Test on demo images.
path = '../test/'
image_names = sorted(os.listdir(path))
image_raw = tf.gfile.FastGFile(image_names[0], 'rb').read()
image = tf.image.decode_png(image_raw)
#img = mpimg.imread(path + image_names[0])
#resized_img = cv2.resize(img, (512, 512))
resized_img = tf_image.resize_image(image,
                                    net_shape,
                                    method=tf.image.ResizeMethod.BILINEAR,
                                    align_corners=False)

saver = tf.train.Saver()
saver.restore(isess, ckpt_filename)

rscores, rbboxes = process_image(resized_img)
print(rscores)
print('--------------------------')
print(rbboxes)

# Draw results.
# img_bboxes = np.copy(img)
# bboxes_draw_on_img(img_bboxes, rclasses, rscores, rbboxes, colors_tableau, thickness=2)

# mpimg.imsave('output.jpeg', img_bboxes)
Example #17
0
def preprocess_for_train(image, labels, bboxes, orientations,
                         out_shape, data_format='NHWC',
                         scope='simplifyssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes, orientations]):

       # if image.get_shape().ndims != 3:
       #    raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        #if image.dtype != tf.float32:
        #    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        image = tf.cast(image,tf.float32)
        t1, t2, t3 = tf.split(image, [1, 1, 1], axis=2)
        scale = tf.constant(256.0)
        shift = tf.constant(0.5)
        t1 = tf.subtract(tf.divide(t1, scale), shift)
        scale = tf.constant(256.0)
        t2 = tf.subtract(tf.divide(t2, scale), shift)
        scale = tf.constant(32.0)
        t3 = tf.subtract(tf.divide(t3, scale), shift)

        t = tf.concat([t1, t2, t3], axis=2)


        tf_summary_image(t, bboxes, 'image_with_bboxes')

        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = t
        dst_image, labels, bboxes, distort_bbox, orientations = \
            distorted_bounding_box_crop(dst_image, labels, bboxes, orientations,
                                        min_object_covered=MIN_OBJECT_COVERED,
                                        aspect_ratio_range=CROP_RATIO_RANGE)
        # Resize image to output size.
        dst_image = tf_image.resize_image(dst_image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
        #img_show,_=tf.split(dst_image,[3,8],axis=2)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Randomly flip the image horizontally.
        dst_image, bboxes, orientations = tf_image.random_flip_left_right_with_ori(dst_image, bboxes, orientations)

        # Randomly distort the colors. There are 4 ways to do it.
        # dst_image = apply_with_random_selector(
        #         dst_image,
        #         lambda x, ordering: distort_color(x, ordering, fast_mode),
        #         num_cases=4)
        #img_show,_=tf.split(dst_image,[3,8],axis=2)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = dst_image
        #image = dst_image * 255.
        #image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, orientations
def ron_preprocess_for_train(image,
                             labels,
                             bboxes,
                             out_shape,
                             data_format='NHWC',
                             scope='ron_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ron_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'image_with_bboxes_0')

        image, bboxes = control_flow_ops.cond(
            tf.random_uniform([1], minval=0., maxval=1., dtype=tf.float32)[0] <
            0.5, lambda: (image, bboxes),
            lambda: tf_image.ssd_random_expand(image, bboxes, 2))
        tf_summary_image(image, bboxes, 'image_on_canvas_1')

        # Distort image and bounding boxes.
        random_sample_image, labels, bboxes = tf_image.ssd_random_sample_patch(
            image, labels, bboxes)
        tf_summary_image(random_sample_image, bboxes,
                         'image_shape_distorted_2')

        # Randomly flip the image horizontally.
        random_sample_flip_image, bboxes = tf_image.random_flip_left_right(
            random_sample_image, bboxes)

        random_sample_flip_resized_image = tf_image.resize_image(
            random_sample_flip_image,
            out_shape,
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=False)

        tf_summary_image(random_sample_flip_resized_image, bboxes,
                         'image_fliped_and_resized_3')

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
            random_sample_flip_resized_image,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted_4')
        dst_image = random_sample_flip_resized_image
        # Rescale to VGG input scale.
        dst_image.set_shape([None, None, 3])
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes
Example #19
0
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         out_shape=EVAL_SIZE,
                         data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    '''
    Pre-processes the given image for training.
    :param image: images
    :param labels: labels for faces
    :param bboxes: bounding boxes
    :param out_shape: output shape
    :param data_format:
    :param scope:
    :return:
    '''

    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'original_image')
        #print(tfe.get_shape(image),image.get_shape().as_list(),'debug')
        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = image
        debug_image = image
        bboxes = tf.clip_by_value(bboxes, 0., 1.)
        dst_image, labels, bboxes, distort_bbox = distorted_bounding_box_crop(
            dst_image, labels, bboxes)

        #tf_summary_image(image, tf.reshape(distort_bbox, (1,-1)), 'cropped_position')

        # Resize image to output size.
        dst_image = tf_image.resize_image(
            dst_image,
            out_shape,
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=False)
        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)
        tf_summary_image(dst_image, bboxes, 'resized_image')

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
            dst_image,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        tf_summary_image(dst_image, bboxes, 'color_distorted_image')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes,
def preprocess_for_eval(image, labels, bboxes,
                        out_shape=EVAL_SIZE, data_format='NHWC',
                        difficults=None, resize=Resize.WARP_RESIZE,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        out_shape: Output shape after pre-processing (if resize != None)
        resize: Resize strategy.

    Returns:
        A preprocessed image.
    """
    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])

        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat([bbox_img, bboxes], axis=0)

        if resize == Resize.NONE:
            # No resizing...
            pass
        elif resize == Resize.CENTRAL_CROP:
            # Central cropping of the image.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(tf.to_double(1.0),
                                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(image, resize_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.WARP_RESIZE:
            # Warp resize of the image.
            image = tf_image.resize_image(image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        # Remove difficult boxes.
        if difficults is not None:
            mask = tf.logical_not(tf.cast(difficults, tf.bool))
            labels = tf.boolean_mask(labels, mask)
            bboxes = tf.boolean_mask(bboxes, mask)
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, bbox_img
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         xs,
                         ys,
                         out_shape,
                         data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        # Randomly flip the image horizontally.
        if FLIP:
            image, bboxes, xs, ys = tf_image.random_flip_left_right_bbox(
                image, bboxes, xs, ys)
        if ROTATE:
            # random rotate the image [-10, 10]
            image, bboxes, xs, ys = tf_rotate_image(image, xs, ys)

        # samples = tf.multinomial(tf.log([[0.25, 0.25, 0.25, 0.25]]), 1) # note log-prob
        # scale=elems[tf.cast(samples[0][0], tf.int32)]
        # if SCALE:
        #     image,bboxes,xs,ys=tf_scale_image(image,bboxes,xs,ys,640)

        image_shape = tf.cast(tf.shape(image), dtype=tf.float32)
        image_h, image_w = image_shape[0], image_shape[1]

        if USE_NM_CROP:
            mask = tf.greater_equal(labels, 1)
            valid_bboxes = tf.boolean_mask(bboxes, mask)
            # FIXME bboxes may is empty
            # NOTE tf_func must return value must be numpy, or you will madding!!!!!!
            crop_bbox = tf.py_func(generate_sample,
                                   [image_shape, valid_bboxes], tf.float32)
        else:
            scales = tf.random_shuffle([0.5, 1.])
            scales = tf.Print(scales, [crop])
            target_h = tf.cast(640 / scales[0], dtype=tf.float32)
            target_w = tf.cast(640 / scales[0], dtype=tf.float32)
            bbox_begin_h_max = tf.maximum(image_h - target_h, 0)
            bbox_begin_w_max = tf.maximum(image_w - target_w, 0)
            bbox_begin_h = tf.random_uniform([],
                                             minval=0,
                                             maxval=bbox_begin_h_max,
                                             dtype=tf.float32)
            bbox_begin_w = tf.random_uniform([],
                                             minval=0,
                                             maxval=bbox_begin_w_max,
                                             dtype=tf.float32)

            crop_bbox = [bbox_begin_h/image_h, bbox_begin_w/image_w, \
                (bbox_begin_h+target_h)/image_h, (bbox_begin_w+target_w)/image_w]

        image = tf.image.crop_and_resize(tf.expand_dims(image, 0), [crop_bbox],
                                         [0], (640, 640),
                                         extrapolation_value=128)
        image = tf.squeeze(image, 0)
        bboxes, xs, ys = tfe.bboxes_resize(crop_bbox, bboxes, xs, ys)
        labels, bboxes, xs, ys = tfe.bboxes_filter_overlap(
            labels,
            bboxes,
            xs,
            ys,
            threshold=BBOX_CROP_OVERLAP,
            assign_value=LABEL_IGNORE)

        if ROTATE_90:
            rnd = tf.random_uniform((), minval=0, maxval=1)
            image, bboxes, xs, ys = tf.cond(
                tf.less(rnd, 0.2),
                lambda: tf_image.random_rotate90(image, bboxes, xs, ys),
                lambda: (image, bboxes, xs, ys))

        # tf_summary_image(tf.to_float(image), bboxes, 'crop_image')

        # what is the enpand's meanoing?
        # expand image
        if MAX_EXPAND_SCALE > 1:
            rnd2 = tf.random_uniform((), minval=0, maxval=1)

            def expand():
                scale = tf.random_uniform([],
                                          minval=1.0,
                                          maxval=MAX_EXPAND_SCALE,
                                          dtype=tf.float32)
                image_shape = tf.cast(tf.shape(image), dtype=tf.float32)
                image_h, image_w = image_shape[0], image_shape[1]
                target_h = tf.cast(image_h * scale, dtype=tf.int32)
                target_w = tf.cast(image_w * scale, dtype=tf.int32)
                tf.logging.info('expanded')
                return tf_image.resize_image_bboxes_with_crop_or_pad(
                    image, bboxes, xs, ys, target_h, target_w)

            def no_expand():
                return image, bboxes, xs, ys

            image, bboxes, xs, ys = tf.cond(tf.less(rnd2, config.expand_prob),
                                            expand, no_expand)

        # Convert to float scaled [0, 1].
        # if image.dtype != tf.float32:
        # image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        # tf_summary_image(image, bboxes, 'image_with_bboxes')

        # Distort image and bounding boxes.
        dst_image = image
        # use tf.image.sample_distorted_bounding_box() random crop train patch, but can't control the scale
        if False:
            dst_image, labels, bboxes, xs, ys, distort_bbox = \
                distorted_bounding_box_crop(image, labels, bboxes, xs, ys,
                                            min_object_covered=MIN_OBJECT_COVERED,
                                            aspect_ratio_range=CROP_ASPECT_RATIO_RANGE,
                                            area_range=AREA_RANGE)
            # Resize image to output size.
            dst_image = tf_image.resize_image(
                dst_image,
                out_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)

        # Filter bboxes using the length of shorter sides
        if USING_SHORTER_SIDE_FILTERING:
            xs = xs * out_shape[1]
            ys = ys * out_shape[0]
            labels, bboxes, xs, ys = tfe.bboxes_filter_by_shorter_side(
                labels,
                bboxes,
                xs,
                ys,
                min_height=MIN_SHORTER_SIDE,
                max_height=MAX_SHORTER_SIDE,
                assign_value=LABEL_IGNORE)
            xs = xs / out_shape[1]
            ys = ys / out_shape[0]

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
            dst_image / 255.0,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        dst_image = dst_image * 255.
        # tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # FIXME: change the input value
        # NOTE: resnet v1 use VGG data process, so we use the same way
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, xs, ys