Exemple #1
0
def preprocess_for_eval(image,
                        labels,
                        bboxes,
                        out_shape,
                        resize,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        out_shape: Output shape after pre-processing (if resize != None)
        resize: Resize strategy.

    Returns:
        A preprocessed image.
    """
    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])

        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat(0, [bbox_img, bboxes])

        # Resize strategy...
        if resize == Resize.NONE:
            pass
        elif resize == Resize.CENTRAL_CROP:
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(
                tf.to_double(1.0),
                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(
                image,
                resize_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        return image, labels, bboxes, bbox_img
def preprocess_for_eval(image, labels, bboxes, out_shape, resize,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        out_shape: Output shape after pre-processing (if resize != None)
        resize: Resize strategy.

    Returns:
        A preprocessed image.
    """
    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])

        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat(0, [bbox_img, bboxes])

        # Resize strategy...
        if resize == Resize.NONE:
            pass
        elif resize == Resize.CENTRAL_CROP:
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(tf.to_double(1.0),
                                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(image, resize_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        return image, labels, bboxes, bbox_img
Exemple #3
0
def preprocess_for_eval_multi(image,
                              labels,
                              bboxes,
                              out_shape,
                              resize,
                              scope='ssd_preprocessing_train'):
    with tf.name_scope(scope):

        image = tf.to_float(image)
        #image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        image = image - np.array([123.6800, 116.7790, 103.9390]).reshape(
            (1, 1, 1, 3))

        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat(0, [bbox_img, bboxes])

        # Resize strategy...
        if resize == Resize.NONE:
            pass
        elif resize == Resize.CENTRAL_CROP:
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(
                tf.to_double(1.0),
                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(
                image,
                resize_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        return image, labels, bboxes, bbox_img
 def expand():
     scale = tf.random_uniform([], minval = 1.0, 
                   maxval = MAX_EXPAND_SCALE, dtype=tf.float32)
     image_shape = tf.cast(tf.shape(image), dtype = tf.float32)
     image_h, image_w = image_shape[0], image_shape[1]
     target_h = tf.cast(image_h * scale, dtype = tf.int32)
     target_w = tf.cast(image_w * scale, dtype = tf.int32)
     tf.logging.info('expanded')
     return tf_image.resize_image_bboxes_with_crop_or_pad(
                  image, bboxes, xs, ys, target_h, target_w)
 def expand():
     scale = tf.random_uniform([], minval = 1.0, 
                   maxval = MAX_EXPAND_SCALE, dtype=tf.float32)
     image_shape = tf.cast(tf.shape(image), dtype = tf.float32)
     image_h, image_w = image_shape[0], image_shape[1]
     target_h = tf.cast(image_h * scale, dtype = tf.int32)
     target_w = tf.cast(image_w * scale, dtype = tf.int32)
     tf.logging.info('expanded')
     return tf_image.resize_image_bboxes_with_crop_or_pad(
                  image, bboxes, xs, ys, target_h, target_w)
def preprocess_for_eval(image, labels, bboxes,
                        out_shape=EVAL_SIZE, data_format='NHWC',
                        difficults=None, resize=Resize.WARP_RESIZE,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        out_shape: Output shape after pre-processing (if resize != None)
        resize: Resize strategy.

    Returns:
        A preprocessed image.
    """
    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])

        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat([bbox_img, bboxes], axis=0)

        if resize == Resize.NONE:
            # No resizing...
            pass
        elif resize == Resize.CENTRAL_CROP:
            # Central cropping of the image.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(tf.to_double(1.0),
                                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(image, resize_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.WARP_RESIZE:
            # Warp resize of the image.
            image = tf_image.resize_image(image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        # Remove difficult boxes.
        if difficults is not None:
            mask = tf.logical_not(tf.cast(difficults, tf.bool))
            labels = tf.boolean_mask(labels, mask)
            bboxes = tf.boolean_mask(bboxes, mask)
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, bbox_img
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         xs,
                         ys,
                         out_shape,
                         data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = True
    with tf.name_scope(scope, 'ssd_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        # rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly
#         if USE_ROTATION:
#             image, bboxes, xs, ys = tf_image.random_rotate90(image, bboxes, xs, ys)
# rotate image by 0, 0.5 * pi, pi, 1.5 * pi randomly
        if USE_ROTATION:
            rnd = tf.random_uniform((), minval=0, maxval=1)

            def rotate():
                return tf_image.random_rotate90(image, bboxes, xs, ys)

            def no_rotate():
                return image, bboxes, xs, ys

            image, bboxes, xs, ys = tf.cond(tf.less(rnd, config.rotation_prob),
                                            rotate, no_rotate)

        #
        image_shape = tf.cast(tf.shape(image), dtype=tf.float32)
        image_h, image_w = image_shape[0], image_shape[1]
        target = tf.cast(tf.maximum(image_h, image_w), dtype=tf.int32)
        image, bboxes, xs, ys = tf_image.resize_image_bboxes_with_crop_or_pad(
            image, bboxes, xs, ys, target, target)

        # expand image
        if MAX_EXPAND_SCALE > 1:
            rnd2 = tf.random_uniform((), minval=0, maxval=1)

            def expand():
                scale = tf.random_uniform([],
                                          minval=1.0,
                                          maxval=MAX_EXPAND_SCALE,
                                          dtype=tf.float32)
                image_shape = tf.cast(tf.shape(image), dtype=tf.float32)
                image_h, image_w = image_shape[0], image_shape[1]
                target_h = tf.cast(image_h * scale, dtype=tf.int32)
                target_w = tf.cast(image_w * scale, dtype=tf.int32)
                tf.logging.info('expanded')
                return tf_image.resize_image_bboxes_with_crop_or_pad(
                    image, bboxes, xs, ys, target_h, target_w)

            def no_expand():
                return image, bboxes, xs, ys

            image, bboxes, xs, ys = tf.cond(tf.less(rnd2, config.expand_prob),
                                            expand, no_expand)

        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)


#         tf_summary_image(image, bboxes, 'image_with_bboxes')

# Distort image and bounding boxes.
        dst_image = image
        # dst_image, labels, bboxes, xs, ys, distort_bbox = \
        #     distorted_bounding_box_crop(image, labels, bboxes, xs, ys,
        #                                 min_object_covered = MIN_OBJECT_COVERED,
        #                                 aspect_ratio_range = CROP_ASPECT_RATIO_RANGE,
        #                                 area_range = AREA_RANGE)
        # Resize image to output size.
        dst_image = tf_image.resize_image(
            dst_image,
            out_shape,
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Filter bboxes using the length of shorter sides
        if USING_SHORTER_SIDE_FILTERING:
            xs = xs * out_shape[1]
            ys = ys * out_shape[0]
            labels, bboxes, xs, ys = tfe.bboxes_filter_by_shorter_side(
                labels,
                bboxes,
                xs,
                ys,
                min_height=MIN_SHORTER_SIDE,
                max_height=MAX_SHORTER_SIDE,
                assign_value=LABEL_IGNORE)
            xs = xs / out_shape[1]
            ys = ys / out_shape[0]

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
            dst_image,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, xs, ys