Exemple #1
0
def preprocess_for_train(image, labels, bboxes, out_shape,
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'image_with_bboxes')

        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes,
                                        aspect_ratio_range=CROP_RATIO_RANGE)
        # Resize image to output size.
        dst_image = tf_image.resize_image(dst_image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
                dst_image,
                lambda x, ordering: distort_color(x, ordering, fast_mode),
                num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        return image, labels, bboxes
def preprocess_for_train(image, labels, bboxes, out_shape,
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'image_with_bboxes')

        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes,
                                        aspect_ratio_range=CROP_RATIO_RANGE)
        # Resize image to output size.
        dst_image = tf_image.resize_image(dst_image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
                dst_image,
                lambda x, ordering: distort_color(x, ordering, fast_mode),
                num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        return image, labels, bboxes
Exemple #3
0
def preprocess_for_train(image, labels, bboxes,
                         out_shape, data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'image_with_bboxes')
        # 上面保证了图片是3维的tf.float32格式

        # (有条件的)随机裁剪,labels(n,)、bboxes(n, 4),裁剪图片对应原图坐标(4,)
        dst_image, labels, bboxes, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes,
                                        min_object_covered=MIN_OBJECT_COVERED,  # 0.25
                                        aspect_ratio_range=CROP_RATIO_RANGE)  # (0.6, 1.67)

        # Resize image to output size.
        dst_image = tf_image.resize_image(dst_image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
                dst_image,
                lambda x, ordering: distort_color(x, ordering, fast_mode),
                num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # mean = tf.constant(means, dtype=image.dtype)
        # image = image - mean

        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        # 'NHWC' (n,) (n, 4)
        return image, labels, bboxes
Exemple #4
0
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         out_shape=EVAL_SIZE,
                         data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, '0_original_image')

        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes, distort_bbox = \
            distorted_bounding_box_crop(dst_image, labels, bboxes)

        tf_summary_image(image, tf.reshape(distort_bbox, (1, -1)),
                         '1_cropped_position')

        # Resize image to output size.
        dst_image = tf_image.resize_image(
            dst_image,
            out_shape,
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=False)
        tf_summary_image(dst_image, bboxes, '2_resized_image')
        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)
        # Randomly flip the image vertically.
        # dst_image, bboxes = tf_image.random_flip_up_down(dst_image, bboxes)
        # Randomly rotate the image 90 degrees counterclockwise.
        # dst_image, bboxes = tf_image.random_rot90(dst_image, bboxes)
        tf_summary_image(dst_image, bboxes, '3_flipped_image')

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
            dst_image,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        tf_summary_image(dst_image, bboxes, '4_color_distorted_image')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes
def ron_preprocess_for_train(image,
                             labels,
                             bboxes,
                             out_shape,
                             data_format='NHWC',
                             scope='ron_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ron_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'image_with_bboxes_0')

        image, bboxes = control_flow_ops.cond(
            tf.random_uniform([1], minval=0., maxval=1., dtype=tf.float32)[0] <
            0.5, lambda: (image, bboxes),
            lambda: tf_image.ssd_random_expand(image, bboxes, 2))
        tf_summary_image(image, bboxes, 'image_on_canvas_1')

        # Distort image and bounding boxes.
        random_sample_image, labels, bboxes = tf_image.ssd_random_sample_patch(
            image, labels, bboxes)
        tf_summary_image(random_sample_image, bboxes,
                         'image_shape_distorted_2')

        # Randomly flip the image horizontally.
        random_sample_flip_image, bboxes = tf_image.random_flip_left_right(
            random_sample_image, bboxes)

        random_sample_flip_resized_image = tf_image.resize_image(
            random_sample_flip_image,
            out_shape,
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=False)

        tf_summary_image(random_sample_flip_resized_image, bboxes,
                         'image_fliped_and_resized_3')

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
            random_sample_flip_resized_image,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted_4')
        dst_image = random_sample_flip_resized_image
        # Rescale to VGG input scale.
        dst_image.set_shape([None, None, 3])
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes
Exemple #6
0
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         out_shape=EVAL_SIZE,
                         data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    '''
    Pre-processes the given image for training.
    :param image: images
    :param labels: labels for faces
    :param bboxes: bounding boxes
    :param out_shape: output shape
    :param data_format:
    :param scope:
    :return:
    '''

    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'original_image')
        #print(tfe.get_shape(image),image.get_shape().as_list(),'debug')
        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = image
        debug_image = image
        bboxes = tf.clip_by_value(bboxes, 0., 1.)
        dst_image, labels, bboxes, distort_bbox = distorted_bounding_box_crop(
            dst_image, labels, bboxes)

        #tf_summary_image(image, tf.reshape(distort_bbox, (1,-1)), 'cropped_position')

        # Resize image to output size.
        dst_image = tf_image.resize_image(
            dst_image,
            out_shape,
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=False)
        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)
        tf_summary_image(dst_image, bboxes, 'resized_image')

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
            dst_image,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        tf_summary_image(dst_image, bboxes, 'color_distorted_image')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes,
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         out_shape,
                         data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].
    #注意底下所给的参数和上面提供的参数不一致,因此我们在程序中关注它的实际参数就可以了!
    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'image_with_bboxes')

        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes,
                                        min_object_covered=MIN_OBJECT_COVERED,
                                        aspect_ratio_range=CROP_RATIO_RANGE)
        # Resize image to output size.
        #因为distorted_bounding_box_crop返回的图像我们都已经set_shape为了[None,None,3],我们需要将其调整为网络所需要的输入大小,
        #所以统一resize为out_shape大小!!!
        dst_image = tf_image.resize_image(
            dst_image,
            out_shape,
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Randomly flip the image horizontally.
        #随机左右翻转图像
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)

        # Randomly distort the colors. There are 4 ways to do it.
        # 使用一种随机的顺序调整图像的色彩!!!
        dst_image = apply_with_random_selector(
            dst_image,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        # 注意dst_image的输出为0~1.0之间,我们需要进行调整恢复为0~255.0作为VGG网络的输入!
        image = dst_image * 255.
        #对图像进行白化操作!
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes