Esempio n. 1
0
def distorted_bounding_box_crop(image,
                                labels,
                                bboxes,
                                min_object_covered=0.3,
                                aspect_ratio_range=(0.9, 1.1),
                                area_range=(0.1, 1.0),
                                max_attempts=200,
                                clip_bboxes=True,
                                scope=None):
    """Generates cropped_image using a one of the bboxes randomly distorted.

    See `tf.image.sample_distorted_bounding_box` for more documentation.

    Args:
        image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
        bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
            where each coordinate is [0, 1) and the coordinates are arranged
            as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
            image.
        min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
            area of the image must contain at least this fraction of any bounding box
            supplied.
        aspect_ratio_range: An optional list of `floats`. The cropped area of the
            image must have an aspect ratio = width / height within this range.
        area_range: An optional list of `floats`. The cropped area of the image
            must contain a fraction of the supplied image within in this range.
        max_attempts: An optional `int`. Number of attempts at generating a cropped
            region of the image of the specified constraints. After `max_attempts`
            failures, return the entire image.
        scope: Optional scope for name_scope.
    Returns:
        A tuple, a 3-D Tensor cropped_image and the distorted bbox
    """
    with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]):
        # Each bounding box has shape [1, num_boxes, box coords] and
        # the coordinates are ordered [ymin, xmin, ymax, xmax].
        bboxes = tf.clip_by_value(bboxes, 0.0, 1.0)
        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
            tf.shape(image),
            bounding_boxes=tf.expand_dims(bboxes, 0),
            min_object_covered=min_object_covered,
            aspect_ratio_range=aspect_ratio_range,
            area_range=area_range,
            max_attempts=max_attempts,
            use_image_if_no_bounding_boxes=True)
        distort_bbox = distort_bbox[0, 0]

        # Crop the image to the specified bounding box.
        cropped_image = tf.slice(image, bbox_begin, bbox_size)
        # Restore the shape since the dynamic slice loses 3rd dimension.
        cropped_image.set_shape([None, None, 3])

        # Update bounding boxes: resize and filter out.
        bboxes = tfe.bboxes_resize(distort_bbox, bboxes)
        labels, bboxes = tfe.bboxes_filter_overlap(labels,
                                                   bboxes,
                                                   threshold=BBOX_CROP_OVERLAP,
                                                   assign_negative=False)
        return cropped_image, labels, bboxes, distort_bbox
def distorted_bounding_box_crop(image,
                                labels,
                                bboxes,
                                min_object_covered=0.3,
                                aspect_ratio_range=(0.9, 1.1),
                                area_range=(0.1, 1.0),
                                max_attempts=200,
                                clip_bboxes=True,
                                scope=None):
    """Generates cropped_image using a one of the bboxes randomly distorted.

    See `tf.image.sample_distorted_bounding_box` for more documentation.

    Args:
        image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
        bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
            where each coordinate is [0, 1) and the coordinates are arranged
            as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
            image.
        min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
            area of the image must contain at least this fraction of any bounding box
            supplied.
        aspect_ratio_range: An optional list of `floats`. The cropped area of the
            image must have an aspect ratio = width / height within this range.
        area_range: An optional list of `floats`. The cropped area of the image
            must contain a fraction of the supplied image within in this range.
        max_attempts: An optional `int`. Number of attempts at generating a cropped
            region of the image of the specified constraints. After `max_attempts`
            failures, return the entire image.
        scope: Optional scope for name_scope.
    Returns:
        A tuple, a 3-D Tensor cropped_image and the distorted bbox
    """
    with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]):
        # Each bounding box has shape [1, num_boxes, box coords] and
        # the coordinates are ordered [ymin, xmin, ymax, xmax].
        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
                tf.shape(image),
                bounding_boxes=tf.expand_dims(bboxes, 0),
                min_object_covered=min_object_covered,
                aspect_ratio_range=aspect_ratio_range,
                area_range=area_range,
                max_attempts=max_attempts,
                use_image_if_no_bounding_boxes=True)
        distort_bbox = distort_bbox[0, 0]

        # Crop the image to the specified bounding box.
        cropped_image = tf.slice(image, bbox_begin, bbox_size)
        # Restore the shape since the dynamic slice loses 3rd dimension.
        cropped_image.set_shape([None, None, 3])

        # Update bounding boxes: resize and filter out.
        bboxes = tfe.bboxes_resize(distort_bbox, bboxes)
        labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes,
                                                   threshold=BBOX_CROP_OVERLAP,
                                                   assign_negative=False)
        return cropped_image, labels, bboxes, distort_bbox
Esempio n. 3
0
def distorted_image(image, height, labels, width, bbox, thread_id, scope=None):
    # Each bounding box has shape [1, num_boxes, box coords] and
    # the coordinates are ordered [ymin, xmin, ymax, xmax].

    # Display the bounding box in the first thread only.
    with tf.name_scope(scope, 'distorted_bounding_box_crop',
                       [image, bbox, height, width]):
        if not thread_id:
            image_with_box = tf.image.draw_bounding_boxes(
                tf.expand_dims(image, 0), bbox)
            tf.summary.image('image_with_bounding_boxes', image_with_box)

        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
            tf.shape(image),
            bounding_boxes=bbox,
            min_object_covered=0.1,
            aspect_ratio_range=(0.9, 1.1),
            area_range=(0.1, 1.0),
            max_attempts=200,
            use_image_if_no_bounding_boxes=True)

        if not thread_id:
            image_with_distorted_box = tf.image.draw_bounding_boxes(
                tf.expand_dims(image, 0), distort_bbox)
            tf.summary.image('images_with_distorted_bounding_box',
                             image_with_distorted_box)

        distort_bbox = distort_bbox[0, 0]

        # Crop the image to the specified bounding box.
        cropped_image = tf.slice(image, bbox_begin, bbox_size)
        # Restore the shape since the dynamic slice loses 3rd dimension.

        distorted_image = tf.image.resize_images(
            cropped_image, [height, width],
            method=tf.image.ResizeMethod.BILINEAR)
        distorted_image.set_shape([height, width, 3])
        if not thread_id:
            tf.summary.image('cropped_resized_image',
                             tf.expand_dims(distorted_image, 0))
        distorted_image = tf.image.random_flip_left_right(distorted_image)
        # Randomly distort the colors.
        distorted_image = distort_color(distorted_image, thread_id)

        if not thread_id:
            tf.summary.image('final_distorted_image',
                             tf.expand_dims(distorted_image, 0))
        # Update bounding boxes: resize and filter out.

        bboxes = tfe.bboxes_resize(distort_bbox, bbox)
        print "labels: %s " % (labels)
        label, bboxes, num = tfe.bboxes_filter_overlap(labels,
                                                       bboxes,
                                                       threshold=0.4)

        return distorted_image, label, bboxes, num
def distorted_bounding_box_crop(image,
                                labels,
                                bboxes,
                                cord,
                                min_object_covered=0.1,
                                aspect_ratio_range=(0.8, 1.2),
                                area_range=(0.1, 1.0),
                                max_attempts=200,
                                scope=None):
    """Generates cropped_image using a one of the bboxes randomly distorted.
    Args:
        image: A `Tensor` representing an image of arbitrary size.
        labels : A Tensor inlcudes all labels
        bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4]
        min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
            area of the image must contain at least this fraction of any bounding box
            supplied.
        aspect_ratio_range: An optional list of `floats`. The cropped area of the
            image must have an aspect ratio = width / height within this range.
        area_range: An optional list of `floats`. The cropped area of the image
            must contain a fraction of the supplied image within in this range.
        max_attempts: An optional `int`. Number of attempts at generating a cropped
            region of the image of the specified constraints. After `max_attempts`
            failures, return the entire image.
        scope: Optional scope for name_scope.
    Returns:
        A tuple, a 3-D Tensor cropped_image and the distorted bbox
    """
    with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes,cord]):
        # Each bounding box has shape [1, num_boxes, box coords] and
        # the coordinates are ordered [ymin, xmin, ymax, xmax].

        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
                tf.shape(image),
                bounding_boxes=tf.expand_dims(bboxes, 0),
                min_object_covered=min_object_covered,
                aspect_ratio_range=aspect_ratio_range,
                area_range=area_range,
                max_attempts=max_attempts,
                use_image_if_no_bounding_boxes=False)
        distort_bbox = distort_bbox[0, 0]

        # Crop the image to the specified bounding box.
        cropped_image = tf.slice(image, bbox_begin, bbox_size)

        # Update bounding boxes: resize and filter out.
        bboxes = tfe.bboxes_resize(distort_bbox, bboxes)
        cord   = tfe.polybox_resize(distort_bbox, cord)
        labels, bboxes, cord, num = tfe.bboxes_filter_overlap(labels, bboxes,cord,
                                                   BBOX_CROP_OVERLAP)
        return cropped_image, labels, bboxes, cord, distort_bbox,num
def distorted_bounding_box_crop(image,
                                labels,
                                bboxes,
                                xs,
                                ys,
                                min_object_covered,
                                aspect_ratio_range,
                                area_range,
                                max_attempts=200,
                                scope=None):
    """Generates cropped_image using a one of the bboxes randomly distorted.
    See `tf.image.sample_distorted_bounding_box` for more documentation.
    Args:
        image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
        bbox: 2-D float Tensor of bounding boxes arranged [num_boxes, coords]
            where each coordinate is [0, 1) and the coordinates are arranged
            as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
            image.
        min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
            area of the image must contain at least this fraction of any bounding box
            supplied.
        aspect_ratio_range: An optional list of `floats`. The cropped area of the
            image must have an aspect ratio = width / height within this range.
        area_range: An optional list of `floats`. The cropped area of the image
            must contain a fraction of the supplied image within in this range.
        max_attempts: An optional `int`. Number of attempts at generating a cropped
            region of the image of the specified constraints. After `max_attempts`
            failures, return the entire image.
        scope: Optional scope for name_scope.
    Returns:
        A tuple, a 3-D Tensor cropped_image and the distorted bbox
    """
    with tf.name_scope(scope, 'distorted_bounding_box_crop',
                       [image, bboxes, xs, ys]):
        # Each bounding box has shape [1, num_boxes, box coords] and
        # the coordinates are ordered [ymin, xmin, ymax, xmax].
        num_bboxes = tf.shape(bboxes)[0]

        def has_bboxes():
            return bboxes, labels, xs, ys

        def no_bboxes():
            xmin = tf.random_uniform((1, 1), minval=0, maxval=0.9)
            ymin = tf.random_uniform((1, 1), minval=0, maxval=0.9)
            w = tf.constant(0.1, dtype=tf.float32)
            h = w
            xmax = xmin + w
            ymax = ymin + h
            rnd_bboxes = tf.concat([ymin, xmin, ymax, xmax], axis=1)
            rnd_labels = tf.constant([0], dtype=tf.int32)
            rnd_xs = tf.concat([xmin, xmax, xmax, xmin], axis=1)
            rnd_ys = tf.concat([ymin, ymin, ymax, ymax], axis=1)

            return rnd_bboxes, rnd_labels, rnd_xs, rnd_ys

        bboxes, labels, xs, ys = tf.cond(num_bboxes > 0, has_bboxes, no_bboxes)
        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
            tf.shape(image),
            bounding_boxes=tf.expand_dims(bboxes, 0),
            min_object_covered=min_object_covered,
            aspect_ratio_range=aspect_ratio_range,
            area_range=area_range,
            max_attempts=max_attempts,
            use_image_if_no_bounding_boxes=True)
        distort_bbox = distort_bbox[0, 0]

        # Crop the image to the specified bounding box.
        cropped_image = tf.slice(image, bbox_begin, bbox_size)
        # Restore the shape since the dynamic slice loses 3rd dimension.
        cropped_image.set_shape([None, None, 3])

        # Update bounding boxes: resize and filter out.
        bboxes, xs, ys = tfe.bboxes_resize(distort_bbox, bboxes, xs, ys)
        labels, bboxes, xs, ys = tfe.bboxes_filter_overlap(
            labels,
            bboxes,
            xs,
            ys,
            threshold=BBOX_CROP_OVERLAP,
            assign_value=LABEL_IGNORE)
        return cropped_image, labels, bboxes, xs, ys, distort_bbox
def distorted_bounding_box_crop(image,
                                labels,
                                bboxes,
                                xs, ys, 
                                min_object_covered,
                                aspect_ratio_range,
                                area_range,
                                max_attempts = 200,
                                scope=None):
    """Generates cropped_image using a one of the bboxes randomly distorted.

    See `tf.image.sample_distorted_bounding_box` for more documentation.

    Args:
        image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
        bbox: 2-D float Tensor of bounding boxes arranged [num_boxes, coords]
            where each coordinate is [0, 1) and the coordinates are arranged
            as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
            image.
        min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
            area of the image must contain at least this fraction of any bounding box
            supplied.
        aspect_ratio_range: An optional list of `floats`. The cropped area of the
            image must have an aspect ratio = width / height within this range.
        area_range: An optional list of `floats`. The cropped area of the image
            must contain a fraction of the supplied image within in this range.
        max_attempts: An optional `int`. Number of attempts at generating a cropped
            region of the image of the specified constraints. After `max_attempts`
            failures, return the entire image.
        scope: Optional scope for name_scope.
    Returns:
        A tuple, a 3-D Tensor cropped_image and the distorted bbox
    """
    with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes, xs, ys]):
        # Each bounding box has shape [1, num_boxes, box coords] and
        # the coordinates are ordered [ymin, xmin, ymax, xmax].
        num_bboxes = tf.shape(bboxes)[0]
        def has_bboxes():
            return bboxes, labels, xs, ys
        def no_bboxes():
            xmin = tf.random_uniform((1,1), minval = 0, maxval = 0.9)
            ymin = tf.random_uniform((1,1), minval = 0, maxval = 0.9)
            w = tf.constant(0.1, dtype = tf.float32)
            h = w
            xmax = xmin + w
            ymax = ymin + h
            rnd_bboxes = tf.concat([ymin, xmin, ymax, xmax], axis = 1)
            rnd_labels = tf.constant([config.background_label], dtype = tf.int64)
            rnd_xs = tf.concat([xmin, xmax, xmax, xmin], axis = 1)
            rnd_ys = tf.concat([ymin, ymin, ymax, ymax], axis = 1)
            
            return rnd_bboxes, rnd_labels, rnd_xs, rnd_ys
        
        bboxes, labels, xs, ys = tf.cond(num_bboxes > 0, has_bboxes, no_bboxes)
        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
                tf.shape(image),
                bounding_boxes=tf.expand_dims(bboxes, 0),
                min_object_covered=min_object_covered,
                aspect_ratio_range=aspect_ratio_range,
                area_range=area_range,
                max_attempts=max_attempts,
                use_image_if_no_bounding_boxes=True)
        distort_bbox = distort_bbox[0, 0]

        # Crop the image to the specified bounding box.
        cropped_image = tf.slice(image, bbox_begin, bbox_size)
        # Restore the shape since the dynamic slice loses 3rd dimension.
        cropped_image.set_shape([None, None, 3])

        # Update bounding boxes: resize and filter out.
        bboxes, xs, ys = tfe.bboxes_resize(distort_bbox, bboxes, xs, ys)
        labels, bboxes, xs, ys = tfe.bboxes_filter_overlap(labels, bboxes, xs, ys, 
                    threshold=BBOX_CROP_OVERLAP, assign_value = LABEL_IGNORE)
        return cropped_image, labels, bboxes, xs, ys, distort_bbox
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         xs,
                         ys,
                         out_shape,
                         data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        # Randomly flip the image horizontally.
        if FLIP:
            image, bboxes, xs, ys = tf_image.random_flip_left_right_bbox(
                image, bboxes, xs, ys)
        if ROTATE:
            # random rotate the image [-10, 10]
            image, bboxes, xs, ys = tf_rotate_image(image, xs, ys)

        # samples = tf.multinomial(tf.log([[0.25, 0.25, 0.25, 0.25]]), 1) # note log-prob
        # scale=elems[tf.cast(samples[0][0], tf.int32)]
        # if SCALE:
        #     image,bboxes,xs,ys=tf_scale_image(image,bboxes,xs,ys,640)

        image_shape = tf.cast(tf.shape(image), dtype=tf.float32)
        image_h, image_w = image_shape[0], image_shape[1]

        if USE_NM_CROP:
            mask = tf.greater_equal(labels, 1)
            valid_bboxes = tf.boolean_mask(bboxes, mask)
            # FIXME bboxes may is empty
            # NOTE tf_func must return value must be numpy, or you will madding!!!!!!
            crop_bbox = tf.py_func(generate_sample,
                                   [image_shape, valid_bboxes], tf.float32)
        else:
            scales = tf.random_shuffle([0.5, 1.])
            scales = tf.Print(scales, [crop])
            target_h = tf.cast(640 / scales[0], dtype=tf.float32)
            target_w = tf.cast(640 / scales[0], dtype=tf.float32)
            bbox_begin_h_max = tf.maximum(image_h - target_h, 0)
            bbox_begin_w_max = tf.maximum(image_w - target_w, 0)
            bbox_begin_h = tf.random_uniform([],
                                             minval=0,
                                             maxval=bbox_begin_h_max,
                                             dtype=tf.float32)
            bbox_begin_w = tf.random_uniform([],
                                             minval=0,
                                             maxval=bbox_begin_w_max,
                                             dtype=tf.float32)

            crop_bbox = [bbox_begin_h/image_h, bbox_begin_w/image_w, \
                (bbox_begin_h+target_h)/image_h, (bbox_begin_w+target_w)/image_w]

        image = tf.image.crop_and_resize(tf.expand_dims(image, 0), [crop_bbox],
                                         [0], (640, 640),
                                         extrapolation_value=128)
        image = tf.squeeze(image, 0)
        bboxes, xs, ys = tfe.bboxes_resize(crop_bbox, bboxes, xs, ys)
        labels, bboxes, xs, ys = tfe.bboxes_filter_overlap(
            labels,
            bboxes,
            xs,
            ys,
            threshold=BBOX_CROP_OVERLAP,
            assign_value=LABEL_IGNORE)

        if ROTATE_90:
            rnd = tf.random_uniform((), minval=0, maxval=1)
            image, bboxes, xs, ys = tf.cond(
                tf.less(rnd, 0.2),
                lambda: tf_image.random_rotate90(image, bboxes, xs, ys),
                lambda: (image, bboxes, xs, ys))

        # tf_summary_image(tf.to_float(image), bboxes, 'crop_image')

        # what is the enpand's meanoing?
        # expand image
        if MAX_EXPAND_SCALE > 1:
            rnd2 = tf.random_uniform((), minval=0, maxval=1)

            def expand():
                scale = tf.random_uniform([],
                                          minval=1.0,
                                          maxval=MAX_EXPAND_SCALE,
                                          dtype=tf.float32)
                image_shape = tf.cast(tf.shape(image), dtype=tf.float32)
                image_h, image_w = image_shape[0], image_shape[1]
                target_h = tf.cast(image_h * scale, dtype=tf.int32)
                target_w = tf.cast(image_w * scale, dtype=tf.int32)
                tf.logging.info('expanded')
                return tf_image.resize_image_bboxes_with_crop_or_pad(
                    image, bboxes, xs, ys, target_h, target_w)

            def no_expand():
                return image, bboxes, xs, ys

            image, bboxes, xs, ys = tf.cond(tf.less(rnd2, config.expand_prob),
                                            expand, no_expand)

        # Convert to float scaled [0, 1].
        # if image.dtype != tf.float32:
        # image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        # tf_summary_image(image, bboxes, 'image_with_bboxes')

        # Distort image and bounding boxes.
        dst_image = image
        # use tf.image.sample_distorted_bounding_box() random crop train patch, but can't control the scale
        if False:
            dst_image, labels, bboxes, xs, ys, distort_bbox = \
                distorted_bounding_box_crop(image, labels, bboxes, xs, ys,
                                            min_object_covered=MIN_OBJECT_COVERED,
                                            aspect_ratio_range=CROP_ASPECT_RATIO_RANGE,
                                            area_range=AREA_RANGE)
            # Resize image to output size.
            dst_image = tf_image.resize_image(
                dst_image,
                out_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)

        # Filter bboxes using the length of shorter sides
        if USING_SHORTER_SIDE_FILTERING:
            xs = xs * out_shape[1]
            ys = ys * out_shape[0]
            labels, bboxes, xs, ys = tfe.bboxes_filter_by_shorter_side(
                labels,
                bboxes,
                xs,
                ys,
                min_height=MIN_SHORTER_SIDE,
                max_height=MAX_SHORTER_SIDE,
                assign_value=LABEL_IGNORE)
            xs = xs / out_shape[1]
            ys = ys / out_shape[0]

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
            dst_image / 255.0,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        dst_image = dst_image * 255.
        # tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # FIXME: change the input value
        # NOTE: resnet v1 use VGG data process, so we use the same way
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, xs, ys
Esempio n. 8
0
def distorted_bounding_box_crop(image,
                                labels,
                                bboxes,
                                min_object_covered=0.3,
                                aspect_ratio_range=(0.9, 1.1),
                                area_range=(0.1, 1.0),
                                max_attempts=200,
                                clip_bboxes=True,
                                scope=None):
    """Generates cropped_image using a one of the bboxes randomly distorted.

    See `tf.image.sample_distorted_bounding_box` for more documentation.

    Args:
        image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
        bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
            where each coordinate is [0, 1) and the coordinates are arranged
            as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
            image.
        min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
            area of the image must contain at least this fraction of any bounding box
            supplied.
        aspect_ratio_range: An optional list of `floats`. The cropped area of the
            image must have an aspect ratio = width / height within this range.
        area_range: An optional list of `floats`. The cropped area of the image
            must contain a fraction of the supplied image within in this range.
        max_attempts: An optional `int`. Number of attempts at generating a cropped
            region of the image of the specified constraints. After `max_attempts`
            failures, return the entire image.
        scope: Optional scope for name_scope.
    Returns:
        A tuple, a 3-D Tensor cropped_image and the distorted bbox
    """
    with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]):
        # 高级的随机裁剪
        # The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
        # and height of the underlying image.
        # 1-D, 1-D, [1, 1, 4]
        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
                tf.shape(image),
                bounding_boxes=tf.expand_dims(bboxes, 0),  # [1, n, 4]
                min_object_covered=min_object_covered,
                aspect_ratio_range=aspect_ratio_range,
                area_range=area_range,
                max_attempts=max_attempts,
                use_image_if_no_bounding_boxes=True)
        '''
        Returns:
            A tuple of `Tensor` objects (begin, size, bboxes).

        begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing `[offset_height, offset_width, 0]`. 
            Provide as input to `tf.slice`.
        size: A `Tensor`. Has the same type as `image_size`. 1-D, containing `[target_height, target_width, -1]`. 
            Provide as input to `tf.slice`.
        bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing the distorted bounding box.
            Provide as input to `tf.image.draw_bounding_boxes`.
        '''
        # [4]
        distort_bbox = distort_bbox[0, 0]

        # Crop the image to the specified bounding box.
        cropped_image = tf.slice(image, bbox_begin, bbox_size)
        # Restore the shape since the dynamic slice loses 3rd dimension.
        cropped_image.set_shape([None, None, 3])  # <-----设置了尺寸了哈

        # Update bounding boxes: resize and filter out.
        bboxes = tfe.bboxes_resize(distort_bbox, bboxes)  # [4], [n, 4]
        labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes,
                                                   threshold=BBOX_CROP_OVERLAP,  # 0.5
                                                   assign_negative=False)
        # 返回随机裁剪的图片,筛选调整后的labels(n,)、bboxes(n, 4),裁剪图片对应原图坐标(4,)
        return cropped_image, labels, bboxes, distort_bbox
Esempio n. 9
0
def distorted_bounding_box_crop(image,
                                labels,
                                bboxes,
                                min_object_covered=0.3,
                                aspect_ratio_range=(0.9, 1.1),
                                area_range=(0.1, 1.0),
                                max_attempts=200,
                                clip_bboxes=True,
                                scope=None):
    """Generates cropped_image using a one of the bboxes randomly distorted.

    See `tf.image.sample_distorted_bounding_box` for more documentation.

    Args:
        image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
        bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
            where each coordinate is [0, 1) and the coordinates are arranged
            as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
            image.
        min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
            area of the image must contain at least this fraction of any bounding box
            supplied.
        aspect_ratio_range: An optional list of `floats`. The cropped area of the
            image must have an aspect ratio = width / height within this range.
        area_range: An optional list of `floats`. The cropped area of the image
            must contain a fraction of the supplied image within in this range.
        max_attempts: An optional `int`. Number of attempts at generating a cropped
            region of the image of the specified constraints. After `max_attempts`
            failures, return the entire image.
        scope: Optional scope for name_scope.
    Returns:
        A tuple, a 3-D Tensor cropped_image and the distorted bbox
    """
    with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]):
        # Each bounding box has shape [1, num_boxes, box coords] and
        # the coordinates are ordered [ymin, xmin, ymax, xmax].
        #bbox_begin:和 image_size 具有相同的类型。包含 [offset_height, offset_width, 0] 的一维数组。作为 tf.slice 的输入。
        #bbox_size:作为 tf.slice 的输入。
        #distort_bbox:作为 tf.image.draw_bounding_boxes 的输入。

        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
                tf.shape(image),# 是包含 [height, width, channels] 三个值的一维数组。数值类型必须是 uint8,int8,int16,int32,int64 中的一种。
                bounding_boxes=tf.expand_dims(bboxes, 0),#是一个 shape 为 [batch, N, 4] 的三维数组,数据类型为float32,第一个batch是因为函数是处理一组图片的,N表示描述与图像相关联的N个边界框的形状,而标注框由4个数字 [y_min, x_min, y_max, x_max] 表示出来。例如:tf.constant([[[0.05, 0.05, 0.9, 0.7], [0.35, 0.47, 0.5, 0.56]]]) 的 shape 为 [1,2,4] 表示一张图片中的两个标注框;
                min_object_covered=min_object_covered,#图像的裁剪区域必须包含所提供的任意一个边界框的至少 min_object_covered 的内容。该参数的值应为非负数,当为0时,裁剪区域不必与提供的任何边界框有重叠部分。
                aspect_ratio_range=aspect_ratio_range,#图像的裁剪区域的宽高比(宽高比=宽/高) 必须在这个范围内。
                area_range=area_range,#默认为 [0.05, 1] 。图像的裁剪区域必须包含这个范围内的图像的一部分。
                max_attempts=max_attempts,#尝试生成图像指定约束的裁剪区域的次数。经过 max_attempts 次失败后,将返回整个图像。
                use_image_if_no_bounding_boxes=True)#如果没有提供边框,则用它来控制行为。如果为True,则假设有一个覆盖整个输入的隐含边界框。如果为False,就报错。
        distort_bbox = distort_bbox[0, 0]

        # Crop the image to the specified bounding box.
        cropped_image = tf.slice(image, bbox_begin, bbox_size)
        # Restore the shape since the dynamic slice loses 3rd dimension.
        cropped_image.set_shape([None, None, 3])

        # Update bounding boxes: resize and filter out.
        bboxes = tfe.bboxes_resize(distort_bbox, bboxes)
        labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes,
                                                   threshold=BBOX_CROP_OVERLAP,
                                                   assign_negative=False)
        return cropped_image, labels, bboxes, distort_bbox
def distorted_bounding_box_crop(image,
                                labels,
                                bboxes,
                                min_object_covered=0.3,
                                aspect_ratio_range=(0.9, 1.1),
                                area_range=(0.1, 1.0),
                                max_attempts=200,
                                clip_bboxes=True,
                                scope=None):
    """
    #注意这个函数的解析表明的是使用其中一个随机扭曲的bbox生成cropped_image
    Generates cropped_image using a one of the bboxes randomly distorted.

    See `tf.image.sample_distorted_bounding_box` for more documentation.

    Args:
        #观察这些参数可以发现,这个函数实际是对一张图片的多个gt bbox中随机选择一个(暂时理解为ground truth bbox)进行随机扭曲,返回cropped_image和bbox等等
        image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
        bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
            where each coordinate is [0, 1) and the coordinates are arranged
            as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
            image.
        min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
            area of the image must contain at least this fraction of any bounding box
            supplied.
        aspect_ratio_range: An optional list of `floats`. The cropped area of the
            image must have an aspect ratio = width / height within this range.
        area_range: An optional list of `floats`. The cropped area of the image
            must contain a fraction of the supplied image within in this range.
        max_attempts: An optional `int`. Number of attempts at generating a cropped
            region of the image of the specified constraints. After `max_attempts`
            failures, return the entire image.
        scope: Optional scope for name_scope.
    #注意返回值是一个tuple,分别是cropped_image和distorted bbox,这个我们主要参考tf.image.sample_distorted_bounding_box的实现
    Returns:
        A tuple, a 3-D Tensor cropped_image and the distorted bbox
    """
    with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]):
        # Each bounding box has shape [1, num_boxes, box coords] and
        # the coordinates are ordered [ymin, xmin, ymax, xmax].
        # 为什么要用sample_distorted_bounding_box的原因在于可以随机的截取图片中一个块,减小需要关注的物体大小对图像识别算法的影响
        # tf.image.sample_distorted_bounding_box的讲解主要参考:https://blog.csdn.net/tz_zs/article/details/77920116
        # 需要注意的是,返回值的类型为:
        # begin: 和 image_size 具有相同的类型。包含 [offset_height, offset_width, 0] 的一维数组。作为 tf.slice 的输入。
        # size: 和 image_size 具有相同的类型。包含 [target_height, target_width, -1] 的一维数组。作为 tf.slice 的输入。
        # 根据begin,size两个参数我们可以tf.slice出来我们所需要的裁剪出来的小图,而bboxes主要用于在图像上面的显示bbox工作!!!
        # 那么为什么bboxes的shape为[1,1,4]呢?是不是因为tf.image.sample_distorted_bounding_box函数仅裁出来了一个bbox呢?
        # bboxes:shape为 [1, 1, 4] 的三维矩阵,数据类型为float32,表示随机变形后的边界框。作为 tf.image.draw_bounding_boxes 的输入。
        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
            tf.shape(image),
            bounding_boxes=tf.expand_dims(bboxes, 0),
            min_object_covered=min_object_covered,
            aspect_ratio_range=aspect_ratio_range,
            area_range=area_range,
            max_attempts=max_attempts,
            use_image_if_no_bounding_boxes=True)
        distort_bbox = distort_bbox[0, 0]

        # Crop the image to the specified bounding box.
        # 注意tf.slice中的begin参数和size参数,begin.shape[-1]=0,size.shape[-1]=-1,可以由tf.image.sample_distorted_bouning_box中确定,
        # 然后我们就可以从图像中裁剪我们所期望的小图!
        cropped_image = tf.slice(image, bbox_begin, bbox_size)
        # Restore the shape since the dynamic slice loses 3rd dimension.
        cropped_image.set_shape([None, None, 3])

        # Update bounding boxes: resize and filter out.
        bboxes = tfe.bboxes_resize(distort_bbox, bboxes)
        labels, bboxes = tfe.bboxes_filter_overlap(labels,
                                                   bboxes,
                                                   threshold=BBOX_CROP_OVERLAP,
                                                   assign_negative=False)
        #注意我们的返回值cropped_image的shape为[None,None,3],不用担心,在后面preprocess_for_train中我们会怎样呢,对了没错,
        #resize到ssd所需要的大小,所以不用担心哈!
        return cropped_image, labels, bboxes, distort_bbox