Exemple #1
0
def test_random_bbox_crop_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    test_box_shape = [200, 4]

    def get_boxes():
        out = [
            (np.random.randint(0, 255, size=test_box_shape, dtype=np.uint8) /
             255).astype(dtype=np.float32) for _ in range(batch_size)
        ]
        return out

    test_lables_shape = [200, 1]

    def get_lables():
        out = [
            np.random.randint(0, 255, size=test_lables_shape, dtype=np.int32)
            for _ in range(batch_size)
        ]
        return out

    boxes = fn.external_source(source=get_boxes)
    lables = fn.external_source(source=get_lables)
    processed, _, _, _ = fn.random_bbox_crop(boxes,
                                             lables,
                                             aspect_ratio=[0.5, 2.0],
                                             thresholds=[0.1, 0.3, 0.5],
                                             scaling=[0.8, 1.0],
                                             bbox_layout="xyXY")
    pipe.set_outputs(processed)
    pipe.build()
    for _ in range(3):
        pipe.run()
def random_bbox_crop_pipeline(get_boxes, get_labels, seed):
    boxes = fn.external_source(source=get_boxes)
    labels = fn.external_source(source=get_labels)
    out = fn.random_bbox_crop(boxes, labels, aspect_ratio=[0.5, 2.0], thresholds=[
                              0.1, 0.3, 0.5], scaling=[0.8, 1.0], bbox_layout="xyXY", seed=seed)

    return tuple(out)
Exemple #3
0
    def define_graph(self):
        inputs, bboxes, labels, polygons, vertices = fn.readers.coco(
                                            file_root=self.file_root,
                                            annotations_file=self.annotation_file,
                                            skip_empty=True,
                                            shard_id=self.share_id,
                                            num_shards=self.num_gpus,
                                            ratio=True,
                                            ltrb=True,
                                            polygon_masks = True,
                                            random_shuffle=self.random_shuffle,
                                            shuffle_after_epoch=self.shuffle_after_epoch,
                                            name="Reader")

        input_shape = fn.slice(fn.cast(fn.peek_image_shape(inputs), dtype=types.INT32), 0, 2, axes=[0])
        h = fn.slice(input_shape, 0, 1, axes = [0], dtype=types.FLOAT)
        w = fn.slice(input_shape, 1, 1, axes = [0], dtype=types.FLOAT)
        short_side = math.min(w, h)        
        scale = fn.random.uniform(range=[0.3, 1.])
        crop_side = fn.cast(math.ceil(scale * short_side), dtype=types.INT32)    
        crop_shape = fn.cat(crop_side, crop_side)
        anchor_rel, shape_rel, bboxes, labels, bbox_indices = fn.random_bbox_crop(
                        bboxes,
                        labels,
                        input_shape=input_shape,
                        crop_shape=crop_shape,
                        shape_layout="HW",
                        thresholds=[0.],            # No minimum intersection-over-union, for demo purposes
                        allow_no_crop=False,        # No-crop is disallowed, for demo purposes 
                        seed=-1,                    # Fixed random seed for deterministic results
                        bbox_layout="xyXY",         # left, top, right, back
                        output_bbox_indices=True,   # Output indices of the filtered bounding boxes
                        total_num_attempts=1024,
        )
        polygons, vertices = fn.segmentation.select_masks(
            bbox_indices, polygons, vertices
        )
        images = fn.decoders.image_slice(
            inputs, anchor_rel, shape_rel, normalized_anchor=False, normalized_shape=False, device='mixed'
        )
        images = fn.color_space_conversion(images, image_type=types.RGB, output_type=types.BGR)
        MT_1_vertices = fn.transforms.crop(
            to_start=(0.0, 0.0), to_end=fn.cat(w, h)
        )    
        MT_2_vertices = fn.transforms.crop(
            from_start=anchor_rel, from_end=(anchor_rel + shape_rel),
            to_start=(0.0, 0.0), to_end=(1., 1.)
        )    
        vertices = fn.coord_transform(fn.coord_transform(vertices, MT=MT_1_vertices), MT=MT_2_vertices)    
        targets = fn.cat( bboxes, fn.reshape(vertices, shape=[-1, 10]), axis=1)

        interp_methods = [types.INTERP_LINEAR, types.INTERP_CUBIC, types.INTERP_LANCZOS3, types.INTERP_GAUSSIAN, types.INTERP_NN, types.INTERP_TRIANGULAR]
        interp_method = fn.random.uniform(values=[int(x) for x in interp_methods], dtype=types.INT32)
        interp_method = fn.reinterpret(interp_method, dtype=types.INTERP_TYPE)
        images = fn.resize(images, dtype=types.FLOAT, size=self.input_dim, interp_type=interp_method)

        labels = labels.gpu()
        targets = targets.gpu()
        return (images, targets, labels)
Exemple #4
0
def create_coco_pipeline(file_root,
                         annotations_file,
                         batch_size=1,
                         device_id=0,
                         num_threads=4,
                         local_rank=0,
                         world_size=1):
    pipeline = Pipeline(batch_size, num_threads,
                        local_rank, seed=42 + device_id)

    with pipeline:
        images, bboxes, labels = fn.coco_reader(file_root=file_root,
                                                annotations_file=annotations_file,
                                                skip_empty=True,
                                                shard_id=local_rank,
                                                num_shards=world_size,
                                                ratio=True,
                                                ltrb=True,
                                                random_shuffle=False,
                                                shuffle_after_epoch=True,
                                                name="Reader")

        crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(bboxes, labels,
                                                                    device="cpu",
                                                                    aspect_ratio=[0.5, 2.0],
                                                                    thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
                                                                    scaling=[0.3, 1.0],
                                                                    bbox_layout="xyXY",
                                                                    allow_no_crop=True,
                                                                    num_attempts=50)
        images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB)
        flip_coin = fn.coin_flip(probability=0.5)
        images = fn.resize(images,
                           resize_x=300,
                           resize_y=300,
                           min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

        # use float to avoid clipping and quantizing the intermediate result
        images = fn.hsv(images, dtype=types.FLOAT, hue=fn.uniform(range=[-0.5, 0.5]),
                        saturation=fn.uniform(range=[0.5, 1.5]))

        images = fn.brightness_contrast(images,
                                        contrast_center = 128,  # input is in float, but in 0..255 range
                                        dtype = types.UINT8,
                                        brightness = fn.uniform(range=[0.875, 1.125]),
                                        contrast = fn.uniform(range=[0.5, 1.5]))

        bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin)
        images = fn.crop_mirror_normalize(images,
                                          mean=[104., 117., 123.],
                                          std=[1., 1., 1.],
                                          mirror=flip_coin,
                                          dtype=types.FLOAT,
                                          output_layout="CHW",
                                          pad_output=False)

        pipeline.set_outputs(images, bboxes, labels)
    return pipeline
 def random_bbox_crop_fixed_aspect_ratio():
     in_sh = fn.random.uniform(range=(400, 600), shape=(2,), dtype=types.INT32)
     inputs = fn.external_source(source=bbox_source, num_outputs=bbox_source.num_outputs)
     outputs = fn.random_bbox_crop(
         *inputs,
         device='cpu',
         aspect_ratio=(1.0, 1.0),
         scaling=(0.5, 0.8),
         thresholds=[0.0],
         threshold_type='iou',
         bbox_layout="xyXY",
         total_num_attempts=100,
         allow_no_crop=False,
         input_shape=in_sh if use_input_shape else None,
     )
     return in_sh, outputs[1]
Exemple #6
0
def create_coco_pipeline(default_boxes, args):
    try:
        shard_id = torch.distributed.get_rank()
        num_shards = torch.distributed.get_world_size()
    except RuntimeError:
        shard_id = 0
        num_shards = 1

    images, bboxes, labels = fn.readers.coco(
        file_root=args.train_coco_root,
        annotations_file=args.train_annotate,
        skip_empty=True,
        shard_id=shard_id,
        num_shards=num_shards,
        ratio=True,
        ltrb=True,
        random_shuffle=False,
        shuffle_after_epoch=True,
        name="Reader")

    crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(
        bboxes,
        labels,
        device="cpu",
        aspect_ratio=[0.5, 2.0],
        thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
        scaling=[0.3, 1.0],
        bbox_layout="xyXY",
        allow_no_crop=True,
        num_attempts=50)
    images = fn.image_decoder_slice(images,
                                    crop_begin,
                                    crop_size,
                                    device="mixed",
                                    output_type=types.RGB)
    flip_coin = fn.random.coin_flip(probability=0.5)
    images = fn.resize(images,
                       resize_x=300,
                       resize_y=300,
                       min_filter=types.DALIInterpType.INTERP_TRIANGULAR)

    saturation = fn.uniform(range=[0.5, 1.5])
    contrast = fn.uniform(range=[0.5, 1.5])
    brightness = fn.uniform(range=[0.875, 1.125])
    hue = fn.uniform(range=[-0.5, 0.5])

    images = fn.hsv(images, dtype=types.FLOAT, hue=hue,
                    saturation=saturation)  # use float to avoid clipping and
    # quantizing the intermediate result
    images = fn.brightness_contrast(
        images,
        contrast_center=128,  # input is in float, but in 0..255 range
        dtype=types.UINT8,
        brightness=brightness,
        contrast=contrast)

    dtype = types.FLOAT16 if args.fp16 else types.FLOAT

    bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin)
    images = fn.crop_mirror_normalize(
        images,
        crop=(300, 300),
        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
        std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
        mirror=flip_coin,
        dtype=dtype,
        output_layout="CHW",
        pad_output=False)

    bboxes, labels = fn.box_encoder(bboxes,
                                    labels,
                                    criteria=0.5,
                                    anchors=default_boxes.as_ltrb_list())

    labels = labels.gpu()
    bboxes = bboxes.gpu()

    return images, bboxes, labels
def check_bbox_random_crop_adjust_polygons(file_root,
                                           annotations_file,
                                           batch_size=3,
                                           num_iters=4,
                                           num_threads=4,
                                           device_id=0,
                                           seed=1234):
    pipe = Pipeline(batch_size=batch_size,
                    num_threads=num_threads,
                    device_id=device_id,
                    seed=seed)
    with pipe:
        # Read data from COCO
        # ratio=True means both bboxes and masks coordinates will be
        # relative to the image dimensions (range [0.0, 1.0])
        inputs, in_bboxes, labels, in_polygons, in_vertices = \
            fn.readers.coco(
                file_root=file_root, annotations_file=annotations_file, shard_id=0, num_shards=1,
                ratio=True, ltrb=True, polygon_masks=True
            )

        # Generate a random crop. out_bboxes are adjusted to the crop window
        slice_anchor, slice_shape, out_bboxes, labels, bbox_indices = \
            fn.random_bbox_crop(
                in_bboxes, labels,
                aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
                scaling=[0.3, 1.0], bbox_layout='xyXY', output_bbox_indices=True
            )
        # Crop the image
        _ = fn.decoders.image_slice(inputs,
                                    slice_anchor,
                                    slice_shape,
                                    device='mixed',
                                    axis_names='WH')

        sel_polygons, sel_vertices = fn.segmentation.select_masks(
            bbox_indices, in_polygons, in_vertices)

        # Adjust masks coordinates to the coordinate space of the cropped image
        MT = fn.transforms.crop(from_start=slice_anchor,
                                from_end=(slice_anchor + slice_shape))
        out_vertices = fn.coord_transform(sel_vertices, MT=MT)

        # Converting to absolute coordinates (demo purposes)
        image_shape = fn.peek_image_shape(inputs, dtype=types.FLOAT)
        h = fn.slice(image_shape, 0, 1, axes=[0])
        w = fn.slice(image_shape, 1, 1, axes=[0])

        # Original bboxes
        bbox_x = fn.slice(in_bboxes, 0, 1, axes=[1])
        bbox_y = fn.slice(in_bboxes, 1, 1, axes=[1])
        bbox_X = fn.slice(in_bboxes, 2, 1, axes=[1])
        bbox_Y = fn.slice(in_bboxes, 3, 1, axes=[1])
        in_bboxes_abs = fn.cat(bbox_x * w,
                               bbox_y * h,
                               bbox_X * w,
                               bbox_Y * h,
                               axis=1)

        # Transform to convert relative coordinates to absolute
        scale_rel_to_abs = fn.transforms.scale(scale=fn.cat(w, h))

        # Selected vertices (relative coordinates)
        sel_vertices_abs = fn.coord_transform(out_vertices,
                                              MT=scale_rel_to_abs)

        # Output bboxes
        bbox2_x = fn.slice(out_bboxes, 0, 1, axes=[1])
        bbox2_y = fn.slice(out_bboxes, 1, 1, axes=[1])
        bbox2_X = fn.slice(out_bboxes, 2, 1, axes=[1])
        bbox2_Y = fn.slice(out_bboxes, 3, 1, axes=[1])
        out_bboxes_abs = fn.cat(bbox2_x * w,
                                bbox2_y * h,
                                bbox2_X * w,
                                bbox2_Y * h,
                                axis=1)

        # Output vertices (absolute coordinates)
        out_vertices_abs = fn.coord_transform(out_vertices,
                                              MT=scale_rel_to_abs)

        # Clamped coordinates
        out_vertices_clamped = math.clamp(out_vertices, 0.0, 1.0)
        out_vertices_clamped_abs = fn.coord_transform(out_vertices_clamped,
                                                      MT=scale_rel_to_abs)

    pipe.set_outputs(in_vertices, sel_vertices, sel_vertices_abs, out_vertices,
                     out_vertices_clamped, out_vertices_abs,
                     out_vertices_clamped_abs, in_bboxes, in_bboxes_abs,
                     out_bboxes, out_bboxes_abs, in_polygons, sel_polygons,
                     image_shape, slice_anchor, slice_shape, bbox_indices)
    pipe.build()
    # Enough iterations to see an example with more than one bounding box
    for i in range(num_iters):
        outs = pipe.run()
        for j in range(batch_size):
            (in_vertices, sel_vertices, sel_vertices_abs, out_vertices,
             out_vertices_clamped, out_vertices_abs, out_vertices_clamped_abs,
             in_bboxes, in_bboxes_abs, out_bboxes, out_bboxes_abs, in_polygons,
             sel_polygons, image_shape, slice_anchor, slice_shape,
             bbox_indices) = (outs[k].at(j) for k in range(len(outs)))

            # Checking that the output polygon descriptors are the ones associated with the
            # selected bounding boxes
            expected_polygons_list = []
            expected_vertices_list = []
            ver_count = 0
            for k in range(in_polygons.shape[0]):
                mask_id = in_polygons[k][0]
                in_ver_start_idx = in_polygons[k][1]
                in_ver_end_idx = in_polygons[k][2]
                pol_nver = in_ver_end_idx - in_ver_start_idx
                if mask_id in bbox_indices:
                    expected_polygons_list.append(
                        [mask_id, ver_count, ver_count + pol_nver])
                    for j in range(in_ver_start_idx, in_ver_end_idx):
                        expected_vertices_list.append(in_vertices[j])
                    ver_count = ver_count + pol_nver
            expected_sel_polygons = np.array(expected_polygons_list)
            np.testing.assert_equal(expected_sel_polygons, sel_polygons)

            # Checking the selected vertices correspond to the selected masks
            expected_sel_vertices = np.array(expected_vertices_list)
            np.testing.assert_equal(expected_sel_vertices, sel_vertices)

            # Chekc that the vertices are correctly mapped to the cropping window
            expected_out_vertices = np.copy(expected_sel_vertices)
            crop_x, crop_y = slice_anchor
            crop_w, crop_h = slice_shape
            for v in range(expected_out_vertices.shape[0]):
                expected_out_vertices[v, 0] = (expected_out_vertices[v, 0] -
                                               crop_x) / crop_w
                expected_out_vertices[v, 1] = (expected_out_vertices[v, 1] -
                                               crop_y) / crop_h
            np.testing.assert_allclose(expected_out_vertices,
                                       out_vertices,
                                       rtol=1e-4)

            # Checking the conversion to absolute coordinates
            h, w, _ = image_shape
            wh = np.array([w, h])
            whwh = np.array([w, h, w, h])
            expected_out_vertices_abs = expected_out_vertices * wh
            np.testing.assert_allclose(expected_out_vertices_abs,
                                       out_vertices_abs,
                                       rtol=1e-4)

            # Checking clamping of the relative coordinates
            expected_out_vertices_clamped = np.clip(expected_out_vertices,
                                                    a_min=0.0,
                                                    a_max=1.0)
            np.testing.assert_allclose(expected_out_vertices_clamped,
                                       out_vertices_clamped,
                                       rtol=1e-4)

            # Checking clamping of the absolute coordinates
            expected_out_vertices_clamped_abs = np.clip(
                expected_out_vertices_abs, 0, wh)
            np.testing.assert_allclose(expected_out_vertices_clamped_abs,
                                       out_vertices_clamped_abs,
                                       rtol=1e-4)

            # Checking scaling of the bounding boxes
            expected_in_bboxes_abs = in_bboxes * whwh
            np.testing.assert_allclose(expected_in_bboxes_abs,
                                       in_bboxes_abs,
                                       rtol=1e-4)

            # Check box selection and mapping to the cropping window
            expected_out_bboxes = np.copy(in_bboxes[bbox_indices, :])
            for k in range(expected_out_bboxes.shape[0]):
                expected_out_bboxes[k, 0] = (expected_out_bboxes[k, 0] -
                                             crop_x) / crop_w
                expected_out_bboxes[k, 1] = (expected_out_bboxes[k, 1] -
                                             crop_y) / crop_h
                expected_out_bboxes[k, 2] = (expected_out_bboxes[k, 2] -
                                             crop_x) / crop_w
                expected_out_bboxes[k, 3] = (expected_out_bboxes[k, 3] -
                                             crop_y) / crop_h
            expected_out_bboxes = np.clip(expected_out_bboxes,
                                          a_min=0.0,
                                          a_max=1.0)
            np.testing.assert_allclose(expected_out_bboxes,
                                       out_bboxes,
                                       rtol=1e-4)

            expected_out_bboxes_abs = expected_out_bboxes * whwh
            np.testing.assert_allclose(expected_out_bboxes_abs,
                                       out_bboxes_abs,
                                       rtol=1e-4)