def create_coco_pipeline(file_root, annotations_file, batch_size=1, device_id=0, num_threads=4, local_rank=0, world_size=1): pipeline = Pipeline(batch_size, num_threads, local_rank, seed=42 + device_id) with pipeline: images, bboxes, labels = fn.coco_reader(file_root=file_root, annotations_file=annotations_file, skip_empty=True, shard_id=local_rank, num_shards=world_size, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True, name="Reader") crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(bboxes, labels, device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", allow_no_crop=True, num_attempts=50) images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB) flip_coin = fn.coin_flip(probability=0.5) images = fn.resize(images, resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) # use float to avoid clipping and quantizing the intermediate result images = fn.hsv(images, dtype=types.FLOAT, hue=fn.uniform(range=[-0.5, 0.5]), saturation=fn.uniform(range=[0.5, 1.5])) images = fn.brightness_contrast(images, contrast_center = 128, # input is in float, but in 0..255 range dtype = types.UINT8, brightness = fn.uniform(range=[0.875, 1.125]), contrast = fn.uniform(range=[0.5, 1.5])) bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin) images = fn.crop_mirror_normalize(images, mean=[104., 117., 123.], std=[1., 1., 1.], mirror=flip_coin, dtype=types.FLOAT, output_layout="CHW", pad_output=False) pipeline.set_outputs(images, bboxes, labels) return pipeline
def image_decoder_slice_pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) encoded = fn.external_source(source=input_data, cycle=False, device='cpu') anch = fn.constant(fdata=.1) sh = fn.constant(fdata=.4) decoded = fn.image_decoder_slice(encoded, anch, sh, axes=0, device=device) pipe.set_outputs(decoded) return pipe
def test_image_decoder_slice_cpu(): anch_shape = [1, 2] def get_anchors(): out = [(np.random.randint(1, 128, size=anch_shape, dtype=np.uint8) / 255).astype(dtype=np.float32) for _ in range(batch_size)] return out def get_shape(): out = [(np.random.randint(1, 128, size=anch_shape, dtype=np.uint8) / 255).astype(dtype=np.float32) for _ in range(batch_size)] return out pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None) input, _ = fn.file_reader(file_root=images_dir, shard_id=0, num_shards=1) anchors = fn.external_source(source=get_anchors) shape = fn.external_source(source=get_shape) processed = fn.image_decoder_slice(input, anchors, shape) pipe.set_outputs(processed) pipe.build() for _ in range(3): pipe.run()
def create_coco_pipeline(default_boxes, args): try: shard_id = torch.distributed.get_rank() num_shards = torch.distributed.get_world_size() except RuntimeError: shard_id = 0 num_shards = 1 images, bboxes, labels = fn.readers.coco( file_root=args.train_coco_root, annotations_file=args.train_annotate, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True, name="Reader") crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop( bboxes, labels, device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", allow_no_crop=True, num_attempts=50) images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB) flip_coin = fn.random.coin_flip(probability=0.5) images = fn.resize(images, resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) saturation = fn.uniform(range=[0.5, 1.5]) contrast = fn.uniform(range=[0.5, 1.5]) brightness = fn.uniform(range=[0.875, 1.125]) hue = fn.uniform(range=[-0.5, 0.5]) images = fn.hsv(images, dtype=types.FLOAT, hue=hue, saturation=saturation) # use float to avoid clipping and # quantizing the intermediate result images = fn.brightness_contrast( images, contrast_center=128, # input is in float, but in 0..255 range dtype=types.UINT8, brightness=brightness, contrast=contrast) dtype = types.FLOAT16 if args.fp16 else types.FLOAT bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin) images = fn.crop_mirror_normalize( images, crop=(300, 300), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=flip_coin, dtype=dtype, output_layout="CHW", pad_output=False) bboxes, labels = fn.box_encoder(bboxes, labels, criteria=0.5, anchors=default_boxes.as_ltrb_list()) labels = labels.gpu() bboxes = bboxes.gpu() return images, bboxes, labels
def check_bbox_random_crop_adjust_polygons(file_root, annotations_file, batch_size=3, num_iters=4, num_threads=4, device_id=0, seed=1234): pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=seed) with pipe: # Read data from COCO # ratio=True means both bboxes and masks coordinates will be relative to the image dimensions (range [0.0, 1.0]) inputs, in_bboxes, labels, in_polygons, in_vertices = \ fn.readers.coco( file_root=file_root, annotations_file=annotations_file, shard_id=0, num_shards=1, ratio=True, ltrb=True, polygon_masks=True ) # Generate a random crop. out_bboxes are adjusted to the crop window slice_anchor, slice_shape, out_bboxes, labels, bbox_indices = \ fn.random_bbox_crop( in_bboxes, labels, aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout='xyXY', output_bbox_indices=True ) # Crop the image images = fn.image_decoder_slice(inputs, slice_anchor, slice_shape, device='mixed', axis_names='WH') sel_polygons, sel_vertices = fn.segmentation.select_masks( bbox_indices, in_polygons, in_vertices) # Adjust masks coordinates to the coordinate space of the cropped image MT = fn.transforms.crop(from_start=slice_anchor, from_end=(slice_anchor + slice_shape)) out_vertices = fn.coord_transform(sel_vertices, MT=MT) # Converting to absolute coordinates (demo purposes) image_shape = fn.peek_image_shape(inputs, type=types.FLOAT) h = fn.slice(image_shape, 0, 1, axes=[0]) w = fn.slice(image_shape, 1, 1, axes=[0]) # Original bboxes bbox_x = fn.slice(in_bboxes, 0, 1, axes=[1]) bbox_y = fn.slice(in_bboxes, 1, 1, axes=[1]) bbox_X = fn.slice(in_bboxes, 2, 1, axes=[1]) bbox_Y = fn.slice(in_bboxes, 3, 1, axes=[1]) in_bboxes_abs = fn.cat(bbox_x * w, bbox_y * h, bbox_X * w, bbox_Y * h, axis=1) # Transform to convert relative coordinates to absolute scale_rel_to_abs = fn.transforms.scale(scale=fn.cat(w, h)) # Selected vertices (relative coordinates) sel_vertices_abs = fn.coord_transform(out_vertices, MT=scale_rel_to_abs) # Output bboxes bbox2_x = fn.slice(out_bboxes, 0, 1, axes=[1]) bbox2_y = fn.slice(out_bboxes, 1, 1, axes=[1]) bbox2_X = fn.slice(out_bboxes, 2, 1, axes=[1]) bbox2_Y = fn.slice(out_bboxes, 3, 1, axes=[1]) out_bboxes_abs = fn.cat(bbox2_x * w, bbox2_y * h, bbox2_X * w, bbox2_Y * h, axis=1) # Output vertices (absolute coordinates) out_vertices_abs = fn.coord_transform(out_vertices, MT=scale_rel_to_abs) # Clamped coordinates out_vertices_clamped = math.clamp(out_vertices, 0.0, 1.0) out_vertices_clamped_abs = fn.coord_transform(out_vertices_clamped, MT=scale_rel_to_abs) pipe.set_outputs(in_vertices, sel_vertices, sel_vertices_abs, out_vertices, out_vertices_clamped, out_vertices_abs, out_vertices_clamped_abs, in_bboxes, in_bboxes_abs, out_bboxes, out_bboxes_abs, in_polygons, sel_polygons, image_shape, slice_anchor, slice_shape, bbox_indices) pipe.build() for i in range( num_iters ): # Enough iterations to see an example with more than one bounding box outs = pipe.run() for j in range(batch_size): in_vertices, sel_vertices, sel_vertices_abs, \ out_vertices, out_vertices_clamped, out_vertices_abs, out_vertices_clamped_abs, \ in_bboxes, in_bboxes_abs, out_bboxes, out_bboxes_abs, \ in_polygons, sel_polygons, \ image_shape, slice_anchor, slice_shape, bbox_indices = \ (outs[k].at(j) for k in range(len(outs))) # Checking that the output polygon descriptors are the ones associated with the # selected bounding boxes expected_polygons_list = [] expected_vertices_list = [] ver_count = 0 for k in range(in_polygons.shape[0]): mask_id = in_polygons[k][0] in_ver_start_idx = in_polygons[k][1] in_ver_end_idx = in_polygons[k][2] pol_nver = in_ver_end_idx - in_ver_start_idx if mask_id in bbox_indices: expected_polygons_list.append( [mask_id, ver_count, ver_count + pol_nver]) for j in range(in_ver_start_idx, in_ver_end_idx): expected_vertices_list.append(in_vertices[j]) ver_count = ver_count + pol_nver expected_sel_polygons = np.array(expected_polygons_list) np.testing.assert_equal(expected_sel_polygons, sel_polygons) # Checking the selected vertices correspond to the selected masks expected_sel_vertices = np.array(expected_vertices_list) np.testing.assert_equal(expected_sel_vertices, sel_vertices) # Chekc that the vertices are correctly mapped to the cropping window expected_out_vertices = np.copy(expected_sel_vertices) crop_x, crop_y = slice_anchor crop_w, crop_h = slice_shape for v in range(expected_out_vertices.shape[0]): expected_out_vertices[v, 0] = (expected_out_vertices[v, 0] - crop_x) / crop_w expected_out_vertices[v, 1] = (expected_out_vertices[v, 1] - crop_y) / crop_h np.testing.assert_allclose(expected_out_vertices, out_vertices, rtol=1e-4) # Checking the conversion to absolute coordinates h, w, c = image_shape expected_out_vertices_abs = np.copy(expected_out_vertices) for v in range(expected_out_vertices_abs.shape[0]): expected_out_vertices_abs[v, 0] = expected_out_vertices[v, 0] * w expected_out_vertices_abs[v, 1] = expected_out_vertices[v, 1] * h np.testing.assert_allclose(expected_out_vertices_abs, out_vertices_abs, rtol=1e-4) # Checking clamping of the relative coordinates expected_out_vertices_clamped = np.copy(expected_out_vertices) np.clip(expected_out_vertices_clamped, a_min=0.0, a_max=1.0) np.testing.assert_allclose(expected_out_vertices_clamped, out_vertices, rtol=1e-4) # Checking clamping of the absolute coordinates expected_out_vertices_clamped_abs = np.copy( expected_out_vertices_abs) for v in range(expected_out_vertices_clamped_abs.shape[0]): expected_out_vertices_clamped_abs[v, 0] = np.clip( expected_out_vertices_abs[v, 0], a_min=0, a_max=w) expected_out_vertices_clamped_abs[v, 1] = np.clip( expected_out_vertices_abs[v, 1], a_min=0, a_max=h) np.testing.assert_allclose(expected_out_vertices_clamped_abs, out_vertices_clamped_abs, rtol=1e-4) # Checking scaling of the bounding boxes expected_in_bboxes_abs = np.copy(in_bboxes) for v in range(expected_in_bboxes_abs.shape[0]): expected_in_bboxes_abs[v, 0] = expected_in_bboxes_abs[v, 0] * w expected_in_bboxes_abs[v, 1] = expected_in_bboxes_abs[v, 1] * h expected_in_bboxes_abs[v, 2] = expected_in_bboxes_abs[v, 2] * w expected_in_bboxes_abs[v, 3] = expected_in_bboxes_abs[v, 3] * h np.testing.assert_allclose(expected_in_bboxes_abs, in_bboxes_abs, rtol=1e-4) # Check box selection and mapping to the cropping window expected_out_bboxes = np.copy(in_bboxes[bbox_indices, :]) for k in range(expected_out_bboxes.shape[0]): expected_out_bboxes[k, 0] = (expected_out_bboxes[k, 0] - crop_x) / crop_w expected_out_bboxes[k, 1] = (expected_out_bboxes[k, 1] - crop_y) / crop_h expected_out_bboxes[k, 2] = (expected_out_bboxes[k, 2] - crop_x) / crop_w expected_out_bboxes[k, 3] = (expected_out_bboxes[k, 3] - crop_y) / crop_h expected_out_bboxes = np.clip(expected_out_bboxes, a_min=0.0, a_max=1.0) np.testing.assert_allclose(expected_out_bboxes, out_bboxes, rtol=1e-4) expected_out_bboxes_abs = np.copy(expected_out_bboxes) expected_out_bboxes_abs[:, 0] = expected_out_bboxes_abs[:, 0] * w expected_out_bboxes_abs[:, 1] = expected_out_bboxes_abs[:, 1] * h expected_out_bboxes_abs[:, 2] = expected_out_bboxes_abs[:, 2] * w expected_out_bboxes_abs[:, 3] = expected_out_bboxes_abs[:, 3] * h np.testing.assert_allclose(expected_out_bboxes_abs, out_bboxes_abs, rtol=1e-4)