def get_pipeline(folder="train", custom_reader=None): pipe = Pipeline(batch_size=64, num_threads=1, device_id=1) if custom_reader: raw_files, labels = custom_reader else: raw_files, labels = fn.file_reader(file_root="%s" % folder, random_shuffle=True) decode = fn.image_decoder(raw_files, device="mixed", output_type=types.GRAY) resize = fn.resize(decode, device="gpu", image_type=types.RGB, interp_type=types.INTERP_LINEAR, resize_x=WIDTH, resize_y=HEIGHT) hsv = fn.hsv(resize, hue=fn.uniform(range=(-10, 10)), saturation=fn.uniform(range=(-.5, .5)), value=fn.uniform(range=(0.9, 1.2)), device="gpu", dtype=types.UINT8) bc = fn.brightness_contrast(hsv, device="gpu", brightness=fn.uniform(range=(.9, 1.1))) cmn = fn.crop_mirror_normalize(bc, device="gpu", output_dtype=types.FLOAT, output_layout=types.NHWC, image_type=types.GRAY, mean=[255 // 2], std=[255 // 2]) rot = fn.rotate(cmn, angle=fn.uniform(range=(-40, 40)), device="gpu", keep_size=True) tpose = fn.transpose(rot, perm=(2, 0, 1), device="gpu") # Reshaping to a format PyTorch likes pipe.set_outputs(tpose, labels) pipe.build() dali_iter = DALIClassificationIterator([pipe], -1) return dali_iter
def create_coco_pipeline(file_root, annotations_file, batch_size=1, device_id=0, num_threads=4, local_rank=0, world_size=1): pipeline = Pipeline(batch_size, num_threads, local_rank, seed=42 + device_id) with pipeline: images, bboxes, labels = fn.coco_reader(file_root=file_root, annotations_file=annotations_file, skip_empty=True, shard_id=local_rank, num_shards=world_size, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True, name="Reader") crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop(bboxes, labels, device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", allow_no_crop=True, num_attempts=50) images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB) flip_coin = fn.coin_flip(probability=0.5) images = fn.resize(images, resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) # use float to avoid clipping and quantizing the intermediate result images = fn.hsv(images, dtype=types.FLOAT, hue=fn.uniform(range=[-0.5, 0.5]), saturation=fn.uniform(range=[0.5, 1.5])) images = fn.brightness_contrast(images, contrast_center = 128, # input is in float, but in 0..255 range dtype = types.UINT8, brightness = fn.uniform(range=[0.875, 1.125]), contrast = fn.uniform(range=[0.5, 1.5])) bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin) images = fn.crop_mirror_normalize(images, mean=[104., 117., 123.], std=[1., 1., 1.], mirror=flip_coin, dtype=types.FLOAT, output_layout="CHW", pad_output=False) pipeline.set_outputs(images, bboxes, labels) return pipeline
def check_per_sample_gaussian_blur(batch_size, sigma_dim, window_size_dim, shape, layout, axes, op_type="cpu"): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) data = RandomlyShapedDataIterator(batch_size, max_shape=shape) with pipe: if sigma_dim is not None: sigma = fn.uniform(range=[0.5, 3], shape=[sigma_dim]) sigma_arg = sigma else: # placeholder, so we can return something sigma = fn.coin_flip(probability=0) sigma_arg = None if window_size_dim is not None: window_radius = fn.uniform(range=[5, 10], shape=[window_size_dim]) window_size = fn.cast(window_radius, dtype=types.INT32) * 2 + 1 window_arg = window_size else: window_size = fn.coin_flip(probability=0) window_arg = None input = fn.external_source(data, layout=layout) if op_type == "gpu": input = input.gpu() blurred = fn.gaussian_blur(input, device=op_type, sigma=sigma_arg, window_size=window_arg) pipe.set_outputs(blurred, input, sigma, window_size) pipe.build() for _ in range(test_iters): result, input, sigma, window_size = pipe.run() if op_type == "gpu": result = result.as_cpu() input = input.as_cpu() input = to_batch(input, batch_size) sigma = to_batch(sigma, batch_size) window_size = to_batch(window_size, batch_size) baseline = [] for i in range(batch_size): sigma_arg = sigma[i] if sigma is not None else None window_arg = window_size[i] if window_size_dim is not None else None skip_axes = count_skip_axes(layout) baseline.append( gaussian_baseline(input[i], sigma_arg, window_arg, axes, skip_axes)) check_batch(result, baseline, batch_size, max_allowed_error=1, expected_layout=layout)
def pipe(max_batch_size, input_data, device): pipe = Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0) data = fn.external_source(source=input_data, cycle=False, device=device) paste_posx = fn.uniform(range=(0, 1)) paste_posy = fn.uniform(range=(0, 1)) paste_ratio = fn.uniform(range=(1, 2)) processed = fn.bbox_paste(data, paste_x=paste_posx, paste_y=paste_posy, ratio=paste_ratio) pipe.set_outputs(processed) return pipe
def make_param(kind, shape): if kind == "input": return fn.uniform(range=(0, 1), shape=shape) elif kind == "scalar input": return fn.reshape(fn.uniform(range=(0, 1)), shape=[]) elif kind == "vector": return np.random.rand(*shape).astype(np.float32) elif kind == "scalar": return np.random.rand() else: return None
def get_random_pipeline(batch_size): pipe = Pipeline(batch_size, 4, None) with pipe: input, _ = fn.readers.file(file_root=img_dir) decoded = fn.image_decoder(input, device='cpu', output_type=types.RGB) tile = fn.cast(fn.uniform(range=(50, 200), shape=[1]), dtype=types.INT32) ratio = fn.uniform(range=(0.3, 0.7), shape=[1]) angle = fn.uniform(range=(-math.pi, math.pi), shape=[1]) grided = fn.grid_mask(decoded, device='cpu', tile=tile, ratio=ratio, angle=angle) pipe.set_outputs(grided, decoded, tile, ratio, angle) return pipe
def check_pad_per_sample_shapes_and_alignment(device='cpu', batch_size=3, ndim=2, num_iter=3): pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=0, seed=1234) axes = (0, 1) with pipe: in_shape = fn.cast(fn.uniform(range=(10, 20), shape=(ndim, )), dtype=types.INT32) in_data = fn.uniform(range=(0., 1.), shape=in_shape) if device == 'gpu': in_data = in_data.gpu() req_shape = fn.cast(fn.uniform(range=(21, 30), shape=(ndim, )), dtype=types.INT32) req_align = fn.cast(fn.uniform(range=(3, 5), shape=(ndim, )), dtype=types.INT32) out_pad_shape = fn.pad(in_data, axes=axes, align=None, shape=req_shape) out_pad_align = fn.pad(in_data, axes=axes, align=req_align, shape=None) out_pad_both = fn.pad(in_data, axes=axes, align=req_align, shape=req_shape) pipe.set_outputs(in_shape, in_data, req_shape, req_align, out_pad_shape, out_pad_align, out_pad_both) pipe.build() for _ in range(num_iter): outs = [ out.as_cpu() if isinstance(out, TensorListGPU) else out for out in pipe.run() ] for i in range(batch_size): in_shape, in_data, req_shape, req_align, out_pad_shape, out_pad_align, out_pad_both = \ [outs[out_idx].at(i) for out_idx in range(len(outs))] assert (in_shape == in_data.shape).all() # Pad to explicit shape assert (out_pad_shape.shape >= in_shape).all() assert (req_shape == out_pad_shape.shape).all() # Alignment only assert (out_pad_align.shape >= in_shape).all() assert is_aligned(out_pad_align.shape, req_align, axes) # Explicit shape + alignment assert (out_pad_both.shape >= in_shape).all() assert (req_shape <= out_pad_both.shape).all() assert is_aligned(out_pad_both.shape, req_align, axes)
def check_transform_scale_op(scale, center=None, has_input=False, reverse_order=False, batch_size=1, num_threads=4, device_id=0): ndim = len(scale) assert center is None or len(center) == ndim pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipe: if has_input: T0 = fn.uniform(range=(-1, 1), shape=(ndim, ndim + 1), seed=1234) T1 = fn.transforms.scale(T0, device='cpu', scale=scale, center=center, reverse_order=reverse_order) pipe.set_outputs(T1, T0) else: T1 = fn.transforms.scale(device='cpu', scale=scale, center=center) pipe.set_outputs(T1) pipe.build() outs = pipe.run() ref_mat = scale_affine_mat(scale=scale, center=center) T0 = outs[1] if has_input else None check_results(outs[0], batch_size, ref_mat, T0, reverse_order)
def get_random_pipeline(device, batch_size): pipe = Pipeline(batch_size, 4, 0) with pipe: input, _ = fn.readers.file(file_root=img_dir) decoded = fn.decoders.image(input, device='cpu', output_type=types.RGB) decoded = decoded.gpu() if device == 'gpu' else decoded tile = fn.cast(fn.uniform(range=(50, 200)), dtype=types.INT32) ratio = fn.uniform(range=(0.3, 0.7)) angle = fn.uniform(range=(-math.pi, math.pi)) grided = fn.grid_mask(decoded, device=device, tile=tile, ratio=ratio, angle=angle) pipe.set_outputs(grided, decoded, tile, ratio, angle) return pipe
def check_transform_translation_op(offset, has_input=False, reverse_order=False, batch_size=1, num_threads=4, device_id=0): ndim = len(offset) pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipe: if has_input: T0 = fn.uniform(range=(-1, 1), shape=(ndim, ndim + 1), seed=1234) T1 = fn.transforms.translation(T0, device='cpu', offset=offset, reverse_order=reverse_order) pipe.set_outputs(T1, T0) else: T1 = fn.transforms.translation(device='cpu', offset=offset) pipe.set_outputs(T1) pipe.build() outs = pipe.run() ref_mat = translate_affine_mat(offset=offset) T0 = outs[1] if has_input else None check_results(outs[0], batch_size, ref_mat, T0, reverse_order)
def check_combine_transforms(num_transforms=2, ndim=2, reverse_order=False, batch_size=1, num_threads=4, device_id=0): pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipe: transforms = [ fn.uniform(range=(-1, 1), shape=(ndim, ndim + 1), seed=1234) for _ in range(num_transforms) ] T = fn.transforms.combine(*transforms) pipe.set_outputs(T, *transforms) pipe.build() outs = pipe.run() for idx in range(batch_size): num_mats = len(outs) - 1 assert num_mats >= 2 mats = [np.identity(ndim + 1) for _ in range(num_mats)] for in_idx in range(len(mats)): mats[in_idx][:ndim, :] = outs[1 + in_idx].at(idx) # by default we want to access them in opposite order if not reverse_order: mats.reverse() ref_mat = np.identity(ndim + 1) for mat in mats: ref_mat = np.dot(mat, ref_mat) assert np.allclose(outs[0].at(idx), ref_mat[:ndim, :], atol=1e-6)
def check_transform_crop_op(from_start=None, from_end=None, to_start=None, to_end=None, absolute=False, has_input=False, reverse_order=False, batch_size=1, num_threads=4, device_id=0): ndim = get_ndim(from_start, from_end, to_start, to_end) pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipe: if has_input: T0 = fn.uniform(range=(-1, 1), shape=(ndim, ndim + 1), seed=1234) T1 = fn.transforms.crop(T0, device='cpu', from_start=from_start, from_end=from_end, to_start=to_start, to_end=to_end, absolute=absolute, reverse_order=reverse_order) pipe.set_outputs(T1, T0) else: T1 = fn.transforms.crop(device='cpu', from_start=from_start, from_end=from_end, to_start=to_start, to_end=to_end, absolute=absolute) pipe.set_outputs(T1) pipe.build() outs = pipe.run() ref_mat = crop_affine_mat(from_start, from_end, to_start, to_end, absolute=absolute) T0 = outs[1] if has_input else None T1 = outs[0] check_results(T1, batch_size, ref_mat, T0, reverse_order, atol=1e-6) if not has_input: from_start, from_end, to_start, to_end = expand_dims( from_start, from_end, to_start, to_end) if absolute: from_start, from_end = np.minimum(from_start, from_end), np.maximum( from_start, from_end) to_start, to_end = np.minimum(to_start, to_end), np.maximum( to_start, to_end) for idx in range(batch_size): MT = T1.at(idx) M, T = MT[:ndim, :ndim], MT[:, ndim] assert np.allclose(np.dot(M, from_start) + T, to_start, atol=1e-6) assert np.allclose(np.dot(M, from_end) + T, to_end, atol=1e-6)
def create_video_reader_pipeline(sequence_length, files, crop_size): images = fn.video_reader(device="gpu", filenames=files, sequence_length=sequence_length, normalized=False, random_shuffle=True, image_type=types.RGB, dtype=types.UINT8, initial_fill=16, pad_last_batch=True, name="Reader") images = fn.crop(images, crop=crop_size, dtype=types.FLOAT, crop_pos_x=fn.uniform(range=(0.0, 1.0)), crop_pos_y=fn.uniform(range=(0.0, 1.0))) images = fn.transpose(images, perm=[3, 0, 1, 2]) return images
def check_transform_rotation_op(angle=None, axis=None, center=None, has_input = False, reverse_order=False, batch_size=1, num_threads=4, device_id=0): assert axis is None or len(axis) == 3 ndim = 3 if axis is not None else 2 assert center is None or len(center) == ndim random_angle = angle is None pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=12345) with pipe: outputs = [] if random_angle: angle = fn.uniform(range=(-90, 90)) if has_input: T0 = fn.uniform(range=(-1, 1), shape=(ndim, ndim+1)) T1 = fn.transforms.rotation(T0, device='cpu', angle=angle, axis=axis, center=center, reverse_order=reverse_order) outputs = [T1, T0] else: T1 = fn.transforms.rotation(device='cpu', angle=angle, axis=axis, center=center) outputs = [T1] if random_angle: outputs.append(angle) pipe.set_outputs(*outputs) pipe.build() outs = pipe.run() out_idx = 1 out_T0 = None out_angle = None if has_input: out_T0 = outs[out_idx] out_idx = out_idx + 1 if random_angle: out_angle = outs[out_idx] out_idx = out_idx + 1 for idx in range(batch_size): T0 = out_T0.at(idx) if has_input else None angle = out_angle.at(idx) if random_angle else angle ref_mat = rotate_affine_mat(angle=angle, axis=axis, center=center) check_results_sample(outs[0].at(idx), ref_mat, T0, reverse_order, atol=1e-6)
def check_transform_shear_op_runtime_args(ndim, use_angles, use_center, has_input=False, reverse_order=False, batch_size=1, num_threads=4, device_id=0): pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed = 1234) with pipe: inputs = [fn.uniform(range=(-1, 1), shape=(ndim, ndim+1))] if has_input else [] params = [] angles_arg = None shear_arg = None center_arg = None if use_angles: angles_arg = fn.uniform(range=(-80,80), shape=[ndim, ndim-1]) params.append(angles_arg) else: shear_arg = fn.uniform(range=(-2,2), shape=[ndim, ndim-1]) params.append(shear_arg) if use_center: center_arg = fn.uniform(range=(-10,10), shape=[ndim]) params.append(center_arg) T1 = fn.transforms.shear(*inputs, device='cpu', shear=shear_arg, angles=angles_arg, center=center_arg, reverse_order=reverse_order) pipe.set_outputs(T1, *inputs, *params) pipe.build() for _ in range(3): outs = pipe.run() T0 = outs[1] if has_input else None shear_param = outs[2 if has_input else 1] center_param = outs[3 if has_input else 2] if use_center else None for idx in range(batch_size): angles = None shear = None center = None if use_angles: angles = shear_param.at(idx) else: shear = shear_param.at(idx) if use_center: center = center_param.at(idx) ref_mat = shear_affine_mat(shear=shear, angles=angles, center=center) inp = T0.at(idx) if T0 is not None else None check_results_sample(outs[0].at(idx), ref_mat, inp, reverse_order, atol=1e-6)
def zoom_fn(self, img, lbl): resized_shape = self.crop_shape * self.random_augmentation( 0.15, fn.uniform(range=(0.7, 1.0)), 1.0) img, lbl = fn.crop(img, crop=resized_shape), fn.crop(lbl, crop=resized_shape) img = fn.resize(img, interp_type=types.DALIInterpType.INTERP_CUBIC, size=self.crop_shape_float) lbl = fn.resize(lbl, interp_type=types.DALIInterpType.INTERP_NN, size=self.crop_shape_float) return img, lbl
def check_pad_to_square(device='cpu', batch_size=3, ndim=2, num_iter=3): pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=0, seed=1234) axes = (0, 1) with pipe: in_shape = fn.cast(fn.uniform(range=(10, 20), shape=(ndim, )), dtype=types.INT32) in_data = fn.reshape(fn.uniform(range=(0., 1.), shape=in_shape), layout="HW") shape = fn.shapes(in_data, dtype=types.INT32) h = fn.slice(shape, 0, 1, axes=[0]) w = fn.slice(shape, 1, 1, axes=[0]) side = math.max(h, w) if device == 'gpu': in_data = in_data.gpu() out_data = fn.pad(in_data, axis_names="HW", shape=fn.cat(side, side, axis=0)) pipe.set_outputs(in_data, out_data) pipe.build() for _ in range(num_iter): outs = [ out.as_cpu() if isinstance(out, TensorListGPU) else out for out in pipe.run() ] for i in range(batch_size): in_data, out_data = \ [outs[out_idx].at(i) for out_idx in range(len(outs))] in_shape = in_data.shape max_side = max(in_shape) for s in out_data.shape: assert s == max_side np.testing.assert_equal(out_data[:in_shape[0], :in_shape[1]], in_data) np.testing.assert_equal(out_data[in_shape[0]:, :], 0) np.testing.assert_equal(out_data[:, in_shape[1]:], 0)
def test_bbox_paste_cpu(): pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None) test_data_shape = [200, 4] def get_data(): out = [ (np.random.randint(0, 255, size=test_data_shape, dtype=np.uint8) / 255).astype(dtype=np.float32) for _ in range(batch_size) ] return out data = fn.external_source(source=get_data) paste_posx = fn.uniform(range=(0, 1)) paste_posy = fn.uniform(range=(0, 1)) paste_ratio = fn.uniform(range=(1, 2)) processed = fn.bbox_paste(data, paste_x=paste_posx, paste_y=paste_posy, ratio=paste_ratio) pipe.set_outputs(processed) pipe.build() for _ in range(3): pipe.run()
def test_compose_change_device(): batch_size = 3 pipe = Pipeline(batch_size, 1, 0) size = fn.uniform(shape=2, range=(300,500)) c = ops.Compose([ ops.ImageDecoder(device="cpu"), ops.Resize(size=size, device="gpu") ]) files, labels = fn.caffe_reader(path=caffe_db_folder, seed=1) pipe.set_outputs(c(files), fn.resize(fn.image_decoder(files).gpu(), size=size)) pipe.build() out = pipe.run() assert isinstance(out[0], dali.backend.TensorListGPU) test_utils.check_batch(out[0], out[1], batch_size=batch_size)
def check_transform_rotation_op(angle, axis=None, center=None, has_input = False, reverse_order=False, batch_size=1, num_threads=4, device_id=0): assert axis is None or len(axis) == 3 ndim = 3 if axis is not None else 2 assert center is None or len(center) == ndim pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipe: if has_input: T0 = fn.uniform(range=(-1, 1), shape=(ndim, ndim+1), seed = 1234) T1 = fn.transforms.rotation(T0, device='cpu', angle=angle, axis=axis, center=center, reverse_order=reverse_order) pipe.set_outputs(T1, T0) else: T1 = fn.transforms.rotation(device='cpu', angle=angle, axis=axis, center=center) pipe.set_outputs(T1) pipe.build() outs = pipe.run() ref_mat = rotate_affine_mat(angle=angle, axis=axis, center=center) T0 = outs[1] if has_input else None check_results(outs[0], batch_size, ref_mat, T0, reverse_order, rtol=1e-5)
def check_transform_shear_op(shear=None, angles=None, center=None, has_input=False, reverse_order=False, batch_size=1, num_threads=4, device_id=0): assert shear is not None or angles is not None if shear is not None: assert len(shear) == 2 or len(shear) == 6 ndim = 3 if len(shear) == 6 else 2 else: assert len(angles) == 2 or len(angles) == 6 ndim = 3 if len(angles) == 6 else 2 assert center is None or len(center) == ndim pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) with pipe: if has_input: T0 = fn.uniform(range=(-1, 1), shape=(ndim, ndim + 1), seed=1234) T1 = fn.transforms.shear(T0, device='cpu', shear=shear, angles=angles, center=center, reverse_order=reverse_order) pipe.set_outputs(T1, T0) else: T1 = fn.transforms.shear(device='cpu', shear=shear, angles=angles, center=center) pipe.set_outputs(T1) pipe.build() outs = pipe.run() ref_mat = shear_affine_mat(shear=shear, angles=angles, center=center) T0 = outs[1] if has_input else None check_results(outs[0], batch_size, ref_mat, T0, reverse_order, atol=1e-6)
def brightness_fn(self, img): brightness_scale = self.random_augmentation(0.15, fn.uniform(range=(0.7, 1.3)), 1.0) return img * brightness_scale
def blur_fn(self, img): img_blured = fn.gaussian_blur(img, sigma=fn.uniform(range=(0.5, 1.5))) return self.random_augmentation(0.15, img_blured, img)
def noise_fn(self, img): img_noised = img + fn.normal_distribution(img, stddev=fn.uniform(range=(0.0, 0.33))) return self.random_augmentation(0.15, img_noised, img)
def contrast_fn(self, img): min_, max_ = fn.reductions.min(img), fn.reductions.max(img) scale = self.random_augmentation(0.15, fn.uniform(range=(0.65, 1.5)), 1.0) img = math.clamp(img * scale, min_, max_) return img
W, H = int(args.sz * 120), int(args.sz * 400) ImageBytes = W * H * 3 * 4 TestingPipe = Pipeline(batch_size=args.bs, num_threads=4, device_id=0) with TestingPipe: files, labels = fn.file_reader(files=DataList, labels=labels) images = fn.image_decoder(files, device='cpu', use_fast_idct=True) images = fn.resize(images.gpu(), device='gpu', bytes_per_sample_hint=ImageBytes, size=(H, W)) images = fn.gaussian_blur(images, device='gpu', bytes_per_sample_hint=ImageBytes, sigma=fn.uniform(range=(0.1, 2)), window_size=11) images = fn.color_twist(images, device='gpu', bytes_per_sample_hint=ImageBytes, brightness=fn.uniform(range=(0.5, 1.5)), contrast=fn.uniform(range=(0.5, 2.5)), saturation=fn.uniform(range=(0.1, 2))) images = fn.cast(images, device='gpu', bytes_per_sample_hint=ImageBytes, dtype=DALIDataType.FLOAT) images = fn.normalize( images, device='gpu', bytes_per_sample_hint=ImageBytes,
def build_pipes(device, dim, batch_size, channel_first, mode, interp, dtype, w_input, h_input, d_input, use_size_arg, use_size_input, use_roi): dali_pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, seed=1234) with dali_pipe: if dim == 2: files, labels = dali.fn.caffe_reader(path = db_2d_folder, random_shuffle = True) images_cpu = dali.fn.image_decoder(files, device="cpu") else: images_cpu = dali.fn.external_source(source=random_3d_loader(batch_size), layout="DHWC") images_hwc = images_cpu if device == "cpu" else images_cpu.gpu() if channel_first: images = dali.fn.transpose(images_hwc, perm=[3,0,1,2] if dim == 3 else [2,0,1], transpose_layout=True) else: images = images_hwc roi_start = None roi_end = None w = None h = None d = None size = None minibatch_size = 2 if dim == 3 else 8 if use_roi: # Calculate absolute RoI in_size = fn.slice(fn.shapes(images_cpu), types.Constant(0, dtype=types.FLOAT, device="cpu"), types.Constant(dim, dtype=types.FLOAT, device="cpu"), axes=[0], normalized_shape=False) roi_start = fn.uniform(range=(0,0.4), shape=[dim]) * in_size roi_end = fn.uniform(range=(0.6,1.0), shape=[dim]) * in_size size_range = (10, 200) if dim == 3 else (10, 1000) if use_size_arg: if use_size_input: mask = fn.cast(fn.uniform(range=(0.8, 1.9), shape=[dim]), dtype=types.INT32) size = fn.uniform(range=size_range, shape=[dim]) * mask else: size = [300, 400] if dim == 2 else [80, 100, 120] resized = resize_dali(images, channel_first, dtype, interp, mode, size, None, None, None, roi_start, roi_end, minibatch_size=minibatch_size, max_size=max_size(dim)) else: if w_input: has_w = fn.coin_flip(probability=0.8) w = fn.uniform(range=size_range) * has_w else: w = 320 # some fixed value if h_input: has_h = fn.coin_flip(probability=0.8) h = fn.uniform(range=size_range) * has_h else: h = 240 # some other fixed value if dim >= 3: if d_input: has_d = fn.coin_flip(probability=0.8) d = fn.uniform(range=size_range) * has_d else: d = 31 # some other fixed value resized = resize_dali(images, channel_first, dtype, interp, mode, None, w, h, d, roi_start, roi_end, minibatch_size=minibatch_size, max_size=max_size(dim)) outputs = [images, resized] if roi_start and roi_end: outputs += [roi_start, roi_end] for x in (d, h, w, size): if x is not None: if isinstance(x, _DataNode): outputs.append(x) else: outputs.append(types.Constant(np.array(x, dtype=np.float32))) dali_pipe.set_outputs(*outputs) pil_pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, exec_async=False, exec_pipelined=False) with pil_pipe: images = fn.external_source(name="images", layout=layout_str(dim, channel_first)) sizes = fn.external_source(name="size") roi_start = fn.external_source(name="roi_start") roi_end = fn.external_source(name="roi_end") resized = resize_PIL(dim, channel_first, dtype, interp, images, sizes, roi_start, roi_end) resized = fn.reshape(resized, layout=layout_str(dim, channel_first)) pil_pipe.set_outputs(resized) dali_pipe.build() pil_pipe.build() return dali_pipe, pil_pipe
def create_coco_pipeline(default_boxes, args): try: shard_id = torch.distributed.get_rank() num_shards = torch.distributed.get_world_size() except RuntimeError: shard_id = 0 num_shards = 1 images, bboxes, labels = fn.readers.coco( file_root=args.train_coco_root, annotations_file=args.train_annotate, skip_empty=True, shard_id=shard_id, num_shards=num_shards, ratio=True, ltrb=True, random_shuffle=False, shuffle_after_epoch=True, name="Reader") crop_begin, crop_size, bboxes, labels = fn.random_bbox_crop( bboxes, labels, device="cpu", aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout="xyXY", allow_no_crop=True, num_attempts=50) images = fn.image_decoder_slice(images, crop_begin, crop_size, device="mixed", output_type=types.RGB) flip_coin = fn.random.coin_flip(probability=0.5) images = fn.resize(images, resize_x=300, resize_y=300, min_filter=types.DALIInterpType.INTERP_TRIANGULAR) saturation = fn.uniform(range=[0.5, 1.5]) contrast = fn.uniform(range=[0.5, 1.5]) brightness = fn.uniform(range=[0.875, 1.125]) hue = fn.uniform(range=[-0.5, 0.5]) images = fn.hsv(images, dtype=types.FLOAT, hue=hue, saturation=saturation) # use float to avoid clipping and # quantizing the intermediate result images = fn.brightness_contrast( images, contrast_center=128, # input is in float, but in 0..255 range dtype=types.UINT8, brightness=brightness, contrast=contrast) dtype = types.FLOAT16 if args.fp16 else types.FLOAT bboxes = fn.bb_flip(bboxes, ltrb=True, horizontal=flip_coin) images = fn.crop_mirror_normalize( images, crop=(300, 300), mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255], mirror=flip_coin, dtype=dtype, output_layout="CHW", pad_output=False) bboxes, labels = fn.box_encoder(bboxes, labels, criteria=0.5, anchors=default_boxes.as_ltrb_list()) labels = labels.gpu() bboxes = bboxes.gpu() return images, bboxes, labels
def check_random_mask_pixel(ndim=2, batch_size=3, min_extent=20, max_extent=50): pipe = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=4, device_id=0, seed=1234) with pipe: # Input mask in_shape_dims = [ fn.cast(fn.uniform(range=(min_extent, max_extent + 1), shape=(1, ), device='cpu'), dtype=types.INT32) for d in range(ndim) ] in_shape = fn.cat(*in_shape_dims, axis=0) in_mask = fn.cast(fn.uniform(range=(0, 2), device='cpu', shape=in_shape), dtype=types.INT32) fg_pixel1 = fn.segmentation.random_mask_pixel(in_mask, foreground=1) # > 0 fg_pixel2 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, threshold=0.99) # > 0.99 fg_pixel3 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, value=2) # == 2 rnd_pixel = fn.segmentation.random_mask_pixel(in_mask, foreground=0) coin_flip = fn.coin_flip(probability=0.7) fg_biased = fn.segmentation.random_mask_pixel(in_mask, foreground=coin_flip) # Demo purposes: Taking a random pixel and produce a valid anchor to feed slice crop_shape = in_shape - 2 # We want to force the center adjustment, therefore the large crop shape anchor = fn.cast(fg_pixel1, dtype=types.INT32) - crop_shape // 2 anchor = math.min(math.max(0, anchor), in_shape - crop_shape) out_mask = fn.slice(in_mask, anchor, crop_shape, axes=tuple(range(ndim))) pipe.set_outputs(in_mask, fg_pixel1, fg_pixel2, fg_pixel3, rnd_pixel, coin_flip, fg_biased, anchor, crop_shape, out_mask) pipe.build() for iter in range(3): outputs = pipe.run() for idx in range(batch_size): in_mask = outputs[0].at(idx) fg_pixel1 = outputs[1].at(idx).tolist() fg_pixel2 = outputs[2].at(idx).tolist() fg_pixel3 = outputs[3].at(idx).tolist() rnd_pixel = outputs[4].at(idx).tolist() coin_flip = outputs[5].at(idx).tolist() fg_biased = outputs[6].at(idx).tolist() anchor = outputs[7].at(idx).tolist() crop_shape = outputs[8].at(idx).tolist() out_mask = outputs[9].at(idx) assert in_mask[tuple(fg_pixel1)] > 0 assert in_mask[tuple(fg_pixel2)] > 0.99 assert in_mask[tuple(fg_pixel3)] == 2 assert in_mask[tuple(fg_biased)] > 0 or not coin_flip for d in range(ndim): assert 0 <= anchor[ d] and anchor[d] + crop_shape[d] <= in_mask.shape[d] assert out_mask.shape == tuple(crop_shape)
def contrast_fn(self, img): min_, max_ = fn.reductions.min(img), fn.reductions.max(img) scale = self.random_augmentation(RAND_AUG_PROB, fn.uniform(range=(0.65, 1.5), **self.aug_seed_kwargs), 1.0) img = math.clamp(img * scale, min_, max_) return img