def test_external_source_collection_cycling_raise(): pipe = Pipeline(1, 3, 0, prefetch_queue_depth=1) batches = [[make_array([1.5, 2.5], dtype=datapy.float32)], [make_array([-1, 3.5, 4.5], dtype=datapy.float32)]] def batch_gen(): for b in batches: yield b pipe.set_outputs(fn.external_source(batches, cycle="raise"), fn.external_source(batch_gen, cycle="raise")) pipe.build() # epochs are cycles over the source iterable for _ in range(3): for batch in batches: pipe_out = pipe.run() batch = asnumpy(batch) batch = batch, batch check_output(pipe_out, batch) with assert_raises(StopIteration): pipe.run() pipe.reset()
def _test_scalar(device, as_tensors): """Test propagation of scalars from external source""" batch_size = 4 src_pipe = Pipeline(batch_size, 1, 0) src_ext = fn.external_source( source=lambda i: [np.float32(i * 10 + i + 1) for i in range(batch_size)], device=device) src_pipe.set_outputs(src_ext) src_pipe.build() dst_pipe = Pipeline(batch_size, 1, 0, exec_async=False, exec_pipelined=False) dst_pipe.set_outputs(fn.external_source(name="ext", device=device)) dst_pipe.build() for iter in range(3): src = src_pipe.run() data = src[0] if as_tensors: data = [data[i] for i in range(len(data))] dst_pipe.feed_input("ext", data) dst = dst_pipe.run() check_batch(src[0], dst[0], batch_size, 0, 0, "")
def _test_batch_info_flag_default(cb, batch_size): pipe = Pipeline(batch_size, 1, 0) with pipe: ext = fn.external_source(source=cb) pipe.set_outputs(ext) pipe.build() pipe.run()
def _test_epoch_idx(batch_size, epoch_size, cb, batch_info, batch_mode): num_epochs = 3 pipe = Pipeline(batch_size, 1, 0) with pipe: ext = fn.external_source(source=cb, batch_info=batch_info, batch=batch_mode) pipe.set_outputs(ext) pipe.build() for epoch_idx in range(num_epochs): for iteration in range(epoch_size): (batch, ) = pipe.run() assert len(batch) == batch_size for sample_i, sample in enumerate(batch): if batch_mode: expected = np.array( [iteration, epoch_idx if batch_info else -1]) else: expected = np.array([ iteration * batch_size + sample_i, sample_i, iteration, epoch_idx ]) np.testing.assert_array_equal(sample, expected) try: pipe.run() except StopIteration: pipe.reset() else: assert False, "expected StopIteration"
def test_external_source_mixed_contiguous(): batch_size = 2 iterations = 4 def generator(i): if i % 2: return cp.array([[100 + i * 10 + 1.5]] * batch_size, dtype=cp.float32) else: return batch_size * [ cp.array([100 + i * 10 + 1.5], dtype=cp.float32) ] pipe = Pipeline(batch_size, 3, 0) pipe.set_outputs( fn.external_source(device="gpu", source=generator, no_copy=True)) pipe.build() pattern = "ExternalSource operator should not mix contiguous and noncontiguous inputs. " \ "In such a case the internal memory used to gather data in a contiguous chunk of " \ "memory would be trashed." with check_output_pattern(pattern): for _ in range(iterations): pipe.run()
def test_incorrect_dtype_arg(): batch_size = 2 src_data = [[np.ones((120, 120, 3), dtype=np.float32)] * batch_size] src_pipe = Pipeline(batch_size, 1, 0) src_ext = fn.external_source(source=src_data, dtype=DALIDataType.UINT8) src_pipe.set_outputs(src_ext) src_pipe.build() src_pipe.run()
def test_ndim_changing(): batch_size = 2 src_data = [[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size, [np.ones((120, 120), dtype=np.uint8)] * batch_size] src_pipe = Pipeline(batch_size, 1, 0) src_ext1 = fn.external_source(source=src_data, dtype=DALIDataType.UINT8) src_pipe.set_outputs(src_ext1) src_pipe.build() src_pipe.run() src_pipe.run()
def test_changing_dtype(): batch_size = 2 src_data = [[np.ones((120, 120, 3), dtype=np.float32)] * batch_size, [np.ones((120, 120, 3), dtype=np.uint8)] * batch_size] src_pipe = Pipeline(batch_size, 1, 0) src_ext = fn.external_source(source=src_data) src_pipe.set_outputs(src_ext) src_pipe.build() src_pipe.run() src_pipe.run()
def test_ndim_data_mismatch(): batch_size = 2 src_data = [[[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size, [np.ones((120, 120), dtype=np.uint8)] * batch_size]] src_pipe = Pipeline(batch_size, 1, 0) src_ext1, src_ext2 = fn.external_source(source=src_data, num_outputs=2, dtype=DALIDataType.UINT8, ndim=3) src_pipe.set_outputs(src_ext1, src_ext2) src_pipe.build() src_pipe.run()
def check_dim_mismatch(device, test_data_root, names): pipe = Pipeline(2, 2, 0) pipe.set_outputs( fn.readers.numpy(device=device, file_root=test_data_root, files=names)) pipe.build() err = None try: pipe.run() except RuntimeError as thrown: err = thrown # asserts should not be in except block to avoid printing nested exception on failure assert err, "Exception not thrown" assert "Inconsistent data" in str( err), "Unexpected error message: {}".format(err)
def test_ndim_arg_multioutput(): batch_size = 2 src_data = [[[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size, [np.ones((120, 120), dtype=np.float32)] * batch_size]] src_pipe = Pipeline(batch_size, 1, 0) src1_ext, src1_ext2 = fn.external_source( source=src_data, num_outputs=2, dtype=[DALIDataType.UINT8, DALIDataType.FLOAT], ndim=[3, 2]) src2_ext, src2_ext2 = fn.external_source( source=src_data, num_outputs=2, dtype=[DALIDataType.UINT8, DALIDataType.FLOAT], layout=["HWC", "HW"]) src_pipe.set_outputs(src1_ext, src1_ext2, src2_ext, src2_ext2) src_pipe.build() out11, out12, out21, out22 = src_pipe.run() for i in range(batch_size): t1 = out11.at(i) t2 = out12.at(i) assert np.array_equal(t1, np.ones((120, 120, 3), dtype=np.uint8)) assert np.allclose(t2, [np.ones((120, 120), dtype=np.float32)]) t3 = out21.at(i) t4 = out22.at(i) assert np.array_equal(t3, np.ones((120, 120, 3), dtype=np.uint8)) assert np.allclose(t4, [np.ones((120, 120), dtype=np.float32)])
def _test_reader_files_arg(use_root, use_labels, shuffle): root = g_root fnames = g_files if not use_root: fnames = [os.path.join(root, f) for f in fnames] root = None lbl = None if use_labels: lbl = [10000 + i for i in range(len(fnames))] batch_size = 3 pipe = Pipeline(batch_size, 1, 0) files, labels = fn.readers.file(file_root=root, files=fnames, labels=lbl, random_shuffle=shuffle) pipe.set_outputs(files, labels) pipe.build() num_iters = (len(fnames) + 2 * batch_size) // batch_size for i in range(num_iters): out_f, out_l = pipe.run() for j in range(batch_size): contents = bytes(out_f.at(j)).decode('utf-8') label = out_l.at(j)[0] index = label - 10000 if use_labels else label assert contents == ref_contents(fnames[index])
def check_slice_named_args_errors(device, batch_size): test_data_shape = [5, 4, 3] def get_data(): out = [ np.random.randint(0, 255, size=test_data_shape, dtype=np.uint8) for _ in range(batch_size) ] return out pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipe: data = fn.external_source(source=get_data, layout="HWC") start = np.array([1, 2]) shape = np.array([3, 1]) outs = [ fn.slice(data, start, shape, start=start, end=start + shape, shape=shape, axes=(0, 1)), ] pipe.set_outputs(*outs) with assert_raises( RuntimeError, glob= '"end", "rel_end", "shape", and "rel_shape" arguments are mutually exclusive' ): pipe.build() for _ in range(1): outs = pipe.run()
def _test_cross_device(src, dst, use_dali_tensor=False): # The use_dali_tensor converts (via the Dlpack) to the DALI native Tensor before feeding the # data, to additionaly check if the constructor works correctly wrt to device_id. # TODO(klecki): [device_id] currently the device_id is not exposed in Python Tensors, so there # is no other way we may verify it. import nvidia.dali.fn as fn import numpy as np pipe = Pipeline(1, 3, dst) iter = 0 def get_data(): nonlocal iter with cp.cuda.Device(src): data = cp.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=cp.float32) + iter iter += 1 if use_dali_tensor: return TensorGPU(data.toDlpack()) return data with pipe: pipe.set_outputs( fn.external_source(get_data, batch=False, device='gpu')) pipe.build() for i in range(10): out, = pipe.run() assert np.array_equal(np.array(out[0].as_cpu()), np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) + i)
def check_operator_coco_reader_label_remap(avoid_remap): batch_size = 2 images = list(test_data.keys()) ids_map = { s.id: s.cls if avoid_remap else s.mapped_cls for s in test_data.values() } pipeline = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipeline: _, _, labels, ids = fn.readers.coco(file_root=file_root, annotations_file=train_annotations, image_ids=True, images=images, avoid_class_remapping=avoid_remap) pipeline.set_outputs(ids, labels) pipeline.build() i = 0 assert len(images) % batch_size == 0 while i < len(images): out = pipeline.run() for s in range(batch_size): print(out[0].at(s), out[1].at(s)) assert ids_map[int(out[0].at(s))] == int(out[1].at( s)), f"{i}, {ids_map[int(out[0].at(s))]} vs {out[1].at(s)}" i = i + 1
def _test_file_reader_filter(filters, glob_filters, batch_size, num_threads, subpath, case_sensitive_filter): pipe = Pipeline(batch_size, num_threads, 0) root = os.path.join(os.environ['DALI_EXTRA_PATH'], subpath) files, labels = fn.readers.file( file_root=root, file_filters=filters, case_sensitive_filter=case_sensitive_filter) pipe.set_outputs(files, labels) pipe.build() fnames = set() for label, dir in enumerate(sorted(next(os.walk(root))[1])): for filter in glob_filters: for file in glob.glob(os.path.join(root, dir, filter)): fnames.add((label, file.split('/')[-1], file)) fnames = sorted(fnames) for i in range(len(fnames) // batch_size): out_f, _ = pipe.run() for j in range(batch_size): with open(fnames[i * batch_size + j][2], 'rb') as file: contents = np.array(list(file.read())) assert all(contents == out_f.at(j))
def _test_very_small_output(dim, device): batch_size = 8 pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, seed=1234) if dim == 2: files, labels = dali.fn.readers.caffe(path=db_2d_folder, random_shuffle=True) images_cpu = dali.fn.decoders.image(files, device="cpu") else: images_cpu = dali.fn.external_source( source=random_3d_loader(batch_size), layout="DHWC") images = images_cpu if device == "cpu" else images_cpu.gpu() resize_tiny = fn.resize(images, size=1e-10) pipe.set_outputs(resize_tiny) pipe.build() for it in range(3): out, = pipe.run() ref_size = [1, 1, 1, 1] if dim == 3 else [1, 1, 3] for t in out: assert t.shape() == ref_size
def check_slice_named_args_default_start_or_end(device, batch_size): test_data_shape = [5, 4, 3] def get_data(): out = [np.random.randint(0, 255, size = test_data_shape, dtype = np.uint8) for _ in range(batch_size)] return out pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipe: data = fn.external_source(source = get_data, layout = "HWC") in_shape = np.array([5, 4]) start = np.array([1, 2]) shape = np.array([3, 1]) end = start + shape rel_start = start / in_shape rel_shape = shape / in_shape rel_end = end / in_shape outs = [ fn.slice(data, start=start, end=in_shape, axes = (0, 1)), fn.slice(data, start=[0, 0], end=end, axes = (0, 1)), fn.slice(data, start=start, axes = (0, 1)), fn.slice(data, end=end, axes = (0, 1)), ] pipe.set_outputs(*outs) pipe.build() for _ in range(3): outs = pipe.run() for sample in range(batch_size): np.testing.assert_equal(np.array(outs[0][sample]), np.array(outs[2][sample])) np.testing.assert_equal(np.array(outs[1][sample]), np.array(outs[3][sample]))
def _test_external_source_callback_torch_stream(src_device, gen_device): with torch.cuda.stream(torch.cuda.Stream()): for attempt in range(10): t0 = torch.tensor([attempt * 100 + 1.5], dtype=torch.float32, device=gen_device) increment = torch.tensor([10], dtype=torch.float32, device=gen_device) pipe = Pipeline(1, 3, 0) def gen_batch(): nonlocal t0 t0 += increment return [to_dlpack(t0)] pipe.set_outputs( fn.external_source(source=gen_batch, device=src_device, cuda_stream=torch.cuda.current_stream())) pipe.build() for i in range(10): check_output(pipe.run(), [ np.array([attempt * 100 + (i + 1) * 10 + 1.5], dtype=np.float32) ])
def test_external_source_generator(): pipe = Pipeline(1, 3, 0) def gen(): for i in range(5): yield [make_array([i + 1.5], dtype=datapy.float32)] pipe.set_outputs(fn.external_source(gen())) pipe.build() for i in range(5): check_output(pipe.run(), [np.array([i + 1.5], dtype=np.float32)])
def test_dtype_arg(): batch_size = 2 src_data = [[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size] src_pipe = Pipeline(batch_size, 1, 0) src_ext = fn.external_source(source=src_data, dtype=DALIDataType.UINT8) src_pipe.set_outputs(src_ext) src_pipe.build() out, = src_pipe.run() for i in range(batch_size): t = out.at(i) assert t.dtype == np.uint8 np.array_equal(t, np.ones((120, 120, 3), dtype=np.uint8))
def test_constant_promotion_mixed(): filename = os.path.join(jpeg_folder, "241", "cute-4074304_1280.jpg") file_contents = np.fromfile(filename, dtype=np.uint8) pipe = Pipeline(1, 3, 0) with pipe: jpegs, _ = fn.readers.file(files=[filename]) from_reader = fn.image_decoder(jpegs, device="mixed") from_constant = fn.image_decoder(file_contents, device="mixed") pipe.set_outputs(from_constant, from_reader) pipe.build() from_reader, from_constant = pipe.run() check_batch(from_reader, from_constant, 1)
def test_operator_coco_reader_same_images(): file_root = os.path.join(test_data_root, 'db', 'coco_pixelwise', 'images') train_annotations = os.path.join(test_data_root, 'db', 'coco_pixelwise', 'instances.json') coco_dir = os.path.join(test_data_root, 'db', 'coco') coco_dir_imgs = os.path.join(coco_dir, 'images') coco_pixelwise_dir = os.path.join(test_data_root, 'db', 'coco_pixelwise') coco_pixelwise_dir_imgs = os.path.join(coco_pixelwise_dir, 'images') for file_root, annotations_file in [ \ (coco_dir_imgs, os.path.join(coco_dir, 'instances.json')), (coco_pixelwise_dir_imgs, os.path.join(coco_pixelwise_dir, 'instances.json')), (coco_pixelwise_dir_imgs, os.path.join(coco_pixelwise_dir, 'instances_rle_counts.json'))]: pipe = Pipeline(batch_size=1, num_threads=4, device_id=0) with pipe: inputs1, boxes1, labels1, *other = fn.readers.coco( file_root=file_root, annotations_file=train_annotations, name="reader1", seed=1234 ) inputs2, boxes2, labels2, *other = fn.readers.coco( file_root=file_root, annotations_file=train_annotations, polygon_masks=True, name="reader2" ) inputs3, boxes3, labels3, *other = fn.readers.coco( file_root=file_root, annotations_file=train_annotations, pixelwise_masks=True, name="reader3" ) pipe.set_outputs( inputs1, boxes1, labels1, inputs2, boxes2, labels2, inputs3, boxes3, labels3 ) pipe.build() epoch_sz = pipe.epoch_size("reader1") assert epoch_sz == pipe.epoch_size("reader2") assert epoch_sz == pipe.epoch_size("reader3") for i in range(epoch_sz): inputs1, boxes1, labels1, inputs2, boxes2, labels2, inputs3, boxes3, labels3 = \ pipe.run() np.testing.assert_array_equal(inputs1.at(0), inputs2.at(0)) np.testing.assert_array_equal(inputs1.at(0), inputs3.at(0)) np.testing.assert_array_equal(labels1.at(0), labels2.at(0)) np.testing.assert_array_equal(labels1.at(0), labels3.at(0)) np.testing.assert_array_equal(boxes1.at(0), boxes2.at(0)) np.testing.assert_array_equal(boxes1.at(0), boxes3.at(0))
def test_external_source_gen_function_cycle(): pipe = Pipeline(1, 3, 0) def gen(): for i in range(5): yield [make_array([i + 1.5], dtype=datapy.float32)] pipe.set_outputs(fn.external_source(gen, cycle=True)) pipe.build() for _ in range(3): for i in range(5): check_output(pipe.run(), [np.array([i + 1.5], dtype=np.float32)])
def test_external_source_collection_cycling(): pipe = Pipeline(1, 3, 0) batches = [[make_array([1.5, 2.5], dtype=datapy.float32)], [make_array([-1, 3.5, 4.5], dtype=datapy.float32)]] pipe.set_outputs(fn.external_source(batches, cycle=True)) pipe.build() # epochs are cycles over the source iterable for _ in range(3): for batch in batches: batch = asnumpy(batch) check_output(pipe.run(), batch)
def check_pad_per_sample_shapes_and_alignment(device='cpu', batch_size=3, ndim=2, num_iter=3): pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=0, seed=1234) axes = (0, 1) with pipe: in_shape = fn.cast(fn.random.uniform(range=(10, 20), shape=(ndim, )), dtype=types.INT32) in_data = fn.random.uniform(range=(0., 1.), shape=in_shape) if device == 'gpu': in_data = in_data.gpu() req_shape = fn.cast(fn.random.uniform(range=(21, 30), shape=(ndim, )), dtype=types.INT32) req_align = fn.cast(fn.random.uniform(range=(3, 5), shape=(ndim, )), dtype=types.INT32) out_pad_shape = fn.pad(in_data, axes=axes, align=None, shape=req_shape) out_pad_align = fn.pad(in_data, axes=axes, align=req_align, shape=None) out_pad_both = fn.pad(in_data, axes=axes, align=req_align, shape=req_shape) pipe.set_outputs(in_shape, in_data, req_shape, req_align, out_pad_shape, out_pad_align, out_pad_both) pipe.build() for _ in range(num_iter): outs = [ out.as_cpu() if isinstance(out, TensorListGPU) else out for out in pipe.run() ] for i in range(batch_size): in_shape, in_data, req_shape, req_align, out_pad_shape, out_pad_align, out_pad_both = \ [outs[out_idx].at(i) for out_idx in range(len(outs))] assert (in_shape == in_data.shape).all() # Pad to explicit shape assert (out_pad_shape.shape >= in_shape).all() assert (req_shape == out_pad_shape.shape).all() # Alignment only assert (out_pad_align.shape >= in_shape).all() assert is_aligned(out_pad_align.shape, req_align, axes) # Explicit shape + alignment assert (out_pad_both.shape >= in_shape).all() assert (req_shape <= out_pad_both.shape).all() assert is_aligned(out_pad_both.shape, req_align, axes)
def test_external_source_with_iter(): for attempt in range(10): pipe = Pipeline(1, 3, 0) pipe.set_outputs( fn.external_source(lambda i: [ make_array([attempt * 100 + i * 10 + 1.5], dtype=datapy.float32) ])) pipe.build() for i in range(10): check_output( pipe.run(), [np.array([attempt * 100 + i * 10 + 1.5], dtype=np.float32)])
def _test_feed_input(device): src_pipe, batch_size = build_src_pipe(device) dst_pipe = Pipeline(batch_size, 1, 0, exec_async=False, exec_pipelined=False) dst_pipe.set_outputs(fn.external_source(name="ext", device=device)) dst_pipe.build() for iter in range(3): out1 = src_pipe.run() dst_pipe.feed_input("ext", out1[0]) out2 = dst_pipe.run() check_batch(out2[0], out1[0], batch_size, 0, 0, "XY")
def _test_empty_input(dim, device): batch_size = 8 pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, seed=1234) if dim == 2: files, labels = dali.fn.readers.caffe(path=db_2d_folder, random_shuffle=True) images_cpu = dali.fn.decoders.image(files, device="cpu") else: images_cpu = dali.fn.external_source( source=random_3d_loader(batch_size), layout="DHWC") images = images_cpu if device == "cpu" else images_cpu.gpu() in_rel_shapes = np.ones([batch_size, dim], dtype=np.float32) in_rel_shapes[::2, :] *= 0 # all zeros in every second sample degenerate_images = fn.slice(images, np.zeros([dim]), fn.external_source(lambda: in_rel_shapes), axes=list(range(dim))) sizes = np.random.randint(20, 50, [batch_size, dim], dtype=np.int32) size_inp = fn.external_source( lambda: [x.astype(np.float32) for x in sizes]) resize_no_empty = fn.resize(images, size=size_inp, mode="not_larger") resize_with_empty = fn.resize(degenerate_images, size=size_inp, mode="not_larger") pipe.set_outputs(resize_no_empty, resize_with_empty) pipe.build() for it in range(3): out_no_empty, out_with_empty = pipe.run() if device == "gpu": out_no_empty = out_no_empty.as_cpu() out_with_empty = out_with_empty.as_cpu() for i in range(batch_size): if i % 2 != 0: assert np.array_equal(out_no_empty.at(i), out_with_empty.at(i)) else: assert np.prod(out_with_empty.at(i).shape) == 0
def check_slice_named_args(device, batch_size): test_data_shape = [5, 4, 3] def get_data(): out = [np.random.randint(0, 255, size = test_data_shape, dtype = np.uint8) for _ in range(batch_size)] return out pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipe: data = fn.external_source(source = get_data, layout = "HWC") in_shape_list = [5, 4] start_list = [1, 2] shape_list = [3, 1] in_shape = np.array(in_shape_list) start = np.array(start_list) shape = np.array(shape_list) end_list = [start_list[i] + shape_list[i] for i in range(2)] end = start + shape rel_start_list = [start_list[i] / in_shape_list[i] for i in range(2)] rel_start = start / in_shape rel_shape_list = [shape_list[i] / in_shape_list[i] for i in range(2)] rel_shape = shape / in_shape rel_end_list = [end_list[i] / in_shape_list[i] for i in range(2)] rel_end = end / in_shape outs = [ fn.slice(data, start, shape, axes = (0, 1)), fn.slice(data, rel_start, rel_shape, axes = (0, 1)), ] for start_arg in [start, start_list]: for shape_arg in [shape, shape_list]: outs += [fn.slice(data, start=start_arg, shape=shape_arg, axes = (0, 1))] for end_arg in [end, end_list]: outs += [fn.slice(data, start=start_arg, end=end_arg, axes = (0, 1))] for rel_start_arg in [rel_start, rel_start_list]: for rel_shape_arg in [rel_shape, rel_shape_list]: outs += [fn.slice(data, rel_start=rel_start_arg, rel_shape=rel_shape_arg, axes = (0, 1))] for rel_end_arg in [rel_end, rel_end_list]: outs += [fn.slice(data, rel_start=rel_start_arg, rel_end=rel_end_arg, axes = (0, 1))] for shape_arg in [shape, shape_list]: outs += [fn.slice(data, rel_start=rel_start_arg, shape=shape_arg, axes = (0, 1))] pipe.set_outputs(*outs) pipe.build() for _ in range(3): outs = pipe.run() for out_idx in range(1, len(outs)): for sample in range(batch_size): np.testing.assert_equal(np.array(outs[0][sample]), np.array(outs[out_idx][sample]))