def test_tf_experimental_source_disabled(): pipe = Pipeline(10, 4, 0) with pipe: input = fn.external_source(source=lambda: np.full((4, 4), 0), batch=False) pipe.set_outputs(fn.pad(input)) dali_tf.DALIDataset(pipe, output_dtypes=tf.int32)
def reference_pipeline(flip_vertical, flip_horizontal, ref_batch_size=max_batch_size): pipeline = Pipeline(ref_batch_size, num_threads, device_id) with pipeline: data, _ = fn.readers.file(file_root=images_dir) img = fn.decoders.image(data) flipped = fn.flip(img, horizontal=flip_horizontal, vertical=flip_vertical) pipeline.set_outputs(flipped, img) return pipeline
def get_pipeline_desc(batch_size, num_threads, device, device_id, shard_id, num_shards, def_for_dataset): pipe = Pipeline(batch_size, num_threads, device_id) with pipe: # Our callbacks may have state, to be able to run it twice, once in Dataset and once # with baseline test, we need to make a copy to preserve that state. es = fn.external_source(device=es_device, **copy.deepcopy(es_args)) if device == "gpu" and es_device == "cpu": es = es.gpu() pad = fn.pad(es, device=device) pipe.set_outputs(pad) return pipe, None, dtype
def _test_batch_info_flag_default(cb, batch_size): pipe = Pipeline(batch_size, 1, 0) with pipe: ext = fn.external_source(source=cb) pipe.set_outputs(ext) pipe.build() pipe.run()
def test_external_source_mixed_contiguous(): batch_size = 2 iterations = 4 def generator(i): if i % 2: return cp.array([[100 + i * 10 + 1.5]] * batch_size, dtype=cp.float32) else: return batch_size * [ cp.array([100 + i * 10 + 1.5], dtype=cp.float32) ] pipe = Pipeline(batch_size, 3, 0) pipe.set_outputs( fn.external_source(device="gpu", source=generator, no_copy=True)) pipe.build() pattern = "ExternalSource operator should not mix contiguous and noncontiguous inputs. " \ "In such a case the internal memory used to gather data in a contiguous chunk of " \ "memory would be trashed." with check_output_pattern(pattern): for _ in range(iterations): pipe.run()
def test_incorrect_dtype_arg(): batch_size = 2 src_data = [[np.ones((120, 120, 3), dtype=np.float32)] * batch_size] src_pipe = Pipeline(batch_size, 1, 0) src_ext = fn.external_source(source=src_data, dtype=DALIDataType.UINT8) src_pipe.set_outputs(src_ext) src_pipe.build() src_pipe.run()
def test_operator_coco_reader_same_images(): file_root = os.path.join(test_data_root, 'db', 'coco_pixelwise', 'images') train_annotations = os.path.join(test_data_root, 'db', 'coco_pixelwise', 'instances.json') coco_dir = os.path.join(test_data_root, 'db', 'coco') coco_dir_imgs = os.path.join(coco_dir, 'images') coco_pixelwise_dir = os.path.join(test_data_root, 'db', 'coco_pixelwise') coco_pixelwise_dir_imgs = os.path.join(coco_pixelwise_dir, 'images') for file_root, annotations_file in [ \ (coco_dir_imgs, os.path.join(coco_dir, 'instances.json')), (coco_pixelwise_dir_imgs, os.path.join(coco_pixelwise_dir, 'instances.json')), (coco_pixelwise_dir_imgs, os.path.join(coco_pixelwise_dir, 'instances_rle_counts.json'))]: pipe = Pipeline(batch_size=1, num_threads=4, device_id=0) with pipe: inputs1, boxes1, labels1, *other = fn.readers.coco( file_root=file_root, annotations_file=train_annotations, name="reader1", seed=1234 ) inputs2, boxes2, labels2, *other = fn.readers.coco( file_root=file_root, annotations_file=train_annotations, polygon_masks=True, name="reader2" ) inputs3, boxes3, labels3, *other = fn.readers.coco( file_root=file_root, annotations_file=train_annotations, pixelwise_masks=True, name="reader3" ) pipe.set_outputs( inputs1, boxes1, labels1, inputs2, boxes2, labels2, inputs3, boxes3, labels3 ) pipe.build() epoch_sz = pipe.epoch_size("reader1") assert epoch_sz == pipe.epoch_size("reader2") assert epoch_sz == pipe.epoch_size("reader3") for i in range(epoch_sz): inputs1, boxes1, labels1, inputs2, boxes2, labels2, inputs3, boxes3, labels3 = \ pipe.run() np.testing.assert_array_equal(inputs1.at(0), inputs2.at(0)) np.testing.assert_array_equal(inputs1.at(0), inputs3.at(0)) np.testing.assert_array_equal(labels1.at(0), labels2.at(0)) np.testing.assert_array_equal(labels1.at(0), labels3.at(0)) np.testing.assert_array_equal(boxes1.at(0), boxes2.at(0)) np.testing.assert_array_equal(boxes1.at(0), boxes3.at(0))
def test_ndim_changing(): batch_size = 2 src_data = [[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size, [np.ones((120, 120), dtype=np.uint8)] * batch_size] src_pipe = Pipeline(batch_size, 1, 0) src_ext1 = fn.external_source(source=src_data, dtype=DALIDataType.UINT8) src_pipe.set_outputs(src_ext1) src_pipe.build() src_pipe.run() src_pipe.run()
def test_changing_dtype(): batch_size = 2 src_data = [[np.ones((120, 120, 3), dtype=np.float32)] * batch_size, [np.ones((120, 120, 3), dtype=np.uint8)] * batch_size] src_pipe = Pipeline(batch_size, 1, 0) src_ext = fn.external_source(source=src_data) src_pipe.set_outputs(src_ext) src_pipe.build() src_pipe.run() src_pipe.run()
def test_ndim_arg_multioutput(): batch_size = 2 src_data = [[[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size, [np.ones((120, 120), dtype=np.float32)] * batch_size]] src_pipe = Pipeline(batch_size, 1, 0) src1_ext, src1_ext2 = fn.external_source( source=src_data, num_outputs=2, dtype=[DALIDataType.UINT8, DALIDataType.FLOAT], ndim=[3, 2]) src2_ext, src2_ext2 = fn.external_source( source=src_data, num_outputs=2, dtype=[DALIDataType.UINT8, DALIDataType.FLOAT], layout=["HWC", "HW"]) src_pipe.set_outputs(src1_ext, src1_ext2, src2_ext, src2_ext2) src_pipe.build() out11, out12, out21, out22 = src_pipe.run() for i in range(batch_size): t1 = out11.at(i) t2 = out12.at(i) assert np.array_equal(t1, np.ones((120, 120, 3), dtype=np.uint8)) assert np.allclose(t2, [np.ones((120, 120), dtype=np.float32)]) t3 = out21.at(i) t4 = out22.at(i) assert np.array_equal(t3, np.ones((120, 120, 3), dtype=np.uint8)) assert np.allclose(t4, [np.ones((120, 120), dtype=np.float32)])
def _test_reader_files_arg(use_root, use_labels, shuffle): root = g_root fnames = g_files if not use_root: fnames = [os.path.join(root, f) for f in fnames] root = None lbl = None if use_labels: lbl = [10000 + i for i in range(len(fnames))] batch_size = 3 pipe = Pipeline(batch_size, 1, 0) files, labels = fn.readers.file(file_root=root, files=fnames, labels=lbl, random_shuffle=shuffle) pipe.set_outputs(files, labels) pipe.build() num_iters = (len(fnames) + 2 * batch_size) // batch_size for i in range(num_iters): out_f, out_l = pipe.run() for j in range(batch_size): contents = bytes(out_f.at(j)).decode('utf-8') label = out_l.at(j)[0] index = label - 10000 if use_labels else label assert contents == ref_contents(fnames[index])
def check_slice_named_args_errors(device, batch_size): test_data_shape = [5, 4, 3] def get_data(): out = [ np.random.randint(0, 255, size=test_data_shape, dtype=np.uint8) for _ in range(batch_size) ] return out pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipe: data = fn.external_source(source=get_data, layout="HWC") start = np.array([1, 2]) shape = np.array([3, 1]) outs = [ fn.slice(data, start, shape, start=start, end=start + shape, shape=shape, axes=(0, 1)), ] pipe.set_outputs(*outs) with assert_raises( RuntimeError, glob= '"end", "rel_end", "shape", and "rel_shape" arguments are mutually exclusive' ): pipe.build() for _ in range(1): outs = pipe.run()
def _test_cross_device(src, dst, use_dali_tensor=False): # The use_dali_tensor converts (via the Dlpack) to the DALI native Tensor before feeding the # data, to additionaly check if the constructor works correctly wrt to device_id. # TODO(klecki): [device_id] currently the device_id is not exposed in Python Tensors, so there # is no other way we may verify it. import nvidia.dali.fn as fn import numpy as np pipe = Pipeline(1, 3, dst) iter = 0 def get_data(): nonlocal iter with cp.cuda.Device(src): data = cp.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=cp.float32) + iter iter += 1 if use_dali_tensor: return TensorGPU(data.toDlpack()) return data with pipe: pipe.set_outputs( fn.external_source(get_data, batch=False, device='gpu')) pipe.build() for i in range(10): out, = pipe.run() assert np.array_equal(np.array(out[0].as_cpu()), np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) + i)
def check_operator_coco_reader_label_remap(avoid_remap): batch_size = 2 images = list(test_data.keys()) ids_map = { s.id: s.cls if avoid_remap else s.mapped_cls for s in test_data.values() } pipeline = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipeline: _, _, labels, ids = fn.readers.coco(file_root=file_root, annotations_file=train_annotations, image_ids=True, images=images, avoid_class_remapping=avoid_remap) pipeline.set_outputs(ids, labels) pipeline.build() i = 0 assert len(images) % batch_size == 0 while i < len(images): out = pipeline.run() for s in range(batch_size): print(out[0].at(s), out[1].at(s)) assert ids_map[int(out[0].at(s))] == int(out[1].at( s)), f"{i}, {ids_map[int(out[0].at(s))]} vs {out[1].at(s)}" i = i + 1
def _test_file_reader_filter(filters, glob_filters, batch_size, num_threads, subpath, case_sensitive_filter): pipe = Pipeline(batch_size, num_threads, 0) root = os.path.join(os.environ['DALI_EXTRA_PATH'], subpath) files, labels = fn.readers.file( file_root=root, file_filters=filters, case_sensitive_filter=case_sensitive_filter) pipe.set_outputs(files, labels) pipe.build() fnames = set() for label, dir in enumerate(sorted(next(os.walk(root))[1])): for filter in glob_filters: for file in glob.glob(os.path.join(root, dir, filter)): fnames.add((label, file.split('/')[-1], file)) fnames = sorted(fnames) for i in range(len(fnames) // batch_size): out_f, _ = pipe.run() for j in range(batch_size): with open(fnames[i * batch_size + j][2], 'rb') as file: contents = np.array(list(file.read())) assert all(contents == out_f.at(j))
def _test_very_small_output(dim, device): batch_size = 8 pipe = Pipeline(batch_size=batch_size, num_threads=8, device_id=0, seed=1234) if dim == 2: files, labels = dali.fn.readers.caffe(path=db_2d_folder, random_shuffle=True) images_cpu = dali.fn.decoders.image(files, device="cpu") else: images_cpu = dali.fn.external_source( source=random_3d_loader(batch_size), layout="DHWC") images = images_cpu if device == "cpu" else images_cpu.gpu() resize_tiny = fn.resize(images, size=1e-10) pipe.set_outputs(resize_tiny) pipe.build() for it in range(3): out, = pipe.run() ref_size = [1, 1, 1, 1] if dim == 3 else [1, 1, 3] for t in out: assert t.shape() == ref_size
def _test_external_source_callback_torch_stream(src_device, gen_device): with torch.cuda.stream(torch.cuda.Stream()): for attempt in range(10): t0 = torch.tensor([attempt * 100 + 1.5], dtype=torch.float32, device=gen_device) increment = torch.tensor([10], dtype=torch.float32, device=gen_device) pipe = Pipeline(1, 3, 0) def gen_batch(): nonlocal t0 t0 += increment return [to_dlpack(t0)] pipe.set_outputs( fn.external_source(source=gen_batch, device=src_device, cuda_stream=torch.cuda.current_stream())) pipe.build() for i in range(10): check_output(pipe.run(), [ np.array([attempt * 100 + (i + 1) * 10 + 1.5], dtype=np.float32) ])
def check_slice_named_args_default_start_or_end(device, batch_size): test_data_shape = [5, 4, 3] def get_data(): out = [np.random.randint(0, 255, size = test_data_shape, dtype = np.uint8) for _ in range(batch_size)] return out pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0) with pipe: data = fn.external_source(source = get_data, layout = "HWC") in_shape = np.array([5, 4]) start = np.array([1, 2]) shape = np.array([3, 1]) end = start + shape rel_start = start / in_shape rel_shape = shape / in_shape rel_end = end / in_shape outs = [ fn.slice(data, start=start, end=in_shape, axes = (0, 1)), fn.slice(data, start=[0, 0], end=end, axes = (0, 1)), fn.slice(data, start=start, axes = (0, 1)), fn.slice(data, end=end, axes = (0, 1)), ] pipe.set_outputs(*outs) pipe.build() for _ in range(3): outs = pipe.run() for sample in range(batch_size): np.testing.assert_equal(np.array(outs[0][sample]), np.array(outs[2][sample])) np.testing.assert_equal(np.array(outs[1][sample]), np.array(outs[3][sample]))
def check_layout(kwargs, input_datasets, layout): pipe = Pipeline(10, 4, 0) with pipe: input = fn.external_source(**kwargs) # Rely on the Pad internal check to ensure that External Source set layout pipe.set_outputs(fn.pad(input, axis_names=layout)) with tf.device('/cpu:0'): dali_dataset = dali_tf.experimental.DALIDatasetWithInputs( input_datasets=input_datasets, pipeline=pipe, batch_size=pipe.max_batch_size, output_shapes=None, output_dtypes=tf.int64, num_threads=pipe.num_threads, device_id=pipe.device_id) run_dataset_eager_mode(dali_dataset, 10)
def _test_feed_input(device): src_pipe, batch_size = build_src_pipe(device) dst_pipe = Pipeline(batch_size, 1, 0, exec_async=False, exec_pipelined=False) dst_pipe.set_outputs(fn.external_source(name="ext", device=device)) dst_pipe.build() for iter in range(3): out1 = src_pipe.run() dst_pipe.feed_input("ext", out1[0]) out2 = dst_pipe.run() check_batch(out2[0], out1[0], batch_size, 0, 0, "XY")
def test_ndim_data_mismatch(): batch_size = 2 src_data = [[[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size, [np.ones((120, 120), dtype=np.uint8)] * batch_size]] src_pipe = Pipeline(batch_size, 1, 0) src_ext1, src_ext2 = fn.external_source(source=src_data, num_outputs=2, dtype=DALIDataType.UINT8, ndim=3) src_pipe.set_outputs(src_ext1, src_ext2) src_pipe.build() src_pipe.run()
def check_dim_mismatch(device, test_data_root, names): pipe = Pipeline(2, 2, 0) pipe.set_outputs( fn.readers.numpy(device=device, file_root=test_data_root, files=names)) pipe.build() err = None try: pipe.run() except RuntimeError as thrown: err = thrown # asserts should not be in except block to avoid printing nested exception on failure assert err, "Exception not thrown" assert "Inconsistent data" in str( err), "Unexpected error message: {}".format(err)
def _test_external_source_callback_split(use_fn_api, batch, as_tensor, device): iter_num = 5 batch_size = 9 pipe = Pipeline(batch_size, 3, 0) # this should produce a two-element list of Tensor(Lists), the first # being 2D, the second being 3D (+ batch dimension) source = TestIterator(iter_num, batch_size, [2, 3], as_tensor) iter_in = iter(source) if batch else iter( SampleIterator(iter(source), True)) if use_fn_api: inputs = fn.external_source(lambda: next(iter_in), 2, device=device, batch=batch) else: ext_source = ops.ExternalSource(lambda: next(iter_in), num_outputs=2, device=device, batch=batch) inputs = ext_source() pipe.set_outputs(*inputs) pipe.build() run_and_check(pipe, source)
def test_wrong_feature_shape(): features = { 'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""), 'image/object/bbox': tfrec.FixedLenFeature([], tfrec.float32, -1.0), 'image/object/class/label': tfrec.FixedLenFeature([], tfrec.int64, -1), } test_dummy_data_path = os.path.join(get_dali_extra_path(), 'db', 'coco_dummy') pipe = Pipeline(1, 1, 0) with pipe: input = fn.readers.tfrecord(path=os.path.join(test_dummy_data_path, 'small_coco.tfrecord'), index_path=os.path.join( test_dummy_data_path, 'small_coco_index.idx'), features=features) pipe.set_outputs(input['image/encoded'], input['image/object/class/label'], input['image/object/bbox']) pipe.build() # the error is raised because FixedLenFeature is used with insufficient shape to house the input assert_raises( RuntimeError, pipe.run, glob="Error when executing CPU operator*readers*tfrecord*" "Output tensor shape is too small*[]*Expected at least 4 elements")
def NumpyReaderPipeline(path, batch_size, device="cpu", file_list=None, files=None, file_filter="*.npy", num_threads=1, device_id=0, cache_header_information=False): pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) data = fn.readers.numpy(device=device, file_list=file_list, files=files, file_root=path, file_filter=file_filter, shard_id=0, num_shards=1, cache_header_information=cache_header_information) pipe.set_outputs(data) return pipe
def test_external_source_iterate_ndarray(): pipe = Pipeline(4, 3, 0) batch = make_array([1.5, 2.5, 2, 3], dtype=datapy.float32) pipe.set_outputs(fn.external_source(batch, batch=False)) pipe.build() run_and_check(pipe, [batch])
def test_external_source_generator(): pipe = Pipeline(1, 3, 0) def gen(): for i in range(5): yield [make_array([i + 1.5], dtype=datapy.float32)] pipe.set_outputs(fn.external_source(gen())) pipe.build() for i in range(5): check_output(pipe.run(), [np.array([i + 1.5], dtype=np.float32)])
def test_constant_promotion_mixed(): filename = os.path.join(jpeg_folder, "241", "cute-4074304_1280.jpg") file_contents = np.fromfile(filename, dtype=np.uint8) pipe = Pipeline(1, 3, 0) with pipe: jpegs, _ = fn.readers.file(files=[filename]) from_reader = fn.image_decoder(jpegs, device="mixed") from_constant = fn.image_decoder(file_contents, device="mixed") pipe.set_outputs(from_constant, from_reader) pipe.build() from_reader, from_constant = pipe.run() check_batch(from_reader, from_constant, 1)
def test_dtype_arg(): batch_size = 2 src_data = [[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size] src_pipe = Pipeline(batch_size, 1, 0) src_ext = fn.external_source(source=src_data, dtype=DALIDataType.UINT8) src_pipe.set_outputs(src_ext) src_pipe.build() out, = src_pipe.run() for i in range(batch_size): t = out.at(i) assert t.dtype == np.uint8 np.array_equal(t, np.ones((120, 120, 3), dtype=np.uint8))
def test_external_source_collection(): pipe = Pipeline(1, 3, 0) batches = [[make_array([1.5, 2.5], dtype=datapy.float32)], [make_array([-1, 3.5, 4.5], dtype=datapy.float32)]] pipe.set_outputs(fn.external_source(batches)) pipe.build() run_and_check(pipe, batches)