Example #1
0
def test_tf_experimental_source_disabled():
    pipe = Pipeline(10, 4, 0)
    with pipe:
        input = fn.external_source(source=lambda: np.full((4, 4), 0),
                                   batch=False)
        pipe.set_outputs(fn.pad(input))
    dali_tf.DALIDataset(pipe, output_dtypes=tf.int32)
def reference_pipeline(flip_vertical, flip_horizontal, ref_batch_size=max_batch_size):
    pipeline = Pipeline(ref_batch_size, num_threads, device_id)
    with pipeline:
        data, _ = fn.readers.file(file_root=images_dir)
        img = fn.decoders.image(data)
        flipped = fn.flip(img, horizontal=flip_horizontal, vertical=flip_vertical)
        pipeline.set_outputs(flipped, img)
    return pipeline
 def get_pipeline_desc(batch_size, num_threads, device, device_id, shard_id,
                       num_shards, def_for_dataset):
     pipe = Pipeline(batch_size, num_threads, device_id)
     with pipe:
         # Our callbacks may have state, to be able to run it twice, once in Dataset and once
         # with baseline test, we need to make a copy to preserve that state.
         es = fn.external_source(device=es_device, **copy.deepcopy(es_args))
         if device == "gpu" and es_device == "cpu":
             es = es.gpu()
         pad = fn.pad(es, device=device)
         pipe.set_outputs(pad)
     return pipe, None, dtype
def _test_batch_info_flag_default(cb, batch_size):
    pipe = Pipeline(batch_size, 1, 0)
    with pipe:
        ext = fn.external_source(source=cb)
        pipe.set_outputs(ext)
    pipe.build()
    pipe.run()
def test_external_source_mixed_contiguous():
    batch_size = 2
    iterations = 4

    def generator(i):
        if i % 2:
            return cp.array([[100 + i * 10 + 1.5]] * batch_size,
                            dtype=cp.float32)
        else:
            return batch_size * [
                cp.array([100 + i * 10 + 1.5], dtype=cp.float32)
            ]

    pipe = Pipeline(batch_size, 3, 0)

    pipe.set_outputs(
        fn.external_source(device="gpu", source=generator, no_copy=True))
    pipe.build()

    pattern = "ExternalSource operator should not mix contiguous and noncontiguous inputs. " \
              "In such a case the internal memory used to gather data in a contiguous chunk of " \
              "memory would be trashed."
    with check_output_pattern(pattern):
        for _ in range(iterations):
            pipe.run()
def test_incorrect_dtype_arg():
    batch_size = 2
    src_data = [[np.ones((120, 120, 3), dtype=np.float32)] * batch_size]
    src_pipe = Pipeline(batch_size, 1, 0)
    src_ext = fn.external_source(source=src_data, dtype=DALIDataType.UINT8)
    src_pipe.set_outputs(src_ext)
    src_pipe.build()
    src_pipe.run()
def test_operator_coco_reader_same_images():
    file_root = os.path.join(test_data_root, 'db', 'coco_pixelwise', 'images')
    train_annotations = os.path.join(test_data_root, 'db', 'coco_pixelwise', 'instances.json')

    coco_dir = os.path.join(test_data_root, 'db', 'coco')
    coco_dir_imgs = os.path.join(coco_dir, 'images')
    coco_pixelwise_dir = os.path.join(test_data_root, 'db', 'coco_pixelwise')
    coco_pixelwise_dir_imgs = os.path.join(coco_pixelwise_dir, 'images')

    for file_root, annotations_file in [ \
        (coco_dir_imgs, os.path.join(coco_dir, 'instances.json')),
        (coco_pixelwise_dir_imgs, os.path.join(coco_pixelwise_dir, 'instances.json')),
        (coco_pixelwise_dir_imgs, os.path.join(coco_pixelwise_dir, 'instances_rle_counts.json'))]:
        pipe = Pipeline(batch_size=1, num_threads=4, device_id=0)
        with pipe:
            inputs1, boxes1, labels1, *other = fn.readers.coco(
                file_root=file_root,
                annotations_file=train_annotations,
                name="reader1",
                seed=1234
            )
            inputs2, boxes2, labels2, *other = fn.readers.coco(
                file_root=file_root,
                annotations_file=train_annotations,
                polygon_masks=True,
                name="reader2"
            )
            inputs3, boxes3, labels3, *other = fn.readers.coco(
                file_root=file_root,
                annotations_file=train_annotations,
                pixelwise_masks=True,
                name="reader3"
            )
            pipe.set_outputs(
                inputs1, boxes1, labels1,
                inputs2, boxes2, labels2,
                inputs3, boxes3, labels3
            )
        pipe.build()

        epoch_sz = pipe.epoch_size("reader1")
        assert epoch_sz == pipe.epoch_size("reader2")
        assert epoch_sz == pipe.epoch_size("reader3")

        for i in range(epoch_sz):
            inputs1, boxes1, labels1, inputs2, boxes2, labels2, inputs3, boxes3, labels3 = \
                pipe.run()
            np.testing.assert_array_equal(inputs1.at(0), inputs2.at(0))
            np.testing.assert_array_equal(inputs1.at(0), inputs3.at(0))
            np.testing.assert_array_equal(labels1.at(0), labels2.at(0))
            np.testing.assert_array_equal(labels1.at(0), labels3.at(0))
            np.testing.assert_array_equal(boxes1.at(0), boxes2.at(0))
            np.testing.assert_array_equal(boxes1.at(0), boxes3.at(0))
def test_ndim_changing():
    batch_size = 2
    src_data = [[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size,
                [np.ones((120, 120), dtype=np.uint8)] * batch_size]
    src_pipe = Pipeline(batch_size, 1, 0)
    src_ext1 = fn.external_source(source=src_data, dtype=DALIDataType.UINT8)
    src_pipe.set_outputs(src_ext1)
    src_pipe.build()
    src_pipe.run()
    src_pipe.run()
def test_changing_dtype():
    batch_size = 2
    src_data = [[np.ones((120, 120, 3), dtype=np.float32)] * batch_size,
                [np.ones((120, 120, 3), dtype=np.uint8)] * batch_size]
    src_pipe = Pipeline(batch_size, 1, 0)
    src_ext = fn.external_source(source=src_data)
    src_pipe.set_outputs(src_ext)
    src_pipe.build()
    src_pipe.run()
    src_pipe.run()
def test_ndim_arg_multioutput():
    batch_size = 2
    src_data = [[[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size,
                 [np.ones((120, 120), dtype=np.float32)] * batch_size]]
    src_pipe = Pipeline(batch_size, 1, 0)
    src1_ext, src1_ext2 = fn.external_source(
        source=src_data,
        num_outputs=2,
        dtype=[DALIDataType.UINT8, DALIDataType.FLOAT],
        ndim=[3, 2])

    src2_ext, src2_ext2 = fn.external_source(
        source=src_data,
        num_outputs=2,
        dtype=[DALIDataType.UINT8, DALIDataType.FLOAT],
        layout=["HWC", "HW"])

    src_pipe.set_outputs(src1_ext, src1_ext2, src2_ext, src2_ext2)
    src_pipe.build()
    out11, out12, out21, out22 = src_pipe.run()
    for i in range(batch_size):
        t1 = out11.at(i)
        t2 = out12.at(i)
        assert np.array_equal(t1, np.ones((120, 120, 3), dtype=np.uint8))
        assert np.allclose(t2, [np.ones((120, 120), dtype=np.float32)])
        t3 = out21.at(i)
        t4 = out22.at(i)
        assert np.array_equal(t3, np.ones((120, 120, 3), dtype=np.uint8))
        assert np.allclose(t4, [np.ones((120, 120), dtype=np.float32)])
Example #11
0
def _test_reader_files_arg(use_root, use_labels, shuffle):
    root = g_root
    fnames = g_files
    if not use_root:
        fnames = [os.path.join(root, f) for f in fnames]
        root = None

    lbl = None
    if use_labels:
        lbl = [10000 + i for i in range(len(fnames))]

    batch_size = 3
    pipe = Pipeline(batch_size, 1, 0)
    files, labels = fn.readers.file(file_root=root,
                                    files=fnames,
                                    labels=lbl,
                                    random_shuffle=shuffle)
    pipe.set_outputs(files, labels)
    pipe.build()

    num_iters = (len(fnames) + 2 * batch_size) // batch_size
    for i in range(num_iters):
        out_f, out_l = pipe.run()
        for j in range(batch_size):
            contents = bytes(out_f.at(j)).decode('utf-8')
            label = out_l.at(j)[0]
            index = label - 10000 if use_labels else label
            assert contents == ref_contents(fnames[index])
Example #12
0
def check_slice_named_args_errors(device, batch_size):
    test_data_shape = [5, 4, 3]

    def get_data():
        out = [
            np.random.randint(0, 255, size=test_data_shape, dtype=np.uint8)
            for _ in range(batch_size)
        ]
        return out

    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0)

    with pipe:
        data = fn.external_source(source=get_data, layout="HWC")
        start = np.array([1, 2])
        shape = np.array([3, 1])
        outs = [
            fn.slice(data,
                     start,
                     shape,
                     start=start,
                     end=start + shape,
                     shape=shape,
                     axes=(0, 1)),
        ]
        pipe.set_outputs(*outs)

    with assert_raises(
            RuntimeError,
            glob=
            '"end", "rel_end", "shape", and "rel_shape" arguments are mutually exclusive'
    ):
        pipe.build()
        for _ in range(1):
            outs = pipe.run()
def _test_cross_device(src, dst, use_dali_tensor=False):
    # The use_dali_tensor converts (via the Dlpack) to the DALI native Tensor before feeding the
    # data, to additionaly check if the constructor works correctly wrt to device_id.
    # TODO(klecki): [device_id] currently the device_id is not exposed in Python Tensors, so there
    # is no other way we may verify it.
    import nvidia.dali.fn as fn
    import numpy as np

    pipe = Pipeline(1, 3, dst)

    iter = 0

    def get_data():
        nonlocal iter
        with cp.cuda.Device(src):
            data = cp.array([[1, 2, 3, 4], [5, 6, 7, 8]],
                            dtype=cp.float32) + iter
            iter += 1
        if use_dali_tensor:
            return TensorGPU(data.toDlpack())
        return data

    with pipe:
        pipe.set_outputs(
            fn.external_source(get_data, batch=False, device='gpu'))

    pipe.build()
    for i in range(10):
        out, = pipe.run()
        assert np.array_equal(np.array(out[0].as_cpu()),
                              np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) + i)
def check_operator_coco_reader_label_remap(avoid_remap):
    batch_size = 2
    images = list(test_data.keys())
    ids_map = {
        s.id: s.cls if avoid_remap else s.mapped_cls
        for s in test_data.values()
    }

    pipeline = Pipeline(batch_size=batch_size, num_threads=4, device_id=0)
    with pipeline:
        _, _, labels, ids = fn.readers.coco(file_root=file_root,
                                            annotations_file=train_annotations,
                                            image_ids=True,
                                            images=images,
                                            avoid_class_remapping=avoid_remap)
        pipeline.set_outputs(ids, labels)
    pipeline.build()

    i = 0
    assert len(images) % batch_size == 0
    while i < len(images):
        out = pipeline.run()
        for s in range(batch_size):
            print(out[0].at(s), out[1].at(s))
            assert ids_map[int(out[0].at(s))] == int(out[1].at(
                s)), f"{i}, {ids_map[int(out[0].at(s))]} vs {out[1].at(s)}"
            i = i + 1
Example #15
0
def _test_file_reader_filter(filters, glob_filters, batch_size, num_threads,
                             subpath, case_sensitive_filter):
    pipe = Pipeline(batch_size, num_threads, 0)
    root = os.path.join(os.environ['DALI_EXTRA_PATH'], subpath)
    files, labels = fn.readers.file(
        file_root=root,
        file_filters=filters,
        case_sensitive_filter=case_sensitive_filter)
    pipe.set_outputs(files, labels)
    pipe.build()

    fnames = set()
    for label, dir in enumerate(sorted(next(os.walk(root))[1])):
        for filter in glob_filters:
            for file in glob.glob(os.path.join(root, dir, filter)):
                fnames.add((label, file.split('/')[-1], file))

    fnames = sorted(fnames)

    for i in range(len(fnames) // batch_size):
        out_f, _ = pipe.run()
        for j in range(batch_size):
            with open(fnames[i * batch_size + j][2], 'rb') as file:
                contents = np.array(list(file.read()))
                assert all(contents == out_f.at(j))
Example #16
0
def _test_very_small_output(dim, device):
    batch_size = 8
    pipe = Pipeline(batch_size=batch_size,
                    num_threads=8,
                    device_id=0,
                    seed=1234)
    if dim == 2:
        files, labels = dali.fn.readers.caffe(path=db_2d_folder,
                                              random_shuffle=True)
        images_cpu = dali.fn.decoders.image(files, device="cpu")
    else:
        images_cpu = dali.fn.external_source(
            source=random_3d_loader(batch_size), layout="DHWC")

    images = images_cpu if device == "cpu" else images_cpu.gpu()

    resize_tiny = fn.resize(images, size=1e-10)

    pipe.set_outputs(resize_tiny)
    pipe.build()

    for it in range(3):
        out, = pipe.run()
        ref_size = [1, 1, 1, 1] if dim == 3 else [1, 1, 3]
        for t in out:
            assert t.shape() == ref_size
def _test_external_source_callback_torch_stream(src_device, gen_device):
    with torch.cuda.stream(torch.cuda.Stream()):
        for attempt in range(10):
            t0 = torch.tensor([attempt * 100 + 1.5],
                              dtype=torch.float32,
                              device=gen_device)
            increment = torch.tensor([10],
                                     dtype=torch.float32,
                                     device=gen_device)
            pipe = Pipeline(1, 3, 0)

            def gen_batch():
                nonlocal t0
                t0 += increment
                return [to_dlpack(t0)]

            pipe.set_outputs(
                fn.external_source(source=gen_batch,
                                   device=src_device,
                                   cuda_stream=torch.cuda.current_stream()))
            pipe.build()

            for i in range(10):
                check_output(pipe.run(), [
                    np.array([attempt * 100 + (i + 1) * 10 + 1.5],
                             dtype=np.float32)
                ])
Example #18
0
def check_slice_named_args_default_start_or_end(device, batch_size):
    test_data_shape = [5, 4, 3]
    def get_data():
        out = [np.random.randint(0, 255, size = test_data_shape, dtype = np.uint8) for _ in range(batch_size)]
        return out
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0)
    with pipe:
        data = fn.external_source(source = get_data, layout = "HWC")
        in_shape = np.array([5, 4])
        start = np.array([1, 2])
        shape = np.array([3, 1])
        end = start + shape
        rel_start = start / in_shape
        rel_shape = shape / in_shape
        rel_end = end / in_shape
        outs = [
            fn.slice(data, start=start, end=in_shape, axes = (0, 1)),
            fn.slice(data, start=[0, 0], end=end, axes = (0, 1)),
            fn.slice(data, start=start, axes = (0, 1)),
            fn.slice(data, end=end, axes = (0, 1)),
        ]
        pipe.set_outputs(*outs)
    pipe.build()
    for _ in range(3):
        outs = pipe.run()
        for sample in range(batch_size):
            np.testing.assert_equal(np.array(outs[0][sample]), np.array(outs[2][sample]))
            np.testing.assert_equal(np.array(outs[1][sample]), np.array(outs[3][sample]))
Example #19
0
def check_layout(kwargs, input_datasets, layout):
    pipe = Pipeline(10, 4, 0)
    with pipe:
        input = fn.external_source(**kwargs)
        # Rely on the Pad internal check to ensure that External Source set layout
        pipe.set_outputs(fn.pad(input, axis_names=layout))

    with tf.device('/cpu:0'):
        dali_dataset = dali_tf.experimental.DALIDatasetWithInputs(
            input_datasets=input_datasets,
            pipeline=pipe,
            batch_size=pipe.max_batch_size,
            output_shapes=None,
            output_dtypes=tf.int64,
            num_threads=pipe.num_threads,
            device_id=pipe.device_id)

    run_dataset_eager_mode(dali_dataset, 10)
def _test_feed_input(device):
    src_pipe, batch_size = build_src_pipe(device)

    dst_pipe = Pipeline(batch_size,
                        1,
                        0,
                        exec_async=False,
                        exec_pipelined=False)
    dst_pipe.set_outputs(fn.external_source(name="ext", device=device))
    dst_pipe.build()
    for iter in range(3):
        out1 = src_pipe.run()
        dst_pipe.feed_input("ext", out1[0])
        out2 = dst_pipe.run()
        check_batch(out2[0], out1[0], batch_size, 0, 0, "XY")
def test_ndim_data_mismatch():
    batch_size = 2
    src_data = [[[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size,
                 [np.ones((120, 120), dtype=np.uint8)] * batch_size]]
    src_pipe = Pipeline(batch_size, 1, 0)
    src_ext1, src_ext2 = fn.external_source(source=src_data,
                                            num_outputs=2,
                                            dtype=DALIDataType.UINT8,
                                            ndim=3)
    src_pipe.set_outputs(src_ext1, src_ext2)
    src_pipe.build()
    src_pipe.run()
Example #22
0
def check_dim_mismatch(device, test_data_root, names):
    pipe = Pipeline(2, 2, 0)
    pipe.set_outputs(
        fn.readers.numpy(device=device, file_root=test_data_root, files=names))
    pipe.build()
    err = None
    try:
        pipe.run()
    except RuntimeError as thrown:
        err = thrown
    # asserts should not be in except block to avoid printing nested exception on failure
    assert err, "Exception not thrown"
    assert "Inconsistent data" in str(
        err), "Unexpected error message: {}".format(err)
Example #23
0
def _test_external_source_callback_split(use_fn_api, batch, as_tensor, device):
    iter_num = 5
    batch_size = 9
    pipe = Pipeline(batch_size, 3, 0)

    # this should produce a two-element list of Tensor(Lists), the first
    # being 2D, the second being 3D (+ batch dimension)
    source = TestIterator(iter_num, batch_size, [2, 3], as_tensor)
    iter_in = iter(source) if batch else iter(
        SampleIterator(iter(source), True))

    if use_fn_api:
        inputs = fn.external_source(lambda: next(iter_in),
                                    2,
                                    device=device,
                                    batch=batch)
    else:
        ext_source = ops.ExternalSource(lambda: next(iter_in),
                                        num_outputs=2,
                                        device=device,
                                        batch=batch)
        inputs = ext_source()
    pipe.set_outputs(*inputs)
    pipe.build()

    run_and_check(pipe, source)
Example #24
0
def test_wrong_feature_shape():
    features = {
        'image/encoded': tfrec.FixedLenFeature((), tfrec.string, ""),
        'image/object/bbox': tfrec.FixedLenFeature([], tfrec.float32, -1.0),
        'image/object/class/label': tfrec.FixedLenFeature([], tfrec.int64, -1),
    }
    test_dummy_data_path = os.path.join(get_dali_extra_path(), 'db',
                                        'coco_dummy')
    pipe = Pipeline(1, 1, 0)
    with pipe:
        input = fn.readers.tfrecord(path=os.path.join(test_dummy_data_path,
                                                      'small_coco.tfrecord'),
                                    index_path=os.path.join(
                                        test_dummy_data_path,
                                        'small_coco_index.idx'),
                                    features=features)
    pipe.set_outputs(input['image/encoded'], input['image/object/class/label'],
                     input['image/object/bbox'])
    pipe.build()
    # the error is raised because FixedLenFeature is used with insufficient shape to house the input
    assert_raises(
        RuntimeError,
        pipe.run,
        glob="Error when executing CPU operator*readers*tfrecord*"
        "Output tensor shape is too small*[]*Expected at least 4 elements")
def NumpyReaderPipeline(path,
                        batch_size,
                        device="cpu",
                        file_list=None,
                        files=None,
                        file_filter="*.npy",
                        num_threads=1,
                        device_id=0,
                        cache_header_information=False):
    pipe = Pipeline(batch_size=batch_size,
                    num_threads=num_threads,
                    device_id=device_id)
    data = fn.readers.numpy(device=device,
                            file_list=file_list,
                            files=files,
                            file_root=path,
                            file_filter=file_filter,
                            shard_id=0,
                            num_shards=1,
                            cache_header_information=cache_header_information)
    pipe.set_outputs(data)
    return pipe
Example #26
0
def test_external_source_iterate_ndarray():
    pipe = Pipeline(4, 3, 0)

    batch = make_array([1.5, 2.5, 2, 3], dtype=datapy.float32)

    pipe.set_outputs(fn.external_source(batch, batch=False))
    pipe.build()
    run_and_check(pipe, [batch])
Example #27
0
def test_external_source_generator():
    pipe = Pipeline(1, 3, 0)

    def gen():
        for i in range(5):
            yield [make_array([i + 1.5], dtype=datapy.float32)]

    pipe.set_outputs(fn.external_source(gen()))
    pipe.build()

    for i in range(5):
        check_output(pipe.run(), [np.array([i + 1.5], dtype=np.float32)])
Example #28
0
def test_constant_promotion_mixed():
    filename = os.path.join(jpeg_folder, "241", "cute-4074304_1280.jpg")
    file_contents = np.fromfile(filename, dtype=np.uint8)
    pipe = Pipeline(1, 3, 0)
    with pipe:
        jpegs, _ = fn.readers.file(files=[filename])
        from_reader = fn.image_decoder(jpegs, device="mixed")
        from_constant = fn.image_decoder(file_contents, device="mixed")
        pipe.set_outputs(from_constant, from_reader)
    pipe.build()
    from_reader, from_constant = pipe.run()
    check_batch(from_reader, from_constant, 1)
def test_dtype_arg():
    batch_size = 2
    src_data = [[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size]
    src_pipe = Pipeline(batch_size, 1, 0)
    src_ext = fn.external_source(source=src_data, dtype=DALIDataType.UINT8)
    src_pipe.set_outputs(src_ext)
    src_pipe.build()
    out, = src_pipe.run()
    for i in range(batch_size):
        t = out.at(i)
        assert t.dtype == np.uint8
        np.array_equal(t, np.ones((120, 120, 3), dtype=np.uint8))
Example #30
0
def test_external_source_collection():
    pipe = Pipeline(1, 3, 0)

    batches = [[make_array([1.5, 2.5], dtype=datapy.float32)],
               [make_array([-1, 3.5, 4.5], dtype=datapy.float32)]]

    pipe.set_outputs(fn.external_source(batches))
    pipe.build()
    run_and_check(pipe, batches)