Beispiel #1
0
def test_reduce_std_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    data = fn.external_source(source=get_data)
    mean = fn.reductions.mean(data)
    reduced = fn.reductions.std_dev(data, mean)
    pipe.set_outputs(reduced)
    pipe.build()
    for _ in range(3):
        pipe.run()
Beispiel #2
0
def test_mel_filter_bank_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    data = fn.external_source(source=get_audio_data)
    spectrum = fn.spectrogram(data, nfft=60, window_length=50, window_step=25)
    processed = fn.mel_filter_bank(spectrum)
    pipe.set_outputs(processed)
    pipe.build()
    for _ in range(3):
        pipe.run()
def _test_invalid_args(device, args, message, run):
    data = [np.uint8([[1, 2, 3]]), np.uint8([[1, 2]])]
    pipe = Pipeline(2, 1, 0)
    src = fn.external_source(lambda: data, device=device)
    pipe.set_outputs(fn.tensor_subscript(src, **args))
    with assert_raises(RuntimeError, glob=message):
        pipe.build()
        if run:
            pipe.run()
Beispiel #4
0
def test_batch_permute_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=None)
    data = fn.external_source(source=get_data, layout="HWC")
    perm = fn.batch_permutation(seed=420)
    processed = fn.permute_batch(data, indices=perm)
    pipe.set_outputs(processed)
    pipe.build()
    for _ in range(3):
        pipe.run()
def test_external_source_collection():
    pipe = Pipeline(1, 3, 0)

    batches = [[make_array([1.5, 2.5], dtype=datapy.float32)],
               [make_array([-1, 3.5, 4.5], dtype=datapy.float32)]]

    pipe.set_outputs(fn.external_source(batches))
    pipe.build()
    run_and_check(pipe, batches)
def check_generic_gaussian_blur(batch_size,
                                sigma,
                                window_size,
                                shape,
                                layout,
                                axes,
                                op_type="cpu",
                                in_dtype=np.uint8,
                                out_dtype=types.NO_TYPE,
                                random_shape=True):
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0)
    min_shape = None if random_shape else shape
    data = RandomlyShapedDataIterator(batch_size,
                                      min_shape=min_shape,
                                      max_shape=shape,
                                      dtype=in_dtype)
    # Extract the numpy type from DALI, we can have float32 or the same as input
    if out_dtype == types.NO_TYPE:
        result_type = in_dtype
    elif dali_type(in_dtype) == out_dtype:
        result_type = in_dtype
    else:
        result_type = np.float32
    with pipe:
        input = fn.external_source(data, layout=layout)
        if op_type == "gpu":
            input = input.gpu()
        blurred = fn.gaussian_blur(input,
                                   device=op_type,
                                   sigma=sigma,
                                   window_size=window_size,
                                   dtype=out_dtype)
        pipe.set_outputs(blurred, input)
    pipe.build()

    for _ in range(test_iters):
        result, input = pipe.run()
        if op_type == "gpu":
            result = result.as_cpu()
            input = input.as_cpu()
        input = to_batch(input, batch_size)
        skip_axes = count_skip_axes(layout)
        baseline = [
            gaussian_baseline(img,
                              sigma,
                              window_size,
                              axes,
                              skip_axes,
                              dtype=result_type) for img in input
        ]
        max_error = 1 if result_type != np.float32 else 1e-04
        check_batch(result,
                    baseline,
                    batch_size,
                    max_allowed_error=max_error,
                    expected_layout=layout)
Beispiel #7
0
def check_coin_flip(device='cpu',
                    batch_size=32,
                    max_shape=[1e5],
                    p=None,
                    use_shape_like_input=False):
    pipe = Pipeline(batch_size=batch_size,
                    device_id=0,
                    num_threads=3,
                    seed=123456)
    with pipe:

        def shape_gen_f():
            return random_shape(max_shape)

        shape_arg = None
        inputs = []
        shape_out = None
        if max_shape is not None:
            if use_shape_like_input:
                shape_like_in = dali.fn.external_source(
                    lambda: np.zeros(shape_gen_f()),
                    device=device,
                    batch=False)
                inputs += [shape_like_in]
                shape_out = dali.fn.shapes(shape_like_in)
            else:
                shape_arg = dali.fn.external_source(shape_gen_f, batch=False)
                shape_out = shape_arg
        outputs = [
            dali.fn.random.coin_flip(*inputs,
                                     device=device,
                                     probability=p,
                                     shape=shape_arg)
        ]
        if shape_out is not None:
            outputs += [shape_out]
        pipe.set_outputs(*outputs)
    pipe.build()
    outputs = pipe.run()
    data_out = outputs[0].as_cpu() if isinstance(outputs[0],
                                                 TensorListGPU) else outputs[0]
    shapes_out = None
    if max_shape is not None:
        shapes_out = outputs[1].as_cpu() if isinstance(
            outputs[1], TensorListGPU) else outputs[1]
    p = p if p is not None else 0.5
    for i in range(batch_size):
        data = np.array(data_out[i])
        assert np.logical_or(data == 0, data == 1).all()
        if max_shape is not None:
            sample_shape = np.array(shapes_out[i])
            assert (data.shape == sample_shape).all()
            total = len(data)
            positive = np.count_nonzero(data)
            np.testing.assert_allclose(p, positive / total,
                                       atol=0.005)  # +/- -.5%
Beispiel #8
0
def dali_data_iter(batch_size: int,
                   rec_file: str,
                   idx_file: str,
                   num_threads: int,
                   initial_fill=32768,
                   random_shuffle=True,
                   prefetch_queue_depth=1,
                   local_rank=0,
                   name="reader",
                   mean=(127.5, 127.5, 127.5),
                   std=(127.5, 127.5, 127.5)):
    """
    Parameters:
    ----------
    initial_fill: int
        Size of the buffer that is used for shuffling. If random_shuffle is False, this parameter is ignored.

    """
    rank: int = distributed.get_rank()
    world_size: int = distributed.get_world_size()
    import nvidia.dali.fn as fn
    import nvidia.dali.types as types
    from nvidia.dali.pipeline import Pipeline
    from nvidia.dali.plugin.pytorch import DALIClassificationIterator

    pipe = Pipeline(
        batch_size=batch_size,
        num_threads=num_threads,
        device_id=local_rank,
        prefetch_queue_depth=prefetch_queue_depth,
    )
    condition_flip = fn.random.coin_flip(probability=0.5)
    with pipe:
        jpegs, labels = fn.readers.mxnet(path=rec_file,
                                         index_path=idx_file,
                                         initial_fill=initial_fill,
                                         num_shards=world_size,
                                         shard_id=rank,
                                         random_shuffle=random_shuffle,
                                         pad_last_batch=False,
                                         name=name)
        images = fn.decoders.image(jpegs,
                                   device="mixed",
                                   output_type=types.RGB)
        images = fn.crop_mirror_normalize(images,
                                          dtype=types.FLOAT,
                                          mean=mean,
                                          std=std,
                                          mirror=condition_flip)
        pipe.set_outputs(images, labels)
    pipe.build()
    return DALIWarper(
        DALIClassificationIterator(
            pipelines=[pipe],
            reader_name=name,
        ))
Beispiel #9
0
def test_set_outputs_err_msg_unpack():
    data = [[[np.random.rand(1, 3, 2)], [np.random.rand(1, 4, 5)]]]
    pipe = Pipeline(batch_size=1, num_threads=1, device_id=None)
    pipe.set_outputs(fn.external_source(data, num_outputs=2, cycle='quiet'))
    with assert_raises(
            TypeError,
            glob=
            'Illegal pipeline output type. The output * contains a nested `DataNode`'
    ):
        pipe.build()
def test_incorrect_dtype_arg():
    batch_size = 2
    src_data = [
        [np.ones((120, 120, 3), dtype=np.float32)]*batch_size
    ]
    src_pipe = Pipeline(batch_size, 1, 0)
    src_ext = fn.external_source(source=src_data, device='cpu', dtype=DALIDataType.UINT8)
    src_pipe.set_outputs(src_ext)
    src_pipe.build()
    src_pipe.run()
Beispiel #11
0
def test_set_outputs_err_msg_unpack():
    data = [[[np.random.rand(1, 3, 2)], [np.random.rand(1, 4, 5)]]]
    pipe = Pipeline(batch_size = 1, num_threads = 1, device_id = None)
    pipe.set_outputs(fn.external_source(data, num_outputs = 2, cycle = 'quiet'))
    pattern = 'Illegal pipeline output type. The output 0 contains a nested `DataNode`. ' + \
              'Missing list/tuple expansion (*) is the likely cause.'
    try:
        pipe.build()
    except TypeError as e:
        assert(pattern == e.__str__())
def test_ndim_changing():
    batch_size = 2
    src_data = [[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size,
                [np.ones((120, 120), dtype=np.uint8)] * batch_size]
    src_pipe = Pipeline(batch_size, 1, 0)
    src_ext1 = fn.external_source(source=src_data, dtype=DALIDataType.UINT8)
    src_pipe.set_outputs(src_ext1)
    src_pipe.build()
    src_pipe.run()
    src_pipe.run()
Beispiel #13
0
def test_sequence_reader_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    processed = fn.readers.sequence(file_root=sequence_dir,
                                    sequence_length=2,
                                    shard_id=0,
                                    num_shards=1)
    pipe.set_outputs(processed)
    pipe.build()
    for _ in range(3):
        pipe.run()
Beispiel #14
0
def test_image_decoder_crop_device():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    input, _ = fn.readers.file(file_root=images_dir, shard_id=0, num_shards=1)
    decoded = fn.decoders.image_crop(input,
                                     output_type=types.RGB,
                                     crop=(10, 10))
    pipe.set_outputs(decoded)
    pipe.build()
    for _ in range(3):
        pipe.run()
Beispiel #15
0
def test_set_outputs_err_msg_random_type():
    pipe = Pipeline(batch_size = 1, num_threads = 1, device_id = None)
    pipe.set_outputs("test")
    pattern = "Illegal output type. The output 0 is a `<class 'str'>`. Allowed types are " + \
              "``DataNode`` and types convertible to `types.Constant` (numerical constants, " + \
              "1D lists/tuple of numbers and ND arrays)."
    try:
        pipe.build()
    except TypeError as e:
        assert(pattern == e.__str__())
Beispiel #16
0
def test_stack_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=None)
    data = fn.external_source(source=get_data, layout="HWC")
    data2 = fn.external_source(source=get_data, layout="HWC")
    data3 = fn.external_source(source=get_data, layout="HWC")
    pixel_pos = fn.stack(data, data2, data3)
    pipe.set_outputs(pixel_pos)
    pipe.build()
    for _ in range(3):
        pipe.run()
def test_changing_dtype():
    batch_size = 2
    src_data = [[np.ones((120, 120, 3), dtype=np.float32)] * batch_size,
                [np.ones((120, 120, 3), dtype=np.uint8)] * batch_size]
    src_pipe = Pipeline(batch_size, 1, 0)
    src_ext = fn.external_source(source=src_data)
    src_pipe.set_outputs(src_ext)
    src_pipe.build()
    src_pipe.run()
    src_pipe.run()
def test_invalid_args():
    pipeline = Pipeline(batch_size=2, num_threads=4, device_id=0)
    with pipeline:
        inputs, _, _, ids = fn.coco_reader(file_root=file_root,
                                           annotations_file=train_annotations,
                                           image_ids=True,
                                           images=images,
                                           preprocessed_annotations_dir='/tmp')
        pipeline.set_outputs(ids)
    pipeline.build()
Beispiel #19
0
def test_coco_reader_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    out, _, _ = fn.readers.coco(file_root=coco_dir,
                                annotations_file=coco_annotation,
                                shard_id=0,
                                num_shards=1)
    pipe.set_outputs(out)
    pipe.build()
    for _ in range(3):
        pipe.run()
Beispiel #20
0
def test_mxnet_iterator_empty_array():
    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
    import mxnet as mx

    batch_size = 4
    size = 5

    all_np_types = [
        np.bool_, np.int_, np.intc, np.intp, np.int8, np.int16, np.int32,
        np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float_,
        np.float32, np.float16, np.short, np.long, np.longlong, np.ushort,
        np.ulonglong
    ]
    np_types = []
    # store in np_types only types supported by MXNet
    for t in all_np_types:
        try:
            mx.nd.zeros([2, 2, 2], ctx=None, dtype=t)
            np_types.append(t)
        except mx.base.MXNetError:
            pass

    test_data_shape = [1, 3, 0, 4]

    def get_data():
        # create batch of [type_a, type_a, type_b, type_b, ...]
        out = [[np.empty(test_data_shape, dtype=t)] * batch_size
               for t in np_types]
        out = [val for pair in zip(out, out) for val in pair]
        return out

    pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=0)
    outs = fn.external_source(source=get_data, num_outputs=len(np_types) * 2)
    pipe.set_outputs(*outs)
    pipe.build()

    # create map of [(data, type_a), (label, type_a), ...]
    data_map = [('data_{}'.format(i), MXNetIterator.DATA_TAG)
                for i, t in enumerate(np_types)]
    label_map = [('label_{}'.format(i), MXNetIterator.LABEL_TAG)
                 for i, t in enumerate(np_types)]
    out_map = [val for pair in zip(data_map, label_map) for val in pair]

    iterator = MXNetIterator(pipe,
                             output_map=out_map,
                             size=size,
                             dynamic_shape=True)

    for batch in iterator:
        for d, t in zip(batch[0].data, np_types):
            shape = d.asnumpy().shape
            assert shape[0] == batch_size
            print(shape)
            assert np.array_equal(shape[1:], test_data_shape)
            assert d.asnumpy().dtype == t
Beispiel #21
0
def test_combine_transforms_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    with pipe:
        t = fn.transforms.translation(offset=(1, 2))
        r = fn.transforms.rotation(angle=30.0)
        s = fn.transforms.scale(scale=(2, 3))
        out = fn.transforms.combine(t, r, s)
    pipe.set_outputs(out)
    pipe.build()
    for _ in range(3):
        pipe.run()
Beispiel #22
0
def test_mxnet_reader_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    out, _ = fn.readers.mxnet(path=os.path.join(recordio_dir, "train.rec"),
                              index_path=os.path.join(recordio_dir,
                                                      "train.idx"),
                              shard_id=0,
                              num_shards=1)
    pipe.set_outputs(out)
    pipe.build()
    for _ in range(3):
        pipe.run()
def _test_feed_input(device):
    src_pipe, batch_size = build_src_pipe(device)

    dst_pipe = Pipeline(batch_size, 1, 0, exec_async=False, exec_pipelined=False)
    dst_pipe.set_outputs(fn.external_source(name="ext", device=device))
    dst_pipe.build()
    for iter in range(3):
        out1 = src_pipe.run()
        dst_pipe.feed_input("ext", out1[0])
        out2 = dst_pipe.run()
        check_batch(out2[0], out1[0], batch_size, 0, 0, "XY")
Beispiel #24
0
def check_no_input(op, get_data=get_data, **kwargs):
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    with pipe:
        processed = op(**kwargs)
        if isinstance(processed, Iterable):
            pipe.set_outputs(*processed)
        else:
            pipe.set_outputs(processed)
    pipe.build()
    for _ in range(3):
        pipe.run()
def test_external_source_generator():
    pipe = Pipeline(1, 3, 0)

    def gen():
        for i in range(5):
            yield [make_array([i + 1.5], dtype=datapy.float32)]

    pipe.set_outputs(fn.external_source(gen()))
    pipe.build()

    for i in range(5):
        check_output(pipe.run(), [np.array([i + 1.5], dtype=np.float32)])
Beispiel #26
0
def check_per_sample_gaussian_blur(batch_size,
                                   sigma_dim,
                                   window_size_dim,
                                   shape,
                                   layout,
                                   axes,
                                   op_type="cpu"):
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0)
    data = RandomlyShapedDataIterator(batch_size, max_shape=shape)
    with pipe:
        if sigma_dim is not None:
            sigma = fn.uniform(range=[0.5, 3], shape=[sigma_dim])
            sigma_arg = sigma
        else:
            # placeholder, so we can return something
            sigma = fn.coin_flip(probability=0)
            sigma_arg = None

        if window_size_dim is not None:
            window_radius = fn.uniform(range=[5, 10], shape=[window_size_dim])
            window_size = fn.cast(window_radius, dtype=types.INT32) * 2 + 1
            window_arg = window_size
        else:
            window_size = fn.coin_flip(probability=0)
            window_arg = None

        input = fn.external_source(data, layout=layout)
        if op_type == "gpu":
            input = input.gpu()
        blurred = fn.gaussian_blur(input,
                                   device=op_type,
                                   sigma=sigma_arg,
                                   window_size=window_arg)
        pipe.set_outputs(blurred, input, sigma, window_size)
    pipe.build()

    for _ in range(test_iters):
        result, input, sigma, window_size = pipe.run()
        if op_type == "gpu":
            result = result.as_cpu()
            input = input.as_cpu()
        input = to_batch(input, batch_size)
        sigma = to_batch(sigma, batch_size)
        window_size = to_batch(window_size, batch_size)
        baseline = []
        for i in range(batch_size):
            sigma_arg = sigma[i] if sigma is not None else None
            window_arg = window_size[i] if window_size_dim is not None else None
            skip_axes = count_skip_axes(layout)
            baseline.append(
                gaussian_baseline(input[i], sigma_arg, window_arg, axes,
                                  skip_axes))
        check_batch(result, baseline, batch_size, max_allowed_error=1)
Beispiel #27
0
def test_operator_coco_reader_same_images():
    file_root = os.path.join(test_data_root, 'db', 'coco_pixelwise', 'images')
    train_annotations = os.path.join(test_data_root, 'db', 'coco_pixelwise', 'instances.json')

    coco_dir = os.path.join(test_data_root, 'db', 'coco')
    coco_dir_imgs = os.path.join(coco_dir, 'images')
    coco_pixelwise_dir = os.path.join(test_data_root, 'db', 'coco_pixelwise')
    coco_pixelwise_dir_imgs = os.path.join(coco_pixelwise_dir, 'images')

    for file_root, annotations_file in [ \
        (coco_dir_imgs, os.path.join(coco_dir, 'instances.json')),
        (coco_pixelwise_dir_imgs, os.path.join(coco_pixelwise_dir, 'instances.json')),
        (coco_pixelwise_dir_imgs, os.path.join(coco_pixelwise_dir, 'instances_rle_counts.json'))]:
        pipe = Pipeline(batch_size=1, num_threads=4, device_id=0)
        with pipe:
            inputs1, boxes1, labels1, *other = fn.coco_reader(
                file_root=file_root,
                annotations_file=train_annotations,
                name="reader1",
                seed=1234
            )
            inputs2, boxes2, labels2, *other = fn.coco_reader(
                file_root=file_root,
                annotations_file=train_annotations,
                polygon_masks=True,
                name="reader2"
            )
            inputs3, boxes3, labels3, *other = fn.coco_reader(
                file_root=file_root,
                annotations_file=train_annotations,
                pixelwise_masks=True,
                name="reader3"
            )
            pipe.set_outputs(
                inputs1, boxes1, labels1,
                inputs2, boxes2, labels2,
                inputs3, boxes3, labels3
            )
        pipe.build()

        epoch_sz = pipe.epoch_size("reader1")
        assert epoch_sz == pipe.epoch_size("reader2")
        assert epoch_sz == pipe.epoch_size("reader3")

        for i in range(epoch_sz):
            inputs1, boxes1, labels1, inputs2, boxes2, labels2, inputs3, boxes3, labels3 = \
                pipe.run()
            np.testing.assert_array_equal(inputs1.at(0), inputs2.at(0))
            np.testing.assert_array_equal(inputs1.at(0), inputs3.at(0))
            np.testing.assert_array_equal(labels1.at(0), labels2.at(0))
            np.testing.assert_array_equal(labels1.at(0), labels3.at(0))
            np.testing.assert_array_equal(boxes1.at(0), boxes2.at(0))
            np.testing.assert_array_equal(boxes1.at(0), boxes3.at(0))
def test_dtype_arg():
    batch_size = 2
    src_data = [[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size]
    src_pipe = Pipeline(batch_size, 1, 0)
    src_ext = fn.external_source(source=src_data, dtype=DALIDataType.UINT8)
    src_pipe.set_outputs(src_ext)
    src_pipe.build()
    out, = src_pipe.run()
    for i in range(batch_size):
        t = out.at(i)
        assert t.dtype == np.uint8
        np.array_equal(t, np.ones((120, 120, 3), dtype=np.uint8))
Beispiel #29
0
def _test_permute_batch_out_of_range(device):
    batch_size = 10
    pipe = Pipeline(batch_size, 4, 0)
    data = fn.external_source(source=lambda: gen_data(batch_size, np.int32),
                              device=device,
                              layout="abc")
    perm = fn.batch_permutation()
    pipe.set_outputs(
        data, fn.permute_batch(data, indices=[0, 1, 2, 3, 4, 5, 10, 7, 8, 9]),
        perm)
    pipe.build()
    pipe.run()
def test_ndim_data_mismatch():
    batch_size = 2
    src_data = [[[np.ones((120, 120, 3), dtype=np.uint8)] * batch_size,
                 [np.ones((120, 120), dtype=np.uint8)] * batch_size]]
    src_pipe = Pipeline(batch_size, 1, 0)
    src_ext1, src_ext2 = fn.external_source(source=src_data,
                                            num_outputs=2,
                                            dtype=DALIDataType.UINT8,
                                            ndim=3)
    src_pipe.set_outputs(src_ext1, src_ext2)
    src_pipe.build()
    src_pipe.run()