コード例 #1
0
def test_arithm_op_context_manager_deep_nested():
    tl_1 = tensors.TensorListCPU(np.ones((8, 16, 16)))
    tl_2 = tensors.TensorListCPU(np.ones((8, 16, 16)))
    expected_sum = np.full(shape=(8, 16, 16), fill_value=2)

    eager.arithmetic(True)

    assert np.array_equal((tl_1 + tl_2).as_array(), expected_sum)

    with eager.arithmetic(False):
        with assert_raises(TypeError, glob="unsupported operand type*"):
            tl_1 + tl_2

        with eager.arithmetic(True):
            np.array_equal((tl_1 + tl_2).as_array(), expected_sum)

            with eager.arithmetic(False):
                with assert_raises(TypeError,
                                   glob="unsupported operand type*"):
                    tl_1 + tl_2

        with assert_raises(TypeError, glob="unsupported operand type*"):
            tl_1 + tl_2

    assert np.array_equal((tl_1 + tl_2).as_array(), expected_sum)
    eager.arithmetic(False)
コード例 #2
0
def test_arithm_op_context_manager_enabled():
    eager.arithmetic(True)
    tl_1 = tensors.TensorListCPU(np.ones((8, 16, 16)))
    tl_2 = tensors.TensorListCPU(np.ones((8, 16, 16)))

    assert np.array_equal((tl_1 + tl_2).as_array(),
                          np.full(shape=(8, 16, 16), fill_value=2))
    eager.arithmetic(False)
コード例 #3
0
    def run(self):
        """Run the pipeline and return the result."""
        import numpy as np
        if not self._built:
            raise RuntimeError('Pipeline must be built first.')

        self._debug_on = True
        self._cur_subpipeline_id = -1
        _pipeline.Pipeline.push_current(self)

        res = self._exec_func()
        if res is None:
            res = ()
        elif not isinstance(res, tuple):
            res = (res, )

        self._debug_on = False
        if not self._subpipelines_built:
            self._subpipelines_built = True
        _pipeline.Pipeline.pop_current()

        # Transforming all variables to TensorLists.
        return tuple([
            val.get()
            if isinstance(val, DataNodeDebug) else _tensors.TensorListCPU(
                np.tile(val,
                        (self._max_batch_size, *[1] * np.array(val).ndim)))
            for val in res
        ])
コード例 #4
0
def compare_eager_with_pipeline(path,
                                batch_size=batch_size,
                                N_iterations=5,
                                fn_op=None,
                                eager_op=None,
                                **kwargs):
    import_path = path.split('.')
    if fn_op is None:
        fn_op = reduce(reduce_getattr, [fn] + import_path)
    if eager_op is None:
        eager_op = reduce(reduce_getattr, [eager] + import_path)

    pipe = single_op_pipe(fn_op, kwargs)
    pipe.build()

    for i in range(N_iterations):
        input_tl = tensors.TensorListCPU(np.array(get_data(i)), layout="HWC")
        out1, = pipe.run()
        out2 = eager_op(input_tl, **kwargs)

        out1_data = out1.as_cpu() if isinstance(
            out1, tensors.TensorListGPU) else out1
        out2_data = out2.as_cpu() if isinstance(
            out2, tensors.TensorListGPU) else out2

        check_batch(out1_data, out2_data, batch_size)
コード例 #5
0
ファイル: eager_util.py プロジェクト: NVIDIA/DALI
def _transform_data_to_tensorlist(data, batch_size, layout=None, device_id=None):
    data = _prep_data_for_feed_input(data, batch_size, layout, device_id)

    if isinstance(data, list):
        if isinstance(data[0], _tensors.TensorGPU):
            data = _tensors.TensorListGPU(data, layout or "")
        else:
            data = _tensors.TensorListCPU(data, layout or "")

    return data
コード例 #6
0
def test_objective_eager_resize():
    from nvidia.dali._utils import eager_utils

    resize_class = eager_utils._eager_op_object_factory(
        ops.python_op_factory('Resize'), 'Resize')
    tl = tensors.TensorListCPU(np.random.default_rng().integers(
        256, size=(8, 200, 200, 3), dtype=np.uint8))

    obj_resize = resize_class(resize_x=50, resize_y=50)
    out_obj = obj_resize(tl)
    out_fun = eager.resize(tl, resize_x=50, resize_y=50)

    assert np.array_equal(out_obj.as_tensor(), out_fun.as_tensor())
コード例 #7
0
def test_regular_containers():
    samples_cpu = [
        (test_array, test_array),
        (tensors.TensorCPU(test_array), test_array)
    ]
    batches_cpu = [
        ([test_array], [test_array]),
        ([test_array] * 4, [test_array] * 4),
        ([tensors.TensorCPU(test_array)], [test_array]),
        ([tensors.TensorCPU(test_array)] * 4, [test_array] * 4),
        (tensors.TensorListCPU(test_array), test_array),
    ]
    yield from run_checks(samples_cpu, batches_cpu, [], [])
コード例 #8
0
ファイル: eager_util.py プロジェクト: NVIDIA/DALI
        def classify_array_kwarg(arr):
            if _types._is_torch_tensor(arr):
                if arr.is_cuda:
                    arr = arr.cpu().numpy()
            elif _types._is_mxnet_array(arr):
                import mxnet as mx

                if 'gpu' in str(arr.context):
                    arr = arr.copyto(mx.cpu())
            elif not _types._is_numpy_array(arr):
                raise RuntimeError(f"Unsupported array type '{type(arr)}'.")

            arr = _types._preprocess_constant_array_type(arr)
            arr = _tensors.TensorListCPU([_tensors.TensorCPU(arr)] * arg_constant_len)
            return True, 'cpu', arr
コード例 #9
0
def test_random_object_bbox():
    data = tensors.TensorListCPU([tensors.TensorCPU(
        np.int32([[1, 0, 0, 0],
                  [1, 2, 2, 1],
                  [1, 1, 2, 0],
                  [2, 0, 0, 1]])), tensors.TensorCPU(
        np.int32([[0, 3, 3, 0],
                  [1, 0, 1, 2],
                  [0, 1, 1, 0],
                  [0, 2, 0, 1],
                  [0, 2, 2, 1]]))])

    def source(*_):
        return data

    check_single_input_stateful('segmentation.random_object_bbox',
                                fn_source=source, eager_source=source, layout="")
コード例 #10
0
ファイル: pipeline.py プロジェクト: wanboyang/DALI
 def feed_input(self, ref, data, layout=""):
     """Bind the NumPy array to a tensor produced by ExternalSource
     operator. It is worth mentioning that `ref` should not be overridden
     with other operator outputs."""
     if not self._built:
         raise RuntimeError("Pipeline must be built first.")
     Edge._validate_edge_reference(ref)
     if isinstance(data, list):
         if self._batch_size != len(data):
             raise RuntimeError(
                 "Data list provided to feed_input needs to have batch_size length"
             )
         inputs = []
         for datum in data:
             inputs.append(Tensors.TensorCPU(datum, layout))
         self._pipe.SetExternalTensorInput(ref.name, inputs)
     else:
         inp = Tensors.TensorListCPU(data, layout)
         self._pipe.SetExternalTLInput(ref.name, inp)
コード例 #11
0
ファイル: pipeline.py プロジェクト: tomzhang/DALI
    def feed_input(self, data_node, data, layout=""):
        """Bind a NumPy array (or a list thereof) to an output of ExternalSource.

        Parameters
        ----------
        data_node : :class:`DataNode` or str
            The :class:`DataNode` returned by a call to ExternalSource or a name of the
            :class:`nvidia.dali.ops.ExternalSource`

        data : numpy.ndarray or a list thereof
            The data to be used as the output of the ExternalSource referred to by `data_node`.
            In case of GPU external sources, this must be a ``numpy.ndarray``.

        layout : str
            The description of the data layout (or empty string, if not specified).
            It should be a string of the length that matches the dimensionality of the data, batch
            dimension excluded. For a batch of channel-first images, this should be "CHW", for
            channel-last video it's "FHWC" and so on.
        """
        if not self._built:
            raise RuntimeError("Pipeline must be built first.")
        if isinstance(data_node, str):
            name = data_node
        else:
            _data_node._check(data_node)
            name = data_node.name

        from nvidia.dali.external_source import _check_data_batch
        _check_data_batch(data, self._batch_size, layout)

        if isinstance(data, list):
            inputs = []
            for datum in data:
                inputs.append(Tensors.TensorCPU(datum, layout))
            self._pipe.SetExternalTensorInput(name, inputs)
        else:
            inp = Tensors.TensorListCPU(data, layout)
            self._pipe.SetExternalTLInput(name, inp)
コード例 #12
0
ファイル: _debug_mode.py プロジェクト: NVIDIA/DALI
    def run(self):
        """Run the pipeline and return the result."""
        import numpy as np
        if not self._built:
            raise RuntimeError('Pipeline must be built first.')

        self._debug_on = True
        self._cur_operator_id = -1
        self._cur_iter_batch_info.reset()
        _pipeline.Pipeline.push_current(self)

        res = self._exec_func()
        if res is None:
            res = ()
        elif not isinstance(res, tuple):
            res = (res, )

        self._debug_on = False
        if not self._operators_built:
            self._operators_built = True
        _pipeline.Pipeline.pop_current()

        # Transforming all variables to TensorLists.
        outputs = []

        for i, val in enumerate(res):
            if isinstance(val, DataNodeDebug):
                outputs.append(val.get())
            elif isinstance(val, (list, tuple)):
                raise TypeError(
                    f'Illegal pipeline output type. The output {i} contains a nested `DataNodeDebug`'
                )
            else:
                outputs.append(
                    _tensors.TensorListCPU(
                        np.tile(val, (self._max_batch_size,
                                      *[1] * np.array(val).ndim))))
        return tuple(outputs)
コード例 #13
0
 def create_tmp(idx):
     a = np.full((4, 4), idx)
     dlt = to_dlpack(torch.from_numpy(a))
     return tensors.TensorListCPU(dlt, "")
コード例 #14
0
def _test_disqualified_argument(key):
    tl = tensors.TensorListCPU(np.zeros((8, 256, 256, 3)))
    eager.crop(tl, crop=[64, 64], **{key: 0})
コード例 #15
0
    def feed_input(self,
                   data_node,
                   data,
                   layout="",
                   cuda_stream=None,
                   use_copy_kernel=False):
        """Pass a mutlidimensional array or DLPack (or a list thereof) to an output of ExternalSource.
        In the case of the GPU input, the data must be modified on the same stream as the one
        used by feed_input. See ``cuda_stream`` parameter for details.

        Parameters
        ----------
        data_node : :class:`DataNode` or str
            The name of the :class:`nvidia.dali.ops.ExternalSource` node or a :class:`DataNode`
            object returned by a call to that ExternalSource.

        data : an ndarray or DLPack or a list thereof
            The array(s) may be one of:
              * NumPy ndarray (CPU)
              * MXNet ndarray (CPU)
              * PyTorch tensor (CPU or GPU)
              * CuPy array (GPU)
              * objects implementing ``__cuda_array_interface__``

            The data to be used as the output of the ExternalSource referred to by `data_node`.

        layout : str
            The description of the data layout (or empty string, if not specified).
            It should be a string of the length that matches the dimensionality of the data, batch
            dimension excluded. For a batch of channel-first images, this should be "CHW", for
            channel-last video it's "FHWC" and so on.

        cuda_stream : optional, `cudaStream_t` or an object convertible to `cudaStream_t`, e.g. `cupy.cuda.Stream`, `torch.cuda.Stream`
            The CUDA stream, which is going to be used for copying data to GPU or from a GPU
            source. If not set, best effort will be taken to maintain correctness - i.e. if the data
            is provided as a tensor/array from a recognized library (CuPy, PyTorch), the library's
            current stream is used. This should work in typical scenarios, but advanced use cases
            (and code using unsupported libraries) may still need to supply the stream handle
            explicitly.

            Special values:
              *  0 - use default CUDA stream
              * -1 - use DALI's internal stream

            If internal stream is used, the call to ``feed_input`` will block until the copy to
            internal buffer is complete, since there's no way to synchronize with this stream to
            prevent overwriting the array with new data in another stream.

        use_copy_kernel : optional, `bool`
            If set to True, DALI will use a CUDA kernel to feed the data (only applicable when copying
            data to/from GPU memory) instead of cudaMemcpyAsync (default).
        """
        if not self._built:
            raise RuntimeError("Pipeline must be built first.")
        if isinstance(data_node, str):
            name = data_node
        else:
            _data_node._check(data_node)
            name = data_node.name

        from nvidia.dali.external_source import _check_data_batch

        infer_stream = False
        if cuda_stream is None:
            infer_stream = True
        if cuda_stream == -1:
            cuda_stream = None
        else:
            cuda_stream = types._raw_cuda_stream(cuda_stream)

        def to_numpy(x):
            if types._is_mxnet_array(x):
                return x.asnumpy()
            elif types._is_torch_tensor(x):
                return x.numpy()
            else:
                return x

        # __cuda_array_interface__ doesn't provide any way to pass the information about the device
        # where the memory is located. It is assumed that the current device is the one that the memory belongs to,
        # unless the user sets the device explicitly creating TensorGPU/TensorListGPU
        if isinstance(data, list):
            inputs = []
            checked = False
            for datum in data:
                info = CheckDLPackCapsule(datum)
                if not info[0] and not checked:
                    _check_data_batch(data, self._batch_size, layout)
                    checked = True
                if hasattr(datum, "__cuda_array_interface__") or (info[0]
                                                                  and info[1]):
                    if infer_stream:
                        cuda_stream = _get_default_stream_for_array(datum)
                    inp = Tensors.TensorGPU(datum, layout)
                else:
                    datum = to_numpy(datum)
                    inp = Tensors.TensorCPU(datum, layout)
                inputs.append(inp)
            assert all(isinstance(inp, type(inputs[0])) for inp in inputs), \
                   "Mixed input types are not support, all need to reside on the CPU or GPU"
            self._pipe.SetExternalTensorInput(name, inputs,
                                              ctypes.c_void_p(cuda_stream),
                                              use_copy_kernel)
        else:
            info = CheckDLPackCapsule(data)
            if not info[0]:
                _check_data_batch(data, self._batch_size, layout)
            if hasattr(data, "__cuda_array_interface__") or (info[0]
                                                             and info[1]):
                if infer_stream:
                    cuda_stream = _get_default_stream_for_array(data)
                inp = Tensors.TensorListGPU(data, layout)
            else:
                data = to_numpy(data)
                inp = Tensors.TensorListCPU(data, layout)
            self._pipe.SetExternalTLInput(name, inp,
                                          ctypes.c_void_p(cuda_stream),
                                          use_copy_kernel)
コード例 #16
0
def test_arithm_op_context_manager_disabled():
    tl_1 = tensors.TensorListCPU(np.ones((8, 16, 16)))
    tl_2 = tensors.TensorListCPU(np.ones((8, 16, 16)))

    tl_1 + tl_2
コード例 #17
0
ファイル: external_source.py プロジェクト: hixio-mh/DALI
def _prep_data_for_feed_input(data, batch_size, layout, device_id = None):
    def to_numpy(x):
        if _types._is_mxnet_array(x):
            return x.asnumpy()
        elif _types._is_torch_tensor(x):
            return x.numpy()
        else:
            return x

    # __cuda_array_interface__ doesn't provide any way to pass the information about the device
    # where the memory is located. It is assumed that the current device is the one that the memory belongs to,
    # unless the user sets the device explicitly creating TensorGPU/TensorListGPU
    if isinstance(data, (_tensors.TensorListCPU, _tensors.TensorListGPU)):
        if layout is not None:
            _check_data_batch(data, batch_size, layout)
            data = type(data)(data, layout)
    elif isinstance(data, list):
        inputs = []
        checked = False
        for datum in data:
            (is_dlpack, is_gpu_data) = _b.CheckDLPackCapsule(datum)
            if not is_dlpack and not checked:
                _check_data_batch(data, batch_size, layout)
                checked = True
            if isinstance(datum, (_tensors.TensorCPU, _tensors.TensorGPU)):
                inp = type(datum)(datum, layout=layout) if layout is not None else datum
            elif is_dlpack:
                if is_gpu_data:
                    inp = _tensors.TensorGPU(datum, layout or "")
                else:
                    inp = _tensors.TensorCPU(datum, layout or "")
            elif hasattr(datum, "__cuda_array_interface__"):
                array_device_id = _types._get_device_id_for_array(datum)
                if array_device_id is None:
                    array_device_id = device_id
                inp = _tensors.TensorGPU(datum, layout or "", array_device_id)
            else:
                datum = to_numpy(datum)
                inp = _tensors.TensorCPU(datum, layout or "")
            inputs.append(inp)
        assert all(isinstance(inp, type(inputs[0])) for inp in inputs), \
            "Mixed input types are not support, all need to reside on the CPU or GPU"
        data = inputs
    else:
        (is_dlpack, is_gpu_data) = _b.CheckDLPackCapsule(data)
        if not is_dlpack:
            _check_data_batch(data, batch_size, layout)
        if hasattr(data, "__cuda_array_interface__"):
            array_device_id = _types._get_device_id_for_array(data)
            if array_device_id is None:
                array_device_id = device_id
            data = _tensors.TensorListGPU(data, layout or "", array_device_id)
        elif is_dlpack:
            if is_gpu_data:
                data = _tensors.TensorListGPU(data, layout or "")
            else:
                data = _tensors.TensorListCPU(data, layout or "")
        else:
            data = to_numpy(data)
            data = _tensors.TensorListCPU(data, layout or "")
    return data
コード例 #18
0
 def create_tl(idx):
     a = np.full((3, 4), idx)
     return tensors.TensorListCPU(a, "")
コード例 #19
0
def get_tl(data, layout='HWC'):
    """ Utility function to create a TensorListCPU with given data and layout. """
    layout = '' if layout is None or (data.ndim != 4 and layout == 'HWC') else layout
    return tensors.TensorListCPU(data, layout=layout)
コード例 #20
0
def test_es_device_change():
    cpu_data = np.zeros((8, 1))
    gpu_data = tensors.TensorListCPU(cpu_data)._as_gpu()
    for data, device in zip([gpu_data], ['cpu']):
        yield _test_es_device_change, data, device