def test_CheckDLPackCapsuleNone():
    info = CheckDLPackCapsule(None)
    assert info == (False, False)
def test_CheckDLPackCapsuleGpu():
    arr = torch.rand(size=[3, 5, 6], device="cuda")
    info = CheckDLPackCapsule(to_dlpack(arr))
    assert info == (True, True)
Ejemplo n.º 3
0
    def feed_input(self,
                   data_node,
                   data,
                   layout="",
                   cuda_stream=None,
                   use_copy_kernel=False):
        """Pass a mutlidimensional array or DLPack (or a list thereof) to an output of ExternalSource.
        In the case of the GPU input, the data must be modified on the same stream as the one
        used by feed_input. See ``cuda_stream`` parameter for details.

        Parameters
        ----------
        data_node : :class:`DataNode` or str
            The name of the :class:`nvidia.dali.ops.ExternalSource` node or a :class:`DataNode`
            object returned by a call to that ExternalSource.

        data : an ndarray or DLPack or a list thereof
            The array(s) may be one of:
              * NumPy ndarray (CPU)
              * MXNet ndarray (CPU)
              * PyTorch tensor (CPU or GPU)
              * CuPy array (GPU)
              * objects implementing ``__cuda_array_interface__``

            The data to be used as the output of the ExternalSource referred to by `data_node`.

        layout : str
            The description of the data layout (or empty string, if not specified).
            It should be a string of the length that matches the dimensionality of the data, batch
            dimension excluded. For a batch of channel-first images, this should be "CHW", for
            channel-last video it's "FHWC" and so on.

        cuda_stream : optional, `cudaStream_t` or an object convertible to `cudaStream_t`, e.g. `cupy.cuda.Stream`, `torch.cuda.Stream`
            The CUDA stream, which is going to be used for copying data to GPU or from a GPU
            source. If not set, best effort will be taken to maintain correctness - i.e. if the data
            is provided as a tensor/array from a recognized library (CuPy, PyTorch), the library's
            current stream is used. This should work in typical scenarios, but advanced use cases
            (and code using unsupported libraries) may still need to supply the stream handle
            explicitly.

            Special values:
              *  0 - use default CUDA stream
              * -1 - use DALI's internal stream

            If internal stream is used, the call to ``feed_input`` will block until the copy to
            internal buffer is complete, since there's no way to synchronize with this stream to
            prevent overwriting the array with new data in another stream.

        use_copy_kernel : optional, `bool`
            If set to True, DALI will use a CUDA kernel to feed the data (only applicable when copying
            data to/from GPU memory) instead of cudaMemcpyAsync (default).
        """
        if not self._built:
            raise RuntimeError("Pipeline must be built first.")
        if isinstance(data_node, str):
            name = data_node
        else:
            _data_node._check(data_node)
            name = data_node.name

        from nvidia.dali.external_source import _check_data_batch

        infer_stream = False
        if cuda_stream is None:
            infer_stream = True
        if cuda_stream == -1:
            cuda_stream = None
        else:
            cuda_stream = types._raw_cuda_stream(cuda_stream)

        def to_numpy(x):
            if types._is_mxnet_array(x):
                return x.asnumpy()
            elif types._is_torch_tensor(x):
                return x.numpy()
            else:
                return x

        # __cuda_array_interface__ doesn't provide any way to pass the information about the device
        # where the memory is located. It is assumed that the current device is the one that the memory belongs to,
        # unless the user sets the device explicitly creating TensorGPU/TensorListGPU
        if isinstance(data, list):
            inputs = []
            checked = False
            for datum in data:
                info = CheckDLPackCapsule(datum)
                if not info[0] and not checked:
                    _check_data_batch(data, self._batch_size, layout)
                    checked = True
                if hasattr(datum, "__cuda_array_interface__") or (info[0]
                                                                  and info[1]):
                    if infer_stream:
                        cuda_stream = _get_default_stream_for_array(datum)
                    inp = Tensors.TensorGPU(datum, layout)
                else:
                    datum = to_numpy(datum)
                    inp = Tensors.TensorCPU(datum, layout)
                inputs.append(inp)
            assert all(isinstance(inp, type(inputs[0])) for inp in inputs), \
                   "Mixed input types are not support, all need to reside on the CPU or GPU"
            self._pipe.SetExternalTensorInput(name, inputs,
                                              ctypes.c_void_p(cuda_stream),
                                              use_copy_kernel)
        else:
            info = CheckDLPackCapsule(data)
            if not info[0]:
                _check_data_batch(data, self._batch_size, layout)
            if hasattr(data, "__cuda_array_interface__") or (info[0]
                                                             and info[1]):
                if infer_stream:
                    cuda_stream = _get_default_stream_for_array(data)
                inp = Tensors.TensorListGPU(data, layout)
            else:
                data = to_numpy(data)
                inp = Tensors.TensorListCPU(data, layout)
            self._pipe.SetExternalTLInput(name, inp,
                                          ctypes.c_void_p(cuda_stream),
                                          use_copy_kernel)