Esempio n. 1
0
def feed_ndarray(dali_tensor, arr, cuda_stream = None):
    """
    Copy contents of DALI tensor to MXNet's NDArray.

    Parameters
    ----------
    `dali_tensor` : nvidia.dali.backend.TensorCPU or nvidia.dali.backend.TensorGPU
                    Tensor from which to copy
    `arr` : mxnet.nd.NDArray
            Destination of the copy
    `cuda_stream` : cudaStream_t handle or any value that can be cast to cudaStream_t.
                    CUDA stream to be used for the copy
                    (if not provided, an internal user stream will be selected)
                    In most cases, using the default internal user stream or stream 0
                    is expected.
    """
    # Wait until arr is no longer used by the engine
    _wait_to_write(arr)
    assert dali_tensor.shape() == list(arr.shape), \
            ("Shapes do not match: DALI tensor has shape {0}"
            ", but NDArray has shape {1}".format(dali_tensor.shape(), list(arr.shape)))
    # Get CTypes void pointer to the underlying memory held by arr
    ptr = ctypes.c_void_p()
    mx.base._LIB.MXNDArrayGetData(arr.handle, ctypes.byref(ptr))

    cuda_stream = types._raw_cuda_stream(cuda_stream)

    # Copy data from DALI tensor to ptr
    if isinstance(dali_tensor, (TensorGPU, TensorListGPU)):
        dali_tensor.copy_to_external(ptr, None if cuda_stream is None else ctypes.c_void_p(cuda_stream))
    else:
        dali_tensor.copy_to_external(ptr)
Esempio n. 2
0
def feed_ndarray(dali_tensor, ptr, cuda_stream=None):
    """
    Copy contents of DALI tensor to Paddle's Tensor.

    Parameters
    ----------
    `dali_tensor` : dali.backend.TensorCPU or dali.backend.TensorGPU
                    Tensor from which to copy
    `ptr` : LoDTensor data pointer
            Destination of the copy
    `cuda_stream` : cudaStream_t handle or any value that can be cast to cudaStream_t
                    CUDA stream to be used for the copy
                    (if not provided, an internal user stream will be selected)
    """

    cuda_stream = types._raw_cuda_stream(cuda_stream)

    c_type_pointer = ctypes.c_void_p(ptr)
    if isinstance(dali_tensor, (TensorGPU, TensorListGPU)):
        dali_tensor.copy_to_external(
            c_type_pointer,
            None if cuda_stream is None else ctypes.c_void_p(cuda_stream))
    else:
        dali_tensor.copy_to_external(c_type_pointer)
    return ptr
Esempio n. 3
0
def feed_ndarray(dali_tensor, arr, cuda_stream=None):
    """
    Copy contents of DALI tensor to PyTorch's Tensor.

    Parameters
    ----------
    `dali_tensor` : nvidia.dali.backend.TensorCPU or nvidia.dali.backend.TensorGPU
                    Tensor from which to copy
    `arr` : torch.Tensor
            Destination of the copy
    `cuda_stream` : torch.cuda.Stream, cudaStream_t or any value that can be cast to cudaStream_t.
                    CUDA stream to be used for the copy
                    (if not provided, an internal user stream will be selected)
                    In most cases, using pytorch's current stream is expected (for example,
                    if we are copying to a tensor allocated with torch.zeros(...))
    """
    assert dali_tensor.shape() == list(arr.size()), \
            ("Shapes do not match: DALI tensor has size {0}"
            ", but PyTorch Tensor has size {1}".format(dali_tensor.shape(), list(arr.size())))
    cuda_stream = types._raw_cuda_stream(cuda_stream)

    # turn raw int to a c void pointer
    c_type_pointer = ctypes.c_void_p(arr.data_ptr())
    if isinstance(dali_tensor, (TensorGPU, TensorListGPU)):
        dali_tensor.copy_to_external(
            c_type_pointer,
            None if cuda_stream is None else ctypes.c_void_p(cuda_stream))
    else:
        dali_tensor.copy_to_external(c_type_pointer)
    return arr
Esempio n. 4
0
    def feed_input(self,
                   data_node,
                   data,
                   layout="",
                   cuda_stream=None,
                   use_copy_kernel=False):
        """Pass a mutlidimensional array or DLPack (or a list thereof) to an output of ExternalSource.
        In the case of the GPU input, the data must be modified on the same stream as the one
        used by feed_input. See ``cuda_stream`` parameter for details.

        Parameters
        ----------
        data_node : :class:`DataNode` or str
            The name of the :class:`nvidia.dali.ops.ExternalSource` node or a :class:`DataNode`
            object returned by a call to that ExternalSource.

        data : an ndarray or DLPack or a list thereof
            The array(s) may be one of:
              * NumPy ndarray (CPU)
              * MXNet ndarray (CPU)
              * PyTorch tensor (CPU or GPU)
              * CuPy array (GPU)
              * objects implementing ``__cuda_array_interface__``

            The data to be used as the output of the ExternalSource referred to by `data_node`.

        layout : str
            The description of the data layout (or empty string, if not specified).
            It should be a string of the length that matches the dimensionality of the data, batch
            dimension excluded. For a batch of channel-first images, this should be "CHW", for
            channel-last video it's "FHWC" and so on.

        cuda_stream : optional, `cudaStream_t` or an object convertible to `cudaStream_t`, e.g. `cupy.cuda.Stream`, `torch.cuda.Stream`
            The CUDA stream, which is going to be used for copying data to GPU or from a GPU
            source. If not set, best effort will be taken to maintain correctness - i.e. if the data
            is provided as a tensor/array from a recognized library (CuPy, PyTorch), the library's
            current stream is used. This should work in typical scenarios, but advanced use cases
            (and code using unsupported libraries) may still need to supply the stream handle
            explicitly.

            Special values:
              *  0 - use default CUDA stream
              * -1 - use DALI's internal stream

            If internal stream is used, the call to ``feed_input`` will block until the copy to
            internal buffer is complete, since there's no way to synchronize with this stream to
            prevent overwriting the array with new data in another stream.

        use_copy_kernel : optional, `bool`
            If set to True, DALI will use a CUDA kernel to feed the data (only applicable when copying
            data to/from GPU memory) instead of cudaMemcpyAsync (default).
        """
        if not self._built:
            raise RuntimeError("Pipeline must be built first.")
        if isinstance(data_node, str):
            name = data_node
        else:
            _data_node._check(data_node)
            name = data_node.name

        from nvidia.dali.external_source import _check_data_batch

        infer_stream = False
        if cuda_stream is None:
            infer_stream = True
        if cuda_stream == -1:
            cuda_stream = None
        else:
            cuda_stream = types._raw_cuda_stream(cuda_stream)

        def to_numpy(x):
            if types._is_mxnet_array(x):
                return x.asnumpy()
            elif types._is_torch_tensor(x):
                return x.numpy()
            else:
                return x

        # __cuda_array_interface__ doesn't provide any way to pass the information about the device
        # where the memory is located. It is assumed that the current device is the one that the memory belongs to,
        # unless the user sets the device explicitly creating TensorGPU/TensorListGPU
        if isinstance(data, list):
            inputs = []
            checked = False
            for datum in data:
                info = CheckDLPackCapsule(datum)
                if not info[0] and not checked:
                    _check_data_batch(data, self._batch_size, layout)
                    checked = True
                if hasattr(datum, "__cuda_array_interface__") or (info[0]
                                                                  and info[1]):
                    if infer_stream:
                        cuda_stream = _get_default_stream_for_array(datum)
                    inp = Tensors.TensorGPU(datum, layout)
                else:
                    datum = to_numpy(datum)
                    inp = Tensors.TensorCPU(datum, layout)
                inputs.append(inp)
            assert all(isinstance(inp, type(inputs[0])) for inp in inputs), \
                   "Mixed input types are not support, all need to reside on the CPU or GPU"
            self._pipe.SetExternalTensorInput(name, inputs,
                                              ctypes.c_void_p(cuda_stream),
                                              use_copy_kernel)
        else:
            info = CheckDLPackCapsule(data)
            if not info[0]:
                _check_data_batch(data, self._batch_size, layout)
            if hasattr(data, "__cuda_array_interface__") or (info[0]
                                                             and info[1]):
                if infer_stream:
                    cuda_stream = _get_default_stream_for_array(data)
                inp = Tensors.TensorListGPU(data, layout)
            else:
                data = to_numpy(data)
                inp = Tensors.TensorListCPU(data, layout)
            self._pipe.SetExternalTLInput(name, inp,
                                          ctypes.c_void_p(cuda_stream),
                                          use_copy_kernel)