Python Device Exemples, cupy.cuda.Device Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_join.py Projet : carterbox/cupy

 def test_concatenate_large_different_devices(self):
     arrs = []
     for i in range(10):
         with cuda.Device(i % 2):
             arrs.append(cupy.empty((2, 3, 4)))
     with pytest.raises(ValueError):
         cupy.concatenate(arrs)

Exemple #2

0

Afficher le fichier

def copy(a):
    """Creates a copy of a given array on the current device.

    This function allocates the new array on the current device. If the given
    array is allocated on the different device, then this function tries to
    copy the contents over the devices.

    Args:
        a (cupy.ndarray): The source array.

    Returns:
        cupy.ndarray: The copy of ``a`` on the current device.

    See: :func:`numpy.copy`, :meth:`cupy.ndarray.copy`

    """
    # If the current device is different from the device of ``a``, then this
    # function allocates a new array on the current device, and copies the
    # contents over the devices.
    # TODO(beam2d): Support ordering option
    if a.size == 0:
        return cupy.empty_like(a)

    if not a.flags.c_contiguous:
        a = ascontiguousarray(a)
        if a.data.device == cuda.Device():
            return a
    newarray = cupy.empty_like(a)
    newarray.data.copy_from(a.data, a.nbytes)
    return newarray

Exemple #3

0

Afficher le fichier

 def test_copy_multi_device_non_contiguous(self, order):
     arr = core.ndarray((20, ))[::2]
     dev1 = cuda.Device(1)
     with dev1:
         arr2 = arr.copy(order)
     assert arr2.device == dev1
     testing.assert_array_equal(arr, arr2)

Exemple #4

0

Afficher le fichier

Fichier : test_device.py Projet : zivzone/cupy

 def test_device_attributes(self):
     d = cuda.Device()
     attributes = d.attributes
     assert isinstance(attributes, dict)
     assert all(isinstance(a, int) for a in attributes.values())
     # test a specific attribute that would be present on any supported GPU
     assert 'MaxThreadsPerBlock' in attributes

Exemple #5

0

Afficher le fichier

 def t1_seq():
     t0_setup.wait()
     with dev1:
         with dev0:
             t1_setup.set()
             t0_first_exit.wait()
         t1_exit_device.append(cuda.Device().id)

Exemple #6

0

Afficher le fichier

def inv_gpu(b):
    # We do a batched LU decomposition on the GPU to compute the inverse
    # Change the shape of the array to be size=1 minibatch if necessary
    # Also copy the matrix as the elments will be modified in-place
    a = _as_batch_mat(b).copy()
    n = a.shape[1]
    n_matrices = len(a)
    # Pivot array
    p = cupy.empty((n, n_matrices), dtype=np.int32)
    # Output array
    c = cupy.empty_like(a)
    # These arrays hold information on the execution success
    # or if the matrix was singular
    info = cupy.empty(n_matrices, dtype=np.int32)
    ap = _mat_ptrs(a)
    cp = _mat_ptrs(c)
    _, lda = _get_ld(a)
    _, ldc = _get_ld(c)
    handle = cuda.Device().cublas_handle
    cuda.cublas.sgetrfBatched(
        handle, n, ap.data.ptr, lda, p.data.ptr, info.data.ptr, n_matrices)
    cuda.cublas.sgetriBatched(
        handle, n, ap.data.ptr, lda, p.data.ptr, cp.data.ptr, ldc,
        info.data.ptr, n_matrices)
    return c

Exemple #7

0

Afficher le fichier

Fichier : cuda.py Projet : xsd1221/chainer

def to_gpu(array, device=None, stream=None):
    """Copies the given CPU array to the specified device.

    Args:
        array (*array*, None, list or tuple):
            Array or arrays to be sent to GPU.
        device: CUDA device specifier. If ``None`` or :data:`cuda.DummyDevice`,
            the arrays will be copied to the current CUDA device.
        stream (~cupy.cuda.Stream): *(deprecated since v3.0.0)*
            CUDA stream. If not ``None``, the copy runs asynchronously.

    Returns:
        cupy.ndarray, list or tuple: Array or arrays on GPU.

        If some of the arrays are already on GPU, then this function just
        returns those arrays without performing any copy.

        If input arrays include `None`, it is returned as `None` as is.

    """
    if stream is not None:
        warnings.warn(
            'The stream option is deprecated in chainer.backends.cuda.to_gpu. '
            'Please remove it.', DeprecationWarning)

    check_cuda_available()
    if device is DummyDevice:
        device = cuda.Device()
    else:
        device = _get_device_or_current(device)

    return _backend._convert_arrays(
        array, lambda arr: _array_to_gpu(arr, device, stream))

Exemple #8

0

Afficher le fichier

def get_handle():
    global _handles
    device = cuda.Device()
    handle = _handles.get(device.id, None)
    if handle is None:
        handle = cudnn.create()
        _handles[device.id] = handle
    return handle

Exemple #9

0

Afficher le fichier

 def run_send_recv(rank, n_workers, dtype):
     dev = cuda.Device(rank)
     dev.use()
     comm = NCCLBackend(n_workers, rank)
     in_array = cupy.arange(10, dtype='f')
     for i in range(n_workers):
         out_array = cupy.zeros((10, ), dtype='f')
         comm.send_recv(in_array, out_array, i)
         testing.assert_allclose(out_array, in_array)

Exemple #10

0

Afficher le fichier

    def run_all_reduce(rank, n_workers, dtype):
        dev = cuda.Device(rank)
        dev.use()
        comm = NCCLBackend(n_workers, rank)
        in_array = cupy.arange(2 * 3 * 4, dtype='f').reshape(2, 3, 4)
        out_array = cupy.zeros((2, 3, 4), dtype='f')

        comm.all_reduce(in_array, out_array)
        testing.assert_allclose(out_array, 2 * in_array)

Exemple #11

0

Afficher le fichier

def check_args_device(args):
    dev = cuda.Device()
    for arg in args:
        if isinstance(arg, cupy.ndarray):
            arg_dev = arg.data.device
            if arg_dev != dev:
                raise ValueError('Array device must be same as the current '
                                 'device: array device = %d while current = %d'
                                 % (arg_dev.id, dev.id))

Exemple #12

0

Afficher le fichier

        def ret(*args, **kwargs):
            arg_key = (args, frozenset(kwargs.items()))
            if for_each_device:
                arg_key = (cuda.Device().id, arg_key)

            result = memo.get(arg_key, none)
            if result is none:
                result = f(*args, **kwargs)
                memo[arg_key] = result
            return result

Exemple #13

0

Afficher le fichier

    def run_reduce_scatter(rank, n_workers, dtype):
        dev = cuda.Device(rank)
        dev.use()
        comm = NCCLBackend(n_workers, rank)
        in_array = 1 + cupy.arange(n_workers * 10, dtype='f').reshape(
            n_workers, 10)
        out_array = cupy.zeros((10, ), dtype='f')

        comm.reduce_scatter(in_array, out_array, 10)
        testing.assert_allclose(out_array, 2 * in_array[rank])

Exemple #14

0

Afficher le fichier

 def run_init(rank, n_workers):
     dev = cuda.Device(rank)
     dev.use()
     comm = init_process_group(n_workers, rank)
     # Do a simple call to verify we got a valid comm
     in_array = cupy.zeros(1)
     if rank == 0:
         in_array = in_array + 1
     comm.broadcast(in_array, 0)
     testing.assert_allclose(in_array, cupy.ones(1))

Exemple #15

0

Afficher le fichier

Fichier : test_ndarray.py Projet : reshmarabi/cupy

    def test_copy_multi_device_with_stream(self):
        # Kernel that takes long enough then finally writes values.
        kern = cupy.RawKernel(
            _test_copy_multi_device_with_stream_src, 'wait_and_write')

        # Allocates a memory and launches the kernel on a device with its
        # stream.
        with cuda.Device(0):
            with cuda.Stream():
                a = cupy.zeros((2,), dtype=numpy.uint64)
                kern((1,), (1,), a)

        # D2D copy to another device with another stream should get the
        # original values of the memory before the kernel on the first device
        # finally makes the write.
        with cuda.Device(1):
            with cuda.Stream():
                b = a.copy()
                testing.assert_array_equal(
                    b, numpy.array([0, 0], dtype=numpy.uint64))

Exemple #16

0

Afficher le fichier

Fichier : test_join.py Projet : toslunar/cupy

 def test_concatenate_large_different_devices(self):
     arrs = []
     for i in range(10):
         with cuda.Device(i % 2):
             arrs.append(cupy.empty((2, 3, 4)))
     if cuda.runtime.deviceCanAccessPeer(0, 1) == 1:
         with pytest.warns(cupy._util.PerformanceWarning):
             cupy.concatenate(arrs)
     else:
         with pytest.raises(ValueError):
             cupy.concatenate(arrs)

Exemple #17

0

Afficher le fichier

 def run_send_and_recv(rank, n_workers, dtype):
     dev = cuda.Device(rank)
     dev.use()
     comm = NCCLBackend(n_workers, rank)
     in_array = cupy.arange(10, dtype='f')
     out_array = cupy.zeros((10, ), dtype='f')
     if rank == 0:
         comm.send(in_array, 1)
     else:
         comm.recv(out_array, 0)
         testing.assert_allclose(out_array, in_array)

Exemple #18

0

Afficher le fichier

 def run_all_to_all(rank, n_workers, dtype):
     dev = cuda.Device(rank)
     dev.use()
     comm = NCCLBackend(n_workers, rank)
     in_array = cupy.arange(n_workers * 10,
                            dtype='f').reshape(n_workers, 10)
     out_array = cupy.zeros((n_workers, 10), dtype='f')
     comm.all_to_all(in_array, out_array)
     expected = (10 * rank) + cupy.broadcast_to(cupy.arange(10, dtype='f'),
                                                (n_workers, 10))
     testing.assert_allclose(out_array, expected)

Exemple #19

0

Afficher le fichier

 def run_barrier(rank, n_workers):
     dev = cuda.Device(rank)
     dev.use()
     comm = NCCLBackend(n_workers, rank)
     comm.barrier()
     before = time.time()
     if rank == 0:
         time.sleep(2)
     comm.barrier()
     after = time.time()
     assert int(after - before) == 2

Exemple #20

0

Afficher le fichier

 def run_broadcast(rank, n_workers, root, dtype):
     dev = cuda.Device(rank)
     dev.use()
     comm = NCCLBackend(n_workers, rank)
     expected = cupy.arange(2 * 3 * 4, dtype=dtype).reshape((2, 3, 4))
     if rank == root:
         in_array = expected
     else:
         in_array = cupy.zeros((2, 3, 4), dtype=dtype)
     comm.broadcast(in_array, root)
     testing.assert_allclose(in_array, expected)

Exemple #21

0

Afficher le fichier

Fichier : test_nccl.py Projet : zhaohb/cupy

 def test_single_proc_single_dev(self):
     comms = cuda.nccl.NcclCommunicator.initAll(1)
     cuda.nccl.groupStart()
     for comm in comms:
         cuda.Device(comm.device_id()).use()
         sendbuf = cupy.arange(10)
         recvbuf = cupy.zeros_like(sendbuf)
         comm.allReduce(sendbuf.data.ptr, recvbuf.data.ptr, 10,
                        cuda.nccl.NCCL_INT64, cuda.nccl.NCCL_SUM,
                        cuda.Stream.null.ptr)
     cuda.nccl.groupEnd()
     assert cupy.allclose(sendbuf, recvbuf)

Exemple #22

0

Afficher le fichier

 def run_gather(rank, n_workers, root, dtype):
     dev = cuda.Device(rank)
     dev.use()
     comm = NCCLBackend(n_workers, rank)
     in_array = (rank + 1) * cupy.arange(10, dtype='f')
     out_array = cupy.zeros((n_workers, 10), dtype='f')
     comm.gather(in_array, out_array, root)
     if rank == root:
         expected = 1 + cupy.arange(n_workers).reshape(n_workers, 1)
         expected = expected * cupy.broadcast_to(cupy.arange(10, dtype='f'),
                                                 (n_workers, 10))
         testing.assert_allclose(out_array, expected)

Exemple #23

0

Afficher le fichier

def _check_args(args):
    dev = cuda.Device()
    cp_array = cupy.ndarray
    scalar_type = _scalar_type
    for arg in args:
        if isinstance(arg, cp_array):
            arr_dev = arg.device
            if arr_dev is not None and arr_dev != dev:
                raise ValueError('Array device must be same as the current '
                                 'device: array device = %d while current = %d'
                                 % (arr_dev.id, dev.id))
        elif not isinstance(arg, scalar_type):
            raise TypeError('Unsupported type %s' % type(arg))

Exemple #24

0

Afficher le fichier

Fichier : test_ndarray.py Projet : cupy/cupy

    def test_copy_multi_device_with_stream(self):
        # Kernel that takes long enough then finally writes values.
        src = _test_copy_multi_device_with_stream_src
        if runtime.is_hip and driver.get_build_version() >= 5_00_00000:
            src = '#include <ctime>\n' + src
        kern = cupy.RawKernel(src, 'wait_and_write')

        # Allocates a memory and launches the kernel on a device with its
        # stream.
        with cuda.Device(0):
            # Keep this stream alive over the D2D copy below for HIP
            with cuda.Stream() as s1:  # NOQA
                a = cupy.zeros((2,), dtype=numpy.uint64)
                kern((1,), (1,), a)

        # D2D copy to another device with another stream should get the
        # original values of the memory before the kernel on the first device
        # finally makes the write.
        with cuda.Device(1):
            with cuda.Stream():
                b = a.copy()
                testing.assert_array_equal(
                    b, numpy.array([0, 0], dtype=numpy.uint64))

Exemple #25

0

Afficher le fichier

Fichier : util.py Projet : yanweifu/chainer

        def ret(*args, **kwargs):
            global _memoized_funcs

            arg_key = (args, frozenset(kwargs.items()))
            if for_each_device:
                arg_key = (cuda.Device().id, arg_key)

            memo = getattr(f, '_cupy_memo', None)
            if memo is None:
                memo = f._cupy_memo = {}
                _memoized_funcs.append(f)

            result = memo.get(arg_key, None)
            if result is None:
                result = f(*args, **kwargs)
                memo[arg_key] = result
            return result

Exemple #26

0

Afficher le fichier

Fichier : test_device.py Projet : toslunar/cupy

    def test_context_and_use(self):
        dev0 = cuda.Device(0)
        dev1 = cuda.Device(1)

        dev1.use()
        with dev0:
            assert 0 == cuda.Device().id
            dev1.use()
            with dev1:
                assert 1 == cuda.Device().id
            assert 0 == cuda.Device().id
        assert 0 == cuda.Device().id

Exemple #27

0

Afficher le fichier

Fichier : generator.py Projet : yanweifu/chainer

def get_random_state():
    """Gets the state of the random number generator for the current device.

    If the state for the current device is not created yet, this function
    creates a new one, initializes it, and stores it as the state for the
    current device.

    Returns:
        RandomState: The state of the random number generator for the
        device.

    """
    global _random_states
    dev = cuda.Device()
    rs = _random_states.get(dev.id, None)
    if rs is None:
        rs = RandomState()
        _random_states[dev.id] = rs
    return rs

Exemple #28

0

Afficher le fichier

def get_random_state():
    """Gets the state of the random number generator for the current device.

    If the state for the current device is not created yet, this function
    creates a new one, initializes it, and stores it as the state for the
    current device.

    Returns:
        RandomState: The state of the random number generator for the
        device.

    """
    dev = cuda.Device()
    rs = _random_states.get(dev.id, None)
    if rs is None:
        seed = os.getenv('CUPY_SEED')
        if seed is None:
            seed = os.getenv('CHAINER_SEED')
        rs = RandomState(seed)
        rs = _random_states.setdefault(dev.id, rs)
    return rs

Exemple #29

0

Afficher le fichier

Fichier : test_ndarray.py Projet : mnicely/cupy

 def test_copy_multi_device_non_contiguous_K(self):
     arr = core.ndarray((20,))[::2]
     with cuda.Device(1):
         with self.assertRaises(NotImplementedError):
             arr.copy('K')

Exemple #30

0

Afficher le fichier

Fichier : test_ndarray.py Projet : mnicely/cupy

 def test_deepcopy_multi_device(self):
     arr = core.ndarray(self.shape)
     with cuda.Device(1):
         arr2 = copy.deepcopy(arr)
     self._check_deepcopy(arr, arr2)
     assert arr2.device == arr.device