Exemple #1
0
    def copy_to_device(self, ary, stream=0):
        """Copy `ary` to `self`.

        If `ary` is a CUDA memory, perform a device-to-device transfer.
        Otherwise, perform a a host-to-device transfer.
        """
        if ary.size == 0:
            # Nothing to do
            return

        sentry_contiguous(self)
        stream = self._default_stream(stream)

        self_core, ary_core = array_core(self), array_core(ary)
        if _driver.is_device_memory(ary):
            sentry_contiguous(ary)
            check_array_compatibility(self_core, ary_core)
            _driver.device_to_device(self, ary, self.alloc_size, stream=stream)
        else:
            # Ensure same contiguity. Only makes a host-side copy if necessary
            # (i.e., in order to materialize a writable strided view)
            ary_core = np.array(
                ary_core,
                order='C' if self_core.flags['C_CONTIGUOUS'] else 'F',
                subok=True,
                copy=not ary_core.flags['WRITEABLE'])
            check_array_compatibility(self_core, ary_core)
            _driver.host_to_device(self,
                                   ary_core,
                                   self.alloc_size,
                                   stream=stream)
Exemple #2
0
    def _do_setitem(self, key, value, stream=0):

        stream = self._default_stream(stream)

        # If the record didn't have a default stream, and the user didn't
        # provide a stream, then we will use the default stream for the
        # assignment kernel and synchronize on it.
        synchronous = not stream
        if synchronous:
            ctx = devices.get_context()
            stream = ctx.get_default_stream()

        # (1) prepare LHS

        typ, offset = self.dtype.fields[key]
        newdata = self.gpu_data.view(offset)

        lhs = type(self)(dtype=typ, stream=stream, gpu_data=newdata)

        # (2) prepare RHS

        rhs, _ = auto_device(lhs.dtype.type(value), stream=stream)

        # (3) do the copy

        _driver.device_to_device(lhs, rhs, rhs.dtype.itemsize, stream)

        if synchronous:
            stream.synchronize()
Exemple #3
0
 def test_d2d(self):
     hst = np.arange(100, dtype=np.uint32)
     hst2 = np.empty_like(hst)
     sz = hst.size * hst.dtype.itemsize
     dev1 = self.context.memalloc(sz)
     dev2 = self.context.memalloc(sz)
     driver.host_to_device(dev1, hst, sz)
     driver.device_to_device(dev2, dev1, sz)
     driver.device_to_host(hst2, dev2, sz)
     self.assertTrue(np.all(hst == hst2))
Exemple #4
0
 def test_d2d(self):
     hst = np.arange(100, dtype=np.uint32)
     hst2 = np.empty_like(hst)
     sz = hst.size * hst.dtype.itemsize
     dev1 = self.context.memalloc(sz)
     dev2 = self.context.memalloc(sz)
     driver.host_to_device(dev1, hst, sz)
     driver.device_to_device(dev2, dev1, sz)
     driver.device_to_host(hst2, dev2, sz)
     self.assertTrue(np.all(hst == hst2))