Example #1
0
    def copy_to_host(self, ary=None, stream=None):
        """Copy ``self`` to ``ary`` or create a new Numpy ndarray
        if ``ary`` is ``None``.

        The transfer is synchronous: the function returns after the copy
        is finished.

        Always returns the host array.

        Example::

            import numpy as np
            from numba import hsa

            arr = np.arange(1000)
            d_arr = hsa.to_device(arr)

            my_kernel[100, 100](d_arr)

            result_array = d_arr.copy_to_host()
        """
        if ary is None:  # destination does not exist
            hostary = np.empty(shape=self.alloc_size, dtype=np.byte)
        else:  # destination does exist, it's `ary`, check it
            if ary.dtype != self.dtype:
                raise TypeError('incompatible dtype')

            if ary.shape != self.shape:
                scalshapes = (), (1, )
                if not (ary.shape in scalshapes and self.shape in scalshapes):
                    raise TypeError('incompatible shape; device %s; host %s' %
                                    (self.shape, ary.shape))
            if ary.strides != self.strides:
                scalstrides = (), (self.dtype.itemsize, )
                if not (ary.strides in scalstrides
                        and self.strides in scalstrides):
                    raise TypeError(
                        'incompatible strides; device %s; host %s' %
                        (self.strides, ary.strides))
            hostary = ary  # this is supposed to be a ptr for writing

        # a location for the data exists as `hostary`
        assert self.alloc_size >= 0, "Negative memory size"

        context = self._context

        # copy the data from the device to the hostary
        if self.alloc_size != 0:
            sz = self.alloc_size
            if stream is None:
                _driver.hsa.implicit_sync()
                _driver.dGPU_to_host(context, hostary, self, sz)
            else:
                _driver.async_dGPU_to_host(dst_ctx=devices.get_cpu_context(),
                                           src_ctx=self._context,
                                           dst=hostary,
                                           src=self,
                                           size=sz,
                                           stream=stream)

        # if the location for the data was originally None
        # then create a new ndarray and plumb in the new memory
        if ary is None:
            if self.size == 0:
                hostary = np.ndarray(shape=self.shape,
                                     dtype=self.dtype,
                                     buffer=hostary)
            else:
                hostary = np.ndarray(shape=self.shape,
                                     dtype=self.dtype,
                                     strides=self.strides,
                                     buffer=hostary)
        else:  # else hostary points to ary and how has the right memory
            hostary = ary

        return hostary
Example #2
0
    def copy_to_host(self, ary=None, stream=None):
        """Copy ``self`` to ``ary`` or create a new Numpy ndarray
        if ``ary`` is ``None``.

        The transfer is synchronous: the function returns after the copy
        is finished.

        Always returns the host array.

        Example::

            import numpy as np
            from numba import hsa

            arr = np.arange(1000)
            d_arr = hsa.to_device(arr)

            my_kernel[100, 100](d_arr)

            result_array = d_arr.copy_to_host()
        """
        if ary is None:  # destination does not exist
            hostary = np.empty(shape=self.alloc_size, dtype=np.byte)
        else: # destination does exist, it's `ary`, check it
            if ary.dtype != self.dtype:
                raise TypeError('incompatible dtype')

            if ary.shape != self.shape:
                scalshapes = (), (1,)
                if not (ary.shape in scalshapes and self.shape in scalshapes):
                    raise TypeError('incompatible shape; device %s; host %s' %
                                    (self.shape, ary.shape))
            if ary.strides != self.strides:
                scalstrides = (), (self.dtype.itemsize,)
                if not (ary.strides in scalstrides and
                                self.strides in scalstrides):
                    raise TypeError('incompatible strides; device %s; host %s' %
                                    (self.strides, ary.strides))
            hostary = ary  # this is supposed to be a ptr for writing

        # a location for the data exists as `hostary`
        assert self.alloc_size >= 0, "Negative memory size"

        context = self._context

        # copy the data from the device to the hostary
        if self.alloc_size != 0:
            sz = self.alloc_size
            if stream is None:
                _driver.hsa.implicit_sync()
                _driver.dGPU_to_host(context, hostary, self, sz)
            else:
                _driver.async_dGPU_to_host(dst_ctx=devices.get_cpu_context(),
                                           src_ctx=self._context,
                                           dst=hostary, src=self,
                                           size=sz, stream=stream)

        # if the location for the data was originally None
        # then create a new ndarray and plumb in the new memory
        if ary is None:
            if self.size == 0:
                hostary = np.ndarray(shape=self.shape, dtype=self.dtype,
                                     buffer=hostary)
            else:
                hostary = np.ndarray(shape=self.shape, dtype=self.dtype,
                                     strides=self.strides, buffer=hostary)
        else: # else hostary points to ary and how has the right memory
            hostary = ary

        return hostary