def copy_to_host(self, ary=None, stream=None): """Copy ``self`` to ``ary`` or create a new Numpy ndarray if ``ary`` is ``None``. The transfer is synchronous: the function returns after the copy is finished. Always returns the host array. Example:: import numpy as np from numba import hsa arr = np.arange(1000) d_arr = hsa.to_device(arr) my_kernel[100, 100](d_arr) result_array = d_arr.copy_to_host() """ if ary is None: # destination does not exist hostary = np.empty(shape=self.alloc_size, dtype=np.byte) else: # destination does exist, it's `ary`, check it if ary.dtype != self.dtype: raise TypeError('incompatible dtype') if ary.shape != self.shape: scalshapes = (), (1, ) if not (ary.shape in scalshapes and self.shape in scalshapes): raise TypeError('incompatible shape; device %s; host %s' % (self.shape, ary.shape)) if ary.strides != self.strides: scalstrides = (), (self.dtype.itemsize, ) if not (ary.strides in scalstrides and self.strides in scalstrides): raise TypeError( 'incompatible strides; device %s; host %s' % (self.strides, ary.strides)) hostary = ary # this is supposed to be a ptr for writing # a location for the data exists as `hostary` assert self.alloc_size >= 0, "Negative memory size" context = self._context # copy the data from the device to the hostary if self.alloc_size != 0: sz = self.alloc_size if stream is None: _driver.hsa.implicit_sync() _driver.dGPU_to_host(context, hostary, self, sz) else: _driver.async_dGPU_to_host(dst_ctx=devices.get_cpu_context(), src_ctx=self._context, dst=hostary, src=self, size=sz, stream=stream) # if the location for the data was originally None # then create a new ndarray and plumb in the new memory if ary is None: if self.size == 0: hostary = np.ndarray(shape=self.shape, dtype=self.dtype, buffer=hostary) else: hostary = np.ndarray(shape=self.shape, dtype=self.dtype, strides=self.strides, buffer=hostary) else: # else hostary points to ary and how has the right memory hostary = ary return hostary
def copy_to_host(self, ary=None, stream=None): """Copy ``self`` to ``ary`` or create a new Numpy ndarray if ``ary`` is ``None``. The transfer is synchronous: the function returns after the copy is finished. Always returns the host array. Example:: import numpy as np from numba import hsa arr = np.arange(1000) d_arr = hsa.to_device(arr) my_kernel[100, 100](d_arr) result_array = d_arr.copy_to_host() """ if ary is None: # destination does not exist hostary = np.empty(shape=self.alloc_size, dtype=np.byte) else: # destination does exist, it's `ary`, check it if ary.dtype != self.dtype: raise TypeError('incompatible dtype') if ary.shape != self.shape: scalshapes = (), (1,) if not (ary.shape in scalshapes and self.shape in scalshapes): raise TypeError('incompatible shape; device %s; host %s' % (self.shape, ary.shape)) if ary.strides != self.strides: scalstrides = (), (self.dtype.itemsize,) if not (ary.strides in scalstrides and self.strides in scalstrides): raise TypeError('incompatible strides; device %s; host %s' % (self.strides, ary.strides)) hostary = ary # this is supposed to be a ptr for writing # a location for the data exists as `hostary` assert self.alloc_size >= 0, "Negative memory size" context = self._context # copy the data from the device to the hostary if self.alloc_size != 0: sz = self.alloc_size if stream is None: _driver.hsa.implicit_sync() _driver.dGPU_to_host(context, hostary, self, sz) else: _driver.async_dGPU_to_host(dst_ctx=devices.get_cpu_context(), src_ctx=self._context, dst=hostary, src=self, size=sz, stream=stream) # if the location for the data was originally None # then create a new ndarray and plumb in the new memory if ary is None: if self.size == 0: hostary = np.ndarray(shape=self.shape, dtype=self.dtype, buffer=hostary) else: hostary = np.ndarray(shape=self.shape, dtype=self.dtype, strides=self.strides, buffer=hostary) else: # else hostary points to ary and how has the right memory hostary = ary return hostary