def _init_devices(): """Internal function that is used to initialize the Python mapping of physical offload targets to a Python dictionary. """ no_of_devices = number_of_devices() debug(5, "found {0} device(s)", no_of_devices) device_dict = {} for d in range(0, no_of_devices): device_dict[d] = OffloadDevice(d) return device_dict
def __init__(self, device=None): # save a reference to the device assert device is not None self._device = device self._device_id = device.device_id # construct the stream self._stream_id = _pymic_impl_stream_create(self._device_id, 'stream') debug(1, 'created stream 0x{0:x} for device {1}'.format(self._stream_id, self._device_id))
def __init__(self, device=None): # save a reference to the device assert device is not None self._device = device self._device_id = device.device_id # construct the stream self._stream_id = _pymic_impl_stream_create(self._device_id, 'stream') debug( 1, 'created stream 0x{0:x} for device {1}'.format( self._stream_id, self._device_id))
def _trace_func(func): funcname = func.__name__ debug(5, "collecting statistics for {0}", funcname) def wrapper(*args, **kwargs): tstart = timeit.default_timer() rv = func(*args, **kwargs) tend = timeit.default_timer() stack_info = _stack_walk_func() _trace_database.register(funcname, tstart, tend, args, kwargs, stack_info) return rv return wrapper
def __init__(self, library, device=None): """Initialize this OffloadLibrary instance. This function is not to be called from outside pymic. """ # safety checks assert device is not None # bookkeeping self._library = library self._device = device self._device_id = device._map_dev_id() self.unloader = _pymic_impl_unload_library self._cache = {} # locate the library on the host file system debug(5, "searching for {0} in {1}", library, config._search_path) filename = OffloadLibrary._find_library(library) if filename is None: debug(5, "no suitable library found for '{0}'", library) raise OffloadError("Cannot find library '{0}' " "in PYMIC_LIBRARY_PATH".format(library)) # load the library and memorize handle debug(5, "loading '{0}' on device {1}", filename, self._device_id) self._tempfile, self._handle = (_pymic_impl_load_library( self._device_id, filename)) debug(5, "successfully loaded '{0}' on device {1} with handle 0x{2:x}", filename, self._device_id, self._handle)
def __init__(self, library, device=None): """Initialize this OffloadLibrary instance. This function is not to be called from outside pymic. """ # safety checks assert device is not None # bookkeeping self._library = library self._device = device self._device_id = device._map_dev_id() self.unloader = _pymic_impl_unload_library self._cache = {} # locate the library on the host file system debug(5, "searching for {0} in {1}", library, config._search_path) filename = OffloadLibrary._find_library(library) if filename is None: debug(5, "no suitable library found for '{0}'", library) raise OffloadError("Cannot find library '{0}' " "in PYMIC_LIBRARY_PATH".format(library)) # load the library and memorize handle debug(5, "loading '{0}' on device {1}", filename, self._device_id) self._tempfile, self._handle = ( _pymic_impl_load_library(self._device_id, filename)) debug(5, "successfully loaded '{0}' on device {1} with handle 0x{2:x}", filename, self._device_id, self._handle)
def _find_library(library): if os.path.isabs(library) and OffloadLibrary._check_k1om(library): abspath = library else: for path in config._search_path.split(os.pathsep): debug(5, " looking for {0} in {1}", library, path) abspath = os.path.join(path, library) if (os.path.isfile(abspath) and OffloadLibrary._check_k1om(abspath)): break else: return return abspath
def allocate_device_memory(self, nbytes, alignment=64, sticky=False): """Allocate device memory on device associated with the invoking stream object. Though it is part of the stream interface, the operation is synchronous. Caution: this is a low-level function, do not use it unless you have a very specific reason to do so. Better use the high-level interfaces of OffloadArray instead. Parameters ---------- nbytes : int Number of bytes to allocate alignment : int Alignment of the data on the target device. See Also -------- deallocate_device_memory, transfer_host2device, transfer_device2host, transfer_device2device Returns ------- out : int Fake pointer that identifies the allocated memory Examples -------- >>> ptr = stream.allocate_device_memory(4096) >>> print ptr 140297169571840 """ device = self._device_id if nbytes <= 0: raise ValueError('Cannot allocate negative amount of ' 'memory: {0}'.format(nbytes)) device_ptr = _pymic_impl_stream_allocate(device, self._stream_id, nbytes, alignment) debug(2, 'allocated {0} bytes on device {1} at 0x{2:x}' ', alignment {3}', nbytes, device, device_ptr, alignment) return SmartPtr(self, device, device_ptr, sticky)
def deallocate_device_memory(self, device_ptr): """Deallocate device memory previously allocated through allocate_device_memory. Though it is part of the stream interface, the operation is synchronous. Caution: this is a low-level function, do not use it unless you have a very specific reason to do so. Better use the high-level interfaces of OffloadArray instead. Parameters ---------- device_ptr : int Fake pointer of memory do deallocate See Also -------- allocate_device_memory, transfer_host2device, transfer_device2host, transfer_device2device Returns ------- None Examples -------- >>> ptr = stream.allocate_device_memory(4096) >>> stream.deallocate_device_memory(ptr) """ device = self._device if device_ptr is None: raise ValueError('Cannot deallocate None pointer') if not isinstance(device_ptr, SmartPtr): raise ValueError('Wrong argument, no device pointer given') # TODO: add more safety checks here (e.g., pointer from right device # and stream) _pymic_impl_stream_deallocate(self._device_id, self._stream_id, device_ptr._device_ptr) debug(2, 'deallocated pointer {0} on device {1}', device_ptr, device) return None
def invoke(self, kernel, *args): """Invoke a native kernel on the target device by enqueuing a request in the current stream. The kernel is identified by accessing its library's attribute with the same name as the kernel name. The kernel function needs to be in a shared-object library that has been loaded by calling the load_library of the target device before invoke. The additional arguments of invoke can be either instances of OffloadArray, numpy.ndarray, or scalar data. For numpy.ndarray or scalar arguments, invoke automatically performs copy-in and copy-out of the argument, that is, before the kernel is invoked, the argument is automatically transferred to the target device and transferred back after the kernel has finished. All operations (copy in/copy out and invocation) are enqueued into the stream object and complete asynchronously. Parameters ---------- kernel : kernel Kernel to be invoked args : OffloadArray, numpy.ndarray, or scalar type Arguments to be passed to the kernel function See Also -------- load_library Returns ------- None Examples -------- >>> library = device.load_library("libdgemm") >>> stream.invoke(library.dgemm, A, B, C, n, m, k) """ # if called from wrapper, actual arguments are wrapped in an # extra tuple, so we unwrap them if len(args): if type(args[0]) == tuple: args = args[0] # safety check: avoid invoking a kernel if it's library has been loaded # on a different device if kernel[2] is not self._device: raise OffloadError("Cannot invoke kernel, " "library not loaded on device") # determine the types of the arguments (scalar vs arrays); # we store the device pointers as 64-bit integers in an ndarray arg_dims = numpy.empty((len(args),), dtype=numpy.int64) arg_type = numpy.empty((len(args),), dtype=numpy.int64) arg_ptrs = numpy.empty((len(args),), dtype=numpy.int64) arg_size = numpy.empty((len(args),), dtype=numpy.int64) copy_in_out = [] scalars = [] for i, a in enumerate(args): if isinstance(a, pymic.OffloadArray): # get the device pointer of the OffloadArray and # pass it to the kernel arg_dims[i] = 1 arg_type[i] = map_data_types(a.dtype) arg_ptrs[i] = a._device_ptr._device_ptr # fake pointer arg_size[i] = a._nbytes elif isinstance(a, numpy.ndarray): # allocate device buffer on the target of the invoke # and mark the numpy.ndarray for copyin/copyout semantics host_ptr = a.ctypes.data # raw C pointer to host data nbytes = a.dtype.itemsize * a.size dev_ptr = self.allocate_device_memory(nbytes) copy_in_out.append((host_ptr, dev_ptr, nbytes, a)) arg_dims[i] = 1 arg_type[i] = map_data_types(a.dtype) arg_ptrs[i] = dev_ptr._device_ptr # fake pointer arg_size[i] = nbytes else: # this is a hack, but let's wrap scalars as numpy arrays cvtd = numpy.asarray(a) host_ptr = cvtd.ctypes.data # raw C pointer to host data nbytes = cvtd.dtype.itemsize * cvtd.size scalars.append(cvtd) arg_dims[i] = 0 arg_type[i] = map_data_types(cvtd.dtype) arg_ptrs[i] = host_ptr arg_size[i] = nbytes debug(1, "(device {0}, stream 0x{1:x}) invoking kernel '{2}' " "(pointer 0x{3:x}) with {4} " "argument(s) ({5} copy-in/copy-out, {6} scalars)", self._device_id, self._stream_id, kernel[0], kernel[1], len(args), len(copy_in_out), len(scalars)) # iterate over the copyin arguments and transfer them for c in copy_in_out: self.transfer_host2device(c[0], c[1], c[2]) _pymic_impl_invoke_kernel(self._device_id, self._stream_id, kernel[1], arg_dims, arg_type, arg_ptrs, arg_size, len(args)) # iterate over the copyout arguments, transfer them back, # and release buffers for c in copy_in_out: self.transfer_device2host(c[1], c[0], c[2]) self.deallocate_device_memory(c[1]) return None
def __del__(self): debug( 1, 'destroying stream 0x{0:0x} ' 'for device {1}'.format(self._stream_id, self._device_id)) _pymic_impl_stream_destroy(self._device_id, self._stream_id)
def invoke(self, kernel, *args): """Invoke a native kernel on the target device by enqueuing a request in the current stream. The kernel is identified by accessing its library's attribute with the same name as the kernel name. The kernel function needs to be in a shared-object library that has been loaded by calling the load_library of the target device before invoke. The additional arguments of invoke can be either instances of OffloadArray, numpy.ndarray, or scalar data. For numpy.ndarray or scalar arguments, invoke automatically performs copy-in and copy-out of the argument, that is, before the kernel is invoked, the argument is automatically transferred to the target device and transferred back after the kernel has finished. All operations (copy in/copy out and invocation) are enqueued into the stream object and complete asynchronously. Parameters ---------- kernel : kernel Kernel to be invoked args : OffloadArray, numpy.ndarray, or scalar type Arguments to be passed to the kernel function See Also -------- load_library Returns ------- None Examples -------- >>> library = device.load_library("libdgemm") >>> stream.invoke(library.dgemm, A, B, C, n, m, k) """ # if called from wrapper, actual arguments are wrapped in an # extra tuple, so we unwrap them if len(args): if type(args[0]) == tuple: args = args[0] # safety check: avoid invoking a kernel if it's library has been loaded # on a different device if kernel[2] is not self._device: raise OffloadError("Cannot invoke kernel, " "library not loaded on device") # determine the types of the arguments (scalar vs arrays); # we store the device pointers as 64-bit integers in an ndarray arg_dims = numpy.empty((len(args), ), dtype=numpy.int64) arg_type = numpy.empty((len(args), ), dtype=numpy.int64) arg_ptrs = numpy.empty((len(args), ), dtype=numpy.int64) arg_size = numpy.empty((len(args), ), dtype=numpy.int64) copy_in_out = [] scalars = [] for i, a in enumerate(args): if isinstance(a, pymic.OffloadArray): # get the device pointer of the OffloadArray and # pass it to the kernel arg_dims[i] = 1 arg_type[i] = map_data_types(a.dtype) arg_ptrs[i] = a._device_ptr._device_ptr # fake pointer arg_size[i] = a._nbytes elif isinstance(a, numpy.ndarray): # allocate device buffer on the target of the invoke # and mark the numpy.ndarray for copyin/copyout semantics host_ptr = a.ctypes.data # raw C pointer to host data nbytes = a.dtype.itemsize * a.size dev_ptr = self.allocate_device_memory(nbytes) copy_in_out.append((host_ptr, dev_ptr, nbytes, a)) arg_dims[i] = 1 arg_type[i] = map_data_types(a.dtype) arg_ptrs[i] = dev_ptr._device_ptr # fake pointer arg_size[i] = nbytes else: # this is a hack, but let's wrap scalars as numpy arrays cvtd = numpy.asarray(a) host_ptr = cvtd.ctypes.data # raw C pointer to host data nbytes = cvtd.dtype.itemsize * cvtd.size scalars.append(cvtd) arg_dims[i] = 0 arg_type[i] = map_data_types(cvtd.dtype) arg_ptrs[i] = host_ptr arg_size[i] = nbytes debug( 1, "(device {0}, stream 0x{1:x}) invoking kernel '{2}' " "(pointer 0x{3:x}) with {4} " "argument(s) ({5} copy-in/copy-out, {6} scalars)", self._device_id, self._stream_id, kernel[0], kernel[1], len(args), len(copy_in_out), len(scalars)) # iterate over the copyin arguments and transfer them for c in copy_in_out: self.transfer_host2device(c[0], c[1], c[2]) _pymic_impl_invoke_kernel(self._device_id, self._stream_id, kernel[1], arg_dims, arg_type, arg_ptrs, arg_size, len(args)) # iterate over the copyout arguments, transfer them back, # and release buffers for c in copy_in_out: self.transfer_device2host(c[1], c[0], c[2]) self.deallocate_device_memory(c[1]) return None
def transfer_device2device(self, device_ptr_src, device_ptr_dst, nbytes, offset_device_src=0, offset_device_dst=0): """Transfer data from a device memory location (identified by its fake pointer) to another memory region on the same device. The operation is executed asynchronously with stream semantics. Caution: this is a low-level function, do not use it unless you have a very specific reason to do so. Better use the high-level interfaces of OffloadArray instead. Parameters ---------- device_ptr_src : int Fake pointer to the source memory location device_ptr_dst : int Fake pointer to the destination memory location nbytes : int Number of bytes to copy offset_device_src : int, optional, default 0 Transfer offset (bytes) to be added to the address of the device memory (source). offset_device_dst : int, optional, default 0 Transfer offset (bytes) to be added to the address of the device memory (destination). See Also -------- transfer_host2device, allocate_device_memory, deallocate_device_memory Returns ------- None Examples -------- >>> a = numpy.arange(0.0, 16.0) >>> nbytes = a.dtype.itemsize * a.size >>> ptr_a_host = a.ctypes.data >>> device_ptr_1 = stream.allocate_device_memory(nbytes) >>> stream.transfer_host2device(ptr_a_host, device_ptr_1, nbytes) >>> device_ptr_2 = stream.allocate_device_memory(nbytes) >>> stream.transfer_device2device(device_ptr_1, device_ptr_2, nbytes) >>> b = numpy.empty_like(a) [ 6.95303066e-310 6.83874600e-317 3.95252517e-322 0.00000000e+000 9.31741387e+242 0.00000000e+000 0.00000000e+000 0.00000000e+000 4.94065646e-324 3.30519641e-317 1.72409659e+212 1.20070123e-089 5.05907223e-085 4.87883721e+199 0.00000000e+000 6.78545805e-317] # random data >>> print b >>> ptr_b_host = b.ctypes.data >>> stream.transfer_device2host(device_ptr_2, ptr_b_host, nbytes) >>> stream.sync() >>> print b [ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15.] """ if not isinstance(device_ptr_src, SmartPtr): raise ValueError('Wrong argument, no device pointer given') # TODO: add more safety checks here (e.g., pointer from right device # and stream) if not isinstance(device_ptr_dst, SmartPtr): raise ValueError('Wrong argument, no device pointer given') # TODO: add more safety checks here (e.g., pointer from right device # and stream) if offset_device_src < 0: raise ValueError("Negative offset passed for offset_device_src") if offset_device_dst < 0: raise ValueError("Negative offset passed for offset_device_dst") if device_ptr_src is None: raise ValueError('Invalid None device pointer') if device_ptr_dst is None: raise ValueError('Invalid None host pointer') if nbytes <= 0: raise ValueError('Invalid byte count: {0}'.format(nbytes)) device_ptr_src = device_ptr_src._device_ptr device_ptr_dst = device_ptr_dst._device_ptr debug( 1, '(device {0} -> device {0}) transferring {1} bytes ' '(source ptr {2}, destination ptr {3})', self._device_id, nbytes, device_ptr_src, device_ptr_dst) _pymic_impl_stream_memcpy_d2d(self._device_id, self._stream_id, device_ptr_src, device_ptr_dst, nbytes, offset_device_src, offset_device_dst) return None
def transfer_device2host(self, device_ptr, host_ptr, nbytes, offset_device=0, offset_host=0): """Transfer data from a device memory location (identified by its fake pointer) to a host memory region identified by its raw pointer (i.e., a C pointer)on the target device. The operation is executed asynchronously with stream semantics. Caution: this is a low-level function, do not use it unless you have a very specific reason to do so. Better use the high-level interfaces of OffloadArray instead. Parameters ---------- host_ptr : int Pointer to the data on the host device_ptr : int Fake pointer of the destination nbytes : int Number of bytes to copy offset_device : int, optional, default 0 Transfer offset (bytes) to be added to the address of the device memory. offset_host : int, optional, default 0 Transfer offset (bytes) to be added to raw host pointer See Also -------- transfer_host2device, transfer_device2device, allocate_device_memory, deallocate_device_memory Returns ------- None Examples -------- >>> a = numpy.arange(0.0, 16.0) >>> nbytes = a.dtype.itemsize * a.size >>> ptr_a_host = a.ctypes.data >>> device_ptr = stream.allocate_device_memory(nbytes) >>> stream.transfer_host2device(ptr_a_host, device_ptr, nbytes) >>> b = numpy.empty_like(a) >>> print b [ 6.90762927e-310 7.73120247e-317 3.60667921e-322 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000 4.94065646e-324 9.76815212e-317 7.98912845e-317 0.00000000e+000 5.53353523e-322 1.58101007e-322 0.00000000e+000 7.38839996e-317] # random data >>> ptr_b_host = b.ctypes.data >>> stream.transfer_device2host(device_ptr, ptr_b_host, nbytes) >>> stream.sync() >>> print b [ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15.] """ if not isinstance(device_ptr, SmartPtr): raise ValueError('Wrong argument, no device pointer given') # TODO: add more safety checks here (e.g., pointer from right device # and stream) if offset_device < 0: raise ValueError("Negative offset passed for offset_device") if offset_host < 0: raise ValueError("Negative offset passed for offset_host") if device_ptr is None: raise ValueError('Invalid None device pointer') if host_ptr is None: raise ValueError('Invalid None host pointer') if nbytes <= 0: raise ValueError('Invalid byte count: {0}'.format(nbytes)) if device_ptr._offset != 0: if offset_device != 0: raise ValueError('Offset cannot be non-zero if fake pointer' 'has an offset.') offset_device = device_ptr._offset debug( 1, '(device {0} -> host) transferring {1} bytes ' '(device ptr {2}, host ptr 0x{3:x})', self._device_id, nbytes, device_ptr, host_ptr) device_ptr = device_ptr._device_ptr _pymic_impl_stream_memcpy_d2h(self._device_id, self._stream_id, device_ptr, host_ptr, nbytes, offset_device, offset_host) return None
def __del__(self): debug(1, 'destroying stream 0x{0:0x} ' 'for device {1}'.format(self._stream_id, self._device_id)) _pymic_impl_stream_destroy(self._device_id, self._stream_id)
traceback = inspect.getframeinfo(current_frame) stack.append((traceback.function, (traceback.filename, traceback.lineno))) current_frame = current_frame.f_back return stack def _trace_func(func): # this is the do-nothing-wrapper def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper if config._trace_level == 1: debug(5, "tracing is enabled", config._trace_level) elif config._trace_level is not None: debug(5, "tracing is disabled", config._trace_level) if config._trace_level >= 1: _stack_walk_func = _stack_walk_compact if config._collect_stacks_str.lower() == "none": debug(5, "stack collection is set to '{0}'", config._collect_stacks_str) _stack_walk_func = _stack_walk_none elif config._collect_stacks_str.lower() == "compact": debug(5, "stack collection is set to '{0}'", config._collect_stacks_str) _stack_walk_func = _stack_walk_compact elif config._collect_stacks_str.lower() == "full": debug(5, "stack collection is set to '{0}'",
stack.append( (traceback.function, (traceback.filename, traceback.lineno))) current_frame = current_frame.f_back return stack def _trace_func(func): # this is the do-nothing-wrapper def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper if config._trace_level == 1: debug(5, "tracing is enabled", config._trace_level) elif config._trace_level is not None: debug(5, "tracing is disabled", config._trace_level) if config._trace_level >= 1: _stack_walk_func = _stack_walk_compact if config._collect_stacks_str.lower() == "none": debug(5, "stack collection is set to '{0}'", config._collect_stacks_str) _stack_walk_func = _stack_walk_none elif config._collect_stacks_str.lower() == "compact": debug(5, "stack collection is set to '{0}'", config._collect_stacks_str) _stack_walk_func = _stack_walk_compact elif config._collect_stacks_str.lower() == "full": debug(5, "stack collection is set to '{0}'",
def transfer_device2device(self, device_ptr_src, device_ptr_dst, nbytes, offset_device_src=0, offset_device_dst=0): """Transfer data from a device memory location (identified by its fake pointer) to another memory region on the same device. The operation is executed asynchronously with stream semantics. Caution: this is a low-level function, do not use it unless you have a very specific reason to do so. Better use the high-level interfaces of OffloadArray instead. Parameters ---------- device_ptr_src : int Fake pointer to the source memory location device_ptr_dst : int Fake pointer to the destination memory location nbytes : int Number of bytes to copy offset_device_src : int, optional, default 0 Transfer offset (bytes) to be added to the address of the device memory (source). offset_device_dst : int, optional, default 0 Transfer offset (bytes) to be added to the address of the device memory (destination). See Also -------- transfer_host2device, allocate_device_memory, deallocate_device_memory Returns ------- None Examples -------- >>> a = numpy.arange(0.0, 16.0) >>> nbytes = a.dtype.itemsize * a.size >>> ptr_a_host = a.ctypes.data >>> device_ptr_1 = stream.allocate_device_memory(nbytes) >>> stream.transfer_host2device(ptr_a_host, device_ptr_1, nbytes) >>> device_ptr_2 = stream.allocate_device_memory(nbytes) >>> stream.transfer_device2device(device_ptr_1, device_ptr_2, nbytes) >>> b = numpy.empty_like(a) [ 6.95303066e-310 6.83874600e-317 3.95252517e-322 0.00000000e+000 9.31741387e+242 0.00000000e+000 0.00000000e+000 0.00000000e+000 4.94065646e-324 3.30519641e-317 1.72409659e+212 1.20070123e-089 5.05907223e-085 4.87883721e+199 0.00000000e+000 6.78545805e-317] # random data >>> print b >>> ptr_b_host = b.ctypes.data >>> stream.transfer_device2host(device_ptr_2, ptr_b_host, nbytes) >>> stream.sync() >>> print b [ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15.] """ if not isinstance(device_ptr_src, SmartPtr): raise ValueError('Wrong argument, no device pointer given') # TODO: add more safety checks here (e.g., pointer from right device # and stream) if not isinstance(device_ptr_dst, SmartPtr): raise ValueError('Wrong argument, no device pointer given') # TODO: add more safety checks here (e.g., pointer from right device # and stream) if offset_device_src < 0: raise ValueError("Negative offset passed for offset_device_src") if offset_device_dst < 0: raise ValueError("Negative offset passed for offset_device_dst") if device_ptr_src is None: raise ValueError('Invalid None device pointer') if device_ptr_dst is None: raise ValueError('Invalid None host pointer') if nbytes <= 0: raise ValueError('Invalid byte count: {0}'.format(nbytes)) device_ptr_src = device_ptr_src._device_ptr device_ptr_dst = device_ptr_dst._device_ptr debug(1, '(device {0} -> device {0}) transferring {1} bytes ' '(source ptr {2}, destination ptr {3})', self._device_id, nbytes, device_ptr_src, device_ptr_dst) _pymic_impl_stream_memcpy_d2d(self._device_id, self._stream_id, device_ptr_src, device_ptr_dst, nbytes, offset_device_src, offset_device_dst) return None
def transfer_device2host(self, device_ptr, host_ptr, nbytes, offset_device=0, offset_host=0): """Transfer data from a device memory location (identified by its fake pointer) to a host memory region identified by its raw pointer (i.e., a C pointer)on the target device. The operation is executed asynchronously with stream semantics. Caution: this is a low-level function, do not use it unless you have a very specific reason to do so. Better use the high-level interfaces of OffloadArray instead. Parameters ---------- host_ptr : int Pointer to the data on the host device_ptr : int Fake pointer of the destination nbytes : int Number of bytes to copy offset_device : int, optional, default 0 Transfer offset (bytes) to be added to the address of the device memory. offset_host : int, optional, default 0 Transfer offset (bytes) to be added to raw host pointer See Also -------- transfer_host2device, transfer_device2device, allocate_device_memory, deallocate_device_memory Returns ------- None Examples -------- >>> a = numpy.arange(0.0, 16.0) >>> nbytes = a.dtype.itemsize * a.size >>> ptr_a_host = a.ctypes.data >>> device_ptr = stream.allocate_device_memory(nbytes) >>> stream.transfer_host2device(ptr_a_host, device_ptr, nbytes) >>> b = numpy.empty_like(a) >>> print b [ 6.90762927e-310 7.73120247e-317 3.60667921e-322 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000 0.00000000e+000 4.94065646e-324 9.76815212e-317 7.98912845e-317 0.00000000e+000 5.53353523e-322 1.58101007e-322 0.00000000e+000 7.38839996e-317] # random data >>> ptr_b_host = b.ctypes.data >>> stream.transfer_device2host(device_ptr, ptr_b_host, nbytes) >>> stream.sync() >>> print b [ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15.] """ if not isinstance(device_ptr, SmartPtr): raise ValueError('Wrong argument, no device pointer given') # TODO: add more safety checks here (e.g., pointer from right device # and stream) if offset_device < 0: raise ValueError("Negative offset passed for offset_device") if offset_host < 0: raise ValueError("Negative offset passed for offset_host") if device_ptr is None: raise ValueError('Invalid None device pointer') if host_ptr is None: raise ValueError('Invalid None host pointer') if nbytes <= 0: raise ValueError('Invalid byte count: {0}'.format(nbytes)) if device_ptr._offset != 0: if offset_device != 0: raise ValueError('Offset cannot be non-zero if fake pointer' 'has an offset.') offset_device = device_ptr._offset debug(1, '(device {0} -> host) transferring {1} bytes ' '(device ptr {2}, host ptr 0x{3:x})', self._device_id, nbytes, device_ptr, host_ptr) device_ptr = device_ptr._device_ptr _pymic_impl_stream_memcpy_d2h(self._device_id, self._stream_id, device_ptr, host_ptr, nbytes, offset_device, offset_host) return None