def fill(self, value): """Fill an array with the specified value. Parameters ---------- value : type Value to fill the array with. Returns ------- out : OffloadArray The object instance of this OffloadArray. See Also -------- zero """ if self.dtype != type(value): raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, type(value))) dt = map_data_types(self.dtype) n = int(self.size) x = self self.stream.invoke(self._library.pymic_offload_array_fill, dt, n, x, value) return self
def __imul__(self, other): """Multiply an array or a scalar with an array (in-place operation).""" dt = map_data_types(self.dtype) n = int(self.size) x = self y = other incx = int(1) if isinstance(other, OffloadArray): if self.array.shape != other.array.shape: raise ValueError("shapes of the arrays need to match: " "{0} != {1}".format(self.array.shape, other.shape)) if self.dtype != other.dtype: raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, other.dtype)) incy = int(1) elif isinstance(other, numpy.ndarray): if self.array.shape != other.shape: raise ValueError("shapes of the arrays need to match: " "{0} != {1}".format(self.array.shape, other.shape)) if self.dtype != other.dtype: raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, other.dtype)) incy = int(1) else: # scalar if self.dtype != type(other): raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, type(other))) incy = int(0) self.stream.invoke(self._library.pymic_offload_array_mul, dt, n, x, incx, y, incy, x, incx) return self
def __pow__(self, other): """Element-wise pow() function. The operation is enqueued into the array's default stream object and completes asynchronously. """ dt = map_data_types(self.dtype) n = int(self.size) x = self incx = int(1) if isinstance(other, OffloadArray): if self.array.shape != other.array.shape: raise ValueError("shapes of the arrays need to match (" + str(self.array.shape) + " != " + str(other.array.shape) + ")") if self.dtype != other.dtype: raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, other.dtype)) y = other.array incy = int(1) incr = int(1) elif isinstance(other, numpy.ndarray): if self.array.shape != other.shape: raise ValueError("shapes of the arrays need to match (" + str(self.array.shape) + " != " + str(other.shape) + ")") if self.dtype != other.dtype: raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, other.dtype)) y = other incy = int(1) incr = int(1) else: # scalar if self.dtype != type(other): raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, type(other))) y = other incy = int(0) incr = int(1) result = OffloadArray(self.shape, self.dtype, device=self.device, stream=self.stream) self.stream.invoke(self._library.pymic_offload_array_pow, dt, n, x, incx, y, incy, result, incr) return result
def reverse(self): """Return a new OffloadArray with all elements in reverse order. The operation is enqueued into the array's default stream object and completes asynchronously. """ if len(self.shape) > 1: raise ValueError("Multi-dimensional arrays cannot be revered.") dt = map_data_types(self.dtype) n = int(self.array.size) result = self.stream.empty_like(self) self.stream.invoke(self._library.pymic_offload_array_reverse, dt, n, self, result) return result
def __abs__(self): """Return a new OffloadArray with the absolute values of the elements of `self`. The operation is enqueued into the array's default stream object and completes asynchronously. """ dt = map_data_types(self.dtype) n = int(self.array.size) x = self if dt == 2: # complex data result = self.stream.empty(self.shape, dtype=numpy.float, order=self.order, update_host=False) else: result = self.stream.empty_like(self, update_host=False) self.stream.invoke(self._library.pymic_offload_array_abs, dt, n, x, result) return result
def __setslice__(self, i, j, sequence): """Overwrite this OffloadArray with slice coming from another array. The operation is enqueued into the array's default stream object and completes asynchronously. """ # TODO: raise errors here: shape/size/data data type lb = min(i, self.size) ub = min(j, self.size) dt = map_data_types(self.dtype) if isinstance(sequence, OffloadArray): self.stream.invoke(self._library.pymic_offload_array_setslice, dt, lb, ub, self, sequence) elif isinstance(sequence, numpy.ndarray): offl_sequence = self.stream.bind(sequence) self.stream.invoke(self._library.pymic_offload_array_setslice, dt, lb, ub, self, offl_sequence) self.stream.sync() else: self.fill(sequence)
def invoke(self, kernel, *args): """Invoke a native kernel on the target device by enqueuing a request in the current stream. The kernel is identified by accessing its library's attribute with the same name as the kernel name. The kernel function needs to be in a shared-object library that has been loaded by calling the load_library of the target device before invoke. The additional arguments of invoke can be either instances of OffloadArray, numpy.ndarray, or scalar data. For numpy.ndarray or scalar arguments, invoke automatically performs copy-in and copy-out of the argument, that is, before the kernel is invoked, the argument is automatically transferred to the target device and transferred back after the kernel has finished. All operations (copy in/copy out and invocation) are enqueued into the stream object and complete asynchronously. Parameters ---------- kernel : kernel Kernel to be invoked args : OffloadArray, numpy.ndarray, or scalar type Arguments to be passed to the kernel function See Also -------- load_library Returns ------- None Examples -------- >>> library = device.load_library("libdgemm") >>> stream.invoke(library.dgemm, A, B, C, n, m, k) """ # if called from wrapper, actual arguments are wrapped in an # extra tuple, so we unwrap them if len(args): if type(args[0]) == tuple: args = args[0] # safety check: avoid invoking a kernel if it's library has been loaded # on a different device if kernel[2] is not self._device: raise OffloadError("Cannot invoke kernel, " "library not loaded on device") # determine the types of the arguments (scalar vs arrays); # we store the device pointers as 64-bit integers in an ndarray arg_dims = numpy.empty((len(args), ), dtype=numpy.int64) arg_type = numpy.empty((len(args), ), dtype=numpy.int64) arg_ptrs = numpy.empty((len(args), ), dtype=numpy.int64) arg_size = numpy.empty((len(args), ), dtype=numpy.int64) copy_in_out = [] scalars = [] for i, a in enumerate(args): if isinstance(a, pymic.OffloadArray): # get the device pointer of the OffloadArray and # pass it to the kernel arg_dims[i] = 1 arg_type[i] = map_data_types(a.dtype) arg_ptrs[i] = a._device_ptr._device_ptr # fake pointer arg_size[i] = a._nbytes elif isinstance(a, numpy.ndarray): # allocate device buffer on the target of the invoke # and mark the numpy.ndarray for copyin/copyout semantics host_ptr = a.ctypes.data # raw C pointer to host data nbytes = a.dtype.itemsize * a.size dev_ptr = self.allocate_device_memory(nbytes) copy_in_out.append((host_ptr, dev_ptr, nbytes, a)) arg_dims[i] = 1 arg_type[i] = map_data_types(a.dtype) arg_ptrs[i] = dev_ptr._device_ptr # fake pointer arg_size[i] = nbytes else: # this is a hack, but let's wrap scalars as numpy arrays cvtd = numpy.asarray(a) host_ptr = cvtd.ctypes.data # raw C pointer to host data nbytes = cvtd.dtype.itemsize * cvtd.size scalars.append(cvtd) arg_dims[i] = 0 arg_type[i] = map_data_types(cvtd.dtype) arg_ptrs[i] = host_ptr arg_size[i] = nbytes debug( 1, "(device {0}, stream 0x{1:x}) invoking kernel '{2}' " "(pointer 0x{3:x}) with {4} " "argument(s) ({5} copy-in/copy-out, {6} scalars)", self._device_id, self._stream_id, kernel[0], kernel[1], len(args), len(copy_in_out), len(scalars)) # iterate over the copyin arguments and transfer them for c in copy_in_out: self.transfer_host2device(c[0], c[1], c[2]) _pymic_impl_invoke_kernel(self._device_id, self._stream_id, kernel[1], arg_dims, arg_type, arg_ptrs, arg_size, len(args)) # iterate over the copyout arguments, transfer them back, # and release buffers for c in copy_in_out: self.transfer_device2host(c[1], c[0], c[2]) self.deallocate_device_memory(c[1]) return None
def invoke(self, kernel, *args): """Invoke a native kernel on the target device by enqueuing a request in the current stream. The kernel is identified by accessing its library's attribute with the same name as the kernel name. The kernel function needs to be in a shared-object library that has been loaded by calling the load_library of the target device before invoke. The additional arguments of invoke can be either instances of OffloadArray, numpy.ndarray, or scalar data. For numpy.ndarray or scalar arguments, invoke automatically performs copy-in and copy-out of the argument, that is, before the kernel is invoked, the argument is automatically transferred to the target device and transferred back after the kernel has finished. All operations (copy in/copy out and invocation) are enqueued into the stream object and complete asynchronously. Parameters ---------- kernel : kernel Kernel to be invoked args : OffloadArray, numpy.ndarray, or scalar type Arguments to be passed to the kernel function See Also -------- load_library Returns ------- None Examples -------- >>> library = device.load_library("libdgemm") >>> stream.invoke(library.dgemm, A, B, C, n, m, k) """ # if called from wrapper, actual arguments are wrapped in an # extra tuple, so we unwrap them if len(args): if type(args[0]) == tuple: args = args[0] # safety check: avoid invoking a kernel if it's library has been loaded # on a different device if kernel[2] is not self._device: raise OffloadError("Cannot invoke kernel, " "library not loaded on device") # determine the types of the arguments (scalar vs arrays); # we store the device pointers as 64-bit integers in an ndarray arg_dims = numpy.empty((len(args),), dtype=numpy.int64) arg_type = numpy.empty((len(args),), dtype=numpy.int64) arg_ptrs = numpy.empty((len(args),), dtype=numpy.int64) arg_size = numpy.empty((len(args),), dtype=numpy.int64) copy_in_out = [] scalars = [] for i, a in enumerate(args): if isinstance(a, pymic.OffloadArray): # get the device pointer of the OffloadArray and # pass it to the kernel arg_dims[i] = 1 arg_type[i] = map_data_types(a.dtype) arg_ptrs[i] = a._device_ptr._device_ptr # fake pointer arg_size[i] = a._nbytes elif isinstance(a, numpy.ndarray): # allocate device buffer on the target of the invoke # and mark the numpy.ndarray for copyin/copyout semantics host_ptr = a.ctypes.data # raw C pointer to host data nbytes = a.dtype.itemsize * a.size dev_ptr = self.allocate_device_memory(nbytes) copy_in_out.append((host_ptr, dev_ptr, nbytes, a)) arg_dims[i] = 1 arg_type[i] = map_data_types(a.dtype) arg_ptrs[i] = dev_ptr._device_ptr # fake pointer arg_size[i] = nbytes else: # this is a hack, but let's wrap scalars as numpy arrays cvtd = numpy.asarray(a) host_ptr = cvtd.ctypes.data # raw C pointer to host data nbytes = cvtd.dtype.itemsize * cvtd.size scalars.append(cvtd) arg_dims[i] = 0 arg_type[i] = map_data_types(cvtd.dtype) arg_ptrs[i] = host_ptr arg_size[i] = nbytes debug(1, "(device {0}, stream 0x{1:x}) invoking kernel '{2}' " "(pointer 0x{3:x}) with {4} " "argument(s) ({5} copy-in/copy-out, {6} scalars)", self._device_id, self._stream_id, kernel[0], kernel[1], len(args), len(copy_in_out), len(scalars)) # iterate over the copyin arguments and transfer them for c in copy_in_out: self.transfer_host2device(c[0], c[1], c[2]) _pymic_impl_invoke_kernel(self._device_id, self._stream_id, kernel[1], arg_dims, arg_type, arg_ptrs, arg_size, len(args)) # iterate over the copyout arguments, transfer them back, # and release buffers for c in copy_in_out: self.transfer_device2host(c[1], c[0], c[2]) self.deallocate_device_memory(c[1]) return None