def fill(self, value): """Fill an array with the specified value. Parameters ---------- value : type Value to fill the array with. Returns ------- out : OffloadArray The object instance of this OffloadArray. See Also -------- zero """ if self.dtype != type(value): raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, type(value))) dt = map_data_types(self.dtype) n = int(self.size) x = self self.stream.invoke(self._library.pymic_offload_array_fill, dt, n, x, value) return self
def __iadd__(self, other): """Add an array or scalar to an array (in-place operation).""" dt = map_data_types(self.dtype) x = self y = other incx = int(1) if isinstance(other, (OffloadArray, numpy.ndarray)): if x.ndim < y.ndim: raise ValueError("non broadcastable output:" " {} doesn't match {}".format( x.shape, y.shape)) broadcast_dim = _get_broadcast_dim(self.shape, other.shape) dim_x = self.ndim + broadcast_dim dim_y = other.ndim + broadcast_dim niter = _get_dimsize(self.shape, dim_x) n = incix = _get_stride(self.shape, dim_x) inciy = int(0) incy = int(1) else: # scalar y = _cast_scalar(self, other) n = int(self.size) shape = self.shape niter = int(1) incix = _get_stride(self.shape, -1) inciy = incy = int(0) self.stream.invoke(self._library.pymic_offload_array_add, dt, niter, n, x, incix, incx, y, inciy, incy, x, incix, incx) return self
def __imul__(self, other): """Multiply an array or a scalar with an array (in-place operation).""" dt = map_data_types(self.dtype) n = int(self.size) x = self y = other incx = int(1) if isinstance(other, OffloadArray): if self.array.shape != other.array.shape: raise ValueError("shapes of the arrays need to match: " "{0} != {1}".format(self.array.shape, other.shape)) if self.dtype != other.dtype: raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, other.dtype)) incy = int(1) elif isinstance(other, numpy.ndarray): if self.array.shape != other.shape: raise ValueError("shapes of the arrays need to match: " "{0} != {1}".format(self.array.shape, other.shape)) if self.dtype != other.dtype: raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, other.dtype)) incy = int(1) else: # scalar if self.dtype != type(other): raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, type(other))) incy = int(0) self.stream.invoke(self._library.pymic_offload_array_mul, dt, n, x, incx, y, incy, x, incx) return self
def fill(self, value): """Fill an array with the specified value. Parameters ---------- value : type Value to fill the array with. Returns ------- out : OffloadArray The object instance of this OffloadArray. See Also -------- zero """ if numpy.issubdtype(self.dtype, numpy.float): val = float(value) elif numpy.issubdtype(self.dtype, numpy.int): val = int(value) elif numpy.issubdtype(self.dtype, numpy.complex): val = complex(value) dt = map_data_types(self.dtype) n = int(self.size) x = self self.stream.invoke(self._library.pymic_offload_array_fill, dt, n, x, val) return self
def __pow__(self, other): """Element-wise pow() function. The operation is enqueued into the array's default stream object and completes asynchronously. """ dt = map_data_types(self.dtype) n = int(self.size) x = self y = other incx = int(1) if isinstance(other, (OffloadArray, numpy.ndarray)): _check_arrays(self, other) incy = int(1) incr = int(1) else: # scalar _check_scalar(self, other) incy = int(0) incr = int(1) result = OffloadArray(self.shape, self.dtype, device=self.device, stream=self.stream) self.stream.invoke(self._library.pymic_offload_array_pow, dt, n, x, incx, y, incy, result, incr) return result
def __pow__(self, other): """Element-wise pow() function. The operation is enqueued into the array's default stream object and completes asynchronously. """ dt = map_data_types(self.dtype) n = int(self.size) x = self y = other incx = int(1) if isinstance(other, (OffloadArray, numpy.ndarray)): _check_arrays(self, other) incy = int(1) incr = int(1) else: # scalar y = _cast_scalar(self, other) incy = int(0) incr = int(1) result = type(self)(self.shape, self.dtype, device=self.device, stream=self.stream) self.stream.invoke(self._library.pymic_offload_array_pow, dt, n, x, incx, y, incy, result, incr) return result
def __setitem__(self, index, sequence): """Overwrite this OffloadArray with slice coming from another array. The operation is enqueued into the array's default stream object and completes asynchronously. """ lb, ub, stride = index.start, index.stop, index.step if lb is None: lb = 0 if ub is None: ub = self.size if stride is None: stride = 1 # TODO: add additional checks here: shape/size/data type if stride != 1: raise ValueError('Cannot assign with stride not equal to 1') dt = map_data_types(self.dtype) if isinstance(sequence, OffloadArray): self.stream.invoke(self._library.pymic_offload_array_setslice, dt, lb, ub, self, sequence) elif isinstance(sequence, numpy.ndarray): offl_sequence = self.stream.bind(sequence) self.stream.invoke(self._library.pymic_offload_array_setslice, dt, lb, ub, self, offl_sequence) self.stream.sync() else: self.fill(sequence)
def __mul__(self, other): """Multiply an array or a scalar with an array. The operation is enqueued into the array's default stream object and completes asynchronously. """ dt = map_data_types(self.dtype) n = int(self.size) x = self y = other incx = int(1) if isinstance(other, (OffloadArray, numpy.ndarray)): _check_arrays(self, other) incy = int(1) incr = int(1) else: # scalar _check_scalar(self, other) incy = int(0) incr = int(1) result = OffloadArray(self.shape, self.dtype, device=self.device, stream=self.stream) self.stream.invoke(self._library.pymic_offload_array_mul, dt, n, x, incx, y, incy, result, incr) return result
def reverse(self): """Return a new OffloadArray with all elements in reverse order. The operation is enqueued into the array's default stream object and completes asynchronously. """ if self.ndim > 1: raise ValueError("Multi-dimensional arrays cannot be revered.") dt = map_data_types(self.dtype) n = int(self.array.size) result = self.stream.empty_like(self) self.stream.invoke(self._library.pymic_offload_array_reverse, dt, n, self, result) return result
def reverse(self): """Return a new OffloadArray with all elements in reverse order. The operation is enqueued into the array's default stream object and completes asynchronously. """ if len(self.shape) > 1: raise ValueError("Multi-dimensional arrays cannot be revered.") dt = map_data_types(self.dtype) n = int(self.array.size) result = self.stream.empty_like(self) self.stream.invoke(self._library.pymic_offload_array_reverse, dt, n, self, result) return result
def __pow__(self, other): """Element-wise pow() function. The operation is enqueued into the array's default stream object and completes asynchronously. """ dt = map_data_types(self.dtype) n = int(self.size) x = self incx = int(1) if isinstance(other, OffloadArray): if self.array.shape != other.array.shape: raise ValueError("shapes of the arrays need to match (" + str(self.array.shape) + " != " + str(other.array.shape) + ")") if self.dtype != other.dtype: raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, other.dtype)) y = other.array incy = int(1) incr = int(1) elif isinstance(other, numpy.ndarray): if self.array.shape != other.shape: raise ValueError("shapes of the arrays need to match (" + str(self.array.shape) + " != " + str(other.shape) + ")") if self.dtype != other.dtype: raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, other.dtype)) y = other incy = int(1) incr = int(1) else: # scalar if self.dtype != type(other): raise ValueError("Data types do not match: " "{0} != {1}".format(self.dtype, type(other))) y = other incy = int(0) incr = int(1) result = OffloadArray(self.shape, self.dtype, device=self.device, stream=self.stream) self.stream.invoke(self._library.pymic_offload_array_pow, dt, n, x, incx, y, incy, result, incr) return result
def __imul__(self, other): """Multiply an array or a scalar with an array (in-place operation).""" dt = map_data_types(self.dtype) n = int(self.size) x = self y = other incx = int(1) if isinstance(other, (OffloadArray, numpy.ndarray)): _check_arrays(self, other) incy = int(1) else: # scalar _check_scalar(self, other) incy = int(0) self.stream.invoke(self._library.pymic_offload_array_mul, dt, n, x, incx, y, incy, x, incx) return self
def __idiv__(self, other): """Divide an array by an array or a scalar (in-place opearation).""" dt = map_data_types(self.dtype) n = int(self.size) x = self y = other incx = int(1) if isinstance(other, (OffloadArray, numpy.ndarray)): _check_arrays(self, other) incy = int(1) else: # scalar y = _cast_scalar(self, other) incy = int(0) self.stream.invoke(self._library.pymic_offload_array_div, dt, n, x, incx, y, incy, x, incx) return self
def __abs__(self): """Return a new OffloadArray with the absolute values of the elements of `self`. The operation is enqueued into the array's default stream object and completes asynchronously. """ dt = map_data_types(self.dtype) n = int(self.array.size) x = self if is_complex_type(self.dtype): result = self.stream.empty(self.shape, dtype=numpy.float, order=self.order, update_host=False) else: result = self.stream.empty_like(self, update_host=False) self.stream.invoke(self._library.pymic_offload_array_abs, dt, n, x, result) return result
def __add__(self, other): """Add an array or scalar to an array. The operation is enqueued into the array's default stream object and completes asynchronously. """ dt = map_data_types(self.dtype) x = self y = other incx = int(1) incr = int(1) if isinstance(other, (OffloadArray, numpy.ndarray)): broadcast_dim = _get_broadcast_dim(self.shape, other.shape) if y.ndim > self.ndim: x, y = y, x shape = x.shape dim_x = x.ndim + broadcast_dim dim_y = y.ndim + broadcast_dim niter = _get_dimsize(x.shape, dim_x) n = incix = _get_stride(x.shape, dim_x) incir = incix inciy = int(0) incy = int(1) else: # scalar y = _cast_scalar(self, other) n = int(self.size) shape = self.shape niter = int(1) incix = incir = _get_stride(x.shape, -1) inciy = incy = int(0) result = type(self)(shape, self.dtype, device=self.device, stream=self.stream) self.stream.invoke(self._library.pymic_offload_array_add, dt, niter, n, x, incix, incx, y, inciy, incy, result, incir, incr) return result
def __rdiv__(self, other): """Divide an array or a scalar by an array (reverse opearation).""" dt = map_data_types(self.dtype) n = int(self.size) x = self y = other incx = int(1) if isinstance(other, (OffloadArray, numpy.ndarray)): _check_arrays(self, other) incy = int(1) incr = int(1) else: # scalar y = _cast_scalar(self, other) incy = int(0) incr = int(1) result = type(self)(self.shape, self.dtype, device=self.device, stream=self.stream) self.stream.invoke(self._library.pymic_offload_array_div, dt, n, y, incy, x, incx, result, incr) return result
def invoke(self, kernel, *args): """Invoke a native kernel on the target device by enqueuing a request in the current stream. The kernel is identified by accessing its library's attribute with the same name as the kernel name. The kernel function needs to be in a shared-object library that has been loaded by calling the load_library of the target device before invoke. The additional arguments of invoke can be either instances of OffloadArray, numpy.ndarray, or scalar data. For numpy.ndarray or scalar arguments, invoke automatically performs copy-in and copy-out of the argument, that is, before the kernel is invoked, the argument is automatically transferred to the target device and transferred back after the kernel has finished. All operations (copy in/copy out and invocation) are enqueued into the stream object and complete asynchronously. Parameters ---------- kernel : kernel Kernel to be invoked args : OffloadArray, numpy.ndarray, or scalar type Arguments to be passed to the kernel function See Also -------- load_library Returns ------- None Examples -------- >>> library = device.load_library("libdgemm") >>> stream.invoke(library.dgemm, A, B, C, n, m, k) """ # if called from wrapper, actual arguments are wrapped in an # extra tuple, so we unwrap them if len(args): if type(args[0]) == tuple: args = args[0] # throw an exception if the number of kernel arguments is more than # 16 (that's a limitation of libxstream at the moment) if len(args) > 16: raise ValueError("Kernels with more than 16 arguments " "are not supported") # safety check: avoid invoking a kernel if it's library has been loaded # on a different device if kernel[2] is not self._device: raise OffloadError("Cannot invoke kernel, " "library not loaded on device") # determine the types of the arguments (scalar vs arrays); # we store the device pointers as 64-bit integers in an ndarray arg_dims = numpy.empty((len(args),), dtype=numpy.int64) arg_type = numpy.empty((len(args),), dtype=numpy.int64) arg_ptrs = numpy.empty((len(args),), dtype=numpy.int64) arg_size = numpy.empty((len(args),), dtype=numpy.int64) copy_in_out = [] scalars = [] for i, a in enumerate(args): if a is None: # this is a None object, so we pass a nullptr to kernel arg_dims[i] = 1 arg_type[i] = -1 # magic number to mark nullptrs arg_ptrs[i] = 0 # nullptr arg_size[i] = 0 debug(3, "(device {0}, stream 0x{1:x}) kernel '{2}' " "arg {3} is None (device pointer 'nullptr')" "".format(self._device_id, self._stream_id, kernel[0], i)) elif isinstance(a, pymic.OffloadArray): # get the device pointer of the OffloadArray and # pass it to the kernel arg_dims[i] = 1 arg_type[i] = map_data_types(a.dtype) arg_ptrs[i] = a._device_ptr._device_ptr # fake pointer arg_size[i] = a._nbytes debug(3, "(device {0}, stream 0x{1:x}) kernel '{2}' " "arg {3} is offload array (device pointer " "{4})".format(self._device_id, self._stream_id, kernel[0], i, a._device_ptr)) elif isinstance(a, numpy.ndarray): # allocate device buffer on the target of the invoke # and mark the numpy.ndarray for copyin/copyout semantics host_ptr = a.ctypes.data # raw C pointer to host data nbytes = a.dtype.itemsize * a.size dev_ptr = self.allocate_device_memory(nbytes) copy_in_out.append((host_ptr, dev_ptr, nbytes, a)) arg_dims[i] = 1 arg_type[i] = map_data_types(a.dtype) arg_ptrs[i] = dev_ptr._device_ptr # fake pointer arg_size[i] = nbytes debug(3, "(device {0}, stream 0x{1:x}) kernel '{2}' " "arg {3} is copy-in/-out array (host pointer {4}, " "device pointer " "{5})".format(self._device_id, self._stream_id, kernel[0], i, host_ptr, dev_ptr)) else: # this is a hack, but let's wrap scalars as numpy arrays cvtd = numpy.asarray(a) host_ptr = cvtd.ctypes.data # raw C pointer to host data nbytes = cvtd.dtype.itemsize * cvtd.size scalars.append(cvtd) arg_dims[i] = 0 arg_type[i] = map_data_types(cvtd.dtype) arg_ptrs[i] = host_ptr arg_size[i] = nbytes debug(3, "(device {0}, stream 0x{1:x}) kernel '{2}' " "arg {3} is scalar {4} (host pointer " "{5})".format(self._device_id, self._stream_id, kernel[0], i, a, host_ptr)) debug(1, "(device {0}, stream 0x{1:x}) invoking kernel '{2}' " "(pointer 0x{3:x}) with {4} " "argument(s) ({5} copy-in/copy-out, {6} scalars)", self._device_id, self._stream_id, kernel[0], kernel[1], len(args), len(copy_in_out), len(scalars)) # iterate over the copyin arguments and transfer them for c in copy_in_out: self.transfer_host2device(c[0], c[1], c[2]) pymic_stream_invoke_kernel(self._device_id, self._stream_id, kernel[1], len(args), arg_dims, arg_type, arg_ptrs, arg_size) # iterate over the copyout arguments, transfer them back for c in copy_in_out: self.transfer_device2host(c[1], c[0], c[2]) if len(copy_in_out) != 0: self.sync()