def get_dimshuffle(dtype, shape, axes, src, dst): """ Gets dimshuffle kernel and parameters for two same-sized tensors Arguments: dtype: tensor data type shape (tuple): source shape axes (tuple): new order of axes src (TensorDescriptionWrapper): source tensor dst (TensorDescriptionWrapper): dest tensor """ kernel = _get_copy_transpose_kernel(dtype, shape, axes) params = [dst.td, src.td] + list(kernel.args) params = params + list(src.strides) + list(dst.strides) return (kernel, params)
def from_contiguous(self, tensor): """ Copies from a contiguous GPUArray with the same dimensions into this tensor Arguments: tensor (GPUArray): Contiguous tensor with same dimensions to use as source """ src_strides = [s // tensor.dtype.itemsize for s in tensor.strides] dst_strides = [ s // self.tensor.dtype.itemsize for s in self.tensor.strides ] kernel = _get_copy_transpose_kernel(tensor.dtype, tensor.shape, range(len(tensor.shape))) params = [self.tensor.gpudata, tensor.gpudata] + list(kernel.args) params = params + src_strides + dst_strides kernel.prepared_async_call(kernel.grid, kernel.block, None, *params)
def as_contiguous(self): """ Creates a new GPUArray with the same dimensions, but using contiguous memory Returns: New contiguous GPUArray with separate underlying device allocation """ contig_tensor = GPUArray(self.tensor.shape, dtype=self.tensor.dtype) src_strides = [s // self.tensor.dtype.itemsize for s in self.tensor.strides] dst_strides = [s // contig_tensor.dtype.itemsize for s in contig_tensor.strides] kernel = _get_copy_transpose_kernel(self.tensor.dtype, self.tensor.shape, range(len(self.tensor.shape))) params = [contig_tensor.gpudata, self.tensor.gpudata] + list(kernel.args) params = params + src_strides + dst_strides kernel.prepared_async_call(kernel.grid, kernel.block, None, *params) return contig_tensor
def from_other(self, tensor, dest=None): """ Copies from another GPUArray with the same dimensions into this tensor. Handles discontiguous strides. Arguments: tensor (GPUArray): Contiguous tensor with same dimensions to use as source """ if dest is None: dest = self.tensor src_strides = [s // tensor.dtype.itemsize for s in tensor.strides] dst_strides = [s // dest.dtype.itemsize for s in dest.strides] kernel = _get_copy_transpose_kernel(tensor.dtype, tensor.shape, range(len(tensor.shape))) params = [dest.gpudata, tensor.gpudata] + list(kernel.args) params = params + src_strides + dst_strides kernel.prepared_async_call(kernel.grid, kernel.block, None, *params)