def inverse(self, ary, out=None): '''Perform inverse FFT :param ary: Input array :param out: Optional output array :returns: The output array or a new numpy array is `out` is None. .. note: If `ary` is `out`, an inplace operation is performed. ''' if self.direction not in ('both', 'inverse'): raise TypeError("Invalid operation") d_ary, d_out, h_out, do_host_copy = self._prepare(ary, out) used_in, used_out, transpose_out = self._maybe_transpose(d_ary, d_out) self._plan.inverse(used_in, used_out) if do_host_copy: if transpose_out: # reshape the array so it can be transposed back used_out.shape = (used_out.shape[1], used_out.shape[0]) used_out.strides = (used_out.dtype.itemsize, used_out.dtype.itemsize * used_out.shape[0]) # and finally transpose it transpose(used_out, d_out) d_out.copy_to_host(h_out) return h_out
def _maybe_transpose(self, d_ary, d_out): """Transpose device arrays into row-major format if needed, as cuFFT can't handle column-major data.""" transpose_in = len(d_ary.shape) == 2 and d_ary.is_f_contiguous() transpose_out = len(d_out.shape) == 2 and d_out.is_f_contiguous() if transpose_in: # Create a row-major device array used_in = DeviceNDArray( shape=(d_ary.shape[1], d_ary.shape[0]), strides=(d_ary.dtype.itemsize, d_ary.dtype.itemsize * d_ary.shape[1]), dtype=d_ary.dtype) transpose(d_ary, used_in) else: used_in = d_ary if transpose_out: # Create a row-major device array used_out = DeviceNDArray( shape=d_out.shape, strides=(d_out.dtype.itemsize * d_out.shape[1], d_out.dtype.itemsize), dtype=d_out.dtype) else: used_out = d_out return used_in, used_out, transpose_out
def test_transpose(self, rows, cols, dtype): x = np.arange(rows * cols, dtype=dtype).reshape(cols, rows) y = np.zeros(rows * cols, dtype=dtype).reshape(rows, cols) dx = cuda.to_device(x) dy = cuda.cudadrv.devicearray.from_array_like(y) transpose(dx, dy) dy.copy_to_host(y) self.assertTrue(np.all(x.transpose() == y))
def test_transpose(self): # To verify non-redundant data movement run this test with NUMBA_TRACE=1 a = SmartArray(np.arange(16, dtype=float).reshape(4,4)) b = SmartArray(where='gpu', shape=(4,4), dtype=float) c = SmartArray(where='gpu', shape=(4,4), dtype=float) event("initialization done") transpose(a, b) event("checkpoint") transpose(b, c) event("done") self.assertTrue((c.host() == a.host()).all())
def test_transpose_bool(self): for rows, cols in self.small_variants: with self.subTest(rows=rows, cols=cols): arr = np.random.randint(2, size=(rows, cols), dtype=np.bool_) transposed = arr.T d_arr = cuda.to_device(arr) d_transposed = cuda.device_array_like(transposed) transpose(d_arr, d_transposed) host_transposed = d_transposed.copy_to_host() np.testing.assert_array_equal(transposed, host_transposed)
def test_transpose(self): variants = ((5, 6, np.float64), (128, 128, np.complex128), (1025, 512, np.float64)) for rows, cols, dtype in variants: with self.subTest(rows=rows, cols=cols, dtype=dtype): x = np.arange(rows * cols, dtype=dtype).reshape(cols, rows) y = np.zeros(rows * cols, dtype=dtype).reshape(rows, cols) dx = cuda.to_device(x) dy = cuda.cudadrv.devicearray.from_array_like(y) transpose(dx, dy) dy.copy_to_host(y) np.testing.assert_array_equal(x.transpose(), y)
def test_transpose_record(self): for rows, cols in self.small_variants: with self.subTest(rows=rows, cols=cols): arr = np.recarray((rows, cols), dtype=recordwith2darray) for x in range(rows): for y in range(cols): arr[x, y].i = x**2 + y j = np.arange(3 * 2, dtype=np.float32) arr[x, y].j = j.reshape(3, 2) * x + y transposed = arr.T d_arr = cuda.to_device(arr) d_transposed = cuda.device_array_like(transposed) transpose(d_arr, d_transposed) host_transposed = d_transposed.copy_to_host() np.testing.assert_array_equal(transposed, host_transposed)
def transpose(self, axes=None): if axes and tuple(axes) == tuple(range(self.ndim)): return self elif self.ndim != 2: raise NotImplementedError("transposing a non-2D DeviceNDArray isn't supported") elif axes is not None and set(axes) != set(range(self.ndim)): raise ValueError("invalid axes list %r" % (axes,)) else: from numba.cuda.kernels.transpose import transpose return transpose(self)