Beispiel #1
0
    def inverse(self, ary, out=None):
        '''Perform inverse FFT

        :param ary: Input array
        :param out: Optional output array

        :returns: The output array or a new numpy array is `out` is None.

        .. note: If `ary` is `out`, an inplace operation is performed.
        '''
        if self.direction not in ('both', 'inverse'):
            raise TypeError("Invalid operation")
        d_ary, d_out, h_out, do_host_copy = self._prepare(ary, out)
        used_in, used_out, transpose_out = self._maybe_transpose(d_ary, d_out)
        self._plan.inverse(used_in, used_out)
        if do_host_copy:
            if transpose_out:
                # reshape the array so it can be transposed back
                used_out.shape = (used_out.shape[1], used_out.shape[0])
                used_out.strides = (used_out.dtype.itemsize,
                                    used_out.dtype.itemsize *
                                    used_out.shape[0])
                # and finally transpose it
                transpose(used_out, d_out)
            d_out.copy_to_host(h_out)
        return h_out
Beispiel #2
0
    def _maybe_transpose(self, d_ary, d_out):
        """Transpose device arrays into row-major format if needed, as cuFFT
        can't handle column-major data."""

        transpose_in = len(d_ary.shape) == 2 and d_ary.is_f_contiguous()
        transpose_out = len(d_out.shape) == 2 and d_out.is_f_contiguous()
        if transpose_in:
            # Create a row-major device array
            used_in = DeviceNDArray(
                shape=(d_ary.shape[1], d_ary.shape[0]),
                strides=(d_ary.dtype.itemsize,
                         d_ary.dtype.itemsize * d_ary.shape[1]),
                dtype=d_ary.dtype)
            transpose(d_ary, used_in)
        else:
            used_in = d_ary
        if transpose_out:
            # Create a row-major device array
            used_out = DeviceNDArray(
                shape=d_out.shape,
                strides=(d_out.dtype.itemsize * d_out.shape[1],
                         d_out.dtype.itemsize),
                dtype=d_out.dtype)
        else:
            used_out = d_out
        return used_in, used_out, transpose_out
    def test_transpose(self, rows, cols, dtype):

        x = np.arange(rows * cols, dtype=dtype).reshape(cols, rows)
        y = np.zeros(rows * cols, dtype=dtype).reshape(rows, cols)
        dx = cuda.to_device(x)
        dy = cuda.cudadrv.devicearray.from_array_like(y)
        transpose(dx, dy)
        dy.copy_to_host(y)
        self.assertTrue(np.all(x.transpose() == y))
Beispiel #4
0
    def test_transpose(self, rows, cols, dtype):

        x = np.arange(rows * cols, dtype=dtype).reshape(cols, rows)
        y = np.zeros(rows * cols, dtype=dtype).reshape(rows, cols)
        dx = cuda.to_device(x)
        dy = cuda.cudadrv.devicearray.from_array_like(y)
        transpose(dx, dy)
        dy.copy_to_host(y)
        self.assertTrue(np.all(x.transpose() == y))
Beispiel #5
0
 def test_transpose(self):
     
     # To verify non-redundant data movement run this test with NUMBA_TRACE=1
     a = SmartArray(np.arange(16, dtype=float).reshape(4,4))
     b = SmartArray(where='gpu', shape=(4,4), dtype=float)
     c = SmartArray(where='gpu', shape=(4,4), dtype=float)
     event("initialization done")
     transpose(a, b)
     event("checkpoint")
     transpose(b, c)
     event("done")
     self.assertTrue((c.host() == a.host()).all())
Beispiel #6
0
    def test_transpose_bool(self):
        for rows, cols in self.small_variants:
            with self.subTest(rows=rows, cols=cols):
                arr = np.random.randint(2, size=(rows, cols), dtype=np.bool_)
                transposed = arr.T

                d_arr = cuda.to_device(arr)
                d_transposed = cuda.device_array_like(transposed)
                transpose(d_arr, d_transposed)

                host_transposed = d_transposed.copy_to_host()
                np.testing.assert_array_equal(transposed, host_transposed)
Beispiel #7
0
    def test_transpose(self):
        variants = ((5, 6, np.float64), (128, 128, np.complex128),
                    (1025, 512, np.float64))

        for rows, cols, dtype in variants:
            with self.subTest(rows=rows, cols=cols, dtype=dtype):
                x = np.arange(rows * cols, dtype=dtype).reshape(cols, rows)
                y = np.zeros(rows * cols, dtype=dtype).reshape(rows, cols)
                dx = cuda.to_device(x)
                dy = cuda.cudadrv.devicearray.from_array_like(y)
                transpose(dx, dy)
                dy.copy_to_host(y)
                np.testing.assert_array_equal(x.transpose(), y)
Beispiel #8
0
    def test_transpose_record(self):
        for rows, cols in self.small_variants:
            with self.subTest(rows=rows, cols=cols):
                arr = np.recarray((rows, cols), dtype=recordwith2darray)
                for x in range(rows):
                    for y in range(cols):
                        arr[x, y].i = x**2 + y
                        j = np.arange(3 * 2, dtype=np.float32)
                        arr[x, y].j = j.reshape(3, 2) * x + y

                transposed = arr.T
                d_arr = cuda.to_device(arr)
                d_transposed = cuda.device_array_like(transposed)
                transpose(d_arr, d_transposed)
                host_transposed = d_transposed.copy_to_host()
                np.testing.assert_array_equal(transposed, host_transposed)
Beispiel #9
0
 def transpose(self, axes=None):
     if axes and tuple(axes) == tuple(range(self.ndim)):
         return self
     elif self.ndim != 2:
         raise NotImplementedError("transposing a non-2D DeviceNDArray isn't supported")
     elif axes is not None and set(axes) != set(range(self.ndim)):
         raise ValueError("invalid axes list %r" % (axes,))
     else:
         from numba.cuda.kernels.transpose import transpose
         return transpose(self)
Beispiel #10
0
 def transpose(self, axes=None):
     if axes and tuple(axes) == tuple(range(self.ndim)):
         return self
     elif self.ndim != 2:
         raise NotImplementedError("transposing a non-2D DeviceNDArray isn't supported")
     elif axes is not None and set(axes) != set(range(self.ndim)):
         raise ValueError("invalid axes list %r" % (axes,))
     else:
         from numba.cuda.kernels.transpose import transpose
         return transpose(self)