예제 #1
0
    def test_cublasDtrsmBatched(self):
        l, m, n = 11, 7, 5
        A = np.random.rand(l, m, m).astype(np.float64)
        B = np.random.rand(l, m, n).astype(np.float64)

        A = np.array(list(map(np.triu, A)))
        X = np.array([np.linalg.solve(a, b) for a, b in zip(A, B)])

        alpha = np.float64(1.0)

        a_gpu = gpuarray.to_gpu(A)
        b_gpu = gpuarray.to_gpu(B)

        a_arr = bptrs(a_gpu)
        b_arr = bptrs(b_gpu)

        cublas.cublasDtrsmBatched(self.cublas_handle, 'r', 'l', 'n', 'n', n, m,
                                  alpha, a_arr.gpudata, m, b_arr.gpudata, n, l)

        assert np.allclose(X, b_gpu.get(), 5)
예제 #2
0
    def test_cublasDtrsmBatched(self):
        l, m, n = 11, 7, 5
        A = np.random.rand(l, m, m).astype(np.float64)
        B = np.random.rand(l, m, n).astype(np.float64)

        A = np.array(list(map(np.triu, A)))
        X = np.array([np.linalg.solve(a, b) for a, b in zip(A, B)])

        alpha = np.float64(1.0)

        a_gpu = gpuarray.to_gpu(A)
        b_gpu = gpuarray.to_gpu(B)

        a_arr = bptrs(a_gpu)
        b_arr = bptrs(b_gpu)

        cublas.cublasDtrsmBatched(self.cublas_handle, 'r', 'l', 'n', 'n',
                                  n, m, alpha,
                                  a_arr.gpudata, m,
                                  b_arr.gpudata, n, l)

        assert np.allclose(X, b_gpu.get(), 5)