def test_cublasDtrsmBatched(self): l, m, n = 11, 7, 5 A = np.random.rand(l, m, m).astype(np.float64) B = np.random.rand(l, m, n).astype(np.float64) A = np.array(list(map(np.triu, A))) X = np.array([np.linalg.solve(a, b) for a, b in zip(A, B)]) alpha = np.float64(1.0) a_gpu = gpuarray.to_gpu(A) b_gpu = gpuarray.to_gpu(B) a_arr = bptrs(a_gpu) b_arr = bptrs(b_gpu) cublas.cublasDtrsmBatched(self.cublas_handle, 'r', 'l', 'n', 'n', n, m, alpha, a_arr.gpudata, m, b_arr.gpudata, n, l) assert np.allclose(X, b_gpu.get(), 5)