def test_cublasSgetrfBatched(self): from scipy.linalg import lu_factor l, m = 11, 7 A = np.random.rand(l, m, m).astype(np.float32) A = np.array([np.matrix(a) * np.matrix(a).T for a in A]) a_gpu = gpuarray.to_gpu(A) a_arr = bptrs(a_gpu) p_gpu = gpuarray.empty((l, m), np.int32) i_gpu = gpuarray.zeros(1, np.int32) X = np.array([lu_factor(a)[0] for a in A]) cublas.cublasSgetrfBatched(self.cublas_handle, m, a_arr.gpudata, m, p_gpu.gpudata, i_gpu.gpudata, l) X_ = np.array([a.T for a in a_gpu.get()]) assert np.allclose(X, X_, atol=10 * _SEPS)
def test_cublasSgetrfBatched(self): from scipy.linalg import lu_factor l, m = 11, 7 A = np.random.rand(l, m, m).astype(np.float32) A = np.array([np.matrix(a)*np.matrix(a).T for a in A]) a_gpu = gpuarray.to_gpu(A) a_arr = bptrs(a_gpu) p_gpu = gpuarray.empty((l, m), np.int32) i_gpu = gpuarray.zeros(1, np.int32) X = np.array([ lu_factor(a)[0] for a in A]) cublas.cublasSgetrfBatched(self.cublas_handle, m, a_arr.gpudata, m, p_gpu.gpudata, i_gpu.gpudata, l) X_ = np.array([a.T for a in a_gpu.get()]) assert np.allclose(X, X_, atol=10*_SEPS)