Example #1
0
    def test_cublasSgetrfBatched(self):
        from scipy.linalg import lu_factor
        l, m = 11, 7
        A = np.random.rand(l, m, m).astype(np.float32)
        A = np.array([np.matrix(a) * np.matrix(a).T for a in A])

        a_gpu = gpuarray.to_gpu(A)
        a_arr = bptrs(a_gpu)
        p_gpu = gpuarray.empty((l, m), np.int32)
        i_gpu = gpuarray.zeros(1, np.int32)
        X = np.array([lu_factor(a)[0] for a in A])

        cublas.cublasSgetrfBatched(self.cublas_handle, m, a_arr.gpudata, m,
                                   p_gpu.gpudata, i_gpu.gpudata, l)

        X_ = np.array([a.T for a in a_gpu.get()])

        assert np.allclose(X, X_, atol=10 * _SEPS)
Example #2
0
    def test_cublasSgetrfBatched(self):
        from scipy.linalg import lu_factor
        l, m = 11, 7
        A = np.random.rand(l, m, m).astype(np.float32)
        A = np.array([np.matrix(a)*np.matrix(a).T for a in A])

        a_gpu = gpuarray.to_gpu(A)
        a_arr = bptrs(a_gpu)
        p_gpu = gpuarray.empty((l, m), np.int32)
        i_gpu = gpuarray.zeros(1, np.int32)
        X = np.array([ lu_factor(a)[0] for a in A])

        cublas.cublasSgetrfBatched(self.cublas_handle,
                                   m, a_arr.gpudata, m,
                                   p_gpu.gpudata, i_gpu.gpudata, l)

        X_ = np.array([a.T for a in a_gpu.get()])

        assert np.allclose(X, X_, atol=10*_SEPS)