def test_dot_diag_t_complex128(self): d = np.asarray(np.random.rand(5), np.float64) a = np.asarray(np.random.rand(3, 5)+1j*np.random.rand(3, 5), np.complex128) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu, 't') assert np.allclose(np.dot(np.diag(d), a.T).T, r_gpu.get()) a = a.astype(np.complex128, order="F", copy=True) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu, 't') assert np.allclose(np.dot(np.diag(d), a.T).T, r_gpu.get())
def test_dot_diag_t_float64(self): d = np.asarray(np.random.rand(5), np.float64) a = np.asarray(np.random.rand(3, 5), np.float64) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu, 't') assert np.allclose(np.dot(np.diag(d), a.T).T, r_gpu.get()) a = a.astype(np.float64, order="F", copy=True) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu, 't') assert np.allclose(np.dot(np.diag(d), a.T).T, r_gpu.get())
def test_dot_diag_float32(self): d = np.asarray(np.random.rand(5), np.float32) a = np.asarray(np.random.rand(5, 3), np.float32) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu) assert np.allclose(np.dot(np.diag(d), a), r_gpu.get()) a = a.astype(np.float32, order="F", copy=True) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu) assert np.allclose(np.dot(np.diag(d), a), r_gpu.get())
def test_dot_diag_complex64(self): d = np.asarray(np.random.rand(5), np.float32) a = np.asarray(np.random.rand(5, 3)+1j*np.random.rand(5, 3), np.complex64) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu) assert np.allclose(np.dot(np.diag(d), a), r_gpu.get()) a = a.astype(np.complex64, order="F", copy=True) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu) assert np.allclose(np.dot(np.diag(d), a), r_gpu.get())
def impl_test_dot_diag_t(self, dtype): d = np.asarray(np.random.rand(5), dtype) a = np.asarray(np.random.rand(3, 5), dtype) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu, 't') assert np.allclose(np.dot(np.diag(d), a.T).T, r_gpu.get()) a = a.astype(dtype, order="F", copy=True) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu, 't') assert np.allclose(np.dot(np.diag(d), a.T).T, r_gpu.get())
def ___impl_test_dot_diag_t(self, dtype): d = np.asarray(np.random.rand(5), dtype) a = np.asarray(np.random.rand(3, 5), dtype) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu, "t") assert np.allclose(np.dot(np.diag(d), a.T).T, r_gpu.get()) a = a.astype(dtype, order="F", copy=True) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) # note: due to pycuda issue #66, this will fail when overwrite=False r_gpu = linalg.dot_diag(d_gpu, a_gpu, "t", overwrite=True) assert np.allclose(np.dot(np.diag(d), a.T).T, r_gpu.get())
def test_dot_diag_t_float32(self): d = np.asarray(np.random.rand(5), np.float32) a = np.asarray(np.random.rand(3, 5), np.float32) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu, 't') assert np.allclose(np.dot(np.diag(d), a.T).T, r_gpu.get())
def test_dot_diag_float64(self): d = np.asarray(np.random.rand(5), np.float64) a = np.asarray(np.random.rand(5, 3), np.float64) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu) assert np.allclose(np.dot(np.diag(d), a), r_gpu.get())
def test_dot_diag_complex128(self): d = np.asarray(np.random.rand(5), np.float64) a = np.asarray(np.random.rand(5, 3)+1j*np.random.rand(5, 3), np.complex128) d_gpu = gpuarray.to_gpu(d) a_gpu = gpuarray.to_gpu(a) r_gpu = linalg.dot_diag(d_gpu, a_gpu) assert np.allclose(np.dot(np.diag(d), a), r_gpu.get())
else: nalphas = alpha # Compute weights for each alpha ualphas = np.unique(nalphas) wt = np.zeros((stim.shape[1], resp.shape[1]), order='F') # Make wt column major Vh_gpu = gpuarray.to_gpu(np.copy(Vh, order='F')) for ua in ualphas: selvox = np.nonzero(nalphas==ua)[0] # list of indices equal to ua # TODO determine if this should be a GPU op # Vh is output from SVD, i think NxN (~200x200 or 15000x15000) # TODO determine how reduce works Sd = S/(S**2+ua**2) Sd_gpu = gpuarray.to_gpu(Sd) UR_gpu = gpuarray.to_gpu(np.copy(UR[:,selvox], order='F')) linalg.dot_diag(Sd_gpu, UR_gpu, overwrite=True) del Sd_gpu if selvox.shape[0] > 5000: N=selvox.shape[0]/4 inter_gpu = linalg.dot(Vh_gpu, UR_gpu[:,0:N], transa='T') wt[:,selvox[0:N]] = inter_gpu.get() del inter_gpu inter_gpu = linalg.dot(Vh_gpu, UR_gpu[:,N:2*N], transa='T') wt[:,selvox[N:2*N]] = inter_gpu.get() del inter_gpu inter_gpu = linalg.dot(Vh_gpu, UR_gpu[:,2*N:3*N], transa='T') wt[:,selvox[2*N:3*N]] = inter_gpu.get() del inter_gpu inter_gpu = linalg.dot(Vh_gpu, UR_gpu[:,3*N:], transa='T') wt[:,selvox[3*N:]] = inter_gpu.get() del inter_gpu