def add_dot(X, Y, out, beta=0.0, handle=None): if len(Y.shape) == 1: add_vdot(X, Y, out, beta=beta, handle=handle) elif len(X.shape) == 1: add_vdot(X, Y, out, beta=beta, transM='T', handle=handle) else: scl.add_dot(X, Y, out, beta=beta, handle=handle)
def _impl_add_dot_matrix_tests(self, dtype, transa, transb): a = np.asarray(np.random.rand(4, 2), dtype) if transa == "n": b = np.asarray(np.random.rand(2, 2), dtype) else: b = np.asarray(np.random.rand(4, 4), dtype) a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) aa = a if transa == "n" else a.T bb = b if transb == "n" else b.T c = np.asarray(np.random.rand(aa.shape[0], bb.shape[1]), dtype) c_gpu = gpuarray.to_gpu(c) c_gpu = linalg.add_dot(a_gpu, b_gpu, c_gpu, transa, transb) assert np.allclose(c + np.dot(aa, bb), c_gpu.get()) a = a.astype(dtype, order="F", copy=True) b = b.astype(dtype, order="F", copy=True) c = c.astype(dtype, order="F", copy=True) a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = gpuarray.to_gpu(c) c_gpu = linalg.add_dot(a_gpu, b_gpu, c_gpu, transa, transb) assert np.allclose(c + np.dot(aa, bb), c_gpu.get())
def _impl_test_dot_strided(self, dtype): # n/n a = np.asarray(np.random.rand(4, 10), dtype) b = np.asarray(np.random.rand(2, 20), dtype) c = np.zeros((4, 30), dtype) a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = gpuarray.to_gpu(c) linalg.add_dot(a_gpu[:, 4:6], b_gpu[:, 2:8], c_gpu[:, 1:7], "n", "n") res = c_gpu.get() assert np.allclose(np.dot(a[:, 4:6], b[:, 2:8]), res[:, 1:7]) # t/n a = np.asarray(np.random.rand(4, 10), dtype) b = np.asarray(np.random.rand(4, 20), dtype) c = np.zeros((2, 30), dtype) a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = gpuarray.to_gpu(c) linalg.add_dot(a_gpu[:, 4:6], b_gpu[:, 2:8], c_gpu[:, 1:7], "t", "n") res = c_gpu.get() assert np.allclose(np.dot(a[:, 4:6].T, b[:, 2:8]), res[:, 1:7]) # n/t a = np.asarray(np.random.rand(4, 10), dtype) b = np.asarray(np.random.rand(6, 20), dtype) c = np.zeros((4, 30), dtype) a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = gpuarray.to_gpu(c) linalg.add_dot(a_gpu[:, 4:10], b_gpu[:, 2:8], c_gpu[:, 1:7], "n", "t") res = c_gpu.get() assert np.allclose(np.dot(a[:, 4:10], b[:, 2:8].T), res[:, 1:7]) # t/t a = np.asarray(np.random.rand(6, 10), dtype) b = np.asarray(np.random.rand(8, 20), dtype) c = np.zeros((2, 30), dtype) a_gpu = gpuarray.to_gpu(a) b_gpu = gpuarray.to_gpu(b) c_gpu = gpuarray.to_gpu(c) linalg.add_dot(a_gpu[:, 4:6], b_gpu[:, 2:8], c_gpu[:, 1:9], "t", "t") res = c_gpu.get() assert np.allclose(np.dot(a[:, 4:6].T, b[:, 2:8].T), res[:, 1:9])