for i in range(batchSize): x = np.random.randn(n, n) + 1j * np.random.randn(n, n) x = x + x.conj().T x = x.astype(np.complex64) A[i * n:(i + 1) * n, :] = x # Need to reverse dimensions because CUSOLVER expects column-major matrices: B[i * n:(i + 1) * n, :] = x.T.copy() x_gpu = gpuarray.to_gpu(B) # Set up output buffers: w_gpu = gpuarray.empty((batchSize, n), dtype=np.float32) # Set up parameters params = solver.cusolverDnCreateSyevjInfo() solver.cusolverDnXsyevjSetTolerance(params, 1e-7) solver.cusolverDnXsyevjSetMaxSweeps(params, 15) # Set up work buffers: lwork = solver.cusolverDnCheevjBatched_bufferSize(handle, 'CUSOLVER_EIG_MODE_VECTOR', 'u', n, x_gpu.gpudata, n, w_gpu.gpudata, params, batchSize) workspace_gpu = gpuarray.zeros(lwork, dtype=A.dtype) info = gpuarray.zeros(batchSize, dtype=np.int32) # Compute: solver.cusolverDnCheevjBatched(handle, 'CUSOLVER_EIG_MODE_VECTOR', 'u', n, x_gpu.gpudata, n, w_gpu.gpudata,
A = np.empty((n*batchSize, n), dtype = np.double) for i in range(batchSize): x = np.random.randn(n, n) x = x+x.T A[i*n:(i+1)*n, :] = x x_gpu = gpuarray.to_gpu(A) # Set up output buffers: w_gpu = gpuarray.empty((batchSize, n), dtype = A.dtype) # Set up parameters params = solver.cusolverDnCreateSyevjInfo() solver.cusolverDnXsyevjSetTolerance(params, 1e-7) solver.cusolverDnXsyevjSetMaxSweeps(params, 15) # Set up work buffers: lwork = solver.cusolverDnDsyevjBatched_bufferSize(handle, 'CUSOLVER_EIG_MODE_VECTOR', 'u', n, x_gpu.gpudata, n, w_gpu.gpudata, params, batchSize) workspace_gpu = gpuarray.zeros(lwork, dtype = A.dtype) info = gpuarray.zeros(batchSize, dtype = np.int32) # Compute: solver.cusolverDnDsyevjBatched(handle, 'CUSOLVER_EIG_MODE_VECTOR', 'u', n, x_gpu.gpudata, n, w_gpu.gpudata, workspace_gpu.gpudata, lwork, info.gpudata, params, batchSize)