def thunk(): context = inputs[0][0].context # Size of the matrices to invert. z = outputs[0] # Matrix. A = inputs[0][0] # Solution vectors. b = inputs[1][0] assert (len(A.shape) == 2) assert (len(b.shape) == 2) if self.trans in ['T', 'C']: trans = 1 l, n = A.shape k, m = b.shape elif self.trans == 'N': trans = 0 n, l = A.shape k, m = b.shape else: raise ValueError('Invalid value for trans') if l != n: raise ValueError('A must be a square matrix') if n != k: raise ValueError('A and b must be aligned.') lda = max(1, n) ldb = max(1, k, m) # We copy A and b as cusolver operates inplace b = gpuarray.array(b, copy=True, order='F') if not self.inplace: A = gpuarray.array(A, copy=True) A_ptr = A.gpudata b_ptr = b.gpudata # cusolver expects a F ordered matrix, but A is not explicitly # converted between C and F order, instead we switch the # "transpose" flag. if A.flags['C_CONTIGUOUS']: trans = 1 - trans workspace_size = cusolver.cusolverDnSgetrf_bufferSize( cusolver_handle, n, n, A_ptr, lda) if (thunk.workspace is None or thunk.workspace.size != workspace_size): thunk.workspace = gpuarray.zeros((workspace_size, ), dtype='float32', context=context) if thunk.pivots is None or thunk.pivots.size != min(n, n): thunk.pivots = gpuarray.zeros((min(n, n), ), dtype='float32', context=context) if thunk.dev_info is None: thunk.dev_info = gpuarray.zeros((1, ), dtype='float32', context=context) workspace_ptr = thunk.workspace.gpudata pivots_ptr = thunk.pivots.gpudata dev_info_ptr = thunk.dev_info.gpudata cusolver.cusolverDnSgetrf(cusolver_handle, n, n, A_ptr, lda, workspace_ptr, pivots_ptr, dev_info_ptr) cusolver.cusolverDnSgetrs(cusolver_handle, trans, n, m, A_ptr, lda, pivots_ptr, b_ptr, ldb, dev_info_ptr) z[0] = b
def perform(self, node, inputs, outputs): context = inputs[0][0].context # Size of the matrices to invert. z = outputs[0] # Matrix. A = inputs[0] # Solution vectors. b = inputs[1] assert (len(A.shape) == 2) assert (len(b.shape) == 2) if self.trans in ['T', 'C']: trans = 1 l, n = A.shape k, m = b.shape elif self.trans == 'N': trans = 0 n, l = A.shape k, m = b.shape else: raise ValueError('Invalid value for trans') if l != n: raise ValueError('A must be a square matrix') if n != k: raise ValueError('A and b must be aligned.') lda = max(1, n) ldb = max(1, k) # We copy A and b as cusolver operates inplace b = pygpu.array(b, copy=True, order='F') if not self.inplace: A = pygpu.array(A, copy=True) A_ptr = A.gpudata b_ptr = b.gpudata # cusolver expects a F ordered matrix, but A is not explicitly # converted between C and F order, instead we switch the # "transpose" flag. if A.flags['C_CONTIGUOUS']: trans = 1 - trans if self.A_structure == 'symmetric': with context: workspace_size = cusolver.cusolverDnSpotrf_bufferSize( context.cusolver_handle, 0, n, A_ptr, lda) workspace = pygpu.zeros(workspace_size, dtype='float32', context=context) dev_info = pygpu.zeros((1, ), dtype='int32', context=context) workspace_ptr = workspace.gpudata dev_info_ptr = dev_info.gpudata with context: cusolver.cusolverDnSpotrf(context.cusolver_handle, 0, n, A_ptr, lda, workspace_ptr, workspace_size, dev_info_ptr) self.check_dev_info(dev_info) cusolverDnSpotrs(context.cusolver_handle, 0, n, m, A_ptr, lda, b_ptr, ldb, dev_info_ptr) else: # general case for A with context: workspace_size = cusolver.cusolverDnSgetrf_bufferSize( context.cusolver_handle, n, n, A_ptr, lda) workspace = pygpu.zeros(workspace_size, dtype='float32', context=context) pivots = pygpu.zeros(n, dtype='int32', context=context) dev_info = pygpu.zeros((1, ), dtype='int32', context=context) workspace_ptr = workspace.gpudata pivots_ptr = pivots.gpudata dev_info_ptr = dev_info.gpudata with context: cusolver.cusolverDnSgetrf(context.cusolver_handle, n, n, A_ptr, lda, workspace_ptr, pivots_ptr, dev_info_ptr) self.check_dev_info(dev_info) cusolver.cusolverDnSgetrs(context.cusolver_handle, trans, n, m, A_ptr, lda, pivots_ptr, b_ptr, ldb, dev_info_ptr) z[0] = b
def perform(self, node, inputs, outputs): context = inputs[0][0].context # Size of the matrices to invert. z = outputs[0] # Matrix. A = inputs[0] # Solution vectors. b = inputs[1] assert(len(A.shape) == 2) assert(len(b.shape) == 2) if self.trans in ['T', 'C']: trans = 1 l, n = A.shape k, m = b.shape elif self.trans == 'N': trans = 0 n, l = A.shape k, m = b.shape else: raise ValueError('Invalid value for trans') if l != n: raise ValueError('A must be a square matrix') if n != k: raise ValueError('A and b must be aligned.') lda = max(1, n) ldb = max(1, k) # We copy A and b as cusolver operates inplace b = pygpu.array(b, copy=True, order='F') if not self.inplace: A = pygpu.array(A, copy=True) A_ptr = A.gpudata b_ptr = b.gpudata # cusolver expects a F ordered matrix, but A is not explicitly # converted between C and F order, instead we switch the # "transpose" flag. if A.flags['C_CONTIGUOUS']: trans = 1 - trans if self.A_structure == 'symmetric': with context: workspace_size = cusolver.cusolverDnSpotrf_bufferSize( context.cusolver_handle, 0, n, A_ptr, lda) workspace = pygpu.zeros(workspace_size, dtype='float32', context=context) dev_info = pygpu.zeros((1,), dtype='int32', context=context) workspace_ptr = workspace.gpudata dev_info_ptr = dev_info.gpudata with context: cusolver.cusolverDnSpotrf( context.cusolver_handle, 0, n, A_ptr, lda, workspace_ptr, workspace_size, dev_info_ptr) self.check_dev_info(dev_info) cusolverDnSpotrs( context.cusolver_handle, 0, n, m, A_ptr, lda, b_ptr, ldb, dev_info_ptr) else: # general case for A with context: workspace_size = cusolver.cusolverDnSgetrf_bufferSize( context.cusolver_handle, n, n, A_ptr, lda) workspace = pygpu.zeros(workspace_size, dtype='float32', context=context) pivots = pygpu.zeros(n, dtype='int32', context=context) dev_info = pygpu.zeros((1,), dtype='int32', context=context) workspace_ptr = workspace.gpudata pivots_ptr = pivots.gpudata dev_info_ptr = dev_info.gpudata with context: cusolver.cusolverDnSgetrf( context.cusolver_handle, n, n, A_ptr, lda, workspace_ptr, pivots_ptr, dev_info_ptr) self.check_dev_info(dev_info) cusolver.cusolverDnSgetrs( context.cusolver_handle, trans, n, m, A_ptr, lda, pivots_ptr, b_ptr, ldb, dev_info_ptr) z[0] = b
def thunk(): context = inputs[0][0].context # Size of the matrices to invert. z = outputs[0] # Matrix. A = inputs[0][0] # Solution vectors. b = inputs[1][0] assert(len(A.shape) == 2) assert(len(b.shape) == 2) if self.trans in ['T', 'C']: trans = 1 l, n = A.shape k, m = b.shape elif self.trans == 'N': trans = 0 n, l = A.shape k, m = b.shape else: raise ValueError('Invalid value for trans') if l != n: raise ValueError('A must be a square matrix') if n != k: raise ValueError('A and b must be aligned.') lda = max(1, n) ldb = max(1, k, m) # We copy A and b as cusolver operates inplace b = gpuarray.array(b, copy=True, order='F') if not self.inplace: A = gpuarray.array(A, copy=True) A_ptr = A.gpudata b_ptr = b.gpudata # cusolver expects a F ordered matrix, but A is not explicitly # converted between C and F order, instead we switch the # "transpose" flag. if A.flags['C_CONTIGUOUS']: trans = 1 - trans workspace_size = cusolver.cusolverDnSgetrf_bufferSize( cusolver_handle, n, n, A_ptr, lda) if (thunk.workspace is None or thunk.workspace.size != workspace_size): thunk.workspace = gpuarray.zeros((workspace_size,), dtype='float32', context=context) if thunk.pivots is None or thunk.pivots.size != min(n, n): thunk.pivots = gpuarray.zeros((min(n, n),), dtype='float32', context=context) if thunk.dev_info is None: thunk.dev_info = gpuarray.zeros((1,), dtype='float32', context=context) workspace_ptr = thunk.workspace.gpudata pivots_ptr = thunk.pivots.gpudata dev_info_ptr = thunk.dev_info.gpudata cusolver.cusolverDnSgetrf( cusolver_handle, n, n, A_ptr, lda, workspace_ptr, pivots_ptr, dev_info_ptr) cusolver.cusolverDnSgetrs( cusolver_handle, trans, n, m, A_ptr, lda, pivots_ptr, b_ptr, ldb, dev_info_ptr) z[0] = b