def perform(self, node, inputs, outputs): context = inputs[0][0].context # Input matrix. A = inputs[0] l, n = A.shape if l != n: raise ValueError('A must be a square matrix') lda = max(1, n) # cusolver operates on F ordered matrices, but A is expected # to be symmetric so it does not matter. # We copy A if needed if self.inplace: L = A else: L = pygpu.array(A, copy=True) # The output matrix will contain only the upper or lower # triangular factorization of A. If L is C ordered (it # probably is as it is the default in Theano) we just switch # the fill mode parameter of cusolver l_parameter = 0 if self.lower else 1 if L.flags['C_CONTIGUOUS']: l_parameter = 1 - l_parameter L_ptr = L.gpudata with context: workspace_size = cusolver.cusolverDnSpotrf_bufferSize( context.cusolver_handle, l_parameter, n, L_ptr, lda) workspace = pygpu.zeros(workspace_size, dtype='float32', context=context) dev_info = pygpu.zeros((1, ), dtype='int32', context=context) workspace_ptr = workspace.gpudata dev_info_ptr = dev_info.gpudata cusolver.cusolverDnSpotrf(context.cusolver_handle, l_parameter, n, L_ptr, lda, workspace_ptr, workspace_size, dev_info_ptr) val_dev_info = np.asarray(dev_info)[0] if val_dev_info > 0: raise LinAlgError('Cholesky decomposition failed (is A SPD?)') # cusolver leaves the elements in the matrix outside the considered # upper or lower triangle unchanged, so we need to put zeros outside # the triangle if self.lower: tril(L) else: triu(L) outputs[0][0] = L
def perform(self, node, inputs, outputs): context = inputs[0][0].context # Input matrix. A = inputs[0] l, n = A.shape if l != n: raise ValueError('A must be a square matrix') lda = max(1, n) # cusolver operates on F ordered matrices, but A is expected # to be symmetric so it does not matter. # We copy A if needed if self.inplace: L = A else: L = pygpu.array(A, copy=True) # The output matrix will contain only the upper or lower # triangular factorization of A. If L is C ordered (it # probably is as it is the default in Theano) we just switch # the fill mode parameter of cusolver l_parameter = 0 if self.lower else 1 if L.flags['C_CONTIGUOUS']: l_parameter = 1 - l_parameter L_ptr = L.gpudata with context: workspace_size = cusolver.cusolverDnSpotrf_bufferSize( context.cusolver_handle, l_parameter, n, L_ptr, lda) workspace = pygpu.zeros(workspace_size, dtype='float32', context=context) dev_info = pygpu.zeros((1,), dtype='int32', context=context) workspace_ptr = workspace.gpudata dev_info_ptr = dev_info.gpudata cusolver.cusolverDnSpotrf( context.cusolver_handle, l_parameter, n, L_ptr, lda, workspace_ptr, workspace_size, dev_info_ptr) val_dev_info = np.asarray(dev_info)[0] if val_dev_info > 0: raise LinAlgError('Cholesky decomposition failed (is A SPD?)') # cusolver leaves the elements in the matrix outside the considered # upper or lower triangle unchanged, so we need to put zeros outside # the triangle if self.lower: tril(L) else: triu(L) outputs[0][0] = L
def run_triu(dtype, shape, order, inplace): ac, ag = gen_gpuarray(shape, dtype, order=order, ctx=context) result = triu(ag, inplace=inplace) assert numpy.all(numpy.triu(ac) == result) if inplace: assert numpy.all(numpy.triu(ac) == ag) else: assert numpy.all(ac == ag)
def test_triu(): for shape in [(10, 5), (5, 10), (10, 10)]: for order in ['c', 'f']: for inplace in [True, False]: ac, ag = gen_gpuarray(shape, 'float32', order=order, ctx=context) result = triu(ag, inplace=inplace) assert numpy.all(numpy.triu(ac) == result) if inplace: assert numpy.all(numpy.triu(ac) == ag) else: assert numpy.all(ac == ag)
def test_triu(): for shape in [(10, 5), (5, 10), (10, 10)]: for order in ['c', 'f']: for inplace in [True, False]: ac, ag = gen_gpuarray(shape, 'float32', order=order, ctx=context) result = triu(ag, inplace=inplace) assert numpy.all(numpy.triu(ac) == result) if inplace: assert numpy.all(numpy.triu(ac) == ag) else: assert numpy.all(ac == ag)
def run_noncontiguous_triu(self): a = numpy.random.rand(5, 5) b = pygpu.array(a, context=context) b = b[::-1] assert b.flags.c_contiguous is b.flags.f_contiguous is False triu(b)
def run_3d_triu(self): ac, ag = gen_gpuarray((10, 10, 10), 'float32', ctx=context) triu(ag)
def run_noncontiguous_triu(self): a = numpy.random.rand(5, 5) a = a[::-1] b = pygpu.array(a, context=context) assert b.flags.c_contiguous is b.flags.f_contiguous is False triu(b)
def run_3d_triu(self): ac, ag = gen_gpuarray((10, 10, 10), 'float32', ctx=context) triu(ag)