def emitInPlaceCopy(defDict, indent=0): '''Copy Y into X for CSC-format "out-of-place" sparse triangular solve. Sequential CSC-format sparse triangular solve is naturally an "in-place" algorithm, meaning that it overwrites the input vector X with the output. If the user wants "out-of-place" behavior, so that the input vector isn't touched, then we first have to copy the input vector Y into the output X.''' X_ij = emitDenseAref(defDict, 'X', 'i', 'j') Y_ij = emitDenseAref(defDict, 'Y', 'i', 'j') origIndent = ' ' * indent newIndent = ' ' * 2 s = '' layout = defDict['dataLayout'] # It's more efficient to put stride-1 access in the inner loop. if layout == 'column major': s = s + \ origIndent + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ origIndent + newIndent + 'for (Ordinal i = 0; i < numRows; ++i) {\n' + \ origIndent + newIndent*2 + X_ij + ' = ' + Y_ij + ';\n' + \ origIndent + newIndent + '}\n' + \ origIndent + '}\n' elif layout == 'row major': s = s + \ origIndent + 'for (Ordinal i = 0; i < numRows; ++i) {\n' + \ origIndent + newIndent + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ origIndent + newIndent*2 + X_ij + ' = ' + Y_ij + ';\n' + \ origIndent + newIndent + '}\n' + \ origIndent + '}\n' else: raise ValueError('Invalid dataLayout "' + layout + '"') return s + '\n'
def emitInPlaceCopy (defDict, indent=0): '''Copy Y into X for CSC-format "out-of-place" sparse triangular solve. Sequential CSC-format sparse triangular solve is naturally an "in-place" algorithm, meaning that it overwrites the input vector X with the output. If the user wants "out-of-place" behavior, so that the input vector isn't touched, then we first have to copy the input vector Y into the output X.''' X_ij = emitDenseAref (defDict, 'X', 'i', 'j') Y_ij = emitDenseAref (defDict, 'Y', 'i', 'j') origIndent = ' ' * indent newIndent = ' ' * 2 s = '' layout = defDict['dataLayout'] # It's more efficient to put stride-1 access in the inner loop. if layout == 'column major': s = s + \ origIndent + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ origIndent + newIndent + 'for (Ordinal i = 0; i < numRows; ++i) {\n' + \ origIndent + newIndent*2 + X_ij + ' = ' + Y_ij + ';\n' + \ origIndent + newIndent + '}\n' + \ origIndent + '}\n' elif layout == 'row major': s = s + \ origIndent + 'for (Ordinal i = 0; i < numRows; ++i) {\n' + \ origIndent + newIndent + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ origIndent + newIndent*2 + X_ij + ' = ' + Y_ij + ';\n' + \ origIndent + newIndent + '}\n' + \ origIndent + '}\n' else: raise ValueError ('Invalid dataLayout "' + layout + '"') return s + '\n'
def emitCscOuterLoopBody(defDict, indent=0): '''Generate the body of the outer loop for CSC sparse triangular solve. This only works for CSC-format sparse matrices. Sequential CSC sparse triangular solve is always done in place, like the LAPACK algorithm. This is why the algorithm has to copy first if the user doesn't want in-place behavior. ''' if defDict['sparseFormat'] != 'CSC': raise ValueError('This function requires CSC-format sparse matrices.') indStr = ' ' * indent body = '' X_rj = emitDenseAref(defDict, 'X', 'r', 'j') X_cj = emitDenseAref(defDict, 'X', 'c', 'j') if not defDict['unitDiag']: body = body + \ indStr + 'MatrixScalar A_cc = STS::zero ();\n' + \ indStr + 'for (size_t k = ptr[c]; k < ptr[c+1]; ++k) {\n' + \ indStr + ' '*2 + 'const Ordinal r = ind[k];\n' if defDict['conjugateMatrixEntries']: body = body + \ indStr + ' '*2 + 'MatrixScalar A_rc = STS::conjugate (val[k]);\n' else: body = body + \ indStr + ' '*2 + 'MatrixScalar A_rc = val[k];\n' body = body + \ indStr + ' '*2 + 'if (r == c) {\n' + \ indStr + ' '*4 + 'A_cc = A_cc + A_rc;\n' + \ indStr + ' '*2 + '} else {\n' + \ indStr + ' '*4 + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*6 + X_rj + ' -= A_rc * ' + X_cj + ';\n' + \ indStr + ' '*4 + '}\n' + \ indStr + ' '*2 + '}\n' + \ indStr + ' '*2 + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*4 + X_cj + ' = ' + X_cj + ' / A_cc;\n' + \ indStr + ' '*2 + '}\n' + \ indStr + '}\n' else: body = body + \ indStr + 'for (size_t k = ptr[c]; k < ptr[c+1]; ++k) {\n' + \ indStr + ' '*2 + 'const Ordinal r = ind[k];\n' if defDict['conjugateMatrixEntries']: body = body + \ indStr + ' '*2 + 'MatrixScalar A_rc = STS::conjugate (val[k]);\n' else: body = body + \ indStr + ' '*2 + 'MatrixScalar A_rc = val[k];\n' body = body + \ indStr + ' '*2 + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*4 + X_rj + ' -= A_rc * ' + X_cj + ';\n' + \ indStr + ' '*2 + '}\n' + \ indStr + '}\n' return body
def emitCscOuterLoopBody (defDict, indent=0): '''Generate the body of the outer loop for CSC sparse triangular solve. This only works for CSC-format sparse matrices. Sequential CSC sparse triangular solve is always done in place, like the LAPACK algorithm. This is why the algorithm has to copy first if the user doesn't want in-place behavior. ''' if defDict['sparseFormat'] != 'CSC': raise ValueError('This function requires CSC-format sparse matrices.') indStr = ' ' * indent body = '' X_rj = emitDenseAref(defDict, 'X', 'r', 'j') X_cj = emitDenseAref(defDict, 'X', 'c', 'j') if not defDict['unitDiag']: body = body + \ indStr + 'MatrixScalar A_cc = STS::zero ();\n' + \ indStr + 'for (size_t k = ptr[c]; k < ptr[c+1]; ++k) {\n' + \ indStr + ' '*2 + 'const Ordinal r = ind[k];\n' if defDict['conjugateMatrixEntries']: body = body + \ indStr + ' '*2 + 'MatrixScalar A_rc = STS::conjugate (val[k]);\n' else: body = body + \ indStr + ' '*2 + 'MatrixScalar A_rc = val[k];\n' body = body + \ indStr + ' '*2 + 'if (r == c) {\n' + \ indStr + ' '*4 + 'A_cc = A_cc + A_rc;\n' + \ indStr + ' '*2 + '} else {\n' + \ indStr + ' '*4 + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*6 + X_rj + ' -= A_rc * ' + X_cj + ';\n' + \ indStr + ' '*4 + '}\n' + \ indStr + ' '*2 + '}\n' + \ indStr + ' '*2 + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*4 + X_cj + ' = ' + X_cj + ' / A_cc;\n' + \ indStr + ' '*2 + '}\n' + \ indStr + '}\n' else: body = body + \ indStr + 'for (size_t k = ptr[c]; k < ptr[c+1]; ++k) {\n' + \ indStr + ' '*2 + 'const Ordinal r = ind[k];\n' if defDict['conjugateMatrixEntries']: body = body + \ indStr + ' '*2 + 'MatrixScalar A_rc = STS::conjugate (val[k]);\n' else: body = body + \ indStr + ' '*2 + 'MatrixScalar A_rc = val[k];\n' body = body + \ indStr + ' '*2 + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*4 + X_rj + ' -= A_rc * ' + X_cj + ';\n' + \ indStr + ' '*2 + '}\n' + \ indStr + '}\n' return body
def emitCsrOuterLoopBody(defDict, indent=0): '''Generate the body of the outer loop for CSR sparse triangular solve. This only works for CSR-format sparse matrices. We implement sequential CSR sparse triangular solve "out of place," and do not provide an in-place version.''' if defDict['sparseFormat'] != 'CSR': raise ValueError('This function requires CSR-format sparse matrices.') if defDict['inPlace']: raise ValueError('This function requires out-of-place computation.') indStr = ' ' * indent body = '' X_rj = emitDenseAref(defDict, 'X', 'r', 'j') Y_rj = emitDenseAref(defDict, 'Y', 'r', 'j') X_cj = emitDenseAref(defDict, 'X', 'c', 'j') if defDict['conjugateMatrixEntries']: diagValExpr = 'STS::conjugate (val[ptr[r]])' offDiagValExpr = 'STS::conjugate (val[k])' else: diagValExpr = 'val[ptr[r]]' offDiagValExpr = 'val[k]' body = body + \ indStr + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*2 + X_rj + ' = ' + Y_rj + ';\n' + \ indStr + '}\n' if defDict['unitDiag']: body = body + \ indStr + 'for (size_t k = ptr[r]; k < ptr[r+1]; ++k) {\n' else: body = body + \ indStr + '// We assume the diagonal entry is first in the row.\n' + \ indStr + 'const MatrixScalar A_rr = ' + diagValExpr + ';\n' + \ indStr + 'for (size_t k = ptr[r]+1; k < ptr[r+1]; ++k) {\n' body = body + \ indStr + ' '*2 + 'const MatrixScalar A_rc = ' + offDiagValExpr + ';\n' + \ indStr + ' '*2 + 'const Ordinal c = ind[k];\n' + \ indStr + ' '*2 + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*4 + X_rj + ' -= A_rc * ' + X_cj + ';\n' + \ indStr + ' '*2 + '}\n' + \ indStr + '}\n' if not defDict['unitDiag']: body = body + \ indStr + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*2 + X_rj + ' = ' + X_rj + ' / A_rr;\n' + \ indStr + '}\n' return body
def emitCsrOuterLoopBody (defDict, indent=0): '''Generate the body of the outer loop for CSR sparse triangular solve. This only works for CSR-format sparse matrices. We implement sequential CSR sparse triangular solve "out of place," and do not provide an in-place version.''' if defDict['sparseFormat'] != 'CSR': raise ValueError('This function requires CSR-format sparse matrices.') if defDict['inPlace']: raise ValueError('This function requires out-of-place computation.') indStr = ' ' * indent body = '' X_rj = emitDenseAref(defDict, 'X', 'r', 'j') Y_rj = emitDenseAref(defDict, 'Y', 'r', 'j') X_cj = emitDenseAref(defDict, 'X', 'c', 'j') if defDict['conjugateMatrixEntries']: diagValExpr = 'STS::conjugate (val[ptr[r]])' offDiagValExpr = 'STS::conjugate (val[k])' else: diagValExpr = 'val[ptr[r]]' offDiagValExpr = 'val[k]' body = body + \ indStr + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*2 + X_rj + ' = ' + Y_rj + ';\n' + \ indStr + '}\n' if defDict['unitDiag']: body = body + \ indStr + 'for (size_t k = ptr[r]; k < ptr[r+1]; ++k) {\n' else: body = body + \ indStr + '// We assume the diagonal entry is first in the row.\n' + \ indStr + 'const MatrixScalar A_rr = ' + diagValExpr + ';\n' + \ indStr + 'for (size_t k = ptr[r]+1; k < ptr[r+1]; ++k) {\n' body = body + \ indStr + ' '*2 + 'const MatrixScalar A_rc = ' + offDiagValExpr + ';\n' + \ indStr + ' '*2 + 'const Ordinal c = ind[k];\n' + \ indStr + ' '*2 + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*4 + X_rj + ' -= A_rc * ' + X_cj + ';\n' + \ indStr + ' '*2 + '}\n' + \ indStr + '}\n' if not defDict['unitDiag']: body = body + \ indStr + 'for (Ordinal j = 0; j < numVecs; ++j) {\n' + \ indStr + ' '*2 + X_rj + ' = ' + X_rj + ' / A_rr;\n' + \ indStr + '}\n' return body