def writeBenchmarkFiles(stepBaseDir, solutions, problemSizes, stepName, filesToCopy): if not globalParameters["MergeFiles"]: ensurePath(os.path.join(globalParameters["WorkingPath"], "Solutions")) ensurePath(os.path.join(globalParameters["WorkingPath"], "Kernels")) ############################################################################## # Min Naming ############################################################################## kernels = [] kernelsBetaOnly = [] for solution in solutions: solutionKernels = solution.getKernels() for kernel in solutionKernels: if kernel not in kernels: kernels.append(kernel) solutionKernelsBetaOnly = solution.getKernelsBetaOnly() for kernel in solutionKernelsBetaOnly: if kernel not in kernelsBetaOnly: kernelsBetaOnly.append(kernel) solutionSerialNaming = Solution.getSerialNaming(solutions) kernelSerialNaming = Solution.getSerialNaming(kernels) solutionMinNaming = Solution.getMinNaming(solutions) kernelMinNaming = Solution.getMinNaming(kernels) solutionWriter = SolutionWriter( \ solutionMinNaming, solutionSerialNaming, \ kernelMinNaming, kernelSerialNaming) kernelWriterSource = KernelWriterSource( \ kernelMinNaming, kernelSerialNaming) kernelWriterAssembly = KernelWriterAssembly( \ kernelMinNaming, kernelSerialNaming) # write solution, kernels and CMake problemType = solutions[0]["ProblemType"] writeSolutionsAndKernels( \ globalParameters["WorkingPath"], [problemType], solutions, kernels, kernelsBetaOnly, \ solutionWriter, kernelWriterSource, kernelWriterAssembly ) ############################################################################## # Write CMake ############################################################################## clientName = "TensileBenchmark_%s" % stepName writeCMake(globalParameters["WorkingPath"], solutions, kernels, filesToCopy, \ clientName) forBenchmark = True writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \ filesToCopy, stepBaseDir)
def writeCMake(outputPath, solutions, kernels, libraryStaticFiles, clientName): print1("# Writing Custom CMake") ############################################################################## # Min Naming ############################################################################## if globalParameters["ShortNames"] and not globalParameters["MergeFiles"]: solutionSerialNaming = Solution.getSerialNaming(solutions) kernelSerialNaming = Solution.getSerialNaming(kernels) else: solutionSerialNaming = None kernelSerialNaming = None solutionMinNaming = Solution.getMinNaming(solutions) kernelMinNaming = Solution.getMinNaming(kernels) solutionWriter = SolutionWriter( \ solutionMinNaming, solutionSerialNaming, \ kernelMinNaming, kernelSerialNaming) kernelWriterSource = KernelWriterSource( \ kernelMinNaming, kernelSerialNaming) kernelWriterAssembly = KernelWriterAssembly( \ kernelMinNaming, kernelSerialNaming) generatedFile = open(os.path.join(outputPath, "Generated.cmake"), "w") generatedFile.write(CMakeHeader) generatedFile.write("set( TensileClient_SOLUTIONS\n") # write solution names if globalParameters["MergeFiles"]: generatedFile.write(" ${CMAKE_SOURCE_DIR}/Solutions.h\n") generatedFile.write(" ${CMAKE_SOURCE_DIR}/Solutions.cpp\n") else: for solution in solutions: solutionName = solutionWriter.getSolutionName(solution) generatedFile.write(" ${CMAKE_SOURCE_DIR}/Solutions/%s.h\n" \ % (solutionName) ) generatedFile.write(" ${CMAKE_SOURCE_DIR}/Solutions/%s.cpp\n" \ % (solutionName) ) generatedFile.write(" )\n") # write kernel names generatedFile.write("set( TensileClient_KERNELS\n") if globalParameters["MergeFiles"]: generatedFile.write(" ${CMAKE_SOURCE_DIR}/Kernels.h\n") generatedFile.write(" ${CMAKE_SOURCE_DIR}/Kernels.cpp\n") else: for kernel in kernels: kernelName = kernelWriterSource.getKernelName(kernel) if kernel[ "KernelLanguage"] == "Source" else kernelWriterAssembly.getKernelName( kernel) generatedFile.write(" ${CMAKE_SOURCE_DIR}/Kernels/%s.h\n" % (kernelName)) generatedFile.write(" ${CMAKE_SOURCE_DIR}/Kernels/%s.cpp\n" % kernelName) generatedFile.write(" )\n") generatedFile.write("set( TensileClient_SOURCE\n") for fileName in libraryStaticFiles: # copy file shutil_copy( os.path.join(globalParameters["SourcePath"], fileName), \ outputPath ) # add file to cmake generatedFile.write(" ${CMAKE_SOURCE_DIR}/%s\n" % fileName) generatedFile.write(" )\n\n") # close generated cmake generatedFile.close()
def writeClientParameters(forBenchmark, solutions, problemSizes, stepName, \ functionList): h = "" ############################################################################## # Min Naming ############################################################################## if forBenchmark: kernels = [] for solution in solutions: solutionKernels = solution.getKernels() for kernel in solutionKernels: if kernel not in kernels: kernels.append(kernel) solutionSerialNaming = Solution.getSerialNaming(solutions) kernelSerialNaming = Solution.getSerialNaming(kernels) solutionMinNaming = Solution.getMinNaming(solutions) kernelMinNaming = Solution.getMinNaming(kernels) solutionWriter = SolutionWriter( \ solutionMinNaming, solutionSerialNaming, \ kernelMinNaming, kernelSerialNaming) if forBenchmark: if globalParameters["MergeFiles"]: h += "#include \"Solutions.h\"\n" else: for solution in solutions: solutionName = solutionWriter.getSolutionName(solution) h += "#include \"" + solutionName + ".h\"\n" h += "\n" else: h += "#include \"Tensile.h\"\n" h += "typedef enum {\n" h += " enum_float,\n" h += " enum_double,\n" h += " enum_TensileComplexFloat,\n" h += " enum_TensileComplexDouble\n" h += "#ifdef Tensile_ENABLE_HALF\n" h += " ,enum_TensileHalf\n" h += "#endif\n" h += "} DataTypeEnum;\n" h += "\n" h += "// Debug Params\n" h += "const bool printTensorA=%s;\n" % toCppBool( globalParameters["PrintTensorA"]) h += "const bool printTensorB=%s;\n" % toCppBool( globalParameters["PrintTensorB"]) h += "const bool printTensorC=%s;\n" % toCppBool( globalParameters["PrintTensorC"]) h += "const bool printWinnersOnly=%s;\n" % toCppBool( globalParameters["PrintWinnersOnly"]) h += "\n" h += "const char indexChars[%u] = \"%s" \ % (len(globalParameters["IndexChars"])+1, \ globalParameters["IndexChars"][0]) for i in range(1, len(globalParameters["IndexChars"])): h += globalParameters["IndexChars"][i] h += "\";\n" h += "unsigned int functionIdx;\n" h += "unsigned int dataTypeIdx;\n" h += "unsigned int problemTypeIdx;\n" h += "\n" ############################################################################## # Problem Types ############################################################################## #dataTypes = [] #problemTypes = [] #functionSerialToDataTypeAndIdx = [] dataTypes = [] problemTypes = [] problemTypesForDataType = {} # for data type schedulesForProblemType = {} # for problem type functionInfo = [ ] # dataTypeIdx, problemTypeIdx, idxWithinDataType, idxWithinProblemType if forBenchmark: problemType = solutions[0]["ProblemType"] dataType = problemType["DataType"] dataTypes.append(dataType) problemTypes.append(problemType) problemTypesForDataType[dataType] = [problemType] schedulesForProblemType[problemType] = solutions numProblemTypes = 1 for solution in solutions: functionInfo.append([0, 0, 0, 0, 0, 0]) else: for functionIdx in range(0, len(functionList)): function = functionList[functionIdx] scheduleName = function[0] problemType = function[1] dataType = problemType["DataType"] if dataType not in dataTypes: dataTypes.append(dataType) problemTypesForDataType[dataType] = [] if problemType not in problemTypesForDataType[dataType]: problemTypesForDataType[dataType].append(problemType) schedulesForProblemType[problemType] = [] schedulesForProblemType[problemType].append(scheduleName) # sort dataTypes = sorted(dataTypes) for dataType in dataTypes: problemTypesForDataType[dataType] = \ sorted(problemTypesForDataType[dataType]) for problemType in problemTypesForDataType[dataType]: schedulesForProblemType[problemType] = \ sorted(schedulesForProblemType[problemType]) # assign info functionIdxSerial = 0 problemTypeIdxSerial = 0 for dataTypeIdxSerial in range(0, len(dataTypes)): dataType = dataTypes[dataTypeIdxSerial] functionIdxForDataType = 0 for problemTypeIdxForDataType in range(0, \ len(problemTypesForDataType[dataType])): problemType = \ problemTypesForDataType[dataType][problemTypeIdxForDataType] problemTypes.append(problemType) functionIdxForProblemType = 0 for functionIdxForProblemType in range(0, \ len(schedulesForProblemType[problemType])): functionInfo.append([ \ dataTypeIdxSerial, \ problemTypeIdxForDataType, \ problemTypeIdxSerial, \ functionIdxSerial,\ functionIdxForDataType,\ functionIdxForProblemType, \ ]) functionIdxForProblemType += 1 functionIdxForDataType += 1 functionIdxSerial += 1 problemTypeIdxSerial += 1 numProblemTypes = problemTypeIdxSerial numFunctions = functionIdxSerial h += "const unsigned int numFunctions = %u;\n" % numFunctions ############################################################################## # Data Types ############################################################################## h += "/* data types */\n" numDataTypes = len(dataTypes) h += "const unsigned int numDataTypes = %u;\n" % numDataTypes h += "const DataTypeEnum dataTypeEnums[numDataTypes] = { enum_%s" \ % dataTypes[0].toCpp() for dataTypeIdx in range(1, numDataTypes): h += ", enum_%s" % dataTypes[dataTypeIdx].toCpp() h += " };\n" # bytes per elements h += "const unsigned int bytesPerElement[numDataTypes] = { %u" \ % (dataTypes[0].numBytes()) for dataTypeIdx in range(1, numDataTypes): dataType = dataTypes[dataTypeIdx] h += ", %u" % dataType.numBytes() h += " };\n" # flops per mac h += "const unsigned int numFlopsPerMac[numDataTypes] = { %u" \ % (2 if dataTypes[0].isReal() else 8) for dataTypeIdx in range(1, numDataTypes): dataType = dataTypes[dataTypeIdx] h += ", %u" % (2 if dataType.isReal() else 8) h += " };\n" for dataTypeIdx in range(0, numDataTypes): h += "#define Tensile_DATA_TYPE_%s\n" \ % dataTypes[dataTypeIdx].toCpp().upper() ############################################################################## # Problem Types ############################################################################## h += "/* problem types */\n" h += "const unsigned int numProblemTypes = %u;\n" % numProblemTypes # Num C Indices h += "const unsigned int numIndicesC[numProblemTypes] = { %u" \ % problemTypes[0]["NumIndicesC"] for problemTypeIdx in range(1, numProblemTypes): problemType = problemTypes[problemTypeIdx] h += ", %u" % problemType["NumIndicesC"] h += " };\n" # Num AB Indices maxNumIndicesAB = len(problemTypes[0]["IndexAssignmentsA"]) h += "const unsigned int numIndicesAB[numProblemTypes] = { %u" \ % len(problemTypes[0]["IndexAssignmentsA"]) for problemTypeIdx in range(1, numProblemTypes): problemType = problemTypes[problemTypeIdx] numIndicesAB = len(problemType["IndexAssignmentsA"]) h += ", %u" % numIndicesAB maxNumIndicesAB = max(numIndicesAB, maxNumIndicesAB) h += " };\n" h += "const unsigned int maxNumIndicesAB = %u;\n" % maxNumIndicesAB # Index Assignments A h += "const unsigned int indexAssignmentsA[numProblemTypes][maxNumIndicesAB] = {\n" for problemTypeIdx in range(0, numProblemTypes): problemType = problemTypes[problemTypeIdx] indices = problemType["IndexAssignmentsA"] h += " { %u" % indices[0] for i in range(1, maxNumIndicesAB): if i < len(indices): h += ", %u" % indices[i] else: h += ", static_cast<unsigned int>(-1)" if problemTypeIdx < numProblemTypes - 1: h += " },\n" else: h += " }\n" h += "};\n" # Index Assignments B h += "const unsigned int indexAssignmentsB[numProblemTypes][maxNumIndicesAB] = {\n" for problemTypeIdx in range(0, numProblemTypes): problemType = problemTypes[problemTypeIdx] indices = problemType["IndexAssignmentsB"] h += " { %u" % indices[0] for i in range(1, maxNumIndicesAB): if i < len(indices): h += ", %u" % indices[i] else: h += ", static_cast<unsigned int>(-1)" if problemTypeIdx < numProblemTypes - 1: h += " },\n" else: h += " }\n" h += "};\n" # beta h += "bool useBeta[numProblemTypes] = { %s" \ % ("true" if problemTypes[0]["UseBeta"] else "false") for problemTypeIdx in range(1, numProblemTypes): problemType = problemTypes[problemTypeIdx] h += ", %s" % ("true" if problemType["UseBeta"] else "false") h += " };\n" # Complex Conjugates h += "const bool complexConjugateA[numProblemTypes] = { %s" \ % ("true" if problemTypes[0]["ComplexConjugateA"] else "false" ) for problemTypeIdx in range(1, numProblemTypes): problemType = problemTypes[problemTypeIdx] h += ", %s" % ("true" if problemTypes[0]["ComplexConjugateA"] else "false") h += " };\n" h += "const bool complexConjugateB[numProblemTypes] = { %s" \ % ("true" if problemTypes[0]["ComplexConjugateB"] else "false" ) for problemTypeIdx in range(1, numProblemTypes): problemType = problemTypes[problemTypeIdx] h += ", %s" % ("true" if problemTypes[0]["ComplexConjugateB"] else "false") h += " };\n" h += "\n" if not forBenchmark: h += "// dataTypeIdxSerial, problemTypeIdxForDataType, problemTypeIdxSerial, functionIdxSerial, functionIdxForDataType, functionIdxForProblemType\n" first = True h += "const unsigned int functionInfo[numFunctions][6] = {\n" for info in functionInfo: h += "%s{ %u, %u, %u, %u, %u, %u }" % (" " if first else ",\n ", \ info[0], info[1], info[2], info[3], info[4], info[5] ) first = False h += " };\n" ############################################################################## # Problem Sizes ############################################################################## maxNumIndices = problemTypes[0]["TotalIndices"] if not forBenchmark: for problemType in problemTypes: maxNumIndices = max(problemType["TotalIndices"], maxNumIndices) h += "const unsigned int maxNumIndices = %u;\n" % maxNumIndices h += "const unsigned int totalIndices[numProblemTypes] = { %u" \ % problemTypes[0]["TotalIndices"] for problemTypeIdx in range(1, numProblemTypes): h += ", %u" % problemTypes[problemTypeIdx]["TotalIndices"] h += " };\n" if forBenchmark: h += "const unsigned int numProblems = %u;\n" \ % problemSizes.totalProblemSizes h += "const unsigned int problemSizes[numProblems][%u] = {\n" \ % problemTypes[0]["TotalIndices"] for i in range(0, problemSizes.totalProblemSizes): line = " {%5u" % problemSizes.sizes[i][0] for j in range(1, problemTypes[0]["TotalIndices"]): line += ",%5u" % problemSizes.sizes[i][j] line += " }" h += line if i < problemSizes.totalProblemSizes - 1: h += "," else: h += "};" h += "\n" h += "const unsigned int minStrides[%u] = {" \ % problemTypes[0]["TotalIndices"] for i in range(0, len(problemSizes.minStrides)): if (i != 0): h += ", " h += str(problemSizes.minStrides[i]) h += "};\n" else: h += "unsigned int userSizes[maxNumIndices];\n" h += "unsigned int minStrides[%u] = {" \ % maxNumIndices for i in range(0, maxNumIndices): if (i != 0): h += ", " h += str(0) # always use 0 for minStrides in benchmark mode h += "};\n" if forBenchmark: h += "/* problem sizes */\n" """ h += "const bool indexIsSized[maxNumIndices] = {" for i in range(0, problemSizes.totalIndices): h += " %s" % ("true" if problemSizes.indexIsSized[i] else "false") if i < problemSizes.totalIndices-1: h += "," h += " };\n" h += "const unsigned int numIndicesSized = %u;\n" \ % len(problemSizes.indicesSized) h += "const unsigned int indicesSized[numIndicesSized][4] = {\n" h += "// { min, stride, stride_incr, max }\n" for i in range(0, len(problemSizes.indicesSized)): r = problemSizes.indicesSized[i] h += " { %u, %u, %u, %u }" % (r[0], r[1], r[2], r[3]) if i < len(problemSizes.indicesSized)-1: h += "," h += "\n" h += " };\n" numIndicesMapped = len(problemSizes.indicesMapped) h += "const unsigned int numIndicesMapped = %u;\n" % numIndicesMapped if numIndicesMapped > 0: h += "#define Tensile_INDICES_MAPPED 1\n" h += "const unsigned int indicesMapped[numIndicesMapped] = {" for i in range(0, numIndicesMapped): h += " %u" % problemSizes.indicesMapped[i] if i < numIndicesMapped-1: h += "," h += " };\n" else: h += "#define Tensile_INDICES_MAPPED 0\n" """ ############################################################################## # Max Problem Sizes ############################################################################## if forBenchmark: h += "size_t maxSizeC = %u;\n" % (problemSizes.maxC) h += "size_t maxSizeA = %u;\n" % (problemSizes.maxA) h += "size_t maxSizeB = %u;\n" % (problemSizes.maxB) h += "\n" else: h += "size_t maxSizeC;\n" h += "size_t maxSizeA;\n" h += "size_t maxSizeB;\n" h += "\n" ############################################################################## # Current Problem Size ############################################################################## h += "/* current problem size */\n" #h += "unsigned int fullSizes[maxNumIndices];\n" #h += "unsigned int currentSizedIndexSizes[numIndicesSized];\n" #h += "unsigned int currentSizedIndexIncrements[numIndicesSized];\n" h += "\n" ############################################################################## # Solutions ############################################################################## if forBenchmark: h += "/* solutions */\n" # Problem Type Indices h += "const unsigned int maxNumSolutions = %u;\n" % len(solutions) h += "float solutionPerf[numProblems][maxNumSolutions]; // milliseconds\n" h += "\n" # Solution Ptrs h += "typedef TensileStatus (*SolutionFunctionPointer)(\n" argList = solutionWriter.getArgList(solutions[0]["ProblemType"], True, True, True) for i in range(0, len(argList)): h += " %s %s%s" % (argList[i][0], argList[i][1], \ ",\n" if i < len(argList)-1 else ");\n\n") h += "const SolutionFunctionPointer solutions[maxNumSolutions] = {\n" for i in range(0, len(solutions)): solution = solutions[i] solutionName = solutionWriter.getSolutionName(solution) h += " %s" % solutionName if i < len(solutions) - 1: h += "," h += "\n" h += " };\n" h += "\n" # Solution Names h += "const char *solutionNames[maxNumSolutions] = {\n" for i in range(0, len(solutions)): solution = solutions[i] solutionName = solutionWriter.getSolutionName(solution) h += " \"%s\"" % solutionName if i < len(solutions) - 1: h += "," h += "\n" h += " };\n" h += "\n" else: # Function Names functionNames = [] for dataType in dataTypes: for problemType in problemTypesForDataType[dataType]: for scheduleName in schedulesForProblemType[problemType]: #functionNames.append("tensile_%s_%s" % (scheduleName, problemType)) functionNames.append("tensile_%s" % (problemType)) h += "const char *functionNames[numFunctions] = {\n" for functionIdx in range(0, len(functionNames)): functionName = functionNames[functionIdx] h += " \"%s\"%s\n" % (functionName, \ "," if functionIdx < len(functionNames)-1 else "" ) h += " };\n" ############################################################################## # Runtime Structures ############################################################################## h += "/* runtime structures */\n" h += "TensileStatus status;\n" if globalParameters["RuntimeLanguage"] == "OCL": h += "cl_platform_id platform;\n" h += "cl_device_id device;\n" h += "cl_context context;\n" h += "cl_command_queue stream;\n" else: h += "hipStream_t stream;\n" #h += "int deviceIdx = %u;\n" \ # % (globalParameters["Device"]) h += "\n" h += "void *deviceC;\n" h += "void *deviceA;\n" h += "void *deviceB;\n" ############################################################################## # Benchmarking and Validation Parameters ############################################################################## h += "\n/* benchmarking parameters */\n" #h += "const bool measureKernelTime = %s;\n" \ # % ("true" if globalParameters["KernelTime"] else "false") #h += "const unsigned int numEnqueuesPerSync = %u;\n" \ # % (globalParameters["EnqueuesPerSync"]) #h += "const unsigned int numSyncsPerBenchmark = %u;\n" \ # % (globalParameters["SyncsPerBenchmark"]) #h += "unsigned int numElementsToValidate = %s;\n" \ # % (str(globalParameters["NumElementsToValidate"]) \ # if globalParameters["NumElementsToValidate"] >= 0 \ # else "0xFFFFFFFF" ) #h += "unsigned int validationMaxToPrint = %u;\n" \ # % globalParameters["ValidationMaxToPrint"] #h += "bool validationPrintValids = %s;\n" \ # % ("true" if globalParameters["ValidationPrintValids"] else "false") h += "size_t validationStride;\n" if problemType["HighPrecisionAccumulate"]: h += "static bool useHighPrecisionAccumulate = true;\n" else: h += "static bool useHighPrecisionAccumulate = false;\n" #h += "unsigned int dataInitTypeC = %s;\n" % globalParameters["DataInitTypeC"] #h += "unsigned int dataInitTypeAB = %s;\n" % globalParameters["DataInitTypeAB"] h += "\n" ############################################################################## # Generated Call to Reference ############################################################################## h += "/* generated call to reference */\n" h += "template<typename DataType>\n" h += "TensileStatus generatedCallToReferenceCPU(\n" h += " const unsigned int *sizes,\n" h += " const unsigned int *minStrides,\n" h += " DataType *referenceC,\n" h += " DataType *initialA,\n" h += " DataType *initialB,\n" h += " const unsigned int stride_a,\n" h += " const unsigned int stride_b,\n" h += " const unsigned int stride_c,\n" h += " DataType alpha,\n" h += " DataType beta,\n" h += " bool useHighPrecisionAccumulate) {\n" h += " return tensileReferenceCPU(\n" h += " referenceC,\n" h += " initialA,\n" h += " initialB,\n" h += " stride_a,\n" h += " stride_b,\n" h += " stride_c,\n" h += " alpha,\n" h += " beta,\n" h += " totalIndices[problemTypeIdx],\n" h += " sizes,\n" h += " minStrides,\n" h += " numIndicesC[problemTypeIdx],\n" h += " numIndicesAB[problemTypeIdx],\n" h += " indexAssignmentsA[problemTypeIdx],\n" h += " indexAssignmentsB[problemTypeIdx],\n" h += " complexConjugateA[problemTypeIdx],\n" h += " complexConjugateB[problemTypeIdx],\n" h += " validationStride,\n" h += " useHighPrecisionAccumulate);\n" h += "};\n" h += "\n" ############################################################################## # Generated Call to Solution ############################################################################## if forBenchmark: problemType = solutions[0]["ProblemType"] h += "/* generated call to solution */\n" h += "template<typename DataType>\n" h += "TensileStatus generatedCallToSolution(\n" h += " unsigned int solutionIdx,\n" h += " const unsigned int *sizes,\n" h += " const unsigned int *minStrides,\n" h += " DataType alpha,\n" h += " DataType beta, \n" h += " unsigned int numEvents = 0, \n" if globalParameters["RuntimeLanguage"] == "OCL": h += " cl_event *event_wait_list = NULL,\n" h += " cl_event *outputEvent = NULL ) {\n" else: h += " hipEvent_t *startEvent = NULL,\n" h += " hipEvent_t *stopEvent = NULL ) {\n" h += " // calculate parameters assuming packed data\n" # strides indexChars = globalParameters["IndexChars"] firstStride = 1 if problemType["UseInitialStrides"]: firstStride = 0 lastStrideC = problemType["NumIndicesC"] lastStrideA = len(problemType["IndexAssignmentsA"]) lastStrideB = len(problemType["IndexAssignmentsB"]) # calculate strides for i in range(0, lastStrideC): h += " unsigned int strideC%u%s = 1" % (i, indexChars[i]) for j in range(0, i): h += "* std::max(minStrides[%i], sizes[%i])" % (j, j) h += ";\n" for i in range(0, lastStrideA): h += " unsigned int strideA%u%s = 1" % (i, \ indexChars[problemType["IndexAssignmentsA"][i]]) for j in range(0, i): h += "* std::max(minStrides[%i], sizes[%i])" % \ (problemType["IndexAssignmentsA"][j], problemType["IndexAssignmentsA"][j]) h += ";\n" for i in range(0, lastStrideB): h += " unsigned int strideB%u%s = 1" % (i, \ indexChars[problemType["IndexAssignmentsB"][i]]) for j in range(0, i): h += "* std::max(minStrides[%i], sizes[%i])" % \ (problemType["IndexAssignmentsB"][j], problemType["IndexAssignmentsB"][j]) h += ";\n" for i in range(0, problemType["TotalIndices"]): h += " unsigned int size%s = sizes[%u];\n" % (indexChars[i], i) h += "\n" # function call h += " // call solution function\n" if globalParameters["RuntimeLanguage"] == "OCL": h += " return solutions[solutionIdx]( static_cast<cl_mem>(deviceC), static_cast<cl_mem>(deviceA), static_cast<cl_mem>(deviceB),\n" else: typeName = dataTypes[0].toCpp() h += " return solutions[solutionIdx]( static_cast<%s *>(deviceC), static_cast<%s *>(deviceA), static_cast<%s *>(deviceB),\n" \ % (typeName, typeName, typeName) h += " alpha,\n" if problemType["UseBeta"]: h += " beta,\n" h += " 0, 0, 0, // offsets\n" for i in range(firstStride, lastStrideC): h += " strideC%u%s,\n" % (i, indexChars[i]) for i in range(firstStride, lastStrideA): h += " strideA%u%s,\n" % (i, \ indexChars[problemType["IndexAssignmentsA"][i]]) for i in range(firstStride, lastStrideB): h += " strideB%u%s,\n" % (i, \ indexChars[problemType["IndexAssignmentsB"][i]]) for i in range(0, problemType["TotalIndices"]): h += " size%s,\n" % indexChars[i] h += " stream,\n" if globalParameters["RuntimeLanguage"] == "OCL": h += " numEvents, event_wait_list, outputEvent ); // events\n" else: h += " numEvents, startEvent, stopEvent); // events\n" h += "};\n" h += "\n" else: ############################################################################ # Generated Call to Function ############################################################################ for enqueue in [True, False]: functionName = "tensile" if enqueue else "tensileGetSolutionName" returnName = "TensileStatus" if enqueue else "const char *" h += "/* generated call to function */\n" h += "template<typename DataType>\n" h += "%s generatedCallTo_%s(\n" % (returnName, functionName) h += " unsigned int *sizes,\n" h += " unsigned int *minStrides,\n" h += " DataType alpha,\n" h += " DataType beta, \n" h += " unsigned int strideA, \n" h += " unsigned int strideB, \n" h += " unsigned int strideC, \n" h += " unsigned int numEvents = 0, \n" if globalParameters["RuntimeLanguage"] == "OCL": h += " cl_event *event_wait_list = NULL,\n" h += " cl_event *outputEvent = NULL );\n\n" else: h += " hipEvent_t *startEvent = NULL,\n" h += " hipEvent_t *stopEvent = NULL );\n\n" for dataType in dataTypes: typeName = dataType.toCpp() functionsForDataType = [] for problemType in problemTypesForDataType[dataType]: for scheduleName in schedulesForProblemType[problemType]: functionsForDataType.append( [scheduleName, problemType]) h += "template<>\n" h += "inline %s generatedCallTo_%s<%s>(\n" \ % (returnName, functionName, typeName) h += " unsigned int *sizes,\n" h += " unsigned int *minStrides,\n" h += " %s alpha,\n" % typeName h += " %s beta,\n" % typeName h += " unsigned int strideA, \n" h += " unsigned int strideB, \n" h += " unsigned int strideC, \n" h += " unsigned int numEvents, \n" if globalParameters["RuntimeLanguage"] == "OCL": h += " cl_event *event_wait_list,\n" h += " cl_event *outputEvent ) {\n\n" else: h += " hipEvent_t *startEvent,\n" h += " hipEvent_t *stopEvent ) {\n\n" h += " unsigned int functionIdxForDataType = functionInfo[functionIdx][4];\n" for functionIdx in range(0, len(functionsForDataType)): function = functionsForDataType[functionIdx] scheduleName = function[0] problemType = function[1] if len(functionsForDataType) > 1: if functionIdx == 0: h += " if (functionIdxForDataType == %u) {\n" % functionIdx elif functionIdx == len(functionsForDataType) - 1: h += " } else {\n" else: h += " } else if (functionIdxForDataType == %u) {\n" \ % functionIdx # strides indexChars = globalParameters["IndexChars"] firstStride = 1 if problemType["UseInitialStrides"]: firstStride = 0 lastStrideC = problemType["NumIndicesC"] lastStrideA = len(problemType["IndexAssignmentsA"]) lastStrideB = len(problemType["IndexAssignmentsB"]) # calculate strides for i in range(0, lastStrideC): h += " unsigned int strideC%u%s = 1" % ( i, indexChars[i]) for j in range(0, i): h += "*sizes[%i]" % j h += ";\n" h += " if (strideC != std::numeric_limits<unsigned int>::max()) strideC%u%s = strideC;\n" % ( lastStrideC - 1, indexChars[lastStrideC - 1]) for i in range(0, lastStrideA): h += " unsigned int strideA%u%s = 1" % (i, \ indexChars[problemType["IndexAssignmentsA"][i]]) for j in range(0, i): h += "*sizes[%i]" % \ problemType["IndexAssignmentsA"][j] h += ";\n" h += " if (strideA != std::numeric_limits<unsigned int>::max()) strideA%u%s = strideA;\n" % ( lastStrideA - 1, indexChars[lastStrideA - 1]) for i in range(0, lastStrideB): h += " unsigned int strideB%u%s = 1" % (i, \ indexChars[problemType["IndexAssignmentsB"][i]]) for j in range(0, i): h += "*sizes[%i]" % \ problemType["IndexAssignmentsB"][j] h += ";\n" h += " if (strideB != std::numeric_limits<unsigned int>::max()) strideB%u%s = strideB;\n" % ( lastStrideB - 1, indexChars[lastStrideB - 1]) for i in range(0, problemType["TotalIndices"]): h += " unsigned int size%s = sizes[%u];\n" % ( indexChars[i], i) # function call h += " // call solution function\n" h += " return %s_%s(\n" % (functionName, problemType) if enqueue: if globalParameters["RuntimeLanguage"] == "OCL": h += " static_cast<cl_mem>(deviceC),\n" h += " static_cast<cl_mem>(deviceA),\n" h += " static_cast<cl_mem>(deviceB),\n" else: h += " static_cast<%s *>(deviceC),\n" % typeName h += " static_cast<%s *>(deviceA),\n" % typeName h += " static_cast<%s *>(deviceB),\n" % typeName h += " alpha,\n" if problemType["UseBeta"]: h += " beta,\n" h += " 0, 0, 0, // offsets\n" for i in range(firstStride, lastStrideC): h += " strideC%u%s,\n" % (i, indexChars[i]) for i in range(firstStride, lastStrideA): h += " strideA%u%s,\n" % (i, \ indexChars[problemType["IndexAssignmentsA"][i]]) for i in range(firstStride, lastStrideB): h += " strideB%u%s,\n" % (i, \ indexChars[problemType["IndexAssignmentsB"][i]]) for i in range(0, problemType["TotalIndices"]): h += " size%s,\n" % indexChars[i] h += " stream" if enqueue: if globalParameters["RuntimeLanguage"] == "OCL": h += ",\n numEvents, event_wait_list, outputEvent" else: h += ",\n numEvents, startEvent, stopEvent" h += ");\n" if len(functionsForDataType) > 1: h += " }\n" # close last if h += "};\n" # close callToFunction ############################################################################## # Results File Name ############################################################################## if forBenchmark: h += "/* results file name */\n" resultsFileName = os.path.join(globalParameters["WorkingPath"], \ "../../Data","%s.csv" % stepName) resultsFileName = resultsFileName.replace("\\", "\\\\") h += "const char *resultsFileName = \"%s\";\n" % resultsFileName ############################################################################## # Write File ############################################################################## clientParametersFile = open(os.path.join(globalParameters["WorkingPath"], \ "ClientParameters.h"), "w") clientParametersFile.write(CHeader) clientParametersFile.write(h) clientParametersFile.close()
def TensileCreateLibrary(): print1("") print1(HR) print1("# Tensile Create Library") print2(HR) print2("") ############################################################################## # Parse Command Line Arguments ############################################################################## print2("Arguments: %s" % sys.argv) argParser = argparse.ArgumentParser() argParser.add_argument("LogicPath", help="Path to LibraryLogic.yaml files.") argParser.add_argument("OutputPath", help="Where to write library files?") argParser.add_argument("RuntimeLanguage", help="Which runtime language?", \ choices=["OCL", "HIP", "HSA"]) argParser.add_argument("--merge-files", dest="MergeFiles", \ action="store_true") argParser.add_argument("--no-merge-files", dest="MergeFiles", \ action="store_false") argParser.add_argument("--short-file-names", dest="ShortNames", \ action="store_true") argParser.add_argument("--no-short-file-names", dest="ShortNames", \ action="store_false") argParser.add_argument("--library-print-debug", dest="LibraryPrintDebug", \ action="store_true") argParser.add_argument("--no-library-print-debug", dest="LibraryPrintDebug", \ action="store_false") args = argParser.parse_args() logicPath = args.LogicPath outputPath = args.OutputPath print2("OutputPath: %s" % outputPath) ensurePath(outputPath) arguments = {} arguments["RuntimeLanguage"] = args.RuntimeLanguage arguments["MergeFiles"] = args.MergeFiles arguments["ShortNames"] = args.ShortNames arguments["LibraryPrintDebug"] = args.LibraryPrintDebug arguments["CodeFromFiles"] = False assignGlobalParameters(arguments) if not os.path.exists(logicPath): printExit("LogicPath %s doesn't exist" % logicPath) logicFiles = [os.path.join(logicPath, f) for f in os.listdir(logicPath) \ if (os.path.isfile(os.path.join(logicPath, f)) \ and os.path.splitext(f)[1]==".yaml")] print1("# LibraryLogicFiles:" % logicFiles) for logicFile in logicFiles: print1("# %s" % logicFile) ############################################################################## # Parse config files ############################################################################## solutions = [] logicData = {} # keys are problemTypes, values are schedules for logicFileName in logicFiles: (scheduleName, deviceNames, problemType, solutionsForSchedule, \ indexOrder, exactLogic, rangeLogic) \ = YAMLIO.readLibraryLogicForSchedule(logicFileName) if problemType not in logicData: logicData[problemType] = [] logicData[problemType].append((scheduleName, deviceNames, \ solutionsForSchedule, indexOrder, exactLogic, rangeLogic )) for solution in solutionsForSchedule: if solution not in solutions: solutions.append(solution) # create solution writer and kernel writer kernels = [] kernelsBetaOnly = [] for solution in solutions: solutionKernels = solution.getKernels() for kernel in solutionKernels: if kernel not in kernels: kernels.append(kernel) solutionKernelsBetaOnly = solution.getKernelsBetaOnly() for kernel in solutionKernelsBetaOnly: if kernel not in kernelsBetaOnly: kernelsBetaOnly.append(kernel) # if any kernels are assembly, append every ISA supported if globalParameters["ShortNames"] and not globalParameters["MergeFiles"]: solutionSerialNaming = Solution.getSerialNaming(solutions) kernelSerialNaming = Solution.getSerialNaming(kernels) else: solutionSerialNaming = None kernelSerialNaming = None solutionMinNaming = Solution.getMinNaming(solutions) kernelMinNaming = Solution.getMinNaming(kernels) solutionWriter = SolutionWriter( \ solutionMinNaming, solutionSerialNaming, \ kernelMinNaming, kernelSerialNaming) kernelWriterSource = KernelWriterSource( \ kernelMinNaming, kernelSerialNaming) kernelWriterAssembly = KernelWriterAssembly( \ kernelMinNaming, kernelSerialNaming) # write solutions and kernels writeSolutionsAndKernels(outputPath, solutions, kernels, kernelsBetaOnly, \ solutionWriter, kernelWriterSource, kernelWriterAssembly) libraryStaticFiles = [ "SolutionMapper.h", "TensileTypes.h", "KernelHeader.h", "SolutionHelper.cpp", "SolutionHelper.h", "Tools.cpp", "Tools.h" ] # write cmake clientName = "LibraryClient" writeCMake(outputPath, solutions, kernels, libraryStaticFiles, clientName) # write logic writeLogic(outputPath, logicData, solutionWriter) print1("# Tensile Library Writer DONE") print1(HR) print1("")
def TensileCreateLibrary(): print1("") print1(HR) print1("# Tensile Create Library") print2(HR) print2("") ############################################################################## # Parse Command Line Arguments ############################################################################## print2("Arguments: %s" % sys.argv) argParser = argparse.ArgumentParser() argParser.add_argument("LogicPath", help="Path to LibraryLogic.yaml files.") argParser.add_argument("OutputPath", help="Where to write library files?") argParser.add_argument("RuntimeLanguage", help="Which runtime language?", \ choices=["OCL", "HIP", "HSA"]) argParser.add_argument("--merge-files", dest="MergeFiles", \ action="store_true") argParser.add_argument("--no-merge-files", dest="MergeFiles", \ action="store_false") argParser.add_argument("--short-file-names", dest="ShortNames", \ action="store_true") argParser.add_argument("--no-short-file-names", dest="ShortNames", \ action="store_false") argParser.add_argument("--library-print-debug", dest="LibraryPrintDebug", \ action="store_true") argParser.add_argument("--no-library-print-debug", dest="LibraryPrintDebug", \ action="store_false") argParser.add_argument( "--isa", dest="isa", action="append", help="which architectures for assembly kernels to target") args = argParser.parse_args() logicPath = args.LogicPath outputPath = args.OutputPath print2("OutputPath: %s" % outputPath) ensurePath(outputPath) arguments = {} arguments["RuntimeLanguage"] = args.RuntimeLanguage arguments["MergeFiles"] = args.MergeFiles arguments["ShortNames"] = args.ShortNames arguments["LibraryPrintDebug"] = args.LibraryPrintDebug if args.isa: newISA = [] for isa in args.isa: gfxIdx = isa.find("gfx") if gfxIdx >= 0: major = int(isa[gfxIdx + 3:gfxIdx + 4]) minor = int(isa[gfxIdx + 4:gfxIdx + 5]) step = int(isa[gfxIdx + 5:gfxIdx + 6]) isaTuple = (major, minor, step) if isaTuple in globalParameters[ "SupportedISA"] and isaTuple not in newISA: print1("# User-Specified ISA: gfx%u%u%u" % (major, minor, step)) newISA.append(isaTuple) else: printWarning("isa parameter must be formed as: --isa gfx803") arguments["SupportedISA"] = newISA assignGlobalParameters(arguments) if not os.path.exists(logicPath): printExit("LogicPath %s doesn't exist" % logicPath) logicFiles = [os.path.join(logicPath, f) for f in os.listdir(logicPath) \ if (os.path.isfile(os.path.join(logicPath, f)) \ and os.path.splitext(f)[1]==".yaml")] print1("# LibraryLogicFiles:" % logicFiles) for logicFile in logicFiles: print1("# %s" % logicFile) ############################################################################## # Parse config files ############################################################################## solutions = [] logicData = {} # keys are problemTypes, values are schedules for logicFileName in logicFiles: (scheduleName, deviceNames, problemType, solutionsForSchedule, \ indexOrder, exactLogic, rangeLogic) \ = YAMLIO.readLibraryLogicForSchedule(logicFileName) if problemType not in logicData: logicData[problemType] = [] logicData[problemType].append((scheduleName, deviceNames, \ solutionsForSchedule, indexOrder, exactLogic, rangeLogic )) for solution in solutionsForSchedule: if solution not in solutions: solutions.append(solution) # create solution writer and kernel writer kernels = [] kernelsBetaOnly = [] for solution in solutions: solutionKernels = solution.getKernels() for kernel in solutionKernels: if kernel not in kernels: kernels.append(kernel) solutionKernelsBetaOnly = solution.getKernelsBetaOnly() for kernel in solutionKernelsBetaOnly: if kernel not in kernelsBetaOnly: kernelsBetaOnly.append(kernel) # if any kernels are assembly, append every ISA supported if globalParameters["RuntimeLanguage"] == "HIP": newKernels = [] for kernel in kernels: if kernel["KernelLanguage"] == "Assembly": kernel["ISA"] = globalParameters["SupportedISA"][0] for i in range(1, len(globalParameters["SupportedISA"])): newKernel = deepcopy(kernel) newKernel["ISA"] = globalParameters["SupportedISA"][i] newKernels.append(newKernel) else: kernel["ISA"] = (0, 0, 0) kernels.extend(newKernels) if globalParameters["ShortNames"] and not globalParameters["MergeFiles"]: solutionSerialNaming = Solution.getSerialNaming(solutions) kernelSerialNaming = Solution.getSerialNaming(kernels) else: solutionSerialNaming = None kernelSerialNaming = None solutionMinNaming = Solution.getMinNaming(solutions) kernelMinNaming = Solution.getMinNaming(kernels) solutionWriter = SolutionWriter( \ solutionMinNaming, solutionSerialNaming, \ kernelMinNaming, kernelSerialNaming) kernelWriterSource = KernelWriterSource( \ kernelMinNaming, kernelSerialNaming) kernelWriterAssembly = KernelWriterAssembly( \ kernelMinNaming, kernelSerialNaming) # write solutions and kernels writeSolutionsAndKernels(outputPath, solutions, kernels, kernelsBetaOnly, \ solutionWriter, kernelWriterSource, kernelWriterAssembly) libraryStaticFiles = [ "TensileTypes.h", "KernelHeader.h", "SolutionHelper.cpp", "SolutionHelper.h", "Tools.cpp", "Tools.h" ] # write cmake clientName = "LibraryClient" writeCMake(outputPath, solutions, kernels, libraryStaticFiles, clientName) # write logic writeLogic(outputPath, logicData, solutionWriter) print1("# Tensile Library Writer DONE") print1(HR) print1("")