def addResults( self, hardcodedParameterList, benchmarkPermutations, \ solutions, results): if globalParameters["PrintLevel"] >= 1: print1("# Adding Results to Solution Database") progressBar = ProgressBar(len(results)) for hardcodedIdx in range(0, len(results)): hardcodedResults = results[hardcodedIdx] hardcodedParameters = hardcodedParameterList[hardcodedIdx] winningIdx = -1 winningScore = -9999 # -1 is score of invalid so use -9999 here # find fastest benchmark parameters for this hardcoded for benchmarkIdx in range(0, len(hardcodedResults)): benchmarkResult = hardcodedResults[benchmarkIdx] benchmarkScore = max( benchmarkResult) # take fastest regardless of size if benchmarkScore > winningScore: winningScore = benchmarkScore winningIdx = benchmarkIdx winningSolution = solutions[hardcodedIdx][winningIdx] winningParameters = {} for paramName in benchmarkPermutations[0]: winningParameters[paramName] = winningSolution[paramName] #print2("HCP[%u] Winner: idx=%u, gflops=%f, param=%s" \ # % ( hardcodedIdx, winningIdx, winningScore, winningParameters)) matches = WinningParameterDict.get(hardcodedParameters, self.winners) if len(matches) != 1: printExit("Didn't find exactly 1 match") hardcodedParametersKey = matches[0][0] #oldWinningParameters = matches[0][1] #oldScore = matches[0][2] self.winners[hardcodedParametersKey][0].update(winningParameters) self.winners[hardcodedParametersKey][1] = winningScore if globalParameters["PrintLevel"] >= 1: progressBar.increment()
def wpdUpdate(self, newHardcodedParameterList): # TODO when new list is joining, we need to choose the fastest oldWinners = self.winners self.winners = {} # if this is first time, populate with dummies and early exit if len(oldWinners) == 0: for newHardcodedParameters in newHardcodedParameterList: self.winners[FrozenDictionary(newHardcodedParameters)] = [{}, -1] else: if globalParameters["PrintLevel"] >= 1: print1("# Updating Solution Database") progressBar = ProgressBar(len(newHardcodedParameterList)) for newHardcodedParameters in newHardcodedParameterList: #(oldHardcodedParameters, winningParameters, score) = \ matches = WinningParameterDict.get(newHardcodedParameters, oldWinners) if len(matches) == 1: # plain update hardcodedFrozen = matches[0][0] winningParameters = matches[0][1] score = matches[0][2] #if winningParameters != None: newHardcodedParameters.update(hardcodedFrozen.parameters) self.winners[FrozenDictionary(newHardcodedParameters)] = \ [ winningParameters, score ] elif len(matches) > 1: # join fastestScore = -1 fastestHardcodedParameters = {} fastestWinningParameters = {} for matchIdx in range(0, len(matches)): match = matches[matchIdx] hardcodedFrozen = match[0] winningParameters = match[1] score = match[2] if score > fastestScore: fastestScore = score fastestWinningParameters = winningParameters fastestHardcodedParameters = hardcodedFrozen.parameters newHardcodedParameters.update(fastestHardcodedParameters) self.winners[FrozenDictionary(newHardcodedParameters)] = \ [ fastestWinningParameters, fastestScore ] if globalParameters["PrintLevel"] >= 1: progressBar.increment() # return resulting hardcodedParameterList returnHardcodedParameterList = [] for hardcodedFrozen in self.winners: returnHardcodedParameterList.append(hardcodedFrozen.parameters) #print "info: after winner-update, returnHardcodedParameterList=", len(returnHardcodedParameterList) return returnHardcodedParameterList
def benchmarkProblemType( problemTypeConfig, problemSizeGroupConfig, \ problemSizeGroupIdx ): benchmarkTestFails = 0 # convert config to full benchmark process (resolves defaults) print1("") print1(HR) print1("# Converting Config to BenchmarkProcess Object") print1(HR) print1("") benchmarkProcess = BenchmarkProcess( problemTypeConfig, \ problemSizeGroupConfig ) problemTypeName = str(benchmarkProcess.problemType) problemSizeGroupName = "%s_%02u" % (problemTypeName, problemSizeGroupIdx) pushWorkingPath(problemSizeGroupName) ensurePath(os.path.join(globalParameters["WorkingPath"], "Data")) totalBenchmarkSteps = len(benchmarkProcess) resultsFileBaseFinal = None winners = WinningParameterDict() print1("# NumBenchmarkSteps: %u" % totalBenchmarkSteps) print1("") print1(HR) print1("# Done Creating BenchmarkProcess Object") print1(HR) ############################################################################## # For Each Benchmark Step ############################################################################## for benchmarkStepIdx in range(0, totalBenchmarkSteps): benchmarkStep = benchmarkProcess[benchmarkStepIdx] if winners.winners == {}: # perf optimization to skip the initial winners creation # this helps a little here but really helps below with avoiding the super-expensive # removeHardcoded step below - that can use a fast-path to create # winners when needed. print1( "# Empty winners - use fast initialization of hardcodedParameters" ) resultingHardcodedParameterList = benchmarkStep.hardcodedParameters else: resultingHardcodedParameterList = \ winners.wpdUpdate( benchmarkStep.hardcodedParameters ) benchmarkStep.hardcodedParameters = resultingHardcodedParameterList numHardcoded = len(benchmarkStep.hardcodedParameters) stepName = str(benchmarkStep) shortName = benchmarkStep.abbreviation() print1("\n") print1(HR) currentTime = time.time() elapsedTime = currentTime - startTime print1("# BenchmarkStep: %s - %s %.3fs" % (problemSizeGroupName, stepName, elapsedTime)) print1("# NumProblems: %u" % benchmarkStep.problemSizes.totalProblemSizes) print1("# BenchmarkParameters:") for paramName in benchmarkStep.benchmarkParameters: paramValues = benchmarkStep.benchmarkParameters[paramName] printStr = "# %s = { %s" % (paramName, paramValues[0]) for paramValueIdx in range(1, len(paramValues)): printStr += ", %s" % str(paramValues[paramValueIdx]) printStr += " }" print1(printStr) if False: # print1(hardcoded parameters and their winners print1("# HardcodedParameters | WinningParameters:") paramDictIdx = 0 hardcodedMinNaming = \ Solution.getMinNaming(benchmarkStep.hardcodedParameters) for paramDict in benchmarkStep.hardcodedParameters: winningParameters = winners[paramDict] print1("# (%u) %s | %s" % (paramDictIdx, \ Solution.getNameMin(paramDict, hardcodedMinNaming), \ Solution.getNameFull(winningParameters) )) paramDictIdx += 1 pushWorkingPath(shortName) ############################################################################ # Copy Files to Benchmark Source Directory ############################################################################ stepBaseDir = globalParameters["WorkingPath"] sourceDir = \ os.path.join(stepBaseDir, "source" ) ensurePath(sourceDir) pushWorkingPath("sourceTmp") filesToCopy = [ "SolutionMapper.h", "Client.cpp", "Client.h", "CMakeLists.txt", "DeviceStats.h", "TensorUtils.h", "MathTemplates.cpp", "MathTemplates.h", "TensileTypes.h", "tensile_bfloat16.h", "KernelHeader.h", "ReferenceCPU.h", "SolutionHelper.cpp", "SolutionHelper.h", "Tools.cpp", "Tools.h", ] for f in filesToCopy: shutil_copy(os.path.join(globalParameters["SourcePath"], f), globalParameters["WorkingPath"]) if globalParameters["RuntimeLanguage"] == "OCL": shutil_copy( os.path.join(globalParameters["SourcePath"], "FindOpenCL.cmake"), globalParameters["WorkingPath"]) else: shutil_copy( os.path.join(globalParameters["SourcePath"], "FindHIP.cmake"), globalParameters["WorkingPath"]) shutil_copy( os.path.join(globalParameters["SourcePath"], "FindHCC.cmake"), globalParameters["WorkingPath"]) ############################################################################ # Enumerate Benchmark Permutations ############################################################################ solutions = [] totalBenchmarkPermutations = 1 for benchmarkParamName in benchmarkStep.benchmarkParameters: totalBenchmarkPermutations *= len( benchmarkStep.benchmarkParameters[benchmarkParamName]) maxPossibleSolutions = totalBenchmarkPermutations * numHardcoded print1("# MaxPossibleSolutions: %u = %u (hardcoded) * %u (benchmark)" % \ (maxPossibleSolutions, numHardcoded, totalBenchmarkPermutations)) benchmarkPermutations = [] for i in range(0, totalBenchmarkPermutations): permutation = {} pIdx = i for benchmarkParamName in benchmarkStep.benchmarkParameters: benchmarkParamValues = deepcopy( \ benchmarkStep.benchmarkParameters[benchmarkParamName]) valueIdx = pIdx % len(benchmarkParamValues) permutation[benchmarkParamName] = benchmarkParamValues[ valueIdx] pIdx /= len(benchmarkParamValues) benchmarkPermutations.append(permutation) ############################################################################ # Enumerate Solutions = Hardcoded * Benchmark ############################################################################ print1("# Enumerating Solutions") if globalParameters["PrintLevel"] >= 1: progressBar = ProgressBar(maxPossibleSolutions) solutionSet = set() # avoid duplicates for nlca=-1, 1 for hardcodedIdx in range(0, numHardcoded): solutions.append([]) hardcodedParamDict = benchmarkStep.hardcodedParameters[ hardcodedIdx] for benchmarkIdx in range(0, len(benchmarkPermutations)): benchmarkPermutation = benchmarkPermutations[benchmarkIdx] solution = { "ProblemType": deepcopy(benchmarkProcess.problemType.state) } solution.update(benchmarkPermutation) solution.update(hardcodedParamDict) if benchmarkStepIdx > 0: winningParameters = winners[hardcodedParamDict] if winningParameters == None: # this is a joined parameter that didn't have a winner, that's okay continue solution.update(winningParameters) # append default parameters where necessary for initialSolutionParameterName in benchmarkStep.initialSolutionParameters: if initialSolutionParameterName not in solution: solution[initialSolutionParameterName] = \ benchmarkStep.initialSolutionParameters[initialSolutionParameterName] # TODO check if solution matches problem size for exact tile kernels solutionObject = Solution(solution) if solutionObject["Valid"]: if solutionObject not in solutionSet: solutionSet.add(solutionObject) solutions[hardcodedIdx].append(solutionObject) else: if globalParameters["PrintSolutionRejectionReason"]: print1("rejecting solution %s" % str(solutionObject)) if globalParameters["PrintLevel"] >= 1: progressBar.increment() # remove hardcoded that don't have any valid benchmarks removeHardcoded = [] for hardcodedIdx in range(0, numHardcoded): if len(solutions[hardcodedIdx]) == 0: hardcodedParamDict = benchmarkStep.hardcodedParameters[ hardcodedIdx] removeHardcoded.append(hardcodedParamDict) removesExist = len(removeHardcoded) > 0 for hardcodedParam in removeHardcoded: benchmarkStep.hardcodedParameters.remove(hardcodedParam) if removesExist: print1( "# Updating winners since enumeration removed unused hardcoded solutions. removeHardcoded=%u winners=%u" % (len(removeHardcoded), len(winners.winners))) winners.wpdUpdate(benchmarkStep.hardcodedParameters) if globalParameters["PrintLevel"] >= 1: print1("") numHardcoded = len(benchmarkStep.hardcodedParameters) # remove from solution 2D list also for solutionList in shallowcopy(solutions): if len(solutionList) == 0: solutions.remove(solutionList) elif winners.winners == {}: print1("# Populating initial winners (%u solutions)\n" % len(benchmarkStep.hardcodedParameters)) for hcParm in benchmarkStep.hardcodedParameters: winners.winners[FrozenDictionary(hcParm)] = [{}, -1] print1("# Actual Solutions: %u / %u\n" % ( len(solutions), \ maxPossibleSolutions )) # create linear list solutionList = [] for i in range(0, len(solutions)): solutionsForHardcoded = solutions[i] for j in range(0, len(solutionsForHardcoded)): solution = solutionsForHardcoded[j] solutionList.append(solution) if len(solutionList) == 0: msg = "Your parameters resulted in 0 valid solutions." if globalParameters["PrintSolutionRejectionReason"]: msg += "\nExamine reject and backtrace messages above to see why and where solutions were rejected." else: msg += "\nYou should re-run with \"PrintSolutionRejectionReason: True\" to see why each parameter combination was rejected." printExit(msg) if globalParameters["PrintLevel"] >= 1: for i in range(0, len(solutions)): solutionsForHardcoded = solutions[i] for j in range(0, len(solutionsForHardcoded)): solution = solutionsForHardcoded[j] print2("# (%u:%u) %s" % (i, j, \ Solution.getNameFull(solution) )) print2(HR) # write benchmarkFiles writeBenchmarkFiles(stepBaseDir, solutionList, benchmarkStep.problemSizes, \ shortName, filesToCopy) print1("# Copying files that differ from sourceTmp -> source") sourceTmp = globalParameters["WorkingPath"] files = os.listdir(sourceTmp) for f in files: f0 = os.path.join(sourceTmp, f) f1 = os.path.join(sourceDir, f) if os.path.isdir(f0): #print "cpDir:", f0, f1 if os.path.isdir(f1): shutil.rmtree(f1, True) shutil.copytree(f0, f1) elif not os.path.exists(f1) or not filecmp.cmp(f0, f1): #print "cp:", f0, f1 shutil.copy(f0, f1) shutil.rmtree(sourceTmp, True) popWorkingPath() # source ############################################################################ # Run Benchmark Script ############################################################################ resultsFileBase = os.path.normpath(os.path.join( \ globalParameters["WorkingPath"], "../Data", shortName)) if benchmarkStep.isFinal(): resultsFileBaseFinal = resultsFileBase resultsFileName = resultsFileBase + ".csv" solutionsFileName = resultsFileBase + ".yaml" if not os.path.exists(resultsFileName) or \ globalParameters["ForceRedoBenchmarkProblems"]: pushWorkingPath("build") # write runScript libraryLogicPath = None path = globalParameters["WorkingPath"] forBenchmark = True runScriptName = writeRunScript(path, libraryLogicPath, forBenchmark) # run runScript process = Popen(runScriptName, cwd=globalParameters["WorkingPath"]) process.communicate() if process.returncode: benchmarkTestFails += 1 printWarning( "BenchmarkProblems: Benchmark Process exited with code %u" % process.returncode) popWorkingPath() # build else: print1("# Already benchmarked; skipping.") ############################################################################ # Winners -> Determined Parameters ############################################################################ results = getResults(resultsFileName, solutions) print2("CSV Results: %s" % results) winners.addResults(benchmarkStep.hardcodedParameters, \ benchmarkPermutations, solutions, results) ############################################################################ # Write Solutions YAML ############################################################################ YAMLIO.writeSolutions(solutionsFileName, benchmarkStep.problemSizes, \ solutions ) # End Iteration popWorkingPath() # stepName currentTime = time.time() elapsedTime = currentTime - startTime print1("%s\n# %s\n# %s: End - %.3fs\n%s\n" \ % (HR, problemSizeGroupName, shortName, elapsedTime, HR)) popWorkingPath() # ProblemType return (resultsFileBaseFinal, benchmarkTestFails)
def writeSolutionsAndKernels(outputPath, solutions, kernels, kernelsBetaOnly, \ solutionWriter, kernelWriterSource, kernelWriterAssembly): start = time.time() print1("# Writing Kernels...") if not globalParameters["MergeFiles"]: ensurePath(os.path.join(outputPath, "Solutions")) ensurePath(os.path.join(outputPath, "Kernels")) if globalParameters["ShowProgressBar"]: progressBar = ProgressBar(len(kernels)) ############################################################################## # Write Kernels ############################################################################## if globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels.cpp"), "w") kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels.h"), "w") kernelSourceFile.write(CHeader) kernelHeaderFile.write(CHeader) kernelSourceFile.write("#include \"Kernels.h\"\n") kernelHeaderFile.write("#pragma once\n") if globalParameters["RuntimeLanguage"] == "HIP": kernelHeaderFile.write("#define HCC_ENABLE_ACCELERATOR_PRINTF\n\n") kernelHeaderFile.write("#include <hip/hip_runtime.h>\n") kernelHeaderFile.write("#include \"TensileTypes.h\"\n") kernelHeaderFile.write("#include \"KernelHeader.h\"\n") kernelHeaderFile.write("\n\n") kernelHeaderFile.write( "__device__ inline int GenDot4(int a, int b, int c) { \n") kernelHeaderFile.write( " typedef struct { int c0:8,c1:8,c2:8,c3:8; } C4I8;\n") kernelHeaderFile.write( " typedef union { int32_t i; C4I8 z; } PkInt8x4;\n") kernelHeaderFile.write(" PkInt8x4 va, vb; va.i = a; vb.i = b;\n") kernelHeaderFile.write( " return c + (vb.z.c3*va.z.c3 + vb.z.c2*va.z.c2 + vb.z.c1*va.z.c1 + vb.z.c0*va.z.c0); }\n" ) kernelHeaderFile.write("\n\n") else: kernelHeaderFile.write("#include <string>\n") kernelsWithBuildErrs = {} prepAsm() if globalParameters["CpuThreads"] == 0: cpus = 0 elif globalParameters["CodeFromFiles"]: cpu_count = multiprocessing.cpu_count() cpus = cpu_count*4 if globalParameters["CpuThreads"] == -1 \ else globalParameters["CpuThreads"] else: #! CodeFromFiles is not thread-safe since code merged into same file cpus = 1 workPerCpu = max(10, (len(kernels) + cpus - 1) / cpus) if cpus else 1 print "# Launching kernel compilation processes (cpus=%u kernelsPerCpu=%u)" % ( cpus, workPerCpu) kiStart = 0 cpu = 0 threads = [] if 1 and cpus and globalParameters["ShowProgressBar"]: processLaunchProgressBar = ProgressBar(len(kernels)) else: processLaunchProgressBar = None while kiStart < len(kernels): kiStop = min(len(kernels), kiStart + workPerCpu) if cpus: results = [] parentConn, child = multiprocessing.Pipe() args=(kernels, kernelWriterSource, kernelWriterAssembly, \ kiStart, kiStop, child) t = multiprocessing.Process(target=processKernelSourceChunk, args=args) t.start() child.close() # close child pipe in the parent process threads.append([t, kiStart, kiStop, parentConn]) if processLaunchProgressBar: processLaunchProgressBar.increment(kiStop - kiStart) else: sys.stderr.write( " # launched process %s for kernels %d..%d\n" % (t, kiStart, kiStop - 1)) else: # non-threaded version processKernelSourceChunk(kernels, kernelWriterSource, kernelWriterAssembly, \ kiStart, kiStop, None) kiStart += workPerCpu cpu += 1 sys.stderr.write("# Waiting for kernel compilation processes...\n") someError = 0 for (t, kiStart, kiStop, parentConn) in threads: try: results = parentConn.recv() except EOFError as pipeErr: print "*** warning: process", t, "returned pipe EOF", t, pipeErr t.join() e = t.exitcode if e != 0: print "*** warning: process", t, "returned", t, e someError = 1 results = [] if globalParameters["ShowProgressBar"]: progressBar.increment(kiStop - kiStart) for (err, src, header, kernelName) in results: if err: kernelsWithBuildErrs[kernelName] = err #print "*** warning: invalid kernel#%s"%kernelName # write kernel.cpp if not globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels", kernelName+".cpp"), "w") kernelSourceFile.write(CHeader) kernelSourceFile.write(src) if not globalParameters["MergeFiles"]: kernelSourceFile.close() # write kernel.h kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels", kernelName+".h"), "w") kernelHeaderFile.write(CHeader) kernelHeaderFile.write(header) if not globalParameters["MergeFiles"]: kernelHeaderFile.close() if someError: print "\nKernel compilation failed in one or more subprocesses. May want to set CpuThreads=0 and re-run to make debug easier" printExit("** kernel compilation failure **") # beta-only kernels for kernel in kernelsBetaOnly: kernelWriter = kernelWriterSource kernelName = kernelWriter.getKernelNameBetaOnly(kernel) # write kernel.cpp if not globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels", kernelName+".cpp"), "w") kernelSourceFile.write(CHeader) (err, src) = kernelWriter.getSourceFileStringBetaOnly(kernel) kernelSourceFile.write(src) if err: print "*** warning: invalid kernel#%u" % kernelName if not globalParameters["MergeFiles"]: kernelSourceFile.close() # write kernel.h if not globalParameters["MergeFiles"]: kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels", kernelName + ".h"), "w") kernelHeaderFile.write(CHeader) kernelHeaderFile.write( kernelWriter.getHeaderFileStringBetaOnly(kernel)) if not globalParameters["MergeFiles"]: kernelHeaderFile.close() # close merged if globalParameters["MergeFiles"]: kernelHeaderFile.close() stop = time.time() print "# Kernel Building elapsed time = %.1f secs" % (stop - start) print1("# Writing Solutions") if globalParameters["ShowProgressBar"]: progressBar = ProgressBar(len(solutions)) ############################################################################## # Write Solutions ############################################################################## if globalParameters["MergeFiles"]: solutionSourceFile = open(os.path.join(outputPath, \ "Solutions.cpp"), "w") solutionHeaderFile = open(os.path.join(outputPath, \ "Solutions.h"), "w") if globalParameters["MergeFiles"]: solutionSourceFile.write(CHeader) solutionHeaderFile.write(CHeader) solutionSourceFile.write("#include \"Solutions.h\"\n") solutionSourceFile.write("#include <algorithm>\n") solutionHeaderFile.write("#include \"TensileTypes.h\"\n") solutionHeaderFile.write("#include \"Kernels.h\"\n") solutionHeaderFile.write("#include \"SolutionHelper.h\"\n") solutionHeaderFile.write("#include \"Tools.h\"\n") if globalParameters["CodeFromFiles"]: solutionHeaderFile.write("#include <unistd.h>\n") for solution in solutions: # get solution name if not globalParameters["MergeFiles"]: solutionFileName = solutionWriter.getSolutionName(solution) # write solution.cpp if not globalParameters["MergeFiles"]: solutionSourceFile = open(os.path.join(outputPath, \ "Solutions", solutionFileName+".cpp"), "w") solutionSourceFile.write(CHeader) solutionSourceFile.write( \ solutionWriter.getSourceFileString(solution, kernelsWithBuildErrs)) if not globalParameters["MergeFiles"]: solutionSourceFile.close() # write solution.h if not globalParameters["MergeFiles"]: solutionHeaderFile = open(os.path.join(outputPath, \ "Solutions", solutionFileName+".h"), "w") solutionHeaderFile.write(CHeader) solutionHeaderFile.write( \ solutionWriter.getHeaderFileString(solution)) if not globalParameters["MergeFiles"]: solutionHeaderFile.close() if globalParameters["ShowProgressBar"]: progressBar.increment() # close merged if not globalParameters["MergeFiles"]: solutionHeaderFile.close() if globalParameters["ExitAfterKernelGen"]: printExit( "** Exiting after kernel generation due to ExitAfterKernelGen=1")
def writeSolutionsAndKernels(outputPath, solutions, kernels, kernelsBetaOnly, \ solutionWriter, kernelWriterSource, kernelWriterAssembly): print1("# Writing Solutions and Kernels") if not globalParameters["MergeFiles"]: ensurePath(os.path.join(outputPath, "Solutions")) ensurePath(os.path.join(outputPath, "Kernels")) progressBar = ProgressBar(len(solutions) + len(kernels)) ############################################################################## # Write Solutions ############################################################################## if globalParameters["MergeFiles"]: solutionSourceFile = open(os.path.join(outputPath, \ "Solutions.cpp"), "w") solutionHeaderFile = open(os.path.join(outputPath, \ "Solutions.h"), "w") if globalParameters["MergeFiles"]: solutionSourceFile.write(CHeader) solutionHeaderFile.write(CHeader) solutionSourceFile.write("#include \"Solutions.h\"\n") solutionHeaderFile.write("#include \"TensileTypes.h\"\n") solutionHeaderFile.write("#include \"Kernels.h\"\n") solutionHeaderFile.write("#include \"SolutionHelper.h\"\n") solutionHeaderFile.write("#include \"Tools.h\"\n") for solution in solutions: # get solution name if not globalParameters["MergeFiles"]: solutionFileName = solutionWriter.getSolutionName(solution) # write solution.cpp if not globalParameters["MergeFiles"]: solutionSourceFile = open(os.path.join(outputPath, \ "Solutions", solutionFileName+".cpp"), "w") solutionSourceFile.write(CHeader) solutionSourceFile.write( \ solutionWriter.getSourceFileString(solution)) if not globalParameters["MergeFiles"]: solutionSourceFile.close() # write solution.h if not globalParameters["MergeFiles"]: solutionHeaderFile = open(os.path.join(outputPath, \ "Solutions", solutionFileName+".h"), "w") solutionHeaderFile.write(CHeader) solutionHeaderFile.write( \ solutionWriter.getHeaderFileString(solution)) if not globalParameters["MergeFiles"]: solutionHeaderFile.close() progressBar.increment() # close merged if not globalParameters["MergeFiles"]: solutionHeaderFile.close() ############################################################################## # Write Kernels ############################################################################## if globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels.cpp"), "w") kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels.h"), "w") kernelSourceFile.write(CHeader) kernelHeaderFile.write(CHeader) kernelSourceFile.write("#include \"Kernels.h\"\n") kernelHeaderFile.write("#pragma once\n") if globalParameters["RuntimeLanguage"] == "HIP": kernelHeaderFile.write("#include <hip/hip_runtime.h>\n") kernelHeaderFile.write("#include \"TensileTypes.h\"\n") kernelHeaderFile.write("#include \"KernelHeader.h\"\n") else: kernelHeaderFile.write("#include <string>\n") # tensor contraction kernels for kernel in kernels: kernelWriter = kernelWriterSource if kernel[ "KernelLanguage"] == "Source" else kernelWriterAssembly # get kernel name if not globalParameters["MergeFiles"]: kernelName = kernelWriter.getKernelName(kernel) # write kernel.cpp if not globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels", kernelName+".cpp"), "w") kernelSourceFile.write(CHeader) kernelSourceFile.write(kernelWriter.getSourceFileString(kernel)) if not globalParameters["MergeFiles"]: kernelSourceFile.close() # write kernel.h if not globalParameters["MergeFiles"]: kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels", kernelName+".h"), "w") kernelHeaderFile.write(CHeader) kernelHeaderFile.write(kernelWriter.getHeaderFileString(kernel)) if not globalParameters["MergeFiles"]: kernelHeaderFile.close() progressBar.increment() # beta-only kernels for kernel in kernelsBetaOnly: kernelWriter = kernelWriterSource kernelName = kernelWriter.getKernelNameBetaOnly(kernel) # write kernel.cpp if not globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels", kernelName+".cpp"), "w") kernelSourceFile.write(CHeader) kernelSourceFile.write( kernelWriter.getSourceFileStringBetaOnly(kernel)) if not globalParameters["MergeFiles"]: kernelSourceFile.close() # write kernel.h if not globalParameters["MergeFiles"]: kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels", kernelName + ".h"), "w") kernelHeaderFile.write(CHeader) kernelHeaderFile.write( kernelWriter.getHeaderFileStringBetaOnly(kernel)) if not globalParameters["MergeFiles"]: kernelHeaderFile.close() # close merged if globalParameters["MergeFiles"]: kernelHeaderFile.close()
def writeSolutionsAndKernels(outputPath, problemTypes, solutions, kernels, kernelsBetaOnly, \ solutionWriter, kernelWriterSource, kernelWriterAssembly): start = time.time() print1("# Writing Kernels...") if not globalParameters["MergeFiles"]: ensurePath(os.path.join(outputPath, "Solutions")) ensurePath(os.path.join(outputPath, "Kernels")) if globalParameters["ShowProgressBar"]: progressBar = ProgressBar(len(kernels)) ############################################################################## # Write Kernels ############################################################################## if globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels.cpp"), "w") kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels.h"), "w") kernelSourceFile.write(CHeader) kernelHeaderFile.write(CHeader) kernelSourceFile.write("#include \"Kernels.h\"\n") kernelHeaderFile.write("#pragma once\n") if globalParameters["RuntimeLanguage"] == "HIP": kernelHeaderFile.write("// Also set env var HCC_ENABLE_PRINTF=1 for printf\n") kernelHeaderFile.write("#define HCC_ENABLE_ACCELERATOR_PRINTF\n\n") kernelHeaderFile.write("#include <hip/hip_runtime.h>\n") kernelHeaderFile.write("#include \"TensileTypes.h\"\n") kernelHeaderFile.write("#include \"KernelHeader.h\"\n") kernelHeaderFile.write("\n\n") kernelHeaderFile.write("__device__ inline int GenDot4(int a, int b, int c) { \n") kernelHeaderFile.write("#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__\n") kernelHeaderFile.write(" typedef union { int32_t i; char4 z; } PkInt8x4;\n") kernelHeaderFile.write("#else\n") kernelHeaderFile.write(" typedef struct { int c0:8,c1:8,c2:8,c3:8; } C4I8;\n") kernelHeaderFile.write(" typedef union { int32_t i; C4I8 z; } PkInt8x4;\n") kernelHeaderFile.write("#endif\n") kernelHeaderFile.write(" PkInt8x4 va, vb; va.i = a; vb.i = b;\n") kernelHeaderFile.write("#if (__hcc_workweek__ >= 19015) || __HIP_CLANG_ONLY__\n") kernelHeaderFile.write(" return amd_mixed_dot(va.z, vb.z, c, true); }\n") kernelHeaderFile.write("#else\n") kernelHeaderFile.write(" return c + (vb.z.c3*va.z.c3 + vb.z.c2*va.z.c2 + vb.z.c1*va.z.c1 + vb.z.c0*va.z.c0); }\n") kernelHeaderFile.write("#endif\n") kernelHeaderFile.write("\n\n") else: kernelHeaderFile.write("#include <string>\n") kernelsWithBuildErrs = {} prepAsm() if globalParameters["CpuThreads"] == 0: cpus = 0 elif globalParameters["CodeFromFiles"]: cpu_count = multiprocessing.cpu_count() cpuThreads = globalParameters["CpuThreads"] cpus = cpu_count*abs(cpuThreads) if cpuThreads < 0 \ else min(cpu_count, cpuThreads) else: #! CodeFromFiles is not thread-safe since code merged into same file cpus = 1 workPerCpu = max(10, (len(kernels)+cpus-1)/cpus) if cpus else 1 kiStart = 0 cpu = 0 threads = [] if 1 and cpus and globalParameters["ShowProgressBar"]: print "# Launching kernel compilation processes (cpus=%u kernelsPerCpu=%u)" % (cpus, workPerCpu) processLaunchProgressBar = ProgressBar(len(kernels)) else: print "# Compiling kernels (no multiprocessing, cpus=%u #kernels=%u)" % (cpus, workPerCpu) processLaunchProgressBar = None while kiStart < len(kernels): kiStop = min(len(kernels), kiStart + workPerCpu) if cpus: results = [] parentConn,child = multiprocessing.Pipe() args=(kernels, kernelWriterSource, kernelWriterAssembly, \ kiStart, kiStop, child) t = multiprocessing.Process(target=processKernelSourceChunk, args=args) t.start() child.close() # close child pipe in the parent process threads.append([t,kiStart,kiStop, parentConn]) if processLaunchProgressBar: processLaunchProgressBar.increment(kiStop-kiStart) else: sys.stderr.write(" # launched process %s for kernels %d..%d\n" %(t, kiStart, kiStop-1)) else: # non-threaded version results = processKernelSourceChunk(kernels, kernelWriterSource, kernelWriterAssembly, \ kiStart, kiStop, None) if globalParameters["ShowProgressBar"]: progressBar.increment(kiStop-kiStart) processResults(results, outputPath, kernelsWithBuildErrs, kernelSourceFile, kernelHeaderFile) kiStart += workPerCpu cpu += 1 sys.stderr.write("# Waiting for kernel compilation processes...\n") someError = 0 if cpus: for (t,kiStart,kiStop,parentConn) in threads: try: results = parentConn.recv() except EOFError as pipeErr: print "*** warning: process", t, "returned pipe EOF",t,pipeErr t.join() e = t.exitcode if e != 0 : print "*** warning: process", t, "returned",t,e someError = 1 results = [] if globalParameters["ShowProgressBar"]: progressBar.increment(kiStop-kiStart) processResults(results, outputPath, kernelsWithBuildErrs, kernelSourceFile, kernelHeaderFile) if someError: print "\nKernel compilation failed in one or more subprocesses. May want to set CpuThreads=0 and re-run to make debug easier" printExit("** kernel compilation failure **") # beta-only kernels for kernel in kernelsBetaOnly: kernelWriter = kernelWriterSource kernelName = kernelWriter.getKernelNameBetaOnly(kernel) # write kernel.cpp if not globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels", kernelName+".cpp"), "w") kernelSourceFile.write(CHeader) (err, src) = kernelWriter.getSourceFileStringBetaOnly(kernel) kernelSourceFile.write(src) if err: print "*** warning: invalid kernel#%u"%kernelName if not globalParameters["MergeFiles"]: kernelSourceFile.close() # write kernel.h if not globalParameters["MergeFiles"]: kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels", kernelName + ".h"), "w") kernelHeaderFile.write(CHeader) kernelHeaderFile.write( kernelWriter.getHeaderFileStringBetaOnly(kernel)) if not globalParameters["MergeFiles"]: kernelHeaderFile.close() # close merged if globalParameters["MergeFiles"]: kernelHeaderFile.close() stop = time.time() print "# Kernel Building elapsed time = %.1f secs" % (stop-start) print1("# Writing Solutions") if globalParameters["ShowProgressBar"]: progressBar = ProgressBar(len(solutions)) ############################################################################## # Write Solutions ############################################################################## if globalParameters["MergeFiles"]: solutionSourceFile = open(os.path.join(outputPath, \ "Solutions.cpp"), "w") solutionHeaderFile = open(os.path.join(outputPath, \ "Solutions.h"), "w") if globalParameters["MergeFiles"]: solutionSourceFile.write(CHeader) solutionHeaderFile.write(CHeader) solutionSourceFile.write("#include \"Solutions.h\"\n") solutionSourceFile.write("#include <algorithm>\n") solutionHeaderFile.write("#include \"TensileTypes.h\"\n") solutionHeaderFile.write("#include \"Kernels.h\"\n") solutionHeaderFile.write("#include \"SolutionHelper.h\"\n") solutionHeaderFile.write("#include \"Tools.h\"\n") if globalParameters["CodeFromFiles"]: solutionHeaderFile.write("#include <unistd.h>\n") # Write a solution pointer typedef for each problemType: h = "" for problemType in problemTypes: #print "p=", problemType argListAll = solutionWriter.getArgList(problemType, True, True, True, True) # declare TensileSolutionPointer_ProblemType h += "\n// solution pointer\n" h += "typedef TensileStatus (*TensileSolutionPointer_%s)(\n" \ % problemType for i in range(0, len(argListAll)): h += " %s %s%s" % (argListAll[i][0], argListAll[i][1], ",\n" \ if i < len(argListAll)-1 else ");\n\n") h += "\n" solutionHeaderFile.write(h) # for solution in solutions: # get solution name if not globalParameters["MergeFiles"]: solutionFileName = solutionWriter.getSolutionName(solution) # write solution.cpp if not globalParameters["MergeFiles"]: solutionSourceFile = open(os.path.join(outputPath, \ "Solutions", solutionFileName+".cpp"), "w") solutionSourceFile.write(CHeader) solutionSourceFile.write( \ solutionWriter.getProblemSourceString(solution["ProblemType"], solution, kernelsWithBuildErrs)) if not globalParameters["MergeFiles"]: solutionSourceFile.close() # write solution.h if not globalParameters["MergeFiles"]: solutionHeaderFile = open(os.path.join(outputPath, \ "Solutions", solutionFileName+".h"), "w") solutionHeaderFile.write(CHeader) solutionHeaderFile.write( \ solutionWriter.getHeaderFileString(solution)) if not globalParameters["MergeFiles"]: solutionHeaderFile.close() if globalParameters["ShowProgressBar"]: progressBar.increment() # close merged if not globalParameters["MergeFiles"]: solutionHeaderFile.close() if globalParameters["ExitAfterKernelGen"]: printExit("** Exiting after kernel generation due to ExitAfterKernelGen=1")
def writeSolutionsAndKernels(outputPath, solutions, kernels, kernelsBetaOnly, \ solutionWriter, kernelWriterSource, kernelWriterAssembly): print1("# Writing Kernels") if not globalParameters["MergeFiles"]: ensurePath(os.path.join(outputPath, "Solutions")) ensurePath(os.path.join(outputPath, "Kernels")) if globalParameters["ShowProgressBar"]: progressBar = ProgressBar(len(kernels)) ############################################################################## # Write Kernels ############################################################################## if globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels.cpp"), "w") kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels.h"), "w") kernelSourceFile.write(CHeader) kernelHeaderFile.write(CHeader) kernelSourceFile.write("#include \"Kernels.h\"\n") kernelHeaderFile.write("#pragma once\n") if globalParameters["RuntimeLanguage"] == "HIP": kernelHeaderFile.write("#define HCC_ENABLE_ACCELERATOR_PRINTF\n\n") kernelHeaderFile.write("#include <hip/hip_runtime.h>\n") kernelHeaderFile.write("#include \"TensileTypes.h\"\n") kernelHeaderFile.write("#include \"KernelHeader.h\"\n") else: kernelHeaderFile.write("#include <string>\n") kernelsWithBuildErrs = {} # tensor contraction kernels - dispatch as multiple threads: kLock = threading.Lock() pLock = threading.Lock() prepAsm() if globalParameters["CpuThreads"] == 0: cpus = 0 elif globalParameters["CodeFromFiles"]: cpu_count = multiprocessing.cpu_count() cpus = cpu_count if globalParameters["CpuThreads"] == -1 \ else min(cpu_count, globalParameters["CpuThreads"]) else: #! CodeFromFiles is not thread-safe since code merged into same file cpus = 1 workPerCpu = max(10, (len(kernels) + cpus - 1) / cpus) if cpus else 1 print "info: cpus=%u kernelsPerCpu=%u" % (cpus, workPerCpu) kiStart = 0 cpu = 0 threads = [] while kiStart < len(kernels): kiStop = min(len(kernels), kiStart + workPerCpu) #sys.stderr.write("cpu:%u process kernels #%u-#%u\n"% (cpu, kiStart, kiStop)) if cpus: args=(outputPath, kernels, kernelSourceFile, kernelHeaderFile, \ kernelWriterSource, kernelWriterAssembly, \ kernelsWithBuildErrs, progressBar, kLock, pLock, kiStart, kiStop) t = threading.Thread(target=processKernelSourceChunk, args=args) t.start() threads.append(t) else: processKernelSourceChunk(outputPath, kernels, kernelSourceFile, kernelHeaderFile, \ kernelWriterSource, kernelWriterAssembly, \ kernelsWithBuildErrs, kLock, pLock, kiStart, kiStop) kiStart += workPerCpu cpu += 1 for t in threads: t.join() # beta-only kernels for kernel in kernelsBetaOnly: kernelWriter = kernelWriterSource kernelName = kernelWriter.getKernelNameBetaOnly(kernel) # write kernel.cpp if not globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels", kernelName+".cpp"), "w") kernelSourceFile.write(CHeader) (err, src) = kernelWriter.getSourceFileStringBetaOnly(kernel) kernelSourceFile.write(src) if err: print "*** warning: invalid kernel#%u" % kernelName if not globalParameters["MergeFiles"]: kernelSourceFile.close() # write kernel.h if not globalParameters["MergeFiles"]: kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels", kernelName + ".h"), "w") kernelHeaderFile.write(CHeader) kernelHeaderFile.write( kernelWriter.getHeaderFileStringBetaOnly(kernel)) if not globalParameters["MergeFiles"]: kernelHeaderFile.close() # close merged if globalParameters["MergeFiles"]: kernelHeaderFile.close() print1("# Writing Solutions") if globalParameters["ShowProgressBar"]: progressBar = ProgressBar(len(solutions)) ############################################################################## # Write Solutions ############################################################################## if globalParameters["MergeFiles"]: solutionSourceFile = open(os.path.join(outputPath, \ "Solutions.cpp"), "w") solutionHeaderFile = open(os.path.join(outputPath, \ "Solutions.h"), "w") if globalParameters["MergeFiles"]: solutionSourceFile.write(CHeader) solutionHeaderFile.write(CHeader) solutionSourceFile.write("#include \"Solutions.h\"\n") solutionHeaderFile.write("#include \"TensileTypes.h\"\n") solutionHeaderFile.write("#include \"Kernels.h\"\n") solutionHeaderFile.write("#include \"SolutionHelper.h\"\n") solutionHeaderFile.write("#include \"Tools.h\"\n") if globalParameters["CodeFromFiles"]: solutionHeaderFile.write("#include <unistd.h>\n") for solution in solutions: # get solution name if not globalParameters["MergeFiles"]: solutionFileName = solutionWriter.getSolutionName(solution) # write solution.cpp if not globalParameters["MergeFiles"]: solutionSourceFile = open(os.path.join(outputPath, \ "Solutions", solutionFileName+".cpp"), "w") solutionSourceFile.write(CHeader) solutionSourceFile.write( \ solutionWriter.getSourceFileString(solution, kernelsWithBuildErrs)) if not globalParameters["MergeFiles"]: solutionSourceFile.close() # write solution.h if not globalParameters["MergeFiles"]: solutionHeaderFile = open(os.path.join(outputPath, \ "Solutions", solutionFileName+".h"), "w") solutionHeaderFile.write(CHeader) solutionHeaderFile.write( \ solutionWriter.getHeaderFileString(solution)) if not globalParameters["MergeFiles"]: solutionHeaderFile.close() if globalParameters["ShowProgressBar"]: progressBar.increment() # close merged if not globalParameters["MergeFiles"]: solutionHeaderFile.close() if globalParameters["ExitAfterKernelGen"]: printExit( "** Exiting after kernel generation due to ExitAfterKernelGen=1")
def writeSolutionsAndKernels(outputPath, solutions, kernels, kernelsBetaOnly, \ solutionWriter, kernelWriterSource, kernelWriterAssembly): print1("# Writing Kernels") if not globalParameters["MergeFiles"]: ensurePath(os.path.join(outputPath, "Solutions")) ensurePath(os.path.join(outputPath, "Kernels")) if globalParameters["ShowProgressBar"]: progressBar = ProgressBar(len(kernels)) ############################################################################## # Write Kernels ############################################################################## if globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels.cpp"), "w") kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels.h"), "w") kernelSourceFile.write(CHeader) kernelHeaderFile.write(CHeader) kernelSourceFile.write("#include \"Kernels.h\"\n") kernelHeaderFile.write("#pragma once\n") if globalParameters["RuntimeLanguage"] == "HIP": kernelHeaderFile.write("#define HCC_ENABLE_ACCELERATOR_PRINTF\n\n") kernelHeaderFile.write("#include <hip/hip_runtime.h>\n") kernelHeaderFile.write("#include \"TensileTypes.h\"\n") kernelHeaderFile.write("#include \"KernelHeader.h\"\n") else: kernelHeaderFile.write("#include <string>\n") # tensor contraction kernels for ki in range(0, len(kernels)): kernel = kernels[ki] kernelWriter = kernelWriterSource if kernel[ "KernelLanguage"] == "Source" else kernelWriterAssembly # get kernel name if not globalParameters["MergeFiles"]: kernelName = kernelWriter.getKernelName(kernel) # write kernel.cpp if not globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels", kernelName+".cpp"), "w") kernelSourceFile.write(CHeader) (err, src) = kernelWriter.getSourceFileString(kernel) kernelSourceFile.write(src) if err: print "*** warning: invalid kernel#%u" % ki if not globalParameters["MergeFiles"]: kernelSourceFile.close() # write kernel.h if not globalParameters["MergeFiles"]: kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels", kernelName+".h"), "w") kernelHeaderFile.write(CHeader) kernelHeaderFile.write(kernelWriter.getHeaderFileString(kernel)) if not globalParameters["MergeFiles"]: kernelHeaderFile.close() if globalParameters["ShowProgressBar"]: progressBar.increment() # beta-only kernels for kernel in kernelsBetaOnly: kernelWriter = kernelWriterSource kernelName = kernelWriter.getKernelNameBetaOnly(kernel) # write kernel.cpp if not globalParameters["MergeFiles"]: kernelSourceFile = open(os.path.join(outputPath, \ "Kernels", kernelName+".cpp"), "w") kernelSourceFile.write(CHeader) (err, src) = kernelWriter.getSourceFileStringBetaOnly(kernel) kernelSourceFile.write(src) if err: print "*** warning: invalid kernel#%u" % ki if not globalParameters["MergeFiles"]: kernelSourceFile.close() # write kernel.h if not globalParameters["MergeFiles"]: kernelHeaderFile = open(os.path.join(outputPath, \ "Kernels", kernelName + ".h"), "w") kernelHeaderFile.write(CHeader) kernelHeaderFile.write( kernelWriter.getHeaderFileStringBetaOnly(kernel)) if not globalParameters["MergeFiles"]: kernelHeaderFile.close() # close merged if globalParameters["MergeFiles"]: kernelHeaderFile.close() print1("# Writing Solutions") if globalParameters["ShowProgressBar"]: progressBar = ProgressBar(len(solutions)) ############################################################################## # Write Solutions ############################################################################## if globalParameters["MergeFiles"]: solutionSourceFile = open(os.path.join(outputPath, \ "Solutions.cpp"), "w") solutionHeaderFile = open(os.path.join(outputPath, \ "Solutions.h"), "w") if globalParameters["MergeFiles"]: solutionSourceFile.write(CHeader) solutionHeaderFile.write(CHeader) solutionSourceFile.write("#include \"Solutions.h\"\n") solutionHeaderFile.write("#include \"TensileTypes.h\"\n") solutionHeaderFile.write("#include \"Kernels.h\"\n") solutionHeaderFile.write("#include \"SolutionHelper.h\"\n") solutionHeaderFile.write("#include \"Tools.h\"\n") for solution in solutions: # get solution name if not globalParameters["MergeFiles"]: solutionFileName = solutionWriter.getSolutionName(solution) # write solution.cpp if not globalParameters["MergeFiles"]: solutionSourceFile = open(os.path.join(outputPath, \ "Solutions", solutionFileName+".cpp"), "w") solutionSourceFile.write(CHeader) solutionSourceFile.write( \ solutionWriter.getSourceFileString(solution)) if not globalParameters["MergeFiles"]: solutionSourceFile.close() # write solution.h if not globalParameters["MergeFiles"]: solutionHeaderFile = open(os.path.join(outputPath, \ "Solutions", solutionFileName+".h"), "w") solutionHeaderFile.write(CHeader) solutionHeaderFile.write( \ solutionWriter.getHeaderFileString(solution)) if not globalParameters["MergeFiles"]: solutionHeaderFile.close() if globalParameters["ShowProgressBar"]: progressBar.increment() # close merged if not globalParameters["MergeFiles"]: solutionHeaderFile.close() if globalParameters["ExitAfterKernelGen"]: printExit( "** Exiting after kernel generation due to ExitAfterKernelGen=1")