def main( config ): dataPath = os.path.join(globalParameters["WorkingPath"], \ globalParameters["BenchmarkDataPath"]) pushWorkingPath(globalParameters["BenchmarkProblemsPath"]) ensurePath(dataPath) totalTestFails = 0 for benchmarkProblemTypeConfig in config: problemTypeConfig = benchmarkProblemTypeConfig[0] if len(benchmarkProblemTypeConfig) < 2: problemSizeGroupConfigs = [{}] else: problemSizeGroupConfigs = benchmarkProblemTypeConfig[1:] for problemSizeGroupIdx in range(0, len(problemSizeGroupConfigs)): problemSizeGroupConfig = problemSizeGroupConfigs[problemSizeGroupIdx] print2("ProblemTypeConfig: %s" % problemTypeConfig) problemTypeObj = ProblemType(problemTypeConfig) globalParameters["EnableHalf"] = problemTypeObj["DataType"].isHalf() # results files will be named newResultsFileName = os.path.join(dataPath, "%s_%02u.csv" \ % (str(problemTypeObj), problemSizeGroupIdx) ) newSolutionsFileName = os.path.join(dataPath, "%s_%02u.yaml" \ % (str(problemTypeObj), problemSizeGroupIdx) ) # skip if possible if globalParameters["ForceRedoBenchmarkProblems"] or \ not os.path.exists(newResultsFileName): # Benchmark Problem Size Group (resultsFileBaseFinal, benchmarkErrors) = benchmarkProblemType(problemTypeConfig, \ problemSizeGroupConfig, problemSizeGroupIdx) totalTestFails += benchmarkErrors print "clientExit=%u %s for %s" %\ (totalTestFails, "(ERROR)" if totalTestFails else "(PASS)", \ globalParameters["ConfigPath"]) # Copy Data resultsFileBase = resultsFileBaseFinal resultsFileName = "%s.csv" % (resultsFileBase) solutionsFileName = "%s.yaml" % (resultsFileBase) shutil_copy( resultsFileName, newResultsFileName ) shutil_copy( solutionsFileName, newSolutionsFileName ) else: print1("# %s_%02u already benchmarked; skipping." % (str(problemTypeObj), problemSizeGroupIdx) ) popWorkingPath() if globalParameters["ExitOnFails"] and totalTestFails: sys.exit(1)
def __init__(self, problemTypeConfig, problemSizeGroupConfig): # read problem type #if "ProblemType" in config: # problemTypeConfig = config["ProblemType"] #else: # problemTypeConfig = {} # print2("No ProblemType in config: %s; using defaults." % str(config) ) self.problemType = ProblemType(problemTypeConfig) self.isBatched = True \ if "Batched" in problemTypeConfig and problemTypeConfig["Batched"] \ else False print2("# BenchmarkProcess beginning %s" % str(self.problemType)) # read initial solution parameters self.initialSolutionParameters = {"ProblemType": problemTypeConfig} self.initialSolutionParameters.update(defaultSolution) if "InitialSolutionParameters" not in problemSizeGroupConfig: print2("No InitialSolutionParameters; using defaults.") else: if problemSizeGroupConfig["InitialSolutionParameters"] != None: for paramDict in problemSizeGroupConfig[ "InitialSolutionParameters"]: for paramName in paramDict: paramValueList = paramDict[paramName] if isinstance(paramValueList, list): if len(paramValueList) != 1: printWarning( "InitialSolutionParameters must have length=1: %s:%s" % (paramName, paramValueList)) self.initialSolutionParameters[ paramName] = paramValueList[0] else: self.initialSolutionParameters[ paramName] = paramValueList print2("# InitialSolutionParameters: %s" % str(self.initialSolutionParameters)) # fill in missing steps using defaults self.benchmarkCommonParameters = [] self.forkParameters = [] self.benchmarkForkParameters = [] self.joinParameters = [] self.benchmarkJoinParameters = [] self.benchmarkFinalParameters = [] self.benchmarkSteps = [] self.hardcodedParameters = [{}] self.singleValueParameters = {} # (I) self.fillInMissingStepsWithDefaults(self.isBatched, problemSizeGroupConfig) # convert list of parameters to list of steps self.currentProblemSizes = [] self.benchmarkStepIdx = 0 # (II) self.convertParametersToSteps()
def benchmarkProblemType( problemTypeConfig, problemSizeGroupConfig, \ problemSizeGroupIdx ): benchmarkTestFails = 0 # convert config to full benchmark process (resolves defaults) print1("") print1(HR) print1("# Converting Config to BenchmarkProcess Object") print1(HR) print1("") benchmarkProcess = BenchmarkProcess( problemTypeConfig, \ problemSizeGroupConfig ) problemTypeName = str(benchmarkProcess.problemType) problemSizeGroupName = "%s_%02u" % (problemTypeName, problemSizeGroupIdx) pushWorkingPath(problemSizeGroupName) ensurePath(os.path.join(globalParameters["WorkingPath"], "Data")) totalBenchmarkSteps = len(benchmarkProcess) resultsFileBaseFinal = None winners = WinningParameterDict() print1("# NumBenchmarkSteps: %u" % totalBenchmarkSteps) print1("") print1(HR) print1("# Done Creating BenchmarkProcess Object") print1(HR) ############################################################################## # For Each Benchmark Step ############################################################################## for benchmarkStepIdx in range(0, totalBenchmarkSteps): benchmarkStep = benchmarkProcess[benchmarkStepIdx] if winners.winners == {}: # perf optimization to skip the initial winners creation # this helps a little here but really helps below with avoiding the super-expensive # removeHardcoded step below - that can use a fast-path to create # winners when needed. print1( "# Empty winners - use fast initialization of hardcodedParameters" ) resultingHardcodedParameterList = benchmarkStep.hardcodedParameters else: resultingHardcodedParameterList = \ winners.wpdUpdate( benchmarkStep.hardcodedParameters ) benchmarkStep.hardcodedParameters = resultingHardcodedParameterList numHardcoded = len(benchmarkStep.hardcodedParameters) stepName = str(benchmarkStep) shortName = benchmarkStep.abbreviation() print1("\n") print1(HR) currentTime = time.time() elapsedTime = currentTime - startTime print1("# BenchmarkStep: %s - %s %.3fs" % (problemSizeGroupName, stepName, elapsedTime)) print1("# NumProblems: %u" % benchmarkStep.problemSizes.totalProblemSizes) print1("# BenchmarkParameters:") for paramName in benchmarkStep.benchmarkParameters: paramValues = benchmarkStep.benchmarkParameters[paramName] printStr = "# %s = { %s" % (paramName, paramValues[0]) for paramValueIdx in range(1, len(paramValues)): printStr += ", %s" % str(paramValues[paramValueIdx]) printStr += " }" print1(printStr) if False: # print1(hardcoded parameters and their winners print1("# HardcodedParameters | WinningParameters:") paramDictIdx = 0 hardcodedMinNaming = \ Solution.getMinNaming(benchmarkStep.hardcodedParameters) for paramDict in benchmarkStep.hardcodedParameters: winningParameters = winners[paramDict] print1("# (%u) %s | %s" % (paramDictIdx, \ Solution.getNameMin(paramDict, hardcodedMinNaming), \ Solution.getNameFull(winningParameters) )) paramDictIdx += 1 pushWorkingPath(shortName) ############################################################################ # Copy Files to Benchmark Source Directory ############################################################################ stepBaseDir = globalParameters["WorkingPath"] sourceDir = \ os.path.join(stepBaseDir, "source" ) ensurePath(sourceDir) pushWorkingPath("sourceTmp") filesToCopy = [ "SolutionMapper.h", "Client.cpp", "Client.h", "CMakeLists.txt", "DeviceStats.h", "TensorUtils.h", "MathTemplates.cpp", "MathTemplates.h", "TensileTypes.h", "tensile_bfloat16.h", "KernelHeader.h", "ReferenceCPU.h", "SolutionHelper.cpp", "SolutionHelper.h", "Tools.cpp", "Tools.h", ] for f in filesToCopy: shutil_copy(os.path.join(globalParameters["SourcePath"], f), globalParameters["WorkingPath"]) if globalParameters["RuntimeLanguage"] == "OCL": shutil_copy( os.path.join(globalParameters["SourcePath"], "FindOpenCL.cmake"), globalParameters["WorkingPath"]) else: shutil_copy( os.path.join(globalParameters["SourcePath"], "FindHIP.cmake"), globalParameters["WorkingPath"]) shutil_copy( os.path.join(globalParameters["SourcePath"], "FindHCC.cmake"), globalParameters["WorkingPath"]) ############################################################################ # Enumerate Benchmark Permutations ############################################################################ solutions = [] totalBenchmarkPermutations = 1 for benchmarkParamName in benchmarkStep.benchmarkParameters: totalBenchmarkPermutations *= len( benchmarkStep.benchmarkParameters[benchmarkParamName]) maxPossibleSolutions = totalBenchmarkPermutations * numHardcoded print1("# MaxPossibleSolutions: %u = %u (hardcoded) * %u (benchmark)" % \ (maxPossibleSolutions, numHardcoded, totalBenchmarkPermutations)) benchmarkPermutations = [] for i in range(0, totalBenchmarkPermutations): permutation = {} pIdx = i for benchmarkParamName in benchmarkStep.benchmarkParameters: benchmarkParamValues = deepcopy( \ benchmarkStep.benchmarkParameters[benchmarkParamName]) valueIdx = pIdx % len(benchmarkParamValues) permutation[benchmarkParamName] = benchmarkParamValues[ valueIdx] pIdx /= len(benchmarkParamValues) benchmarkPermutations.append(permutation) ############################################################################ # Enumerate Solutions = Hardcoded * Benchmark ############################################################################ print1("# Enumerating Solutions") if globalParameters["PrintLevel"] >= 1: progressBar = ProgressBar(maxPossibleSolutions) solutionSet = set() # avoid duplicates for nlca=-1, 1 for hardcodedIdx in range(0, numHardcoded): solutions.append([]) hardcodedParamDict = benchmarkStep.hardcodedParameters[ hardcodedIdx] for benchmarkIdx in range(0, len(benchmarkPermutations)): benchmarkPermutation = benchmarkPermutations[benchmarkIdx] solution = { "ProblemType": deepcopy(benchmarkProcess.problemType.state) } solution.update(benchmarkPermutation) solution.update(hardcodedParamDict) if benchmarkStepIdx > 0: winningParameters = winners[hardcodedParamDict] if winningParameters == None: # this is a joined parameter that didn't have a winner, that's okay continue solution.update(winningParameters) # append default parameters where necessary for initialSolutionParameterName in benchmarkStep.initialSolutionParameters: if initialSolutionParameterName not in solution: solution[initialSolutionParameterName] = \ benchmarkStep.initialSolutionParameters[initialSolutionParameterName] # TODO check if solution matches problem size for exact tile kernels solutionObject = Solution(solution) if solutionObject["Valid"]: if solutionObject not in solutionSet: solutionSet.add(solutionObject) solutions[hardcodedIdx].append(solutionObject) else: if globalParameters["PrintSolutionRejectionReason"]: print1("rejecting solution %s" % str(solutionObject)) if globalParameters["PrintLevel"] >= 1: progressBar.increment() # remove hardcoded that don't have any valid benchmarks removeHardcoded = [] for hardcodedIdx in range(0, numHardcoded): if len(solutions[hardcodedIdx]) == 0: hardcodedParamDict = benchmarkStep.hardcodedParameters[ hardcodedIdx] removeHardcoded.append(hardcodedParamDict) removesExist = len(removeHardcoded) > 0 for hardcodedParam in removeHardcoded: benchmarkStep.hardcodedParameters.remove(hardcodedParam) if removesExist: print1( "# Updating winners since enumeration removed unused hardcoded solutions. removeHardcoded=%u winners=%u" % (len(removeHardcoded), len(winners.winners))) winners.wpdUpdate(benchmarkStep.hardcodedParameters) if globalParameters["PrintLevel"] >= 1: print1("") numHardcoded = len(benchmarkStep.hardcodedParameters) # remove from solution 2D list also for solutionList in shallowcopy(solutions): if len(solutionList) == 0: solutions.remove(solutionList) elif winners.winners == {}: print1("# Populating initial winners (%u solutions)\n" % len(benchmarkStep.hardcodedParameters)) for hcParm in benchmarkStep.hardcodedParameters: winners.winners[FrozenDictionary(hcParm)] = [{}, -1] print1("# Actual Solutions: %u / %u\n" % ( len(solutions), \ maxPossibleSolutions )) # create linear list solutionList = [] for i in range(0, len(solutions)): solutionsForHardcoded = solutions[i] for j in range(0, len(solutionsForHardcoded)): solution = solutionsForHardcoded[j] solutionList.append(solution) if len(solutionList) == 0: msg = "Your parameters resulted in 0 valid solutions." if globalParameters["PrintSolutionRejectionReason"]: msg += "\nExamine reject and backtrace messages above to see why and where solutions were rejected." else: msg += "\nYou should re-run with \"PrintSolutionRejectionReason: True\" to see why each parameter combination was rejected." printExit(msg) if globalParameters["PrintLevel"] >= 1: for i in range(0, len(solutions)): solutionsForHardcoded = solutions[i] for j in range(0, len(solutionsForHardcoded)): solution = solutionsForHardcoded[j] print2("# (%u:%u) %s" % (i, j, \ Solution.getNameFull(solution) )) print2(HR) # write benchmarkFiles writeBenchmarkFiles(stepBaseDir, solutionList, benchmarkStep.problemSizes, \ shortName, filesToCopy) print1("# Copying files that differ from sourceTmp -> source") sourceTmp = globalParameters["WorkingPath"] files = os.listdir(sourceTmp) for f in files: f0 = os.path.join(sourceTmp, f) f1 = os.path.join(sourceDir, f) if os.path.isdir(f0): #print "cpDir:", f0, f1 if os.path.isdir(f1): shutil.rmtree(f1, True) shutil.copytree(f0, f1) elif not os.path.exists(f1) or not filecmp.cmp(f0, f1): #print "cp:", f0, f1 shutil.copy(f0, f1) shutil.rmtree(sourceTmp, True) popWorkingPath() # source ############################################################################ # Run Benchmark Script ############################################################################ resultsFileBase = os.path.normpath(os.path.join( \ globalParameters["WorkingPath"], "../Data", shortName)) if benchmarkStep.isFinal(): resultsFileBaseFinal = resultsFileBase resultsFileName = resultsFileBase + ".csv" solutionsFileName = resultsFileBase + ".yaml" if not os.path.exists(resultsFileName) or \ globalParameters["ForceRedoBenchmarkProblems"]: pushWorkingPath("build") # write runScript libraryLogicPath = None path = globalParameters["WorkingPath"] forBenchmark = True runScriptName = writeRunScript(path, libraryLogicPath, forBenchmark) # run runScript process = Popen(runScriptName, cwd=globalParameters["WorkingPath"]) process.communicate() if process.returncode: benchmarkTestFails += 1 printWarning( "BenchmarkProblems: Benchmark Process exited with code %u" % process.returncode) popWorkingPath() # build else: print1("# Already benchmarked; skipping.") ############################################################################ # Winners -> Determined Parameters ############################################################################ results = getResults(resultsFileName, solutions) print2("CSV Results: %s" % results) winners.addResults(benchmarkStep.hardcodedParameters, \ benchmarkPermutations, solutions, results) ############################################################################ # Write Solutions YAML ############################################################################ YAMLIO.writeSolutions(solutionsFileName, benchmarkStep.problemSizes, \ solutions ) # End Iteration popWorkingPath() # stepName currentTime = time.time() elapsedTime = currentTime - startTime print1("%s\n# %s\n# %s: End - %.3fs\n%s\n" \ % (HR, problemSizeGroupName, shortName, elapsedTime, HR)) popWorkingPath() # ProblemType return (resultsFileBaseFinal, benchmarkTestFails)
def TensileCreateLibrary(): print1("") print1(HR) print1("# Tensile Create Library") print2(HR) print2("") ############################################################################## # Parse Command Line Arguments ############################################################################## print2("Arguments: %s" % sys.argv) argParser = argparse.ArgumentParser() argParser.add_argument("LogicPath", help="Path to LibraryLogic.yaml files.") argParser.add_argument("OutputPath", help="Where to write library files?") argParser.add_argument("RuntimeLanguage", help="Which runtime language?", \ choices=["OCL", "HIP", "HSA"]) argParser.add_argument("--merge-files", dest="MergeFiles", \ action="store_true") argParser.add_argument("--no-merge-files", dest="MergeFiles", \ action="store_false") argParser.add_argument("--short-file-names", dest="ShortNames", \ action="store_true") argParser.add_argument("--no-short-file-names", dest="ShortNames", \ action="store_false") argParser.add_argument("--library-print-debug", dest="LibraryPrintDebug", \ action="store_true") argParser.add_argument("--no-library-print-debug", dest="LibraryPrintDebug", \ action="store_false") args = argParser.parse_args() logicPath = args.LogicPath outputPath = args.OutputPath print2("OutputPath: %s" % outputPath) ensurePath(outputPath) arguments = {} arguments["RuntimeLanguage"] = args.RuntimeLanguage arguments["MergeFiles"] = args.MergeFiles arguments["ShortNames"] = args.ShortNames arguments["LibraryPrintDebug"] = args.LibraryPrintDebug arguments["CodeFromFiles"] = False assignGlobalParameters(arguments) if not os.path.exists(logicPath): printExit("LogicPath %s doesn't exist" % logicPath) logicFiles = [os.path.join(logicPath, f) for f in os.listdir(logicPath) \ if (os.path.isfile(os.path.join(logicPath, f)) \ and os.path.splitext(f)[1]==".yaml")] print1("# LibraryLogicFiles:" % logicFiles) for logicFile in logicFiles: print1("# %s" % logicFile) ############################################################################## # Parse config files ############################################################################## solutions = [] logicData = {} # keys are problemTypes, values are schedules for logicFileName in logicFiles: (scheduleName, deviceNames, problemType, solutionsForSchedule, \ indexOrder, exactLogic, rangeLogic) \ = YAMLIO.readLibraryLogicForSchedule(logicFileName) if problemType not in logicData: logicData[problemType] = [] logicData[problemType].append((scheduleName, deviceNames, \ solutionsForSchedule, indexOrder, exactLogic, rangeLogic )) for solution in solutionsForSchedule: if solution not in solutions: solutions.append(solution) # create solution writer and kernel writer kernels = [] kernelsBetaOnly = [] for solution in solutions: solutionKernels = solution.getKernels() for kernel in solutionKernels: if kernel not in kernels: kernels.append(kernel) solutionKernelsBetaOnly = solution.getKernelsBetaOnly() for kernel in solutionKernelsBetaOnly: if kernel not in kernelsBetaOnly: kernelsBetaOnly.append(kernel) # if any kernels are assembly, append every ISA supported if globalParameters["ShortNames"] and not globalParameters["MergeFiles"]: solutionSerialNaming = Solution.getSerialNaming(solutions) kernelSerialNaming = Solution.getSerialNaming(kernels) else: solutionSerialNaming = None kernelSerialNaming = None solutionMinNaming = Solution.getMinNaming(solutions) kernelMinNaming = Solution.getMinNaming(kernels) solutionWriter = SolutionWriter( \ solutionMinNaming, solutionSerialNaming, \ kernelMinNaming, kernelSerialNaming) kernelWriterSource = KernelWriterSource( \ kernelMinNaming, kernelSerialNaming) kernelWriterAssembly = KernelWriterAssembly( \ kernelMinNaming, kernelSerialNaming) # write solutions and kernels writeSolutionsAndKernels(outputPath, solutions, kernels, kernelsBetaOnly, \ solutionWriter, kernelWriterSource, kernelWriterAssembly) libraryStaticFiles = [ "SolutionMapper.h", "TensileTypes.h", "KernelHeader.h", "SolutionHelper.cpp", "SolutionHelper.h", "Tools.cpp", "Tools.h" ] # write cmake clientName = "LibraryClient" writeCMake(outputPath, solutions, kernels, libraryStaticFiles, clientName) # write logic writeLogic(outputPath, logicData, solutionWriter) print1("# Tensile Library Writer DONE") print1(HR) print1("")
def TensileCreateLibrary(): print1("") print1(HR) print1("# Tensile Create Library") print2(HR) print2("") ############################################################################## # Parse Command Line Arguments ############################################################################## print2("Arguments: %s" % sys.argv) argParser = argparse.ArgumentParser() argParser.add_argument("LogicPath", help="Path to LibraryLogic.yaml files.") argParser.add_argument("OutputPath", help="Where to write library files?") argParser.add_argument("RuntimeLanguage", help="Which runtime language?", \ choices=["OCL", "HIP", "HSA"]) argParser.add_argument("--merge-files", dest="MergeFiles", \ action="store_true") argParser.add_argument("--no-merge-files", dest="MergeFiles", \ action="store_false") argParser.add_argument("--short-file-names", dest="ShortNames", \ action="store_true") argParser.add_argument("--no-short-file-names", dest="ShortNames", \ action="store_false") argParser.add_argument("--library-print-debug", dest="LibraryPrintDebug", \ action="store_true") argParser.add_argument("--no-library-print-debug", dest="LibraryPrintDebug", \ action="store_false") argParser.add_argument( "--isa", dest="isa", action="append", help="which architectures for assembly kernels to target") args = argParser.parse_args() logicPath = args.LogicPath outputPath = args.OutputPath print2("OutputPath: %s" % outputPath) ensurePath(outputPath) arguments = {} arguments["RuntimeLanguage"] = args.RuntimeLanguage arguments["MergeFiles"] = args.MergeFiles arguments["ShortNames"] = args.ShortNames arguments["LibraryPrintDebug"] = args.LibraryPrintDebug if args.isa: newISA = [] for isa in args.isa: gfxIdx = isa.find("gfx") if gfxIdx >= 0: major = int(isa[gfxIdx + 3:gfxIdx + 4]) minor = int(isa[gfxIdx + 4:gfxIdx + 5]) step = int(isa[gfxIdx + 5:gfxIdx + 6]) isaTuple = (major, minor, step) if isaTuple in globalParameters[ "SupportedISA"] and isaTuple not in newISA: print1("# User-Specified ISA: gfx%u%u%u" % (major, minor, step)) newISA.append(isaTuple) else: printWarning("isa parameter must be formed as: --isa gfx803") arguments["SupportedISA"] = newISA assignGlobalParameters(arguments) if not os.path.exists(logicPath): printExit("LogicPath %s doesn't exist" % logicPath) logicFiles = [os.path.join(logicPath, f) for f in os.listdir(logicPath) \ if (os.path.isfile(os.path.join(logicPath, f)) \ and os.path.splitext(f)[1]==".yaml")] print1("# LibraryLogicFiles:" % logicFiles) for logicFile in logicFiles: print1("# %s" % logicFile) ############################################################################## # Parse config files ############################################################################## solutions = [] logicData = {} # keys are problemTypes, values are schedules for logicFileName in logicFiles: (scheduleName, deviceNames, problemType, solutionsForSchedule, \ indexOrder, exactLogic, rangeLogic) \ = YAMLIO.readLibraryLogicForSchedule(logicFileName) if problemType not in logicData: logicData[problemType] = [] logicData[problemType].append((scheduleName, deviceNames, \ solutionsForSchedule, indexOrder, exactLogic, rangeLogic )) for solution in solutionsForSchedule: if solution not in solutions: solutions.append(solution) # create solution writer and kernel writer kernels = [] kernelsBetaOnly = [] for solution in solutions: solutionKernels = solution.getKernels() for kernel in solutionKernels: if kernel not in kernels: kernels.append(kernel) solutionKernelsBetaOnly = solution.getKernelsBetaOnly() for kernel in solutionKernelsBetaOnly: if kernel not in kernelsBetaOnly: kernelsBetaOnly.append(kernel) # if any kernels are assembly, append every ISA supported if globalParameters["RuntimeLanguage"] == "HIP": newKernels = [] for kernel in kernels: if kernel["KernelLanguage"] == "Assembly": kernel["ISA"] = globalParameters["SupportedISA"][0] for i in range(1, len(globalParameters["SupportedISA"])): newKernel = deepcopy(kernel) newKernel["ISA"] = globalParameters["SupportedISA"][i] newKernels.append(newKernel) else: kernel["ISA"] = (0, 0, 0) kernels.extend(newKernels) if globalParameters["ShortNames"] and not globalParameters["MergeFiles"]: solutionSerialNaming = Solution.getSerialNaming(solutions) kernelSerialNaming = Solution.getSerialNaming(kernels) else: solutionSerialNaming = None kernelSerialNaming = None solutionMinNaming = Solution.getMinNaming(solutions) kernelMinNaming = Solution.getMinNaming(kernels) solutionWriter = SolutionWriter( \ solutionMinNaming, solutionSerialNaming, \ kernelMinNaming, kernelSerialNaming) kernelWriterSource = KernelWriterSource( \ kernelMinNaming, kernelSerialNaming) kernelWriterAssembly = KernelWriterAssembly( \ kernelMinNaming, kernelSerialNaming) # write solutions and kernels writeSolutionsAndKernels(outputPath, solutions, kernels, kernelsBetaOnly, \ solutionWriter, kernelWriterSource, kernelWriterAssembly) libraryStaticFiles = [ "TensileTypes.h", "KernelHeader.h", "SolutionHelper.cpp", "SolutionHelper.h", "Tools.cpp", "Tools.h" ] # write cmake clientName = "LibraryClient" writeCMake(outputPath, solutions, kernels, libraryStaticFiles, clientName) # write logic writeLogic(outputPath, logicData, solutionWriter) print1("# Tensile Library Writer DONE") print1(HR) print1("")
def fillInMissingStepsWithDefaults(self, config): print2("") print2( "####################################################################" ) print1("# Filling in Parameters With Defaults") print2( "####################################################################" ) print2("") ############################################################################ # (I-0) get 6 phases from config configBenchmarkCommonParameters = config["BenchmarkCommonParameters"] \ if "BenchmarkCommonParameters" in config \ else [{"ProblemSizes": defaultProblemSizes}] configForkParameters = config["ForkParameters"] \ if "ForkParameters" in config else [] configBenchmarkForkParameters = config["BenchmarkForkParameters"] \ if "BenchmarkForkParameters" in config \ else [] configJoinParameters = config["JoinParameters"] \ if "JoinParameters" in config else [] configBenchmarkJoinParameters = config["BenchmarkJoinParameters"] \ if "BenchmarkJoinParameters" in config \ else [] configBenchmarkFinalParameters = config["BenchmarkFinalParameters"] \ if "BenchmarkFinalParameters" in config and config["BenchmarkFinalParameters"] != None \ and len(config["BenchmarkFinalParameters"]) > 0 \ else [{"ProblemSizes": defaultBenchmarkFinalProblemSizes}] ############################################################################ # Ensure only valid solution parameters were requested validParameterNames = validParameters.keys() for paramDictList in [configBenchmarkCommonParameters, \ configForkParameters, configBenchmarkForkParameters, \ configBenchmarkJoinParameters]: if paramDictList != None: for paramDict in paramDictList: for paramName in paramDict: if paramName in ["ProblemSizes"]: continue else: if paramName not in validParameterNames: printExit("Invalid parameter name: %s\nValid parameters are %s." \ % (paramName, validParameterNames)) paramValues = paramDict[paramName] for paramValue in paramValues: if paramValue not in validParameters[ paramName]: printExit("Invalid parameter value: %s = %s\nValid values for %s are %s." \ % (paramName, paramValue, paramName, validParameters[paramName])) ############################################################################ # (I-1) get current problem sizes currentProblemSizes = defaultProblemSizes if configBenchmarkCommonParameters != None: if len(configBenchmarkCommonParameters) > 0: if "ProblemSizes" in configBenchmarkCommonParameters[0]: # user specified, so use it, remove it from config and insert later currentProblemSizes = \ configBenchmarkCommonParameters[0]["ProblemSizes"] del configBenchmarkCommonParameters[0] # into common we put in all Dcommon that # don't show up in Ccommon/Cfork/CBfork/Cjoin/CBjoin # followed by Ccommon self.benchmarkCommonParameters = [{ "ProblemSizes": currentProblemSizes }] for paramDict in defaultBenchmarkCommonParameters: for paramName in paramDict: if not hasParam( paramName, [ configBenchmarkCommonParameters, \ configForkParameters, configBenchmarkForkParameters, \ configJoinParameters, configBenchmarkJoinParameters]) \ or paramName == "ProblemSizes": self.benchmarkCommonParameters.append(paramDict) if configBenchmarkCommonParameters != None: for paramDict in configBenchmarkCommonParameters: self.benchmarkCommonParameters.append(paramDict) else: # make empty self.benchmarkCommonParameters = [{ "ProblemSizes": currentProblemSizes }] ############################################################################ # (I-2) into fork we put in all Dfork that # don't show up in Bcommon/Cfork/CBfork/Cjoin/CBjoin # followed by Cfork self.forkParameters = [] for paramDict in defaultForkParameters: for paramName in paramDict: if not hasParam( paramName, [ self.benchmarkCommonParameters, \ configForkParameters, configBenchmarkForkParameters, \ configJoinParameters, configBenchmarkJoinParameters]) \ or paramName == "ProblemSizes": self.forkParameters.append(paramDict) if configForkParameters != None: for paramDict in configForkParameters: self.forkParameters.append(paramDict) else: # make empty self.forkParameters = [] ############################################################################ # (I-3) get current problem sizes if configBenchmarkForkParameters != None: if len(configBenchmarkForkParameters) > 0: if "ProblemSizes" in configBenchmarkForkParameters[0]: # user specified, so use it, remove it from config and insert later currentProblemSizes = configBenchmarkForkParameters[0][ "ProblemSizes"] del configBenchmarkForkParameters[0] # into Bfork we put in all DBfork that # don't show up in Bcommon/Bfork/CBfork/Cjoin/CBjoin # followed by CBforked self.benchmarkForkParameters = [{"ProblemSizes": currentProblemSizes}] for paramDict in defaultBenchmarkForkParameters: for paramName in paramDict: if not hasParam( paramName, [ self.benchmarkCommonParameters, \ self.forkParameters, configBenchmarkForkParameters, \ configJoinParameters, configBenchmarkJoinParameters]) \ or paramName == "ProblemSizes": self.benchmarkForkParameters.append(paramDict) if configBenchmarkForkParameters != None: for paramDict in configBenchmarkForkParameters: self.benchmarkForkParameters.append(paramDict) else: # make empty self.benchmarkForkParameters = [{ "ProblemSizes": currentProblemSizes }] ############################################################################ # (I-4) into join we put in all non-derrived Djoin that # don't show up in Bcommon/Bfork/CBfork/Cjoin/CBjoin # followed by CBforked self.joinParameters = [] for paramName in defaultJoinParameters: if not hasParam( paramName, [ self.benchmarkCommonParameters, \ self.forkParameters, self.benchmarkForkParameters, \ configJoinParameters, configBenchmarkJoinParameters]) \ or paramName == "ProblemSizes": if "JoinParameters" not in config \ or (paramName != "MacroTile"): self.joinParameters.append(paramName) if configJoinParameters != None: for paramName in configJoinParameters: self.joinParameters.append(paramName) else: # make empty self.joinParameters = [] ############################################################################ # (I-5) benchmark join if configBenchmarkJoinParameters != None: if len(configBenchmarkJoinParameters) > 0: if "ProblemSizes" in configBenchmarkJoinParameters[0]: # user specified, so use it, remove it from config and insert later currentProblemSizes = configBenchmarkJoinParameters[0][ "ProblemSizes"] del configBenchmarkJoinParameters[0] # into Bjoin we put in all DBjoin that # don't show up in Bcommon/Bfork/BBfork/Bjoin/CBjoin # followed by CBjoin self.benchmarkJoinParameters = [{"ProblemSizes": currentProblemSizes}] for paramDict in defaultBenchmarkJoinParameters: for paramName in paramDict: if not hasParam( paramName, [ self.benchmarkCommonParameters, \ self.forkParameters, self.benchmarkForkParameters, \ self.joinParameters, configBenchmarkJoinParameters]) \ or paramName == "ProblemSizes": self.benchmarkJoinParameters.append(paramDict) if configBenchmarkJoinParameters != None: for paramDict in configBenchmarkJoinParameters: self.benchmarkJoinParameters.append(paramDict) else: # make empty self.benchmarkJoinParameters = [{ "ProblemSizes": currentProblemSizes }] ############################################################################ # (I-6) benchmark final sizes self.benchmarkFinalParameters = configBenchmarkFinalParameters # no other parameters besides problem sizes ############################################################################ # (I-7) any default param with 1 value will be hardcoded; move to beginning for stepList in [self.benchmarkCommonParameters, \ self.forkParameters, self.benchmarkForkParameters, \ self.benchmarkJoinParameters]: for paramDict in copy(stepList): for paramName in copy(paramDict): paramValues = paramDict[paramName] if paramValues == None: printExit( "You must specify value for parameters \"%s\"" % paramName) if len(paramValues) < 2 and paramName != "ProblemSizes": paramDict.pop(paramName) #self.benchmarkCommonParameters.insert(0, {paramName: paramValues }) self.hardcodedParameters[0][paramName] = paramValues[0] self.singleValueParameters[paramName] = [ paramValues[0] ] self.initialSolutionParameters[ paramName] = paramValues[0] if len(paramDict) == 0: stepList.remove(paramDict) ############################################################################ # (I-8) if fork and join, but no benchmark fork, append dummy benchmarkFork if len(self.forkParameters) > 0 and len(self.joinParameters) > 0 \ and (len(self.benchmarkForkParameters) == 0 \ or (len(self.benchmarkForkParameters) == 1 \ and hasParam("ProblemSizes", self.benchmarkForkParameters)) ): self.benchmarkForkParameters.append({"BenchmarkFork": [0]}) ############################################################################ # (I-9) if join, but no benchmark join, append dummy benchmarkJoin #if len(self.joinParameters) > 0 \ # and (len(self.benchmarkJoinParameters) == 0 \ # or (len(self.benchmarkJoinParameters) == 1 \ # and hasParam("ProblemSizes", self.benchmarkJoinParameters)) ): # self.benchmarkJoinParameters.append({"BenchmarkJoin": [0]}) # No, this is handles by Final Benchmark ############################################################################ # (I-10) Parameter Lists # benchmarkCommonParameters print2("HardcodedParameters:") for paramName in self.hardcodedParameters[0]: paramValues = self.hardcodedParameters[0][paramName] print2(" %s: %s" % (paramName, paramValues)) print2("BenchmarkCommonParameters:") for step in self.benchmarkCommonParameters: print2(" %s" % step) # forkParameters print2("ForkParameters:") for param in self.forkParameters: print2(" %s" % param) # benchmarkForkParameters print2("BenchmarkForkParameters:") for step in self.benchmarkForkParameters: print2(" %s" % step) # joinParameters print2("JoinParameters:") for param in self.joinParameters: print2(" %s" % param) # benchmarkJoinParameters print2("BenchmarkJoinParameters:") for step in self.benchmarkJoinParameters: print2(" %s" % step) # benchmarkJoinParameters print2("BenchmarkFinalParameters:") for step in self.benchmarkFinalParameters: print2(" %s" % step)
def convertParametersToSteps(self): print2("") print2( "####################################################################" ) print1("# Convert Parameters to Steps") print2( "####################################################################" ) print2("") ############################################################################ # (II-1) benchmark common parameters print2("") print2( "####################################################################" ) print1("# Benchmark Common Parameters") self.addStepsForParameters(self.benchmarkCommonParameters) ############################################################################ # (II-2) fork parameters # calculate permutations of print2("") print2( "####################################################################" ) print1("# Fork Parameters") print2(self.forkParameters) totalPermutations = 1 for param in self.forkParameters: for name in param: # only 1 values = param[name] totalPermutations *= len(values) forkPermutations = [] for i in range(0, totalPermutations): forkPermutations.append({}) pIdx = i for param in self.forkParameters: for name in param: values = param[name] valueIdx = pIdx % len(values) forkPermutations[i][name] = values[valueIdx] pIdx /= len(values) if len(forkPermutations) > 0: self.forkHardcodedParameters(forkPermutations) ############################################################################ # (II-3) benchmark fork parameters print2("") print2( "####################################################################" ) print1("# Benchmark Fork Parameters") self.addStepsForParameters(self.benchmarkForkParameters) ############################################################################ # (II-4.1) join parameters # answer should go in hard-coded parameters # does it remove the prior forks? Yes. print2("") print2( "####################################################################" ) print1("# Join Parameters") macroTileJoinSet = set() totalPermutations = 1 if len(self.joinParameters) > 0: for joinName in self.joinParameters: # joining a parameter with only a single value if hasParam(joinName, self.singleValueParameters): pass elif hasParam(joinName, self.forkParameters): # count permutations for param in self.forkParameters: for name in param: # only 1 if name == joinName: values = param[name] localPermutations = len(values) print2( "JoinParameter %s has %u possibilities" % (joinName, localPermutations)) totalPermutations *= localPermutations ########################################################################## # (II-4.2) Join MacroTile elif joinName == "MacroTile": print2("JoinParam: MacroTile") # get possible WorkGroupEdges from forked print2("currentForkParameters = %s" % str(self.forkParameters)) threadTileValues = [] workGroupValues = [] # todo having MacroTile as join parameter causes trouble if # one parameter is benchmarked rather than forked # however, this may still be the right way to do it # count permutations for paramList in [self.benchmarkCommonParameters, \ self.forkParameters, self.benchmarkForkParameters, \ self.benchmarkJoinParameters, self.singleValueParameters ]: if hasParam("ThreadTile", paramList): threadTileValues = getParamValues( "ThreadTile", paramList) if hasParam("WorkGroup", paramList): workGroupValues = getParamValues( "WorkGroup", paramList) macroTilePermutations = len(workGroupValues) * len( threadTileValues) print2("# Total JoinMacroTile Permutations: %u" % macroTilePermutations) # enumerate permutations for i in range(0, macroTilePermutations): pIdx = i workGroupIdx = pIdx % len(workGroupValues) pIdx /= len(workGroupValues) threadTileIdx = pIdx % len(threadTileValues) workGroup = workGroupValues[workGroupIdx] threadTile = threadTileValues[threadTileIdx] macroTile0 = workGroup[0] * threadTile[0] macroTile1 = workGroup[1] * threadTile[1] macroTileJoinSet.add((macroTile0, macroTile1)) totalPermutations *= len(macroTileJoinSet) print2("JoinMacroTileSet(%u): %s" % (len(macroTileJoinSet), macroTileJoinSet)) # invalid join parameter else: validJoinNames = ["MacroTile"] for validParam in self.forkParameters: for validName in validParam: # only 1 validJoinNames.append(validName) printExit("JoinParameter \"%s\" not in %s" % (joinName, validJoinNames)) ############################################################################ # (II-4.4) Enumerate Permutations Other * MacroTile * DepthU macroTiles = list(macroTileJoinSet) print2("# TotalJoinPermutations = %u" % (totalPermutations)) joinPermutations = [] for i in range(0, totalPermutations): joinPermutations.append({}) pIdx = i for joinName in self.joinParameters: if hasParam(joinName, self.forkParameters): for paramDict in self.forkParameters: # hardcodedPermutations if joinName in paramDict: paramValues = paramDict[joinName] valueIdx = pIdx % len(paramValues) joinPermutations[i][joinName] = paramValues[ valueIdx] pIdx /= len(paramValues) break elif joinName == "MacroTile": valueIdx = pIdx % len(macroTiles) pIdx /= len(macroTiles) joinPermutations[i]["MacroTile0"] = macroTiles[ valueIdx][0] joinPermutations[i]["MacroTile1"] = macroTiles[ valueIdx][1] if len(joinPermutations) > 0: self.joinHardcodedParameters(joinPermutations) ############################################################################ # (II-5) benchmark join parameters print2("") print2( "####################################################################" ) print1("# Benchmark Join Parameters") self.addStepsForParameters(self.benchmarkJoinParameters) ############################################################################ # (II-6) benchmark final print2("") print2( "####################################################################" ) print1("# Benchmark Final") for problemSizesDict in self.benchmarkFinalParameters: problemSizes = problemSizesDict["ProblemSizes"] self.currentProblemSizes = ProblemSizes(self.problemType, problemSizes) currentBenchmarkParameters = {} benchmarkStep = BenchmarkStep(self.hardcodedParameters, currentBenchmarkParameters, self.initialSolutionParameters, self.currentProblemSizes, self.benchmarkStepIdx) self.benchmarkSteps.append(benchmarkStep) self.benchmarkStepIdx += 1