def writeIncludes(): print "AutoGemm.py: Generating include files." if not os.path.exists( Common.getIncludePath() ): os.makedirs( Common.getIncludePath() ) kernelSourceIncludes = KernelSourceIncludes() kernelBinaryIncludes = KernelBinaryIncludes() clKernelIncludes = ClKernelIncludes() kernelSourceBuildOptions = KernelSourceBuildOptions() kernelBinaryBuildOptions = KernelBinaryBuildOptions() cppKernelEnumeration = CppKernelEnumeration() # for each precision kernel = KernelParameters.KernelParameters() for precision in AutoGemmParameters.precisions: kernel.precision = precision cppKernelEnumeration.newPrecision(precision) # valid tiles for this precision tiles = AutoGemmParameters.getTilesForPrecision(precision) # add tiles for this precision to Cpp for tile in tiles: cppKernelEnumeration.addTile(tile) # for non tile parameters for order in AutoGemmParameters.orders: kernel.order = order for transA in AutoGemmParameters.transposes[precision]: kernel.transA = transA for transB in AutoGemmParameters.transposes[precision]: kernel.transB = transB for beta in AutoGemmParameters.betas: kernel.beta = beta # add this nonTile combo for this precision to Cpp cppKernelEnumeration.addNonTile(kernel) # for tile parameters for tile in tiles: kernel.useTile(tile) kernelSourceIncludes.addKernel(kernel) kernelBinaryIncludes.addKernel(kernel) kernelSourceBuildOptions.addKernel(kernel) kernelBinaryBuildOptions.addKernel(kernel) clKernelIncludes.addKernel(kernel) cppKernelEnumeration.addKernel(kernel) # save written files kernelSourceIncludes.writeToFile() kernelBinaryIncludes.writeToFile() clKernelIncludes.writeToFile() kernelSourceBuildOptions.writeToFile() kernelBinaryBuildOptions.writeToFile() cppKernelEnumeration.writeToFile()
def writeOfflineCompilation(args): print("AutoGemm.py: Generating list of kernels to pre-compile.") if not os.path.exists(Common.getIncludePath()): os.makedirs(Common.getIncludePath()) ocFileName = Common.getIncludePath() + "AutoGemmKernelsToPreCompile.h" ocFile = open(ocFileName, "w") ocFile.write(Common.getAutoGemmHeader()) fileStr = "\n/*precision, order, transA, transB, beta, tileNumRows, tileNumCols, unroll*/\n" fileStr += "\nunsigned int gemmPreCompile[][8] = {\n" count = 0 for precision in args.precisions: ocFile.write(fileStr) fileStr = "" validTiles = AutoGemmParameters.getTilesForPrecision(precision) for order in args.orders: for transpose in args.transposes: transA = transpose[0] transB = transpose[1] if (transA == "C" or transB == "C") and (precision == "s" or precision == "d"): # real precision doesn't have conjugate transpose continue for beta in args.betas: for tile in validTiles: # print combination kernelStr = " { %1u, %1u, %1u, %1u, %1u, %3u, %3u, %2u },\n" \ % ( Common.precisionInt[precision], Common.orderInt[order], Common.transposeInt[transA], Common.transposeInt[transB], beta, tile.macroTileNumRows, tile.macroTileNumCols, tile.unroll ) fileStr += kernelStr #print kernelStr count += 1 if count is 0: fileStr += " { %1u, %1u, %1u, %1u, %1u, %3u, %3u, %2u },\n" \ % ( 0, 0, 0, 0, 0, 0, 0, 0 ) fileStr += "};\n" fileStr += "unsigned int gemmPreCompileNum = " + str(count) + ";\n" ocFile.write(fileStr) ocFile.close() count *= 4 print("AutoGemm.py: %u kernels will be pre-compiled." % count)
def writeOfflineCompilation(args): print("AutoGemm.py: Generating list of kernels to pre-compile.") if not os.path.exists( Common.getIncludePath() ): os.makedirs( Common.getIncludePath() ) ocFileName = Common.getIncludePath() + "AutoGemmKernelsToPreCompile.h" ocFile = open(ocFileName, "w") ocFile.write( Common.getAutoGemmHeader() ) fileStr = "\n/*precision, order, transA, transB, beta, tileNumRows, tileNumCols, unroll*/\n" fileStr += "\nunsigned int gemmPreCompile[][8] = {\n" count = 0 for precision in args.precisions: ocFile.write( fileStr ) fileStr = "" validTiles = AutoGemmParameters.getTilesForPrecision(precision) for order in args.orders: for transpose in args.transposes: transA = transpose[0] transB = transpose[1] if (transA=="C" or transB=="C") and (precision=="s" or precision=="d"): # real precision doesn't have conjugate transpose continue for beta in args.betas: for tile in validTiles: # print combination kernelStr = " { %1u, %1u, %1u, %1u, %1u, %3u, %3u, %2u },\n" \ % ( Common.precisionInt[precision], Common.orderInt[order], Common.transposeInt[transA], Common.transposeInt[transB], beta, tile.macroTileNumRows, tile.macroTileNumCols, tile.unroll ) fileStr += kernelStr #print kernelStr count+=1 if count is 0: fileStr += " { %1u, %1u, %1u, %1u, %1u, %3u, %3u, %2u },\n" \ % ( 0, 0, 0, 0, 0, 0, 0, 0 ) fileStr += "};\n" fileStr += "unsigned int gemmPreCompileNum = " + str(count) + ";\n" ocFile.write( fileStr ) ocFile.close() count *= 4 print("AutoGemm.py: %u kernels will be pre-compiled." % count)
def writeKernelSelection(): print("AutoGemm.py: Generating kernel selection.") if not os.path.exists(Common.getIncludePath()): os.makedirs(Common.getIncludePath()) ######################################## # kernel selection specific kss = KernelSelectionSpecific() # for each precision kernel = KernelParameters.KernelParameters() for precision in AutoGemmParameters.precisions: kernel.precision = precision kss.newPrecision(precision) # valid tiles for this precision tiles = AutoGemmParameters.getTilesForPrecision(precision) # for non tile parameters for order in AutoGemmParameters.orders: kernel.order = order kss.newOrder(order) for transA in AutoGemmParameters.transposes[precision]: kernel.transA = transA for transB in AutoGemmParameters.transposes[precision]: kernel.transB = transB kss.newTrans(transA, transB) for beta in AutoGemmParameters.betas: kernel.beta = beta kss.newBeta(beta) # for tile parameters for tile in tiles: kernel.useTile(tile) kss.newKernel(kernel) kss.writeToFile() ######################################## # kernel selection ks = KernelSelection( \ AutoGemmParameters.precisions, \ AutoGemmParameters.orders, \ AutoGemmParameters.transposes, \ AutoGemmParameters.betas, \ AutoGemmParameters.unrolls, \ AutoGemmParameters.kernelSelectionData ) ks.writeToFile()
def writeKernelSelection(): print("AutoGemm.py: Generating kernel selection.") if not os.path.exists( Common.getIncludePath() ): os.makedirs( Common.getIncludePath() ) ######################################## # kernel selection specific kss = KernelSelectionSpecific() # for each precision kernel = KernelParameters.KernelParameters() for precision in AutoGemmParameters.precisions: kernel.precision = precision kss.newPrecision(precision) # valid tiles for this precision tiles = AutoGemmParameters.getTilesForPrecision(precision) # for non tile parameters for order in AutoGemmParameters.orders: kernel.order = order kss.newOrder(order) for transA in AutoGemmParameters.transposes[precision]: kernel.transA = transA for transB in AutoGemmParameters.transposes[precision]: kernel.transB = transB kss.newTrans(transA, transB) for beta in AutoGemmParameters.betas: kernel.beta = beta kss.newBeta(beta) # for tile parameters for tile in tiles: kernel.useTile(tile) kss.newKernel(kernel) kss.writeToFile() ######################################## # kernel selection ks = KernelSelection( \ AutoGemmParameters.precisions, \ AutoGemmParameters.orders, \ AutoGemmParameters.transposes, \ AutoGemmParameters.betas, \ AutoGemmParameters.unrolls, \ AutoGemmParameters.kernelSelectionData ) ks.writeToFile()
def writeOpenCLKernels(): if not os.path.exists( Common.getKernelSourcePath() ): os.makedirs( Common.getKernelSourcePath() ) if not os.path.exists( Common.getKernelBinaryPath() ): os.makedirs( Common.getKernelBinaryPath() ) numKernels = 0 # for each precision kernel = KernelParameters.KernelParameters() for precision in AutoGemmParameters.precisions: kernel.precision = precision # valid tiles for this precision tiles = AutoGemmParameters.getTilesForPrecision(precision) # for non tile parameters for order in AutoGemmParameters.orders: kernel.order = order for transA in AutoGemmParameters.transposes[precision]: kernel.transA = transA for transB in AutoGemmParameters.transposes[precision]: kernel.transB = transB for beta in AutoGemmParameters.betas: kernel.beta = beta # for tile parameters for tile in tiles: # tile kernel kernel.useTile(tile) writeOpenCLKernelToFile(kernel) # row kernel rowKernel = copy.copy(kernel) rowKernel.macroTileNumRows = 1 writeOpenCLKernelToFile(rowKernel) # col kernel colKernel = copy.copy(kernel) colKernel.macroTileNumCols = 1 writeOpenCLKernelToFile(colKernel) # corner kernel cornerKernel = copy.copy(kernel) cornerKernel.macroTileNumRows = 1 cornerKernel.macroTileNumCols = 1 writeOpenCLKernelToFile(cornerKernel) numKernels += 4 print("AutoGemm.py: generated %d kernels" % numKernels)
def writeOpenCLKernels(): if not os.path.exists(Common.getKernelSourcePath()): os.makedirs(Common.getKernelSourcePath()) if not os.path.exists(Common.getKernelBinaryPath()): os.makedirs(Common.getKernelBinaryPath()) numKernels = 0 # for each precision kernel = KernelParameters.KernelParameters() for precision in AutoGemmParameters.precisions: kernel.precision = precision # valid tiles for this precision tiles = AutoGemmParameters.getTilesForPrecision(precision) # for non tile parameters for order in AutoGemmParameters.orders: kernel.order = order for transA in AutoGemmParameters.transposes[precision]: kernel.transA = transA for transB in AutoGemmParameters.transposes[precision]: kernel.transB = transB for beta in AutoGemmParameters.betas: kernel.beta = beta # for tile parameters for tile in tiles: # tile kernel kernel.useTile(tile) writeOpenCLKernelToFile(kernel) # row kernel rowKernel = copy.copy(kernel) rowKernel.macroTileNumRows = 1 writeOpenCLKernelToFile(rowKernel) # col kernel colKernel = copy.copy(kernel) colKernel.macroTileNumCols = 1 writeOpenCLKernelToFile(colKernel) # corner kernel cornerKernel = copy.copy(kernel) cornerKernel.macroTileNumRows = 1 cornerKernel.macroTileNumCols = 1 writeOpenCLKernelToFile(cornerKernel) numKernels += 4 print("AutoGemm.py: generated %d kernels" % numKernels)