def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data run_cmd = ( "CHECK=0" " LIBXSMM_TGEMM_M=" + str(self.granularity * cfg["M"]) + " LIBXSMM_TGEMM_N=" + str(self.granularity * cfg["N"]) + " LIBXSMM_TGEMM_K=" + str(self.granularity * cfg["K"]) + " ./xgemm.sh") dimset = libxsmm_utilities.load_mnklist(self.args.mnk, 0, -1) geoperf = 0 # geometric mean compensation = 0 # see Kahan for dims in dimset: run_result = self.call_program( run_cmd + " " + " ".join(map(str, dims))) assert(run_result["returncode"] == 0) match = re.search( "\\s*LIBXSMM:\\s+([0-9]+(\\.[0-9]*)*)", run_result["stdout"]) assert(match is not None) gflops = float(match.group(1)) assert(0 < gflops) kha = math.log(gflops) - compensation khb = geoperf + kha compensation = (khb - geoperf) - kha geoperf = khb geoperf = math.exp(geoperf / len(dimset)) geotime = 1000000.0 / geoperf mnk = (self.granularity**3) * cfg["M"] * cfg["N"] * cfg["K"] return Result(time=geotime, accuracy=geoperf, size=mnk)
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data run_cmd = ("CHECK=0" " LIBXSMM_TGEMM_M=" + str(self.granularity * cfg["M"]) + " LIBXSMM_TGEMM_N=" + str(self.granularity * cfg["N"]) + " LIBXSMM_TGEMM_K=" + str(self.granularity * cfg["K"]) + " ./xgemm.sh") dimset = libxsmm_utilities.load_mnklist(self.args.mnk, 0, -1) geoperf = 0 # geometric mean compensation = 0 # see Kahan for dims in dimset: run_result = self.call_program(run_cmd + " " + " ".join(map(str, dims))) assert (run_result["returncode"] == 0) match = re.search("\\s*LIBXSMM:\\s+([0-9]+(\\.[0-9]*)*)", run_result["stdout"]) assert (match is not None) gflops = float(match.group(1)) assert (0 < gflops) kha = math.log(gflops) - compensation khb = geoperf + kha compensation = (khb - geoperf) - kha geoperf = khb geoperf = math.exp(geoperf / len(dimset)) geotime = 1000000.0 / geoperf mnk = (self.granularity**3) * cfg["M"] * cfg["N"] * cfg["K"] return Result(time=geotime, accuracy=geoperf, size=mnk)
def save_final_config(self, configuration): """called at the end of tuning""" dimset = libxsmm_utilities.load_mnklist(self.args.mnk, 0, -1) matrices = ( # collects requested matrix shapes into string "-".join(map(str, map(lambda mnk: "x".join(map(str, mnk)), dimset)))) filename = "xgemm-" + matrices + time.strftime( "-%Y%m%d-%H%M%S") + ".json" print("Optimal block size written to " + filename + ": ", configuration.data) self.manipulator().save_to_file(configuration.data, filename)
def save_final_config(self, configuration): """called at the end of tuning""" dimset = libxsmm_utilities.load_mnklist(self.args.mnk, 0, -1) matrices = ( # collects requested matrix shapes into string "-".join(map(str, map(lambda mnk: "x".join( map(str, mnk)), dimset)))) filename = "xgemm-" + matrices + time.strftime( "-%Y%m%d-%H%M%S") + ".json" print("Optimal block size written to " + filename + ": ", configuration.data) self.manipulator().save_to_file(configuration.data, filename)
def manipulator(self): """ Define the search space by creating a ConfigurationManipulator """ self.dimset = libxsmm_utilities.load_mnklist(self.args.mnk, 0, -1) self.granularity = 1 assert (0 < self.granularity) m_max = (64 + self.granularity - 1) / self.granularity n_max = (256 + self.granularity - 1) / self.granularity k_max = (256 + self.granularity - 1) / self.granularity m_param = IntegerParameter("M", self.granularity, m_max) n_param = IntegerParameter("N", self.granularity, n_max) k_param = IntegerParameter("K", self.granularity, k_max) manipulator = ConfigurationManipulator() manipulator.add_parameter(m_param) manipulator.add_parameter(n_param) manipulator.add_parameter(k_param) return manipulator
if (1 < argc): # required argument(s) filename = sys.argv[1] # default configuration if no arguments are given precision = 0 # all prefetch = -1 # auto mnklist = list() # optional argument(s) if (2 < argc): precision = int(sys.argv[2]) if (3 < argc): prefetch = int(sys.argv[3]) if (4 < argc): mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[4:], 0)) template = Template(open(filename, "r").read()) if (fnmatch.fnmatch(filename, "*.h*")): optional = [", ...", ""][0 <= prefetch] substitute = {"MNK_INTERFACE_LIST": ""} for mnk in mnklist: mnkstr = "_".join(map(str, mnk)) if (2 != precision): pfsig = [optional + ");", ",\n " "const float* pa, " "const float* pb, " "const float* pc);"][0 < prefetch] substitute["MNK_INTERFACE_LIST"] += ( "\nLIBXSMM_API void libxsmm_smm_" + mnkstr + "(const float* a, const float* b, float* c" +
else: alignment = 64 if (6 < argc): prefetch = int(sys.argv[6]) else: prefetch = 0 if (7 < argc): threshold = int(sys.argv[7]) else: threshold = 0 if (8 < argc): sync = int(sys.argv[8]) else: sync = 0 if (9 < argc): jit = int(sys.argv[9]) else: jit = 0 if (10 < argc): flags = int(sys.argv[10]) else: flags = 0 if (11 < argc): alpha = int(sys.argv[11]) else: alpha = 1 if (12 < argc): beta = int(sys.argv[12]) else: beta = 1 if (13 < argc): mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[13:], 0)) else: mnklist = list() template = Template(open(filename, "r").read()) maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold) maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5) avgdim = int(0.5 * maxdim + 0.5) avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)), avgdim, False) avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)), avgdim, False) avgk = libxsmm_utilities.median(list(map(lambda mnk: mnk[2], mnklist)), avgdim, False) maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0) maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1) maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2)
if 8 < argc: sync = int(sys.argv[8]) if 9 < argc: jit = int(sys.argv[9]) if 10 < argc: flags = int(sys.argv[10]) if 11 < argc: alpha = int(sys.argv[11]) if 12 < argc: beta = int(sys.argv[12]) if 13 < argc: wrap = int(sys.argv[13]) if 14 < argc: malloc = int(sys.argv[14]) if 15 < argc: mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[15:], 0)) version, branch, realversion = libxsmm_utilities.version_branch() major, minor, update, patch = libxsmm_utilities.version_numbers( version) if 0 == threshold: threshold = 64 * 64 * 64 maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold) maxdim = int(maxmnk**(1.0 / 3.0) + 0.5) avgdim = int(0.5 * maxdim + 0.5) avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)), avgdim, False) avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)), avgdim, False)
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ############################################################################### # Hans Pabst (Intel Corp.) ############################################################################### import libxsmm_utilities import sys if __name__ == "__main__": argc = len(sys.argv) if (3 < argc): precision = int(sys.argv[1]) threshold = int(sys.argv[2]) mnklist = libxsmm_utilities.load_mnklist(sys.argv[3:], 0) print("libxsmm_gemm_descriptor desc;") print("libxsmm_xmmfunction func;") print("unsigned int hash, indx;") print("#if defined(_MSC_VER)") print("# pragma warning(push)") print("# pragma warning(disable: 4127)") print("#endif") for mnk in mnklist: mstr, nstr, kstr, mnkstr = \ str(mnk[0]), str(mnk[1]), str(mnk[2]), "_".join(map(str, mnk)) mnksig = mstr + ", " + nstr + ", " + kstr ldxsig = mstr + ", " + kstr + ", " + mstr # prefer registering double-precision kernels # when approaching an exhausted registry
## NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ## ## SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ## ############################################################################### ## Hans Pabst (Intel Corp.) ############################################################################### import libxsmm_utilities import sys import os if __name__ == "__main__": argc = len(sys.argv) if (3 < argc): precision = int(sys.argv[1]) threshold = int(sys.argv[2]) mnklist = libxsmm_utilities.load_mnklist(sys.argv[3:], threshold) print("libxsmm_gemm_descriptor desc;") print("libxsmm_xmmfunction func;") print("unsigned int hash, indx;") for mnk in mnklist: mstr, nstr, kstr, mnkstr = str(mnk[0]), str(mnk[1]), str(mnk[2]), "_".join(map(str, mnk)) mnksig = "LIBXSMM_LD(" + mstr + ", " + nstr + "), LIBXSMM_LD(" + nstr + ", " + mstr + "), " + kstr ldxsig = "LIBXSMM_LD(" + mstr + ", " + nstr + "), " + kstr + ", LIBXSMM_LD(" + mstr + ", " + nstr + ")" # prefer registering double-precision kernels when approaching an exhausted registry if (1 != precision): # only double-precision print("LIBXSMM_GEMM_DESCRIPTOR(desc, LIBXSMM_ALIGNMENT, LIBXSMM_FLAGS,") print(" " + mnksig + ", " + ldxsig + ",") print(" LIBXSMM_ALPHA, LIBXSMM_BETA, INTERNAL_PREFETCH);") print("LIBXSMM_HASH_FUNCTION_CALL(hash, indx, desc);") print("func.dmm = (libxsmm_dmmfunction)libxsmm_dmm_" + mnkstr + ";")
arg1_filename = [sys.argv[1], ""]["0" == sys.argv[1]] arg1_isfile = os.path.isfile(arg1_filename) base = 1 if (arg1_isfile): print("#if !defined(_WIN32)") print("{ static const char *const build_state =") print("# include \"" + sys.argv[1] + "\"") print(" ;") print(" internal_build_state = build_state;") print("}") print("#endif") base = 2 if ((base + 2) < argc): precision = int(sys.argv[base+0]) threshold = int(sys.argv[base+1]) mnklist = libxsmm_utilities.load_mnklist(sys.argv[base+2:], 0) print("/* omit registering code if JIT is enabled" " and if an ISA extension is found") print(" * which is beyond the static code" " path used to compile the library") print(" */") print("#if (0 != LIBXSMM_JIT) && !defined(__MIC__)") print("/* check if target arch. permits execution" " (arch. may be overridden) */") print("if (LIBXSMM_STATIC_TARGET_ARCH" " <= libxsmm_target_archid &&") print(" (LIBXSMM_X86_SSE3 > libxsmm_target_archid " "/* JIT code gen. is not available */") print(" /* condition allows to avoid JIT " "(if static code is good enough) */") print(" || LIBXSMM_STATIC_TARGET_ARCH"
if (1 < argc): # required argument(s) filename = sys.argv[1] # optional argument(s) precision = int(sys.argv[2]) if (2 < argc) else 0 ilp64 = int(sys.argv[3]) if (3 < argc) else 0 alignment = libxsmm_utilities.sanitize_alignment(int(sys.argv[4])) if (4 < argc) else 64 row_major = int(sys.argv[5]) if (5 < argc) else 0 prefetch = int(sys.argv[6]) if (6 < argc) else 0 threshold = int(sys.argv[7]) if (7 < argc) else 0 jit = int(sys.argv[8]) if (8 < argc) else 0 flags = int(sys.argv[9]) if (9 < argc) else 0 alpha = int(sys.argv[10]) if (10 < argc) else 1 beta = int(sys.argv[11]) if (11 < argc) else 1 mnklist = libxsmm_utilities.load_mnklist(sys.argv[12:], threshold) if (12 < argc) else list() template = Template(open(filename, "r").read()) maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold) maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5) avgdim = int(0.5 * maxdim + 0.5) avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)), avgdim, False) avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)), avgdim, False) avgk = libxsmm_utilities.median(list(map(lambda mnk: mnk[2], mnklist)), avgdim, False) maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0) maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1) maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2) version, branch = libxsmm_utilities.version_branch()
if __name__ == "__main__": argc = len(sys.argv) if (1 < argc): # required argument(s) filename = sys.argv[1] # optional argument(s) row_major = int(sys.argv[2]) if (2 < argc) else 0 alignment = libxsmm_utilities.sanitize_alignment(int(sys.argv[3])) if (3 < argc) else 64 aligned_stores = libxsmm_utilities.sanitize_alignment(int(sys.argv[4])) if (4 < argc) else 1 aligned_loads = libxsmm_utilities.sanitize_alignment(int(sys.argv[5])) if (5 < argc) else 1 prefetch = int(sys.argv[6]) if (6 < argc) else 0 jit = int(sys.argv[7]) if (7 < argc) else 0 threshold = int(sys.argv[8]) if (8 < argc) else 0 beta = int(sys.argv[9]) if (9 < argc) else 1 mnklist = libxsmm_utilities.load_mnklist(sys.argv[10:], 0, threshold) if (10 < argc) else list() template = Template(open(filename, "r").read()) maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold) maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5) avgdim = int(0.5 * maxdim + 0.5) avgm = libxsmm_utilities.median(map(lambda mnk: mnk[0], mnklist), avgdim, False) avgn = libxsmm_utilities.median(map(lambda mnk: mnk[1], mnklist), avgdim, False) avgk = libxsmm_utilities.median(map(lambda mnk: mnk[2], mnklist), avgdim, False) maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0) maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1) maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2) substitute = { \
## NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ## ## SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ## ############################################################################### ## Hans Pabst (Intel Corp.) ############################################################################### import libxsmm_utilities import sys import os if __name__ == "__main__": argc = len(sys.argv) if (3 < argc): precision = int(sys.argv[1]) threshold = int(sys.argv[2]) mnklist = libxsmm_utilities.load_mnklist(sys.argv[3:], threshold) print("libxsmm_gemm_descriptor desc;") print("unsigned int indx;") for mnk in mnklist: mstr, nstr, kstr, mnkstr = str(mnk[0]), str(mnk[1]), str(mnk[2]), "_".join(map(str, mnk)) mnksig = "LIBXSMM_LD(" + mstr + ", " + nstr + "), LIBXSMM_LD(" + nstr + ", " + mstr + "), " + kstr ldxsig = "LIBXSMM_LD(" + mstr + ", " + nstr + "), " + kstr + ", LIBXSMM_LD(" + mstr + ", " + nstr + ")" if (2 != precision): # only double-precision print("LIBXSMM_GEMM_DESCRIPTOR(desc, LIBXSMM_ALIGNMENT, LIBXSMM_FLAGS | LIBXSMM_GEMM_FLAG_F32PREC,") print(" " + mnksig + ", " + ldxsig + ",") print(" LIBXSMM_ALPHA, LIBXSMM_BETA, LIBXSMM_PREFETCH);") print("indx = libxsmm_crc32(&desc, LIBXSMM_GEMM_DESCRIPTOR_SIZE, LIBXSMM_HASH_SEED) % (LIBXSMM_CACHESIZE);") print("if (0 == result[indx].code.xmm) { /* no further effort to handle collision */") print(" result[indx].code.smm = (libxsmm_smmfunction)libxsmm_smm_" + mnkstr + ";") print(" result[indx].code_size = 0; /* statically generated code */")
else: alignment = 64 if (6 < argc): prefetch = int(sys.argv[6]) else: prefetch = 0 if (7 < argc): threshold = int(sys.argv[7]) else: threshold = 0 if (8 < argc): sync = int(sys.argv[8]) else: sync = 0 if (9 < argc): jit = int(sys.argv[9]) else: jit = 0 if (10 < argc): flags = int(sys.argv[10]) else: flags = 0 if (11 < argc): alpha = int(sys.argv[11]) else: alpha = 1 if (12 < argc): beta = int(sys.argv[12]) else: beta = 1 if (13 < argc): mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[13:], threshold)) else: mnklist = list() template = Template(open(filename, "r").read()) maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold) maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5) avgdim = int(0.5 * maxdim + 0.5) avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)), avgdim, False) avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)), avgdim, False) avgk = libxsmm_utilities.median(list(map(lambda mnk: mnk[2], mnklist)), avgdim, False) maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0) maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1) maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2)
else: alignment = 64 if (5 < argc): prefetch = int(sys.argv[5]) else: prefetch = 0 if (6 < argc): threshold = int(sys.argv[6]) else: threshold = 0 if (7 < argc): sync = int(sys.argv[7]) else: sync = 0 if (8 < argc): jit = int(sys.argv[8]) else: jit = 0 if (9 < argc): flags = int(sys.argv[9]) else: flags = 0 if (10 < argc): alpha = int(sys.argv[10]) else: alpha = 1 if (11 < argc): beta = int(sys.argv[11]) else: beta = 1 if (12 < argc): mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[12:], 0)) else: mnklist = list() template = Template(open(filename, "r").read()) maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold) maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5) avgdim = int(0.5 * maxdim + 0.5) avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)), avgdim, False) avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)), avgdim, False) avgk = libxsmm_utilities.median(list(map(lambda mnk: mnk[2], mnklist)), avgdim, False) maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0) maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1) maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2)