def create_dispatch_direct_function(typeflag, mnklist): print "LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE libxsmm_" + typeflag + "mm_function libxsmm_" + typeflag + "mm_dispatch(int m, int n, int k)" print "{" maxm = libxsmm_utilities.max_mnk(mnklist, 0, 0) maxn = libxsmm_utilities.max_mnk(mnklist, 0, 1) maxk = libxsmm_utilities.max_mnk(mnklist, 0, 2) d, h = maxk + 1, maxn + 1 print " static /*const*/ libxsmm_" + typeflag + "mm_function functions[/*" + str(d * h * (maxm + 1)) + "*/] = {" sys.stdout.write(" ") begin, m, n, r = 0, 0, 0, 8 s = r * 6 for mnk in mnklist: end = calc_direct_index(mnk, d, h) for i in range(begin, end): m = m + 1; n = n + 1 if (0 == (m % s)): sys.stdout.write(",\n ") elif (1 < n): sys.stdout.write(", ") sys.stdout.write("0") begin, m, n = end + 1, m + r, n + 1 if (r > (m % s)): sys.stdout.write(",\n ") m = m + m % s elif (1 < n): sys.stdout.write(", ") sys.stdout.write("libxsmm_" + typeflag + "mm_" + "_".join(map(str, mnk))) print print " };" print " return (" + str(maxm) + " >= m && " + str(maxn) + " >= n && " + str(maxk) + " >= k) " + \ "? functions[" + str(d) + "*(" + str(h) + "*m+n)+k] " + \ ": 0;" print "}"
else: threshold = 0 if (8 < argc): sync = int(sys.argv[8]) else: sync = 0 if (9 < argc): jit = int(sys.argv[9]) else: jit = 0 if (10 < argc): flags = int(sys.argv[10]) else: flags = 0 if (11 < argc): alpha = int(sys.argv[11]) else: alpha = 1 if (12 < argc): beta = int(sys.argv[12]) else: beta = 1 if (13 < argc): mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[13:], 0)) else: mnklist = list() template = Template(open(filename, "r").read()) maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold) maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5) avgdim = int(0.5 * maxdim + 0.5) avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)), avgdim, False) avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)), avgdim, False) avgk = libxsmm_utilities.median(list(map(lambda mnk: mnk[2], mnklist)), avgdim, False) maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0) maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1) maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2) substitute = { \ "LIBXSMM_OFFLOAD_BUILD": ["", "\n#define LIBXSMM_OFFLOAD_BUILD"][0!=offload], \ "ALIGNMENT": alignment, \ "PREFETCH": [-1, prefetch][0<=prefetch], \
alpha = int(sys.argv[11]) if (12 < argc): beta = int(sys.argv[12]) if (13 < argc): wrap = int(sys.argv[13]) if (14 < argc): mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[14:], 0)) version, branch, realversion = \ libxsmm_utilities.version_branch() major, minor, update, patch = \ libxsmm_utilities.version_numbers(version) if (0 == threshold): threshold = 64 * 64 * 64 maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold) maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5) avgdim = int(0.5 * maxdim + 0.5) avgm = libxsmm_utilities.median( list(map(lambda mnk: mnk[0], mnklist)), avgdim, False) avgn = libxsmm_utilities.median( list(map(lambda mnk: mnk[1], mnklist)), avgdim, False) avgk = libxsmm_utilities.median( list(map(lambda mnk: mnk[2], mnklist)), avgdim, False) maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0) maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1) maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2) substitute = {