def create_dispatch_direct_function(typeflag, mnklist):
    print "LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE libxsmm_" + typeflag + "mm_function libxsmm_" + typeflag + "mm_dispatch(int m, int n, int k)"
    print "{"
    maxm = libxsmm_utilities.max_mnk(mnklist, 0, 0)
    maxn = libxsmm_utilities.max_mnk(mnklist, 0, 1)
    maxk = libxsmm_utilities.max_mnk(mnklist, 0, 2)
    d, h = maxk + 1, maxn + 1
    print "  static /*const*/ libxsmm_" + typeflag + "mm_function functions[/*" + str(d * h * (maxm + 1)) + "*/] = {"
    sys.stdout.write("    ")
    begin, m, n, r = 0, 0, 0, 8
    s = r * 6
    for mnk in mnklist:
        end = calc_direct_index(mnk, d, h)
        for i in range(begin, end):
            m = m + 1; n = n + 1
            if (0 == (m % s)):
                sys.stdout.write(",\n    ")
            elif (1 < n):
                sys.stdout.write(", ")
            sys.stdout.write("0")
        begin, m, n = end + 1, m + r, n + 1
        if (r > (m % s)):
            sys.stdout.write(",\n    ")
            m = m + m % s
        elif (1 < n):
            sys.stdout.write(", ")
        sys.stdout.write("libxsmm_" + typeflag + "mm_" + "_".join(map(str, mnk)))
    print
    print "  };"
    print "  return (" + str(maxm) + " >= m && " + str(maxn) + " >= n && " + str(maxk) + " >= k) " + \
                "? functions[" + str(d) + "*(" + str(h) + "*m+n)+k] " + \
                ": 0;"
    print "}"
Exemple #2
0
        else: threshold = 0
        if (8 < argc): sync = int(sys.argv[8])
        else: sync = 0
        if (9 < argc): jit = int(sys.argv[9])
        else: jit = 0
        if (10 < argc): flags = int(sys.argv[10])
        else: flags = 0
        if (11 < argc): alpha = int(sys.argv[11])
        else: alpha = 1
        if (12 < argc): beta = int(sys.argv[12])
        else: beta = 1
        if (13 < argc): mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[13:], 0))
        else: mnklist = list()

        template = Template(open(filename, "r").read())
        maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold)
        maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5)
        avgdim = int(0.5 * maxdim + 0.5)

        avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)), avgdim, False)
        avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)), avgdim, False)
        avgk = libxsmm_utilities.median(list(map(lambda mnk: mnk[2], mnklist)), avgdim, False)

        maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0)
        maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1)
        maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2)

        substitute = { \
            "LIBXSMM_OFFLOAD_BUILD": ["", "\n#define LIBXSMM_OFFLOAD_BUILD"][0!=offload], \
            "ALIGNMENT":  alignment, \
            "PREFETCH":   [-1, prefetch][0<=prefetch], \
Exemple #3
0
            alpha = int(sys.argv[11])
        if (12 < argc):
            beta = int(sys.argv[12])
        if (13 < argc):
            wrap = int(sys.argv[13])
        if (14 < argc):
            mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[14:], 0))

        version, branch, realversion = \
            libxsmm_utilities.version_branch()
        major, minor, update, patch = \
            libxsmm_utilities.version_numbers(version)

        if (0 == threshold):
            threshold = 64 * 64 * 64
        maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold)
        maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5)
        avgdim = int(0.5 * maxdim + 0.5)

        avgm = libxsmm_utilities.median(
            list(map(lambda mnk: mnk[0], mnklist)), avgdim, False)
        avgn = libxsmm_utilities.median(
            list(map(lambda mnk: mnk[1], mnklist)), avgdim, False)
        avgk = libxsmm_utilities.median(
            list(map(lambda mnk: mnk[2], mnklist)), avgdim, False)

        maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0)
        maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1)
        maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2)

        substitute = {