Esempio n. 1
0
 def run(self, desired_result, input, limit):
     """
     Compile and run a given configuration then
     return performance
     """
     cfg = desired_result.configuration.data
     run_cmd = (
         "CHECK=0"
         " LIBXSMM_TGEMM_M=" + str(self.granularity * cfg["M"]) +
         " LIBXSMM_TGEMM_N=" + str(self.granularity * cfg["N"]) +
         " LIBXSMM_TGEMM_K=" + str(self.granularity * cfg["K"]) +
         " ./xgemm.sh")
     dimset = libxsmm_utilities.load_mnklist(self.args.mnk, 0, -1)
     geoperf = 0  # geometric mean
     compensation = 0  # see Kahan
     for dims in dimset:
         run_result = self.call_program(
             run_cmd + " " + " ".join(map(str, dims)))
         assert(run_result["returncode"] == 0)
         match = re.search(
             "\\s*LIBXSMM:\\s+([0-9]+(\\.[0-9]*)*)",
             run_result["stdout"])
         assert(match is not None)
         gflops = float(match.group(1))
         assert(0 < gflops)
         kha = math.log(gflops) - compensation
         khb = geoperf + kha
         compensation = (khb - geoperf) - kha
         geoperf = khb
     geoperf = math.exp(geoperf / len(dimset))
     geotime = 1000000.0 / geoperf
     mnk = (self.granularity**3) * cfg["M"] * cfg["N"] * cfg["K"]
     return Result(time=geotime, accuracy=geoperf, size=mnk)
Esempio n. 2
0
 def run(self, desired_result, input, limit):
     """
     Compile and run a given configuration then
     return performance
     """
     cfg = desired_result.configuration.data
     run_cmd = ("CHECK=0"
                " LIBXSMM_TGEMM_M=" + str(self.granularity * cfg["M"]) +
                " LIBXSMM_TGEMM_N=" + str(self.granularity * cfg["N"]) +
                " LIBXSMM_TGEMM_K=" + str(self.granularity * cfg["K"]) +
                " ./xgemm.sh")
     dimset = libxsmm_utilities.load_mnklist(self.args.mnk, 0, -1)
     geoperf = 0  # geometric mean
     compensation = 0  # see Kahan
     for dims in dimset:
         run_result = self.call_program(run_cmd + " " +
                                        " ".join(map(str, dims)))
         assert (run_result["returncode"] == 0)
         match = re.search("\\s*LIBXSMM:\\s+([0-9]+(\\.[0-9]*)*)",
                           run_result["stdout"])
         assert (match is not None)
         gflops = float(match.group(1))
         assert (0 < gflops)
         kha = math.log(gflops) - compensation
         khb = geoperf + kha
         compensation = (khb - geoperf) - kha
         geoperf = khb
     geoperf = math.exp(geoperf / len(dimset))
     geotime = 1000000.0 / geoperf
     mnk = (self.granularity**3) * cfg["M"] * cfg["N"] * cfg["K"]
     return Result(time=geotime, accuracy=geoperf, size=mnk)
Esempio n. 3
0
 def save_final_config(self, configuration):
     """called at the end of tuning"""
     dimset = libxsmm_utilities.load_mnklist(self.args.mnk, 0, -1)
     matrices = (  # collects requested matrix shapes into string
         "-".join(map(str, map(lambda mnk: "x".join(map(str, mnk)),
                               dimset))))
     filename = "xgemm-" + matrices + time.strftime(
         "-%Y%m%d-%H%M%S") + ".json"
     print("Optimal block size written to " + filename + ": ",
           configuration.data)
     self.manipulator().save_to_file(configuration.data, filename)
Esempio n. 4
0
 def save_final_config(self, configuration):
     """called at the end of tuning"""
     dimset = libxsmm_utilities.load_mnklist(self.args.mnk, 0, -1)
     matrices = (  # collects requested matrix shapes into string
         "-".join(map(str, map(lambda mnk: "x".join(
                  map(str, mnk)), dimset))))
     filename = "xgemm-" + matrices + time.strftime(
                "-%Y%m%d-%H%M%S") + ".json"
     print("Optimal block size written to " + filename +
           ": ", configuration.data)
     self.manipulator().save_to_file(configuration.data, filename)
Esempio n. 5
0
 def manipulator(self):
     """
     Define the search space by creating a
     ConfigurationManipulator
     """
     self.dimset = libxsmm_utilities.load_mnklist(self.args.mnk, 0, -1)
     self.granularity = 1
     assert (0 < self.granularity)
     m_max = (64 + self.granularity - 1) / self.granularity
     n_max = (256 + self.granularity - 1) / self.granularity
     k_max = (256 + self.granularity - 1) / self.granularity
     m_param = IntegerParameter("M", self.granularity, m_max)
     n_param = IntegerParameter("N", self.granularity, n_max)
     k_param = IntegerParameter("K", self.granularity, k_max)
     manipulator = ConfigurationManipulator()
     manipulator.add_parameter(m_param)
     manipulator.add_parameter(n_param)
     manipulator.add_parameter(k_param)
     return manipulator
Esempio n. 6
0
    if (1 < argc):
        # required argument(s)
        filename = sys.argv[1]

        # default configuration if no arguments are given
        precision = 0  # all
        prefetch = -1  # auto
        mnklist = list()

        # optional argument(s)
        if (2 < argc):
            precision = int(sys.argv[2])
        if (3 < argc):
            prefetch = int(sys.argv[3])
        if (4 < argc):
            mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[4:], 0))

        template = Template(open(filename, "r").read())
        if (fnmatch.fnmatch(filename, "*.h*")):
            optional = [", ...", ""][0 <= prefetch]
            substitute = {"MNK_INTERFACE_LIST": ""}
            for mnk in mnklist:
                mnkstr = "_".join(map(str, mnk))
                if (2 != precision):
                    pfsig = [optional + ");", ",\n  "
                             "const float* pa, "
                             "const float* pb, "
                             "const float* pc);"][0 < prefetch]
                    substitute["MNK_INTERFACE_LIST"] += (
                        "\nLIBXSMM_API void libxsmm_smm_" + mnkstr +
                        "(const float* a, const float* b, float* c" +
Esempio n. 7
0
        else: alignment = 64
        if (6 < argc): prefetch = int(sys.argv[6])
        else: prefetch = 0
        if (7 < argc): threshold = int(sys.argv[7])
        else: threshold = 0
        if (8 < argc): sync = int(sys.argv[8])
        else: sync = 0
        if (9 < argc): jit = int(sys.argv[9])
        else: jit = 0
        if (10 < argc): flags = int(sys.argv[10])
        else: flags = 0
        if (11 < argc): alpha = int(sys.argv[11])
        else: alpha = 1
        if (12 < argc): beta = int(sys.argv[12])
        else: beta = 1
        if (13 < argc): mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[13:], 0))
        else: mnklist = list()

        template = Template(open(filename, "r").read())
        maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold)
        maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5)
        avgdim = int(0.5 * maxdim + 0.5)

        avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)), avgdim, False)
        avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)), avgdim, False)
        avgk = libxsmm_utilities.median(list(map(lambda mnk: mnk[2], mnklist)), avgdim, False)

        maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0)
        maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1)
        maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2)
Esempio n. 8
0
        if 8 < argc:
            sync = int(sys.argv[8])
        if 9 < argc:
            jit = int(sys.argv[9])
        if 10 < argc:
            flags = int(sys.argv[10])
        if 11 < argc:
            alpha = int(sys.argv[11])
        if 12 < argc:
            beta = int(sys.argv[12])
        if 13 < argc:
            wrap = int(sys.argv[13])
        if 14 < argc:
            malloc = int(sys.argv[14])
        if 15 < argc:
            mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[15:], 0))

        version, branch, realversion = libxsmm_utilities.version_branch()
        major, minor, update, patch = libxsmm_utilities.version_numbers(
            version)

        if 0 == threshold:
            threshold = 64 * 64 * 64
        maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold)
        maxdim = int(maxmnk**(1.0 / 3.0) + 0.5)
        avgdim = int(0.5 * maxdim + 0.5)

        avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)),
                                        avgdim, False)
        avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)),
                                        avgdim, False)
Esempio n. 9
0
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF      #
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING        #
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS          #
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                #
###############################################################################
# Hans Pabst (Intel Corp.)
###############################################################################
import libxsmm_utilities
import sys

if __name__ == "__main__":
    argc = len(sys.argv)
    if (3 < argc):
        precision = int(sys.argv[1])
        threshold = int(sys.argv[2])
        mnklist = libxsmm_utilities.load_mnklist(sys.argv[3:], 0)

        print("libxsmm_gemm_descriptor desc;")
        print("libxsmm_xmmfunction func;")
        print("unsigned int hash, indx;")
        print("#if defined(_MSC_VER)")
        print("# pragma warning(push)")
        print("# pragma warning(disable: 4127)")
        print("#endif")
        for mnk in mnklist:
            mstr, nstr, kstr, mnkstr = \
                str(mnk[0]), str(mnk[1]), str(mnk[2]), "_".join(map(str, mnk))
            mnksig = mstr + ", " + nstr + ", " + kstr
            ldxsig = mstr + ", " + kstr + ", " + mstr
            # prefer registering double-precision kernels
            # when approaching an exhausted registry
Esempio n. 10
0
## NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        ##
## SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              ##
###############################################################################
## Hans Pabst (Intel Corp.)
###############################################################################
import libxsmm_utilities
import sys
import os


if __name__ == "__main__":
    argc = len(sys.argv)
    if (3 < argc):
        precision = int(sys.argv[1])
        threshold = int(sys.argv[2])
        mnklist = libxsmm_utilities.load_mnklist(sys.argv[3:], threshold)

        print("libxsmm_gemm_descriptor desc;")
        print("libxsmm_xmmfunction func;")
        print("unsigned int hash, indx;")
        for mnk in mnklist:
            mstr, nstr, kstr, mnkstr = str(mnk[0]), str(mnk[1]), str(mnk[2]), "_".join(map(str, mnk))
            mnksig = "LIBXSMM_LD(" + mstr + ", " + nstr + "), LIBXSMM_LD(" + nstr + ", " + mstr + "), " + kstr
            ldxsig = "LIBXSMM_LD(" + mstr + ", " + nstr + "), " + kstr + ", LIBXSMM_LD(" + mstr + ", " + nstr + ")"
            # prefer registering double-precision kernels when approaching an exhausted registry
            if (1 != precision): # only double-precision
                print("LIBXSMM_GEMM_DESCRIPTOR(desc, LIBXSMM_ALIGNMENT, LIBXSMM_FLAGS,")
                print("  " + mnksig + ", " + ldxsig + ",")
                print("  LIBXSMM_ALPHA, LIBXSMM_BETA, INTERNAL_PREFETCH);")
                print("LIBXSMM_HASH_FUNCTION_CALL(hash, indx, desc);")
                print("func.dmm = (libxsmm_dmmfunction)libxsmm_dmm_" + mnkstr + ";")
Esempio n. 11
0
 arg1_filename = [sys.argv[1], ""]["0" == sys.argv[1]]
 arg1_isfile = os.path.isfile(arg1_filename)
 base = 1
 if (arg1_isfile):
     print("#if !defined(_WIN32)")
     print("{ static const char *const build_state =")
     print("#   include \"" + sys.argv[1] + "\"")
     print("  ;")
     print("  internal_build_state = build_state;")
     print("}")
     print("#endif")
     base = 2
 if ((base + 2) < argc):
     precision = int(sys.argv[base+0])
     threshold = int(sys.argv[base+1])
     mnklist = libxsmm_utilities.load_mnklist(sys.argv[base+2:], 0)
     print("/* omit registering code if JIT is enabled"
           " and if an ISA extension is found")
     print(" * which is beyond the static code"
           " path used to compile the library")
     print(" */")
     print("#if (0 != LIBXSMM_JIT) && !defined(__MIC__)")
     print("/* check if target arch. permits execution"
           " (arch. may be overridden) */")
     print("if (LIBXSMM_STATIC_TARGET_ARCH"
           " <= libxsmm_target_archid &&")
     print("   (LIBXSMM_X86_SSE3 > libxsmm_target_archid "
           "/* JIT code gen. is not available */")
     print("    /* condition allows to avoid JIT "
           "(if static code is good enough) */")
     print("    || LIBXSMM_STATIC_TARGET_ARCH"
Esempio n. 12
0
    if (1 < argc):
        # required argument(s)
        filename = sys.argv[1]

        # optional argument(s)
        precision = int(sys.argv[2]) if (2 < argc) else 0
        ilp64 = int(sys.argv[3]) if (3 < argc) else 0
        alignment = libxsmm_utilities.sanitize_alignment(int(sys.argv[4])) if (4 < argc) else 64
        row_major = int(sys.argv[5]) if (5 < argc) else 0
        prefetch = int(sys.argv[6]) if (6 < argc) else 0
        threshold = int(sys.argv[7]) if (7 < argc) else 0
        jit = int(sys.argv[8]) if (8 < argc) else 0
        flags = int(sys.argv[9]) if (9 < argc) else 0
        alpha = int(sys.argv[10]) if (10 < argc) else 1
        beta = int(sys.argv[11]) if (11 < argc) else 1
        mnklist = libxsmm_utilities.load_mnklist(sys.argv[12:], threshold) if (12 < argc) else list()

        template = Template(open(filename, "r").read())
        maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold)
        maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5)
        avgdim = int(0.5 * maxdim + 0.5)

        avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)), avgdim, False)
        avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)), avgdim, False)
        avgk = libxsmm_utilities.median(list(map(lambda mnk: mnk[2], mnklist)), avgdim, False)

        maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0)
        maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1)
        maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2)

        version, branch = libxsmm_utilities.version_branch()
Esempio n. 13
0
if __name__ == "__main__":
    argc = len(sys.argv)
    if (1 < argc):
        # required argument(s)
        filename = sys.argv[1]

        # optional argument(s)
        row_major = int(sys.argv[2]) if (2 < argc) else 0
        alignment = libxsmm_utilities.sanitize_alignment(int(sys.argv[3])) if (3 < argc) else 64
        aligned_stores = libxsmm_utilities.sanitize_alignment(int(sys.argv[4])) if (4 < argc) else 1
        aligned_loads = libxsmm_utilities.sanitize_alignment(int(sys.argv[5])) if (5 < argc) else 1
        prefetch = int(sys.argv[6]) if (6 < argc) else 0
        jit = int(sys.argv[7]) if (7 < argc) else 0
        threshold = int(sys.argv[8]) if (8 < argc) else 0
        beta = int(sys.argv[9]) if (9 < argc) else 1
        mnklist = libxsmm_utilities.load_mnklist(sys.argv[10:], 0, threshold) if (10 < argc) else list()

        template = Template(open(filename, "r").read())
        maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold)
        maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5)
        avgdim = int(0.5 * maxdim + 0.5)

        avgm = libxsmm_utilities.median(map(lambda mnk: mnk[0], mnklist), avgdim, False)
        avgn = libxsmm_utilities.median(map(lambda mnk: mnk[1], mnklist), avgdim, False)
        avgk = libxsmm_utilities.median(map(lambda mnk: mnk[2], mnklist), avgdim, False)

        maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0)
        maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1)
        maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2)

        substitute = { \
Esempio n. 14
0
## NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        ##
## SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              ##
###############################################################################
## Hans Pabst (Intel Corp.)
###############################################################################
import libxsmm_utilities
import sys
import os


if __name__ == "__main__":
    argc = len(sys.argv)
    if (3 < argc):
        precision = int(sys.argv[1])
        threshold = int(sys.argv[2])
        mnklist = libxsmm_utilities.load_mnklist(sys.argv[3:], threshold)

        print("libxsmm_gemm_descriptor desc;")
        print("unsigned int indx;")
        for mnk in mnklist:
            mstr, nstr, kstr, mnkstr = str(mnk[0]), str(mnk[1]), str(mnk[2]), "_".join(map(str, mnk))
            mnksig = "LIBXSMM_LD(" + mstr + ", " + nstr + "), LIBXSMM_LD(" + nstr + ", " + mstr + "), " + kstr
            ldxsig = "LIBXSMM_LD(" + mstr + ", " + nstr + "), " + kstr + ", LIBXSMM_LD(" + mstr + ", " + nstr + ")"
            if (2 != precision): # only double-precision
                print("LIBXSMM_GEMM_DESCRIPTOR(desc, LIBXSMM_ALIGNMENT, LIBXSMM_FLAGS | LIBXSMM_GEMM_FLAG_F32PREC,")
                print("  " + mnksig + ", " + ldxsig + ",")
                print("  LIBXSMM_ALPHA, LIBXSMM_BETA, LIBXSMM_PREFETCH);")
                print("indx = libxsmm_crc32(&desc, LIBXSMM_GEMM_DESCRIPTOR_SIZE, LIBXSMM_HASH_SEED) % (LIBXSMM_CACHESIZE);")
                print("if (0 == result[indx].code.xmm) { /* no further effort to handle collision */")
                print("  result[indx].code.smm = (libxsmm_smmfunction)libxsmm_smm_" + mnkstr + ";")
                print("  result[indx].code_size = 0; /* statically generated code */")
Esempio n. 15
0
        else: alignment = 64
        if (6 < argc): prefetch = int(sys.argv[6])
        else: prefetch = 0
        if (7 < argc): threshold = int(sys.argv[7])
        else: threshold = 0
        if (8 < argc): sync = int(sys.argv[8])
        else: sync = 0
        if (9 < argc): jit = int(sys.argv[9])
        else: jit = 0
        if (10 < argc): flags = int(sys.argv[10])
        else: flags = 0
        if (11 < argc): alpha = int(sys.argv[11])
        else: alpha = 1
        if (12 < argc): beta = int(sys.argv[12])
        else: beta = 1
        if (13 < argc): mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[13:], threshold))
        else: mnklist = list()

        template = Template(open(filename, "r").read())
        maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold)
        maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5)
        avgdim = int(0.5 * maxdim + 0.5)

        avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)), avgdim, False)
        avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)), avgdim, False)
        avgk = libxsmm_utilities.median(list(map(lambda mnk: mnk[2], mnklist)), avgdim, False)

        maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0)
        maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1)
        maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2)
Esempio n. 16
0
        else: alignment = 64
        if (5 < argc): prefetch = int(sys.argv[5])
        else: prefetch = 0
        if (6 < argc): threshold = int(sys.argv[6])
        else: threshold = 0
        if (7 < argc): sync = int(sys.argv[7])
        else: sync = 0
        if (8 < argc): jit = int(sys.argv[8])
        else: jit = 0
        if (9 < argc): flags = int(sys.argv[9])
        else: flags = 0
        if (10 < argc): alpha = int(sys.argv[10])
        else: alpha = 1
        if (11 < argc): beta = int(sys.argv[11])
        else: beta = 1
        if (12 < argc): mnklist = sorted(libxsmm_utilities.load_mnklist(sys.argv[12:], 0))
        else: mnklist = list()

        template = Template(open(filename, "r").read())
        maxmnk = libxsmm_utilities.max_mnk(mnklist, threshold)
        maxdim = int(maxmnk ** (1.0 / 3.0) + 0.5)
        avgdim = int(0.5 * maxdim + 0.5)

        avgm = libxsmm_utilities.median(list(map(lambda mnk: mnk[0], mnklist)), avgdim, False)
        avgn = libxsmm_utilities.median(list(map(lambda mnk: mnk[1], mnklist)), avgdim, False)
        avgk = libxsmm_utilities.median(list(map(lambda mnk: mnk[2], mnklist)), avgdim, False)

        maxm = libxsmm_utilities.max_mnk(mnklist, avgdim, 0)
        maxn = libxsmm_utilities.max_mnk(mnklist, avgdim, 1)
        maxk = libxsmm_utilities.max_mnk(mnklist, avgdim, 2)