コード例 #1
0
 def prepare_temp_files_for_error_msg(self):
     """Prepare temp file with source code, and return list of temp file names"""
     temp_filename = util.get_temp_filename(suffix=self.kernel_source.get_suffix())
     util.write_file(temp_filename, self.kernel_string)
     ret = [temp_filename]
     ret.extend(self.temp_files.values())
     return ret
コード例 #2
0
    def prepare_list_of_files(self, kernel_name, params, grid, threads, block_size_names):
        """ prepare the kernel string along with any additional files

        The first file in the list is allowed to include or read in the others
        The files beyond the first are considered additional files that may also contain tunable parameters

        For each file beyond the first this function creates a temporary file with
        preprocessors statements inserted. Occurences of the original filenames in the
        first file are replaced with their temporary counterparts.

        :param kernel_name: A string specifying the kernel name.
        :type kernel_name: string

        :param params: A dictionary with the tunable parameters for this particular
            instance.
        :type params: dict()

        :param grid: The grid dimensions for this instance. The grid dimensions are
            also inserted into the code as if they are tunable parameters for
            convenience.
        :type grid: tuple()

        :param threads: The thread block dimensions for this instance. The thread block are
            also inserted into the code as if they are tunable parameters for
            convenience.
        :type threads: tuple()

        :param block_size_names: A list of strings that denote the names
            for the thread block dimensions.
        :type block_size_names: list(string)

        """
        temp_files = dict()

        for i, f in enumerate(self.kernel_sources):
            if i > 0 and not util.looks_like_a_filename(f):
                raise ValueError('When passing multiple kernel sources, the secondary entries must be filenames')

            ks = self.get_kernel_string(i, params)
            # add preprocessor statements
            n, ks = util.prepare_kernel_string(kernel_name, ks, params, grid, threads, block_size_names, self.lang)

            if i == 0:
                # primary kernel source
                name = n
                kernel_string = ks
                continue

            # save secondary kernel sources to temporary files

            # generate temp filename with the same extension
            temp_file = util.get_temp_filename(suffix="." + f.split(".")[-1])
            temp_files[f] = temp_file
            util.write_file(temp_file, ks)
            # replace occurences of the additional file's name in the first kernel_string with the name of the temp file
            kernel_string = kernel_string.replace(f, temp_file)

        return name, kernel_string, temp_files
コード例 #3
0
ファイル: core.py プロジェクト: TaihuLight/kernel_tuner
    def compile_and_benchmark(self, gpu_args, params, kernel_options,
                              tuning_options):
        """ Compile and benchmark a kernel instance based on kernel strings and parameters """

        instance_string = util.get_instance_string(params)

        logging.debug('compile_and_benchmark ' + instance_string)
        mem_usage = round(
            resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0, 1)
        logging.debug('Memory usage : %2.2f MB', mem_usage)

        verbose = tuning_options.verbose

        instance = self.create_kernel_instance(kernel_options, params, verbose)
        if instance is None:
            return None

        try:
            #compile the kernel
            func = self.compile_kernel(instance, verbose)
            if func is None:
                return None

            #add constant memory arguments to compiled module
            if kernel_options.cmem_args is not None:
                self.dev.copy_constant_memory_args(kernel_options.cmem_args)

            #test kernel for correctness and benchmark
            if tuning_options.answer is not None:
                self.check_kernel_correctness(func, gpu_args, instance,
                                              tuning_options.answer,
                                              tuning_options.atol,
                                              tuning_options.verify, verbose)

            #benchmark
            time = self.benchmark(func, gpu_args, instance, verbose)

        except Exception as e:
            #dump kernel_string to temp file
            temp_filename = util.get_temp_filename(suffix=".c")
            util.write_file(temp_filename, instance.kernel_string)
            print("Error while compiling or benchmarking, see source files: " +
                  temp_filename + " ".join(instance.temp_files.values()))
            raise e

        #clean up any temporary files, if no error occured
        for v in instance.temp_files.values():
            util.delete_temp_file(v)

        return time
コード例 #4
0
ファイル: core.py プロジェクト: benvanwerkhoven/kernel_tuner
    def compile_and_benchmark(self, gpu_args, params, kernel_options, tuning_options):
        """ Compile and benchmark a kernel instance based on kernel strings and parameters """

        instance_string = util.get_instance_string(params)

        logging.debug('compile_and_benchmark ' + instance_string)
        mem_usage = round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024.0, 1)
        logging.debug('Memory usage : %2.2f MB', mem_usage)

        verbose = tuning_options.verbose

        instance = self.create_kernel_instance(kernel_options, params, verbose)
        if instance is None:
            return None

        try:
            #compile the kernel
            func = self.compile_kernel(instance, verbose)
            if func is None:
                return None

            #add constant memory arguments to compiled module
            if kernel_options.cmem_args is not None:
                self.dev.copy_constant_memory_args(kernel_options.cmem_args)
            #add texture memory arguments to compiled module
            if kernel_options.texmem_args is not None:
                self.dev.copy_texture_memory_args(kernel_options.texmem_args)

            #test kernel for correctness and benchmark
            if tuning_options.answer is not None:
                self.check_kernel_output(func, gpu_args, instance, tuning_options.answer, tuning_options.atol, tuning_options.verify, verbose)

            #benchmark
            time = self.benchmark(func, gpu_args, instance, tuning_options.times, verbose)

        except Exception as e:
            #dump kernel_string to temp file
            temp_filename = util.get_temp_filename(suffix=".c")
            util.write_file(temp_filename, instance.kernel_string)
            print("Error while compiling or benchmarking, see source files: " + temp_filename + " ".join(instance.temp_files.values()))
            raise e

        #clean up any temporary files, if no error occured
        for v in instance.temp_files.values():
            util.delete_temp_file(v)

        return time
コード例 #5
0
ファイル: c.py プロジェクト: bayesianopt/kernel_tuner
    def compile(self, kernel_name, kernel_string):
        """call the C compiler to compile the kernel, return the function

        :param kernel_name: The name of the kernel to be compiled, used to lookup the
            function after compilation.
        :type kernel_name: string

        :param kernel_string: The C code that contains the function `kernel_name`
        :type kernel_string: string

        :returns: An ctypes function that can be called directly.
        :rtype: ctypes._FuncPtr
        """
        logging.debug('compiling ' + kernel_name)

        if self.lib != None:
            self.cleanup_lib()

        compiler_options = ["-fPIC"]

        #detect openmp
        if "#include <omp.h>" in kernel_string or "use omp_lib" in kernel_string:
            logging.debug('set using_openmp to true')
            self.using_openmp = True
            if self.compiler == "pgfortran":
                compiler_options.append("-mp")
            else:
                compiler_options.append("-fopenmp")

        #detect whether to use nvcc as default instead of g++, may overrule an explicitly passed g++
        if ("#include <cuda" in kernel_string) or ("__global__"
                                                   in kernel_string):
            if self.compiler == "g++" and self.nvcc_available:
                self.compiler = "nvcc"

        #select right suffix based on compiler
        suffix = ".cc"
        if self.compiler in ["gfortran", "pgfortran", "ftn", "ifort"]:
            suffix = ".F90"
        if self.compiler == "nvcc":
            suffix = suffix[:-1] + "u"
            compiler_options = ["-Xcompiler=" + c for c in compiler_options]

        if ".c" in suffix:
            if not "extern \"C\"" in kernel_string:
                kernel_string = "extern \"C\" {\n" + kernel_string + "\n}"

        #copy user specified compiler options to current list
        if self.compiler_options:
            compiler_options += self.compiler_options

        lib_args = []
        if "CL/cl.h" in kernel_string:
            lib_args = ["-lOpenCL"]

        logging.debug('using compiler ' + self.compiler)
        logging.debug('compiler_options ' + " ".join(compiler_options))
        logging.debug('lib_args ' + " ".join(lib_args))

        source_file = get_temp_filename(suffix=suffix)
        filename = ".".join(source_file.split(".")[:-1])

        #detect Fortran modules
        match = re.search(r"\s*module\s+([a-zA-Z_]*)", kernel_string)
        if match:
            if self.compiler == "gfortran":
                kernel_name = "__" + match.group(1) + "_MOD_" + kernel_name
            elif self.compiler in ["ftn", "ifort"]:
                kernel_name = match.group(1) + "_mp_" + kernel_name + "_"
            elif self.compiler == "pgfortran":
                kernel_name = match.group(1) + "_" + kernel_name + "_"

        try:
            write_file(source_file, kernel_string)

            lib_extension = ".so"
            if platform.system() == "Darwin":
                lib_extension = ".dylib"

            subprocess.check_call([self.compiler, "-c", source_file] +
                                  compiler_options + ["-o", filename + ".o"])
            subprocess.check_call([self.compiler, filename + ".o"] +
                                  compiler_options +
                                  ["-shared", "-o", filename + lib_extension] +
                                  lib_args)

            self.lib = numpy.ctypeslib.load_library(filename, '.')
            func = getattr(self.lib, kernel_name)
            func.restype = C.c_float

        finally:
            delete_temp_file(source_file)
            delete_temp_file(filename + ".o")
            delete_temp_file(filename + ".so")
            delete_temp_file(filename + ".dylib")

        return func
コード例 #6
0
ファイル: c.py プロジェクト: wjp/kernel_tuner
    def compile(self, kernel_instance):
        """call the C compiler to compile the kernel, return the function

        :param kernel_instance: An object representing the specific instance of the tunable kernel
            in the parameter space.
        :type kernel_instance: kernel_tuner.core.KernelInstance

        :returns: An ctypes function that can be called directly.
        :rtype: ctypes._FuncPtr
        """
        logging.debug('compiling ' + kernel_instance.name)

        kernel_string = kernel_instance.kernel_string
        kernel_name = kernel_instance.name

        if self.lib != None:
            self.cleanup_lib()

        compiler_options = ["-fPIC"]

        #detect openmp
        if "#include <omp.h>" in kernel_string or "use omp_lib" in kernel_string:
            logging.debug('set using_openmp to true')
            self.using_openmp = True
            if self.compiler == "pgfortran":
                compiler_options.append("-mp")
            else:
                compiler_options.append("-fopenmp")

        #if filename is known, use that one
        suffix = kernel_instance.kernel_source.get_user_suffix()

        #if code contains device code, suffix .cu is required
        device_code_signals = ["__global", "__syncthreads()", "threadIdx"]
        if any([snippet in kernel_string for snippet in device_code_signals]):
            suffix = ".cu"

        #detect whether to use nvcc as default instead of g++, may overrule an explicitly passed g++
        if (suffix == ".cu") or ("#include <cuda"
                                 in kernel_string) or ("cudaMemcpy"
                                                       in kernel_string):
            if self.compiler == "g++" and self.nvcc_available:
                self.compiler = "nvcc"

        if suffix is None:
            #select right suffix based on compiler
            suffix = ".cc"

            if self.compiler in ["gfortran", "pgfortran", "ftn", "ifort"]:
                suffix = ".F90"

        if self.compiler == "nvcc":
            compiler_options = ["-Xcompiler=" + c for c in compiler_options]

        #this basically checks if we aren't compiling Fortran
        #at the moment any C, C++, or CUDA code is assumed to use extern "C" linkage
        if ".c" in suffix:
            if not "extern \"C\"" in kernel_string:
                kernel_string = "extern \"C\" {\n" + kernel_string + "\n}"

        #copy user specified compiler options to current list
        if self.compiler_options:
            compiler_options += self.compiler_options

        lib_args = []
        if "CL/cl.h" in kernel_string:
            lib_args = ["-lOpenCL"]

        logging.debug('using compiler ' + self.compiler)
        logging.debug('compiler_options ' + " ".join(compiler_options))
        logging.debug('lib_args ' + " ".join(lib_args))

        source_file = get_temp_filename(suffix=suffix)
        filename = ".".join(source_file.split(".")[:-1])

        #detect Fortran modules
        match = re.search(r"\s*module\s+([a-zA-Z_]*)", kernel_string)
        if match:
            if self.compiler == "gfortran":
                kernel_name = "__" + match.group(1) + "_MOD_" + kernel_name
            elif self.compiler in ["ftn", "ifort"]:
                kernel_name = match.group(1) + "_mp_" + kernel_name + "_"
            elif self.compiler == "pgfortran":
                kernel_name = match.group(1) + "_" + kernel_name + "_"
        else:
            #for functions outside of modules
            if self.compiler in ["gfortran", "ftn", "ifort", "pgfortran"]:
                kernel_name = kernel_name + "_"

        try:
            write_file(source_file, kernel_string)

            lib_extension = ".so"
            if platform.system() == "Darwin":
                lib_extension = ".dylib"

            subprocess.check_call([self.compiler, "-c", source_file] +
                                  compiler_options + ["-o", filename + ".o"])
            subprocess.check_call([self.compiler, filename + ".o"] +
                                  compiler_options +
                                  ["-shared", "-o", filename + lib_extension] +
                                  lib_args)

            self.lib = numpy.ctypeslib.load_library(filename, '.')
            func = getattr(self.lib, kernel_name)
            func.restype = C.c_float

        finally:
            delete_temp_file(source_file)
            delete_temp_file(filename + ".o")
            delete_temp_file(filename + ".so")
            delete_temp_file(filename + ".dylib")

        return func
コード例 #7
0
ファイル: c.py プロジェクト: TaihuLight/kernel_tuner
    def compile(self, kernel_name, kernel_string):
        """call the C compiler to compile the kernel, return the function

        :param kernel_name: The name of the kernel to be compiled, used to lookup the
            function after compilation.
        :type kernel_name: string

        :param kernel_string: The C code that contains the function `kernel_name`
        :type kernel_string: string

        :returns: An ctypes function that can be called directly.
        :rtype: ctypes._FuncPtr
        """
        logging.debug('compiling ' + kernel_name)

        if self.lib != None:
            self.cleanup_lib()

        suffix = ".cc"

        if not "extern \"C\"" in kernel_string:
            kernel_string = "extern \"C\" {\n" + kernel_string + "\n}"

        compiler_options = ["-fPIC"]
        if "#include <omp.h>" in kernel_string:
            logging.debug('set using_openmp to true')
            self.using_openmp = True
            compiler_options.append("-fopenmp")

        if ("#include <cuda" in kernel_string) or ("__global__" in kernel_string):
            if self.nvcc_available:
                self.compiler = "nvcc"

        if self.compiler == "nvcc":
            suffix = suffix[:-1] + "u"
            compiler_options = ["-Xcompiler=" + c for c in compiler_options]

        if self.compiler_options:
            compiler_options += self.compiler_options

        lib_args = []
        if "CL/cl.h" in kernel_string:
            lib_args = ["-lOpenCL"]

        logging.debug('using compiler ' + self.compiler)
        logging.debug('compiler_options ' + " ".join(compiler_options))
        logging.debug('lib_args ' + " ".join(lib_args))

        source_file = get_temp_filename(suffix=suffix)
        filename = ".".join(source_file.split(".")[:-1])

        try:
            write_file(source_file, kernel_string)

            subprocess.check_call([self.compiler, "-c", source_file] + compiler_options + ["-o", filename+".o"])
            subprocess.check_call([self.compiler, filename+".o"] + compiler_options + ["-shared", "-o", filename+".so"] + lib_args)

            self.lib = numpy.ctypeslib.load_library(filename, '.')

            func = getattr(self.lib, kernel_name)
            func.restype = C.c_float

        finally:
            delete_temp_file(source_file)
            delete_temp_file(filename+".o")
            delete_temp_file(filename+".so")


        return func
コード例 #8
0
ファイル: c.py プロジェクト: benvanwerkhoven/kernel_tuner
    def compile(self, kernel_name, kernel_string):
        """call the C compiler to compile the kernel, return the function

        :param kernel_name: The name of the kernel to be compiled, used to lookup the
            function after compilation.
        :type kernel_name: string

        :param kernel_string: The C code that contains the function `kernel_name`
        :type kernel_string: string

        :returns: An ctypes function that can be called directly.
        :rtype: ctypes._FuncPtr
        """
        logging.debug('compiling ' + kernel_name)

        if self.lib != None:
            self.cleanup_lib()

        compiler_options = ["-fPIC"]

        #detect openmp
        if "#include <omp.h>" in kernel_string or "use omp_lib" in kernel_string:
            logging.debug('set using_openmp to true')
            self.using_openmp = True
            if self.compiler == "pgfortran":
                compiler_options.append("-mp")
            else:
                compiler_options.append("-fopenmp")

        #select right suffix based on compiler
        suffix = ".cc"

        #detect whether to use nvcc as default instead of g++, may overrule an explicitly passed g++
        if ("#include <cuda" in kernel_string) or ("cudaMemcpy" in kernel_string):
            if self.compiler == "g++" and self.nvcc_available:
                self.compiler = "nvcc"

        #if contains device code suffix .cu is required by nvcc
        if self.compiler == "nvcc" and "__global__" in kernel_string:
            suffix = ".cu"
        if self.compiler in ["gfortran", "pgfortran", "ftn", "ifort"]:
            suffix = ".F90"

        if self.compiler == "nvcc":
            compiler_options = ["-Xcompiler=" + c for c in compiler_options]

        if ".c" in suffix:
            if not "extern \"C\"" in kernel_string:
                kernel_string = "extern \"C\" {\n" + kernel_string + "\n}"

        #copy user specified compiler options to current list
        if self.compiler_options:
            compiler_options += self.compiler_options

        lib_args = []
        if "CL/cl.h" in kernel_string:
            lib_args = ["-lOpenCL"]

        logging.debug('using compiler ' + self.compiler)
        logging.debug('compiler_options ' + " ".join(compiler_options))
        logging.debug('lib_args ' + " ".join(lib_args))

        source_file = get_temp_filename(suffix=suffix)
        filename = ".".join(source_file.split(".")[:-1])

        #detect Fortran modules
        match = re.search(r"\s*module\s+([a-zA-Z_]*)", kernel_string)
        if match:
            if self.compiler == "gfortran":
                kernel_name = "__" + match.group(1) + "_MOD_" + kernel_name
            elif self.compiler in ["ftn", "ifort"]:
                kernel_name = match.group(1) + "_mp_" + kernel_name + "_"
            elif self.compiler == "pgfortran":
                kernel_name = match.group(1) + "_" + kernel_name + "_"


        try:
            write_file(source_file, kernel_string)

            lib_extension = ".so"
            if platform.system() == "Darwin":
                lib_extension = ".dylib"

            subprocess.check_call([self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"])
            subprocess.check_call([self.compiler, filename + ".o"] + compiler_options + ["-shared", "-o", filename + lib_extension] + lib_args)


            self.lib = numpy.ctypeslib.load_library(filename, '.')
            func = getattr(self.lib, kernel_name)
            func.restype = C.c_float

        finally:
            delete_temp_file(source_file)
            delete_temp_file(filename+".o")
            delete_temp_file(filename+".so")
            delete_temp_file(filename+".dylib")

        return func