Esempio n. 1
0
    def create_kernel_instance(self, kernel_source, kernel_options, params,
                               verbose):
        """create kernel instance from kernel source, parameters, problem size, grid divisors, and so on"""
        instance_string = util.get_instance_string(params)
        grid_div = (kernel_options.grid_div_x, kernel_options.grid_div_y,
                    kernel_options.grid_div_z)

        #insert default block_size_names if needed
        if not kernel_options.block_size_names:
            kernel_options.block_size_names = util.default_block_size_names

        #setup thread block and grid dimensions
        threads, grid = util.setup_block_and_grid(
            kernel_options.problem_size, grid_div, params,
            kernel_options.block_size_names)
        if numpy.prod(threads) > self.dev.max_threads:
            if verbose:
                print("skipping config", instance_string,
                      "reason: too many threads per block")
            return None

        #obtain the kernel_string and prepare additional files, if any
        name, kernel_string, temp_files = kernel_source.prepare_list_of_files(
            kernel_options.kernel_name, params, grid, threads,
            kernel_options.block_size_names)

        #collect everything we know about this instance and return it
        return KernelInstance(name, kernel_source, kernel_string, temp_files,
                              threads, grid, params, kernel_options.arguments)
Esempio n. 2
0
    def create_kernel_instance(self, kernel_options, params, verbose):
        """create kernel instance from kernel source, parameters, problem size, grid divisors, and so on"""
        instance_string = util.get_instance_string(params)
        grid_div = (kernel_options.grid_div_x, kernel_options.grid_div_y, kernel_options.grid_div_z)

        #insert default block_size_names if needed
        if not kernel_options.block_size_names:
            kernel_options.block_size_names = util.default_block_size_names

        #setup thread block and grid dimensions
        threads, grid = util.setup_block_and_grid(kernel_options.problem_size, grid_div, params, kernel_options.block_size_names)
        if numpy.prod(threads) > self.dev.max_threads:
            if verbose:
                print("skipping config", instance_string, "reason: too many threads per block")
            return None

        #obtain the kernel_string and prepare additional files, if any
        temp_files = dict()
        kernel_source = kernel_options.kernel_string
        if not isinstance(kernel_source, list):
            kernel_source = [kernel_source]
        name, kernel_string, temp_files = util.prepare_list_of_files(kernel_options.kernel_name, kernel_source, params, grid, threads, kernel_options.block_size_names)

        #collect everything we know about this instance and return it
        return KernelInstance(name, kernel_string, temp_files, threads, grid, params, kernel_options.arguments)
Esempio n. 3
0
def _select_best_common_config(results, objective, objective_higher_is_better):
    """ return the most common config among results obtained on different problem sizes """
    results_table = {}
    total_performance = {}

    inverse_table = {}

    #for each configuration in the list
    for config in results:
        params = config["tunable_parameters"]

        config_str = util.get_instance_string(params)
        #count occurances
        results_table[config_str] = results_table.get(config_str,0) + 1
        #add to performance
        total_performance[config_str] = total_performance.get(config_str,0) + config[objective]
        #store mapping from config_str to the parameters
        inverse_table[config_str] = params

    #look for best config
    top_freq = max(results_table.values())
    best_configs = [k for k in results_table if results_table[k] == top_freq]

    #intersect total_performance with the best_configs
    total_performance = {k:total_performance[k] for k in total_performance if k in best_configs}

    #get the best config from this intersection
    if objective_higher_is_better:
        best_config_str = max(total_performance.keys(), key=lambda x: total_performance[x])
    else:
        best_config_str = min(total_performance.keys(), key=lambda x: total_performance[x])

    #lookup the tunable parameters of this configuration in the inverse table and return result
    return inverse_table[best_config_str]
Esempio n. 4
0
    def compile_and_benchmark(self, kernel_source, gpu_args, params,
                              kernel_options, tuning_options):
        """ Compile and benchmark a kernel instance based on kernel strings and parameters """

        instance_string = util.get_instance_string(params)

        logging.debug('compile_and_benchmark ' + instance_string)
        mem_usage = round(
            resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0, 1)
        logging.debug('Memory usage : %2.2f MB', mem_usage)

        verbose = tuning_options.verbose

        instance = self.create_kernel_instance(kernel_source, kernel_options,
                                               params, verbose)
        if instance is None:
            return None

        try:
            #compile the kernel
            func = self.compile_kernel(instance, verbose)
            if func is None:
                return None

            #add shared memory arguments to compiled module
            if kernel_options.smem_args is not None:
                self.dev.copy_shared_memory_args(
                    util.get_smem_args(kernel_options.smem_args, params))
            #add constant memory arguments to compiled module
            if kernel_options.cmem_args is not None:
                self.dev.copy_constant_memory_args(kernel_options.cmem_args)
            #add texture memory arguments to compiled module
            if kernel_options.texmem_args is not None:
                self.dev.copy_texture_memory_args(kernel_options.texmem_args)

            #test kernel for correctness and benchmark
            if tuning_options.answer is not None or tuning_options.verify is not None:
                self.check_kernel_output(func, gpu_args, instance,
                                         tuning_options.answer,
                                         tuning_options.atol,
                                         tuning_options.verify, verbose)

            #benchmark
            result = self.benchmark(func, gpu_args, instance, verbose)

        except Exception as e:
            #dump kernel_string to temp file
            temp_filenames = instance.prepare_temp_files_for_error_msg()
            print("Error while compiling or benchmarking, see source files: " +
                  " ".join(temp_filenames))
            raise e

        #clean up any temporary files, if no error occured
        instance.delete_temp_files()

        return result
Esempio n. 5
0
    def compile_and_benchmark(self, gpu_args, params, kernel_options,
                              tuning_options):
        """ Compile and benchmark a kernel instance based on kernel strings and parameters """

        instance_string = util.get_instance_string(params)

        logging.debug('compile_and_benchmark ' + instance_string)
        mem_usage = round(
            resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0, 1)
        logging.debug('Memory usage : %2.2f MB', mem_usage)

        verbose = tuning_options.verbose

        instance = self.create_kernel_instance(kernel_options, params, verbose)
        if instance is None:
            return None

        try:
            #compile the kernel
            func = self.compile_kernel(instance, verbose)
            if func is None:
                return None

            #add constant memory arguments to compiled module
            if kernel_options.cmem_args is not None:
                self.dev.copy_constant_memory_args(kernel_options.cmem_args)

            #test kernel for correctness and benchmark
            if tuning_options.answer is not None:
                self.check_kernel_correctness(func, gpu_args, instance,
                                              tuning_options.answer,
                                              tuning_options.atol,
                                              tuning_options.verify, verbose)

            #benchmark
            time = self.benchmark(func, gpu_args, instance, verbose)

        except Exception as e:
            #dump kernel_string to temp file
            temp_filename = util.get_temp_filename(suffix=".c")
            util.write_file(temp_filename, instance.kernel_string)
            print("Error while compiling or benchmarking, see source files: " +
                  temp_filename + " ".join(instance.temp_files.values()))
            raise e

        #clean up any temporary files, if no error occured
        for v in instance.temp_files.values():
            util.delete_temp_file(v)

        return time
Esempio n. 6
0
    def compile_and_benchmark(self, gpu_args, params, kernel_options, tuning_options):
        """ Compile and benchmark a kernel instance based on kernel strings and parameters """

        instance_string = util.get_instance_string(params)

        logging.debug('compile_and_benchmark ' + instance_string)
        mem_usage = round(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024.0, 1)
        logging.debug('Memory usage : %2.2f MB', mem_usage)

        verbose = tuning_options.verbose

        instance = self.create_kernel_instance(kernel_options, params, verbose)
        if instance is None:
            return None

        try:
            #compile the kernel
            func = self.compile_kernel(instance, verbose)
            if func is None:
                return None

            #add constant memory arguments to compiled module
            if kernel_options.cmem_args is not None:
                self.dev.copy_constant_memory_args(kernel_options.cmem_args)
            #add texture memory arguments to compiled module
            if kernel_options.texmem_args is not None:
                self.dev.copy_texture_memory_args(kernel_options.texmem_args)

            #test kernel for correctness and benchmark
            if tuning_options.answer is not None:
                self.check_kernel_output(func, gpu_args, instance, tuning_options.answer, tuning_options.atol, tuning_options.verify, verbose)

            #benchmark
            time = self.benchmark(func, gpu_args, instance, tuning_options.times, verbose)

        except Exception as e:
            #dump kernel_string to temp file
            temp_filename = util.get_temp_filename(suffix=".c")
            util.write_file(temp_filename, instance.kernel_string)
            print("Error while compiling or benchmarking, see source files: " + temp_filename + " ".join(instance.temp_files.values()))
            raise e

        #clean up any temporary files, if no error occured
        for v in instance.temp_files.values():
            util.delete_temp_file(v)

        return time
Esempio n. 7
0
 def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options, tuning_options):
     """ Compile and benchmark a kernel instance based on kernel strings and parameters """
     instance_string = get_instance_string(params)
     logging.debug('compile_and_benchmark ' + instance_string)
     raise self.device_access_error