Ejemplo n.º 1
0
    def create_kernel_instance(self, kernel_options, params, verbose):
        """create kernel instance from kernel source, parameters, problem size, grid divisors, and so on"""
        instance_string = util.get_instance_string(params)
        grid_div = (kernel_options.grid_div_x, kernel_options.grid_div_y, kernel_options.grid_div_z)

        #insert default block_size_names if needed
        if not kernel_options.block_size_names:
            kernel_options.block_size_names = util.default_block_size_names

        #setup thread block and grid dimensions
        threads, grid = util.setup_block_and_grid(kernel_options.problem_size, grid_div, params, kernel_options.block_size_names)
        if numpy.prod(threads) > self.dev.max_threads:
            if verbose:
                print("skipping config", instance_string, "reason: too many threads per block")
            return None

        #obtain the kernel_string and prepare additional files, if any
        temp_files = dict()
        kernel_source = kernel_options.kernel_string
        if not isinstance(kernel_source, list):
            kernel_source = [kernel_source]
        name, kernel_string, temp_files = util.prepare_list_of_files(kernel_options.kernel_name, kernel_source, params, grid, threads, kernel_options.block_size_names)

        #collect everything we know about this instance and return it
        return KernelInstance(name, kernel_string, temp_files, threads, grid, params, kernel_options.arguments)
Ejemplo n.º 2
0
    def create_kernel_instance(self, kernel_source, kernel_options, params,
                               verbose):
        """create kernel instance from kernel source, parameters, problem size, grid divisors, and so on"""
        grid_div = (kernel_options.grid_div_x, kernel_options.grid_div_y,
                    kernel_options.grid_div_z)

        #insert default block_size_names if needed
        if not kernel_options.block_size_names:
            kernel_options.block_size_names = util.default_block_size_names

        #setup thread block and grid dimensions
        threads, grid = util.setup_block_and_grid(
            kernel_options.problem_size, grid_div, params,
            kernel_options.block_size_names)
        if np.prod(threads) > self.dev.max_threads:
            if verbose:
                print(
                    f"skipping config {util.get_instance_string(params)} reason: too many threads per block"
                )
            return None

        #obtain the kernel_string and prepare additional files, if any
        name, kernel_string, temp_files = kernel_source.prepare_list_of_files(
            kernel_options.kernel_name, params, grid, threads,
            kernel_options.block_size_names)

        #check for templated kernel
        if kernel_source.lang == "CUDA" and "<" in name and ">" in name:
            kernel_string, name = wrap_templated_kernel(kernel_string, name)

        #collect everything we know about this instance and return it
        return KernelInstance(name, kernel_source, kernel_string, temp_files,
                              threads, grid, params, kernel_options.arguments)
Ejemplo n.º 3
0
    def create_kernel_instance(self, kernel_options, params, verbose):
        """create kernel instance from kernel source, parameters, problem size, grid divisors, and so on"""
        instance_string = util.get_instance_string(params)
        grid_div = (kernel_options.grid_div_x, kernel_options.grid_div_y,
                    kernel_options.grid_div_z)

        #setup thread block and grid dimensions
        threads, grid = util.setup_block_and_grid(
            kernel_options.problem_size, grid_div, params,
            kernel_options.block_size_names)
        if numpy.prod(threads) > self.dev.max_threads:
            if verbose:
                print("skipping config", instance_string,
                      "reason: too many threads per block")
            return None

        #obtain the kernel_string and prepare additional files, if any
        temp_files = dict()
        original_kernel = kernel_options.kernel_string
        if isinstance(original_kernel, list):
            kernel_string, temp_files = util.prepare_list_of_files(
                original_kernel, params, grid)
        else:
            kernel_string = util.get_kernel_string(original_kernel, params)

        #prepare kernel_string for compilation
        name, kernel_string = util.setup_kernel_strings(
            kernel_options.kernel_name, kernel_string, params, grid)

        #collect everything we know about this instance and return it
        return KernelInstance(name, kernel_string, temp_files, threads, grid,
                              params, kernel_options.arguments)