def get_function_declaration(self, codegen_state, codegen_result, schedule_index): fdecl = super(OpenCLCASTBuilder, self).get_function_declaration(codegen_state, codegen_result, schedule_index) from loopy.target.c import FunctionDeclarationWrapper assert isinstance(fdecl, FunctionDeclarationWrapper) fdecl = fdecl.subdecl from cgen.opencl import CLKernel, CLRequiredWorkGroupSize fdecl = CLKernel(fdecl) from loopy.schedule import get_insn_ids_for_block_at _, local_sizes = codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at(codegen_state.kernel.schedule, schedule_index)) from loopy.symbolic import get_dependencies if not get_dependencies(local_sizes): # sizes can't have parameter dependencies if they are # to be used in static WG size. fdecl = CLRequiredWorkGroupSize(local_sizes, fdecl) return FunctionDeclarationWrapper(fdecl)
def get_function_declaration(self, codegen_state, codegen_result, schedule_index): fdecl = super(CUDACASTBuilder, self).get_function_declaration( codegen_state, codegen_result, schedule_index) from loopy.target.c import FunctionDeclarationWrapper assert isinstance(fdecl, FunctionDeclarationWrapper) fdecl = fdecl.subdecl from cgen.cuda import CudaGlobal, CudaLaunchBounds fdecl = CudaGlobal(fdecl) if self.target.extern_c: from cgen import Extern fdecl = Extern("C", fdecl) from loopy.schedule import get_insn_ids_for_block_at _, local_grid_size = \ codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at( codegen_state.kernel.schedule, schedule_index)) from loopy.symbolic import get_dependencies if not get_dependencies(local_grid_size): # Sizes can't have parameter dependencies if they are # to be used in static thread block size. from pytools import product nthreads = product(local_grid_size) fdecl = CudaLaunchBounds(nthreads, fdecl) return FunctionDeclarationWrapper(fdecl)
def get_function_declaration(self, codegen_state, codegen_result, schedule_index): name = codegen_result.current_program(codegen_state).name from cgen import (FunctionDeclaration, Value) from cgen.ispc import ISPCExport, ISPCTask arg_names, arg_decls = self._arg_names_and_decls(codegen_state) if codegen_state.is_generating_device_code: result = ISPCTask( FunctionDeclaration(Value("void", name), arg_decls)) else: result = ISPCExport( FunctionDeclaration(Value("void", name), arg_decls)) from loopy.target.c import FunctionDeclarationWrapper return FunctionDeclarationWrapper(result)