def get_code(self, arg_to_dtype=None): if arg_to_dtype is not None: arg_to_dtype = frozenset(six.iteritems(arg_to_dtype)) kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype) from loopy.codegen import generate_code_v2 code = generate_code_v2(kernel) return code.device_code()
def program_info(self, entrypoint, arg_to_dtype_set=frozenset(), all_kwargs=None): program = self.get_typed_and_scheduled_translation_unit( entrypoint, arg_to_dtype_set) from loopy.codegen import generate_code_v2 codegen_result = generate_code_v2(program) dev_code = codegen_result.device_code() host_code = codegen_result.host_code() all_code = "\n".join([dev_code, "", host_code]) if self.program[entrypoint].options.write_cl: output = all_code if self.program[entrypoint].options.highlight_cl: output = get_highlighted_code(output) if self.program[entrypoint].options.write_cl is True: print(output) else: with open(self.program[entrypoint].options.write_cl, "w") as outf: outf.write(output) if self.program[entrypoint].options.edit_cl: from pytools import invoke_editor dev_code = invoke_editor(dev_code, "code.c") # update code from editor all_code = "\n".join([dev_code, "", host_code]) c_kernels = [] for dp in codegen_result.device_programs: c_kernels.append( CompiledCKernel( dp, codegen_result.implemented_data_infos[entrypoint], all_code, self.program.target, self.compiler)) return _KernelInfo(program=program, c_kernels=c_kernels, implemented_data_info=codegen_result. implemented_data_infos[entrypoint], invoker=self.get_invoker(program, entrypoint, codegen_result))
def translation_unit_info(self, entrypoint, arg_to_dtype_set=frozenset(), all_kwargs=None): program = self.get_typed_and_scheduled_translation_unit( entrypoint, arg_to_dtype_set) # FIXME: now just need to add the types to the arguments from loopy.codegen import generate_code_v2 from loopy.target.execution import get_highlighted_code codegen_result = generate_code_v2(program) dev_code = codegen_result.device_code() if program[entrypoint].options.write_cl: #FIXME: redirect to "translation unit" level option as well. output = dev_code if self.program[entrypoint].options.highlight_cl: output = get_highlighted_code(output) if self.program[entrypoint].options.write_cl is True: print(output) else: with open(self.program[entrypoint].options.write_cl, "w") as outf: outf.write(output) if program[entrypoint].options.edit_cl: #FIXME: redirect to "translation unit" level option as well. from pytools import invoke_editor dev_code = invoke_editor(dev_code, "code.cl") import pyopencl as cl #FIXME: redirect to "translation unit" level option as well. cl_program = ( cl.Program(self.context, dev_code) .build(options=program[entrypoint].options.cl_build_options)) cl_kernels = _Kernels() for dp in cl_program.kernel_names.split(";"): setattr(cl_kernels, dp, getattr(cl_program, dp)) return _KernelInfo( program=program, cl_kernels=cl_kernels, implemented_data_info=codegen_result.implemented_data_infos[ entrypoint], invoker=self.get_invoker(program, entrypoint, codegen_result))
def get_code(self, arg_to_dtype=None): def process_dtype(dtype): if isinstance(dtype, type) and issubclass(dtype, np.generic): dtype = np.dtype(dtype) if isinstance(dtype, np.dtype): dtype = NumpyType(dtype, self.kernel.target) return dtype if arg_to_dtype is not None: arg_to_dtype = frozenset( (k, process_dtype(v)) for k, v in six.iteritems(arg_to_dtype)) kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype) from loopy.codegen import generate_code_v2 code = generate_code_v2(kernel) return code.device_code()
def get_code(self, arg_to_dtype=None): def process_dtype(dtype): if isinstance(dtype, type) and issubclass(dtype, np.generic): dtype = np.dtype(dtype) if isinstance(dtype, np.dtype): from loopy.types import NumpyType dtype = NumpyType(dtype, self.kernel.target) return dtype if arg_to_dtype is not None: arg_to_dtype = frozenset( (k, process_dtype(v)) for k, v in six.iteritems(arg_to_dtype)) kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype) from loopy.codegen import generate_code_v2 code = generate_code_v2(kernel) return code.device_code()
def cl_kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) from loopy.codegen import generate_code_v2 codegen_result = generate_code_v2(kernel) dev_code = codegen_result.device_code() if self.kernel.options.write_cl: output = dev_code if self.kernel.options.highlight_cl: output = get_highlighted_cl_code(output) if self.kernel.options.write_cl is True: print(output) else: with open(self.kernel.options.write_cl, "w") as outf: outf.write(output) if self.kernel.options.edit_cl: from pytools import invoke_editor dev_code = invoke_editor(dev_code, "code.cl") import pyopencl as cl logger.info("%s: opencl compilation start" % self.kernel.name) cl_program = ( cl.Program(self.context, dev_code) .build(options=kernel.options.cl_build_options)) cl_kernels = _CLKernels() for dp in codegen_result.device_programs: setattr(cl_kernels, dp.name, getattr(cl_program, dp.name)) logger.info("%s: opencl compilation done" % self.kernel.name) return _CLKernelInfo( kernel=kernel, cl_kernels=cl_kernels, implemented_data_info=codegen_result.implemented_data_info, invoker=generate_invoker(kernel, codegen_result))
def get_code(self, entrypoint, arg_to_dtype=None): def process_dtype(dtype): if isinstance(dtype, type) and issubclass(dtype, np.generic): dtype = np.dtype(dtype) if isinstance(dtype, np.dtype): from loopy.types import NumpyType dtype = NumpyType(dtype, self.program.target) return dtype if arg_to_dtype is not None: arg_to_dtype = frozenset( (k, process_dtype(v)) for k, v in arg_to_dtype.items()) kernel = self.get_typed_and_scheduled_translation_unit( entrypoint, arg_to_dtype) from loopy.codegen import generate_code_v2 code = generate_code_v2(kernel) return code.device_code()
def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None): kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set) from loopy.codegen import generate_code_v2 codegen_result = generate_code_v2(kernel) dev_code = codegen_result.device_code() host_code = codegen_result.host_code() all_code = '\n'.join([dev_code, '', host_code]) if self.kernel.options.write_cl: output = all_code if self.kernel.options.highlight_cl: output = get_highlighted_code(output) if self.kernel.options.write_cl is True: print(output) else: with open(self.kernel.options.write_cl, "w") as outf: outf.write(output) if self.kernel.options.edit_cl: from pytools import invoke_editor dev_code = invoke_editor(dev_code, "code.c") # update code from editor all_code = '\n'.join([dev_code, '', host_code]) c_kernels = [] for dp in codegen_result.device_programs: c_kernels.append(CompiledCKernel(dp, codegen_result.implemented_data_info, all_code, self.kernel.target, self.compiler)) return _KernelInfo( kernel=kernel, c_kernels=c_kernels, implemented_data_info=codegen_result.implemented_data_info, invoker=self.get_invoker(kernel, codegen_result))
def generate_header(kernel, codegen_result=None): """ :arg kernel: a :class:`loopy.LoopKernel` :arg codegen_result: an instance of :class:`loopy.CodeGenerationResult` :returns: a list of AST nodes (which may have :func:`str` called on them to produce a string) representing function declarations for the generated device functions. """ if not isinstance(kernel.target, CTarget): raise LoopyError( 'Header generation for non C-based languages are not implemented') if codegen_result is None: from loopy.codegen import generate_code_v2 codegen_result = generate_code_v2(kernel) fde = CFunctionDeclExtractor() for dev_prg in codegen_result.device_programs: fde(dev_prg.ast) return fde.decls