예제 #1
0
    def get_code(self, arg_to_dtype=None):
        if arg_to_dtype is not None:
            arg_to_dtype = frozenset(six.iteritems(arg_to_dtype))

        kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype)

        from loopy.codegen import generate_code_v2
        code = generate_code_v2(kernel)
        return code.device_code()
예제 #2
0
    def program_info(self,
                     entrypoint,
                     arg_to_dtype_set=frozenset(),
                     all_kwargs=None):
        program = self.get_typed_and_scheduled_translation_unit(
            entrypoint, arg_to_dtype_set)

        from loopy.codegen import generate_code_v2
        codegen_result = generate_code_v2(program)

        dev_code = codegen_result.device_code()
        host_code = codegen_result.host_code()
        all_code = "\n".join([dev_code, "", host_code])

        if self.program[entrypoint].options.write_cl:
            output = all_code
            if self.program[entrypoint].options.highlight_cl:
                output = get_highlighted_code(output)

            if self.program[entrypoint].options.write_cl is True:
                print(output)
            else:
                with open(self.program[entrypoint].options.write_cl,
                          "w") as outf:
                    outf.write(output)

        if self.program[entrypoint].options.edit_cl:
            from pytools import invoke_editor
            dev_code = invoke_editor(dev_code, "code.c")
            # update code from editor
            all_code = "\n".join([dev_code, "", host_code])

        c_kernels = []

        for dp in codegen_result.device_programs:
            c_kernels.append(
                CompiledCKernel(
                    dp, codegen_result.implemented_data_infos[entrypoint],
                    all_code, self.program.target, self.compiler))

        return _KernelInfo(program=program,
                           c_kernels=c_kernels,
                           implemented_data_info=codegen_result.
                           implemented_data_infos[entrypoint],
                           invoker=self.get_invoker(program, entrypoint,
                                                    codegen_result))
예제 #3
0
    def translation_unit_info(self, entrypoint, arg_to_dtype_set=frozenset(),
            all_kwargs=None):
        program = self.get_typed_and_scheduled_translation_unit(
                entrypoint, arg_to_dtype_set)

        # FIXME: now just need to add the types to the arguments
        from loopy.codegen import generate_code_v2
        from loopy.target.execution import get_highlighted_code
        codegen_result = generate_code_v2(program)

        dev_code = codegen_result.device_code()

        if program[entrypoint].options.write_cl:
            #FIXME: redirect to "translation unit" level option as well.
            output = dev_code
            if self.program[entrypoint].options.highlight_cl:
                output = get_highlighted_code(output)

            if self.program[entrypoint].options.write_cl is True:
                print(output)
            else:
                with open(self.program[entrypoint].options.write_cl, "w") as outf:
                    outf.write(output)

        if program[entrypoint].options.edit_cl:
            #FIXME: redirect to "translation unit" level option as well.
            from pytools import invoke_editor
            dev_code = invoke_editor(dev_code, "code.cl")

        import pyopencl as cl

        #FIXME: redirect to "translation unit" level option as well.
        cl_program = (
                cl.Program(self.context, dev_code)
                .build(options=program[entrypoint].options.cl_build_options))

        cl_kernels = _Kernels()
        for dp in cl_program.kernel_names.split(";"):
            setattr(cl_kernels, dp, getattr(cl_program, dp))

        return _KernelInfo(
                program=program,
                cl_kernels=cl_kernels,
                implemented_data_info=codegen_result.implemented_data_infos[
                    entrypoint],
                invoker=self.get_invoker(program, entrypoint, codegen_result))
예제 #4
0
    def get_code(self, arg_to_dtype=None):
        def process_dtype(dtype):
            if isinstance(dtype, type) and issubclass(dtype, np.generic):
                dtype = np.dtype(dtype)
            if isinstance(dtype, np.dtype):
                dtype = NumpyType(dtype, self.kernel.target)

            return dtype

        if arg_to_dtype is not None:
            arg_to_dtype = frozenset(
                (k, process_dtype(v)) for k, v in six.iteritems(arg_to_dtype))

        kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype)

        from loopy.codegen import generate_code_v2
        code = generate_code_v2(kernel)
        return code.device_code()
예제 #5
0
파일: execution.py 프로젝트: inducer/loopy
    def get_code(self, arg_to_dtype=None):
        def process_dtype(dtype):
            if isinstance(dtype, type) and issubclass(dtype, np.generic):
                dtype = np.dtype(dtype)
            if isinstance(dtype, np.dtype):
                from loopy.types import NumpyType
                dtype = NumpyType(dtype, self.kernel.target)

            return dtype

        if arg_to_dtype is not None:
            arg_to_dtype = frozenset(
                    (k, process_dtype(v)) for k, v in six.iteritems(arg_to_dtype))

        kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype)

        from loopy.codegen import generate_code_v2
        code = generate_code_v2(kernel)
        return code.device_code()
예제 #6
0
    def cl_kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None):
        kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set)

        from loopy.codegen import generate_code_v2
        codegen_result = generate_code_v2(kernel)

        dev_code = codegen_result.device_code()

        if self.kernel.options.write_cl:
            output = dev_code
            if self.kernel.options.highlight_cl:
                output = get_highlighted_cl_code(output)

            if self.kernel.options.write_cl is True:
                print(output)
            else:
                with open(self.kernel.options.write_cl, "w") as outf:
                    outf.write(output)

        if self.kernel.options.edit_cl:
            from pytools import invoke_editor
            dev_code = invoke_editor(dev_code, "code.cl")

        import pyopencl as cl

        logger.info("%s: opencl compilation start" % self.kernel.name)

        cl_program = (
                cl.Program(self.context, dev_code)
                .build(options=kernel.options.cl_build_options))

        cl_kernels = _CLKernels()
        for dp in codegen_result.device_programs:
            setattr(cl_kernels, dp.name, getattr(cl_program, dp.name))

        logger.info("%s: opencl compilation done" % self.kernel.name)

        return _CLKernelInfo(
                kernel=kernel,
                cl_kernels=cl_kernels,
                implemented_data_info=codegen_result.implemented_data_info,
                invoker=generate_invoker(kernel, codegen_result))
예제 #7
0
    def get_code(self, entrypoint, arg_to_dtype=None):
        def process_dtype(dtype):
            if isinstance(dtype, type) and issubclass(dtype, np.generic):
                dtype = np.dtype(dtype)
            if isinstance(dtype, np.dtype):
                from loopy.types import NumpyType
                dtype = NumpyType(dtype, self.program.target)

            return dtype

        if arg_to_dtype is not None:
            arg_to_dtype = frozenset(
                    (k, process_dtype(v)) for k, v in arg_to_dtype.items())

        kernel = self.get_typed_and_scheduled_translation_unit(
                entrypoint, arg_to_dtype)

        from loopy.codegen import generate_code_v2
        code = generate_code_v2(kernel)
        return code.device_code()
예제 #8
0
    def kernel_info(self, arg_to_dtype_set=frozenset(), all_kwargs=None):
        kernel = self.get_typed_and_scheduled_kernel(arg_to_dtype_set)

        from loopy.codegen import generate_code_v2
        codegen_result = generate_code_v2(kernel)

        dev_code = codegen_result.device_code()
        host_code = codegen_result.host_code()
        all_code = '\n'.join([dev_code, '', host_code])

        if self.kernel.options.write_cl:
            output = all_code
            if self.kernel.options.highlight_cl:
                output = get_highlighted_code(output)

            if self.kernel.options.write_cl is True:
                print(output)
            else:
                with open(self.kernel.options.write_cl, "w") as outf:
                    outf.write(output)

        if self.kernel.options.edit_cl:
            from pytools import invoke_editor
            dev_code = invoke_editor(dev_code, "code.c")
            # update code from editor
            all_code = '\n'.join([dev_code, '', host_code])

        c_kernels = []
        for dp in codegen_result.device_programs:
            c_kernels.append(CompiledCKernel(dp,
                codegen_result.implemented_data_info, all_code, self.kernel.target,
                self.compiler))

        return _KernelInfo(
                kernel=kernel,
                c_kernels=c_kernels,
                implemented_data_info=codegen_result.implemented_data_info,
                invoker=self.get_invoker(kernel, codegen_result))
예제 #9
0
파일: __init__.py 프로젝트: inducer/loopy
def generate_header(kernel, codegen_result=None):
    """
    :arg kernel: a :class:`loopy.LoopKernel`
    :arg codegen_result: an instance of :class:`loopy.CodeGenerationResult`
    :returns: a list of AST nodes (which may have :func:`str`
        called on them to produce a string) representing
        function declarations for the generated device
        functions.
    """

    if not isinstance(kernel.target, CTarget):
        raise LoopyError(
                'Header generation for non C-based languages are not implemented')

    if codegen_result is None:
        from loopy.codegen import generate_code_v2
        codegen_result = generate_code_v2(kernel)

    fde = CFunctionDeclExtractor()
    for dev_prg in codegen_result.device_programs:
        fde(dev_prg.ast)

    return fde.decls
예제 #10
0
def generate_header(kernel, codegen_result=None):
    """
    :arg kernel: a :class:`loopy.LoopKernel`
    :arg codegen_result: an instance of :class:`loopy.CodeGenerationResult`
    :returns: a list of AST nodes (which may have :func:`str`
        called on them to produce a string) representing
        function declarations for the generated device
        functions.
    """

    if not isinstance(kernel.target, CTarget):
        raise LoopyError(
            'Header generation for non C-based languages are not implemented')

    if codegen_result is None:
        from loopy.codegen import generate_code_v2
        codegen_result = generate_code_v2(kernel)

    fde = CFunctionDeclExtractor()
    for dev_prg in codegen_result.device_programs:
        fde(dev_prg.ast)

    return fde.decls