Ejemplo n.º 1
0
Archivo: api.py Proyecto: fjarri/reikna
    def compile(
            self, template_src, render_args=None, render_kwds=None, fast_math=False,
            compiler_options=None, constant_arrays=None, keep=False):
        """
        Creates a module object from the given template.

        :param template_src: Mako template source to render
        :param render_args: an iterable with positional arguments to pass to the template.
        :param render_kwds: a dictionary with keyword parameters to pass to the template.
        :param fast_math: whether to enable fast mathematical operations during compilation.
        :param compiler_options: a list of strings to be passed to the compiler as arguments.
        :param constant_arrays: (**CUDA only**) a dictionary ``{name: metadata}``
            of constant memory arrays to be declared in the compiled program.
            ``metadata`` can be either an array-like object (possessing ``shape`` and ``dtype``
            attributes), or a pair ``(shape, dtype)``.
        :param keep: if `True`, preserve the source file being compiled
            and the accompanying binaries (if any).
            With PyCUDA backend, it is used as the ``keep`` option when creating ``SourceModule``.
            With PyOpenCL backend, it is used as the ``cache_dir`` option for ``Program.build()``
            (and, additionally, the kernel source itself is put there).
        :returns: a :py:class:`Program` object.
        """
        src = render_template_source(
            template_src, render_args=render_args, render_kwds=render_kwds)
        return Program(
            self, src, fast_math=fast_math, compiler_options=compiler_options,
            constant_arrays=constant_arrays, keep=keep)
Ejemplo n.º 2
0
Archivo: api.py Proyecto: fjarri/reikna
    def __init__(self, thr, template_src, name, global_size, local_size=None,
            render_args=None, render_kwds=None, fast_math=False, compiler_options=None,
            constant_arrays=None, keep=False):
        """__init__()""" # hide the signature from Sphinx

        self._thr = thr

        if render_args is None:
            render_args = []
        if render_kwds is None:
            render_kwds = {}

        main_src = render_template_source(
            template_src, render_args=render_args, render_kwds=render_kwds)

        # Since virtual size function require some registers, they affect the maximum local size.
        # Start from the device's max work group size as the first approximation
        # and recompile kernels with smaller local sizes until convergence.
        max_local_size = thr.device_params.max_work_group_size

        while True:

            # Try to find kernel launch parameters for the requested local size.
            # May raise OutOfResourcesError if it's not possible,
            # just let it pass to the caller.
            vs = VirtualSizes(
                thr.device_params, global_size,
                virtual_local_size=local_size,
                max_local_size=max_local_size)

            # Try to compile the kernel with the corresponding virtual size functions
            program = Program(
                self._thr, vs.vsize_functions + main_src,
                static=True, fast_math=fast_math, compiler_options=compiler_options,
                constant_arrays=constant_arrays, keep=keep)
            kernel = getattr(program, name)

            if kernel.max_work_group_size >= product(vs.real_local_size):
                # Kernel will execute with this local size, use it
                break

            # By the contract of VirtualSizes,
            # product(vs.real_local_size) <= max_local_size
            # Also, since we're still in this loop,
            # kernel.max_work_group_size < product(vs.real_local_size).
            # Therefore the new max_local_size value is guaranteed
            # to be smaller than the previous one.
            max_local_size = kernel.max_work_group_size

        self._program = program
        self._kernel = kernel
        self.virtual_local_size = vs.virtual_local_size
        self.virtual_global_size = vs.virtual_global_size
        self.local_size = vs.real_local_size
        self.global_size = vs.real_global_size

        self._kernel.prepare(self.global_size, local_size=self.local_size)
Ejemplo n.º 3
0
    def __init__(self, thr, template_src, name, global_size, local_size=None,
            render_args=None, render_kwds=None, fast_math=False):
        """__init__()""" # hide the signature from Sphinx

        self._thr = thr

        if render_args is None:
            render_args = []
        if render_kwds is None:
            render_kwds = {}

        main_src = render_template_source(
            template_src, render_args=render_args, render_kwds=render_kwds)

        # Since virtual size function require some registers, they affect the maximum local size.
        # Start from the device's max work group size as the first approximation
        # and recompile kernels with smaller local sizes until convergence.
        max_local_size = thr.device_params.max_work_group_size

        while True:

            # Try to find kernel launch parameters for the requested local size.
            # May raise OutOfResourcesError if it's not possible,
            # just let it pass to the caller.
            vs = VirtualSizes(
                thr.device_params, global_size,
                virtual_local_size=local_size,
                max_local_size=max_local_size)

            # Try to compile the kernel with the corresponding virtual size functions
            program = Program(
                self._thr, vs.vsize_functions + main_src,
                static=True, fast_math=fast_math)
            kernel = getattr(program, name)

            if kernel.max_work_group_size >= product(vs.real_local_size):
                # Kernel will execute with this local size, use it
                break

            # By the contract of VirtualSizes,
            # product(vs.real_local_size) <= max_local_size
            # Also, since we're still in this loop,
            # kernel.max_work_group_size < product(vs.real_local_size).
            # Therefore the new max_local_size value is guaranteed
            # to be smaller than the previous one.
            max_local_size = kernel.max_work_group_size

        self._program = program
        self._kernel = kernel
        self.virtual_local_size = vs.virtual_local_size
        self.virtual_global_size = vs.virtual_global_size
        self.local_size = vs.real_local_size
        self.global_size = vs.real_global_size

        self._kernel.prepare(self.global_size, local_size=self.local_size)
Ejemplo n.º 4
0
    def compile(self, template_src, render_args=None, render_kwds=None, fast_math=False):
        """
        Creates a module object from the given template.

        :param template_src: Mako template source to render
        :param render_kwds: an iterable with positional arguments to pass to the template.
        :param render_kwds: a dictionary with keyword parameters to pass to the template.
        :param fast_math: whether to enable fast mathematical operations during compilation.
        :returns: a :py:class:`Program` object.
        """
        src = render_template_source(
            template_src, render_args=render_args, render_kwds=render_kwds)
        return Program(self, src, fast_math=fast_math)
Ejemplo n.º 5
0
    def compile(self, template_src, render_args=None, render_kwds=None, fast_math=False):
        """
        Creates a module object from the given template.

        :param template_src: Mako template source to render
        :param render_kwds: an iterable with positional arguments to pass to the template.
        :param render_kwds: a dictionary with keyword parameters to pass to the template.
        :param fast_math: whether to enable fast mathematical operations during compilation.
        :returns: a :py:class:`Program` object.
        """
        src = render_template_source(
            template_src, render_args=render_args, render_kwds=render_kwds)
        return Program(self, src, fast_math=fast_math)
Ejemplo n.º 6
0
    def compile(self,
                template_src,
                render_args=None,
                render_kwds=None,
                fast_math=False,
                compiler_options=None,
                constant_arrays=None,
                keep=False):
        """
        Creates a module object from the given template.

        :param template_src: Mako template source to render
        :param render_args: an iterable with positional arguments to pass to the template.
        :param render_kwds: a dictionary with keyword parameters to pass to the template.
        :param fast_math: whether to enable fast mathematical operations during compilation.
        :param compiler_options: a list of strings to be passed to the compiler as arguments.
        :param constant_arrays: (**CUDA only**) a dictionary ``{name: metadata}``
            of constant memory arrays to be declared in the compiled program.
            ``metadata`` can be either an array-like object (possessing ``shape`` and ``dtype``
            attributes), or a pair ``(shape, dtype)``.
        :param keep: if `True`, preserve the source file being compiled
            and the accompanying binaries (if any).
            With PyCUDA backend, it is used as the ``keep`` option when creating ``SourceModule``.
            With PyOpenCL backend, it is used as the ``cache_dir`` option for ``Program.build()``
            (and, additionally, the kernel source itself is put there).
        :returns: a :py:class:`Program` object.
        """
        src = render_template_source(template_src,
                                     render_args=render_args,
                                     render_kwds=render_kwds)
        return Program(self,
                       src,
                       fast_math=fast_math,
                       compiler_options=compiler_options,
                       constant_arrays=constant_arrays,
                       keep=keep)