Example #1
0
def get_reduction_kernel(stage,
                         ctx,
                         dtype_out,
                         neutral,
                         reduce_expr,
                         arguments=None,
                         name="reduce_kernel",
                         preamble="",
                         map_exprs=None,
                         device=None,
                         options=[],
                         max_group_size=None):

    if map_exprs is None:
        raise ValueError("map_exprs has to be given!")

    for i, m in enumerate(map_exprs):
        if m is None:
            if stage == 2:
                map_exprs[i] = "pyopencl_reduction_inp_%i[i]" % i
            else:
                map_exprs[i] = "in[i]"

    from pyopencl.tools import (parse_arg_list, get_arg_list_scalar_arg_dtypes,
                                get_arg_offset_adjuster_code, VectorArg)

    arg_prep = ""
    if stage == 1 and arguments is not None:
        arguments = parse_arg_list(arguments, with_offset=True)
        arg_prep = get_arg_offset_adjuster_code(arguments)

    if stage == 2 and arguments is not None:
        arguments = parse_arg_list(arguments)
        arguments = ([
            VectorArg(dtype_out, "pyopencl_reduction_inp_%i" % i)
            for i in range(len(map_exprs))
        ] + arguments)

    inf = _get_reduction_source(ctx, dtype_to_ctype(dtype_out),
                                dtype_out.itemsize, neutral, reduce_expr,
                                map_exprs, arguments, name, preamble, arg_prep,
                                device, max_group_size)

    inf.program = cl.Program(ctx, inf.source)
    inf.program.build(options)
    inf.kernel = getattr(inf.program, name)

    inf.arg_types = arguments

    inf.kernel.set_scalar_arg_dtypes(
        [
            None,
        ] * len(map_exprs) + [np.int64] +
        get_arg_list_scalar_arg_dtypes(inf.arg_types) + [np.uint32] * 2)

    return inf
Example #2
0
def get_reduction_kernel(stage,
                         ctx, dtype_out,
                         neutral, reduce_expr, arguments=None,
                         name="reduce_kernel", preamble="",
                         map_exprs = None,
                         device=None, options=[], max_group_size=None):

    if map_exprs is None:
        raise ValueError("map_exprs has to be given!")

    for i, m in enumerate(map_exprs):
        if m is None:
            if stage==2:
                map_exprs[i] = "pyopencl_reduction_inp_%i[i]"%i
            else:
                map_exprs[i] = "in[i]"


    from pyopencl.tools import (
        parse_arg_list, get_arg_list_scalar_arg_dtypes,
        get_arg_offset_adjuster_code, VectorArg)

    arg_prep = ""
    if stage==1 and arguments is not None:
        arguments = parse_arg_list(arguments, with_offset=True)
        arg_prep = get_arg_offset_adjuster_code(arguments)

    if stage==2 and arguments is not None:
        arguments = parse_arg_list(arguments)
        arguments = (
            [VectorArg(dtype_out, "pyopencl_reduction_inp_%i"%i) for i in xrange(len(map_exprs))]
            +arguments)


    inf = _get_reduction_source(
        ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize,
        neutral, reduce_expr, map_exprs, arguments,
        name, preamble, arg_prep, device, max_group_size)

    inf.program = cl.Program(ctx, inf.source)
    inf.program.build(options)
    inf.kernel = getattr(inf.program, name)

    inf.arg_types = arguments

    inf.kernel.set_scalar_arg_dtypes(
        [None, ]*len(map_exprs)+[np.int64]
        +get_arg_list_scalar_arg_dtypes(inf.arg_types)
        +[np.uint32]*2)


    return inf
def get_reduction_kernel(stage,
                         ctx,
                         dtype_out,
                         neutral,
                         reduce_expr,
                         map_expr=None,
                         arguments=None,
                         name="reduce_kernel",
                         preamble="",
                         device=None,
                         options=[],
                         max_group_size=None):

    if map_expr is None:
        if stage == 2:
            map_expr = "pyopencl_reduction_inp[i]"
        else:
            map_expr = "in[i]"

    from pyopencl.tools import (parse_arg_list, get_arg_list_scalar_arg_dtypes,
                                get_arg_offset_adjuster_code, VectorArg)

    arg_prep = ""
    if stage == 1 and arguments is not None:
        arguments = parse_arg_list(arguments, with_offset=True)
        arg_prep = get_arg_offset_adjuster_code(arguments)

    if stage == 2 and arguments is not None:
        arguments = parse_arg_list(arguments)
        arguments = ([VectorArg(dtype_out, "pyopencl_reduction_inp")] +
                     arguments)

    inf = _get_reduction_source(ctx, dtype_to_ctype(dtype_out),
                                dtype_out.itemsize, neutral, reduce_expr,
                                map_expr, arguments, name, preamble, arg_prep,
                                device, max_group_size)

    inf.program = cl.Program(ctx, inf.source)
    inf.program.build(options)
    inf.kernel = getattr(inf.program, name)

    inf.arg_types = arguments

    inf.kernel.set_scalar_arg_dtypes(
        [None, np.int64] + get_arg_list_scalar_arg_dtypes(inf.arg_types) +
        [np.int64] * 3 + [np.uint32, np.int64])

    return inf
Example #4
0
def get_elwise_kernel_and_types(context,
                                arguments,
                                operation,
                                name="elwise_kernel",
                                options=[],
                                preamble="",
                                use_range=False,
                                **kwargs):

    from pyopencl.tools import parse_arg_list
    parsed_args = parse_arg_list(arguments)

    auto_preamble = kwargs.pop("auto_preamble", True)

    pragmas = []
    includes = []
    have_double_pragma = False
    have_complex_include = False

    if auto_preamble:
        for arg in parsed_args:
            if arg.dtype in [np.float64, np.complex128]:
                if not have_double_pragma:
                    pragmas.append(
                        "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
                        "#define PYOPENCL_DEFINE_CDOUBLE\n")
                    have_double_pragma = True
            if arg.dtype.kind == 'c':
                if not have_complex_include:
                    includes.append("#include <pyopencl-complex.h>\n")
                    have_complex_include = True

    if pragmas or includes:
        preamble = "\n".join(pragmas + includes) + "\n" + preamble

    if use_range:
        parsed_args.extend([
            ScalarArg(np.intp, "start"),
            ScalarArg(np.intp, "stop"),
            ScalarArg(np.intp, "step"),
        ])
    else:
        parsed_args.append(ScalarArg(np.intp, "n"))

    prg = get_elwise_program(context,
                             parsed_args,
                             operation,
                             name=name,
                             options=options,
                             preamble=preamble,
                             use_range=use_range,
                             **kwargs)

    from pyopencl.tools import get_arg_list_scalar_arg_dtypes

    kernel = getattr(prg, name)
    kernel.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes(parsed_args))

    return kernel, parsed_args
Example #5
0
def get_reduction_kernel(stage,
         ctx, dtype_out,
         neutral, reduce_expr, map_expr=None, arguments=None,
         name="reduce_kernel", preamble="",
         device=None, options=[], max_group_size=None):

    if map_expr is None:
        if stage == 2:
            map_expr = "pyopencl_reduction_inp[i]"
        else:
            map_expr = "in[i]"

    from pyopencl.tools import (
            parse_arg_list, get_arg_list_scalar_arg_dtypes,
            get_arg_offset_adjuster_code, VectorArg)

    arg_prep = ""
    if stage == 1 and arguments is not None:
        arguments = parse_arg_list(arguments, with_offset=True)
        arg_prep = get_arg_offset_adjuster_code(arguments)

    if stage == 2 and arguments is not None:
        arguments = parse_arg_list(arguments)
        arguments = (
                [VectorArg(dtype_out, "pyopencl_reduction_inp")]
                + arguments)

    inf = _get_reduction_source(
            ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize,
            neutral, reduce_expr, map_expr, arguments,
            name, preamble, arg_prep, device, max_group_size)

    inf.program = cl.Program(ctx, inf.source)
    inf.program.build(options)
    inf.kernel = getattr(inf.program, name)

    inf.arg_types = arguments

    inf.kernel.set_scalar_arg_dtypes(
            [None, np.int64]
            + get_arg_list_scalar_arg_dtypes(inf.arg_types)
            + [np.int64]*3 + [np.uint32, np.int64]
            )

    return inf
Example #6
0
def get_elwise_kernel_and_types(context, arguments, operation,
        name="elwise_kernel", options=[], preamble="", use_range=False,
        **kwargs):

    from pyopencl.tools import parse_arg_list, get_arg_offset_adjuster_code
    parsed_args = parse_arg_list(arguments, with_offset=True)

    auto_preamble = kwargs.pop("auto_preamble", True)

    pragmas = []
    includes = []
    have_double_pragma = False
    have_complex_include = False

    if auto_preamble:
        for arg in parsed_args:
            if arg.dtype in [np.float64, np.complex128]:
                if not have_double_pragma:
                    pragmas.append("""
                        #if __OPENCL_C_VERSION__ < 120
                        #pragma OPENCL EXTENSION cl_khr_fp64: enable
                        #endif
                        #define PYOPENCL_DEFINE_CDOUBLE
                        """)
                    have_double_pragma = True
            if arg.dtype.kind == 'c':
                if not have_complex_include:
                    includes.append("#include <pyopencl-complex.h>\n")
                    have_complex_include = True

    if pragmas or includes:
        preamble = "\n".join(pragmas+includes) + "\n" + preamble

    if use_range:
        parsed_args.extend([
            ScalarArg(np.intp, "start"),
            ScalarArg(np.intp, "stop"),
            ScalarArg(np.intp, "step"),
            ])
    else:
        parsed_args.append(ScalarArg(np.intp, "n"))

    loop_prep = kwargs.pop("loop_prep", "")
    loop_prep = get_arg_offset_adjuster_code(parsed_args) + loop_prep
    prg = get_elwise_program(
        context, parsed_args, operation,
        name=name, options=options, preamble=preamble,
        use_range=use_range, loop_prep=loop_prep, **kwargs)

    from pyopencl.tools import get_arg_list_scalar_arg_dtypes

    kernel = getattr(prg, name)
    kernel.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes(parsed_args))

    return kernel, parsed_args
Example #7
0
    def __init__(self,
                 ctx,
                 dtype_out,
                 neutral,
                 reduce_expr,
                 map_expr=None,
                 arguments=None,
                 name="reduce_kernel",
                 options=None,
                 preamble=""):
        from pyopencl.tools import parse_arg_list
        arguments = parse_arg_list(arguments, with_offset=True)

        dtype_out = self.dtype_out = np.dtype(dtype_out)

        max_group_size = None
        trip_count = 0

        while True:
            self.stage_1_inf = get_reduction_kernel(
                1,
                ctx,
                dtype_out,
                neutral,
                reduce_expr,
                map_expr,
                arguments,
                name=name + "_stage1",
                options=options,
                preamble=preamble,
                max_group_size=max_group_size)

            kernel_max_wg_size = self.stage_1_inf.kernel.get_work_group_info(
                cl.kernel_work_group_info.WORK_GROUP_SIZE, ctx.devices[0])

            if self.stage_1_inf.group_size <= kernel_max_wg_size:
                break
            else:
                max_group_size = kernel_max_wg_size

            trip_count += 1
            assert trip_count <= 2

        self.stage_2_inf = get_reduction_kernel(2,
                                                ctx,
                                                dtype_out,
                                                neutral,
                                                reduce_expr,
                                                arguments=arguments,
                                                name=name + "_stage2",
                                                options=options,
                                                preamble=preamble,
                                                max_group_size=max_group_size)
Example #8
0
def get_reduction_kernel(stage,
                         ctx,
                         dtype_out,
                         neutral,
                         reduce_expr,
                         map_expr=None,
                         arguments=None,
                         name="reduce_kernel",
                         preamble="",
                         device=None,
                         options=None,
                         max_group_size=None):

    if map_expr is None:
        if stage == 2:
            map_expr = "pyopencl_reduction_inp[i]"
        else:
            map_expr = "in[i]"

    from pyopencl.tools import (parse_arg_list, get_arg_list_scalar_arg_dtypes,
                                get_arg_offset_adjuster_code, VectorArg)

    if arguments is None:
        raise ValueError("arguments must not be None")

    arguments = parse_arg_list(arguments, with_offset=True)
    arg_prep = get_arg_offset_adjuster_code(arguments)

    if stage == 2 and arguments is not None:
        arguments = ([VectorArg(dtype_out, "pyopencl_reduction_inp")] +
                     arguments)

    source, group_size = _get_reduction_source(ctx, dtype_to_ctype(dtype_out),
                                               dtype_out.itemsize, neutral,
                                               reduce_expr, map_expr,
                                               arguments, name, preamble,
                                               arg_prep, device,
                                               max_group_size)

    program = cl.Program(ctx, source)
    program.build(options)

    kernel = getattr(program, name)
    kernel.set_scalar_arg_dtypes([None, np.int64] +
                                 get_arg_list_scalar_arg_dtypes(arguments) +
                                 [np.int64] * 3 + [np.uint32, np.int64])

    return _ReductionInfo(context=ctx,
                          source=source,
                          group_size=group_size,
                          program=program,
                          kernel=kernel,
                          arg_types=arguments)
Example #9
0
def get_reduction_kernel(stage,
                         ctx,
                         out_type,
                         out_type_size,
                         neutral,
                         reduce_expr,
                         map_expr=None,
                         arguments=None,
                         name="reduce_kernel",
                         preamble="",
                         device=None,
                         options=[],
                         max_group_size=None):
    if map_expr is None:
        if stage == 2:
            map_expr = "pyopencl_reduction_inp[i]"
        else:
            map_expr = "in[i]"

    if stage == 2:
        in_arg = "const %s *pyopencl_reduction_inp" % out_type
        if arguments:
            arguments = in_arg + ", " + arguments
        else:
            arguments = in_arg

    from pyopencl.tools import parse_arg_list, get_arg_list_scalar_arg_dtypes
    parsed_args = parse_arg_list(arguments)

    inf = _get_reduction_source(ctx, out_type, out_type_size, neutral,
                                reduce_expr, map_expr, parsed_args, name,
                                preamble, device, max_group_size)

    inf.program = cl.Program(ctx, inf.source)
    inf.program.build(options)
    inf.kernel = getattr(inf.program, name)

    inf.arg_types = parsed_args

    inf.kernel.set_scalar_arg_dtypes(
        [None] + get_arg_list_scalar_arg_dtypes(inf.arg_types) +
        [np.uint32] * 2)

    return inf
Example #10
0
def get_reduction_kernel(stage,
         ctx, out_type, out_type_size,
         neutral, reduce_expr, map_expr=None, arguments=None,
         name="reduce_kernel", preamble="",
         device=None, options=[], max_group_size=None):
    if map_expr is None:
        if stage == 2:
            map_expr = "pyopencl_reduction_inp[i]"
        else:
            map_expr = "in[i]"

    if stage == 2:
        in_arg = "const %s *pyopencl_reduction_inp" % out_type
        if arguments:
            arguments = in_arg + ", " + arguments
        else:
            arguments = in_arg

    from pyopencl.tools import parse_arg_list, get_arg_list_scalar_arg_dtypes
    parsed_args = parse_arg_list(arguments)

    inf = _get_reduction_source(
            ctx, out_type, out_type_size,
            neutral, reduce_expr, map_expr, parsed_args,
            name, preamble, device, max_group_size)

    inf.program = cl.Program(ctx, inf.source)
    inf.program.build(options)
    inf.kernel = getattr(inf.program, name)

    inf.arg_types = parsed_args

    inf.kernel.set_scalar_arg_dtypes(
            [None]
            + get_arg_list_scalar_arg_dtypes(inf.arg_types)
            + [np.uint32]*2)

    return inf
    def __init__(self,
                 context,
                 list_names_and_dtypes,
                 generate_template,
                 arg_decls,
                 count_sharing=None,
                 devices=None,
                 name_prefix="plb_build_list",
                 options=[],
                 preamble="",
                 debug=False,
                 complex_kernel=False):
        """
        :arg context: A :class:`pyopencl.Context`.
        :arg list_names_and_dtypes: a list of `(name, dtype)` tuples
            indicating the lists to be built.
        :arg generate_template: a snippet of C as described below
        :arg arg_decls: A string of comma-separated C argument declarations.
        :arg count_sharing: A mapping consisting of `(child, mother)`
            indicating that `mother` and `child` will always have the
            same number of indices, and the `APPEND` to `mother`
            will always happen *before* the `APPEND` to the child.
        :arg name_prefix: the name prefix to use for the compiled kernels
        :arg options: OpenCL compilation options for kernels using
            *generate_template*.
        :arg complex_kernel: If `True`, prevents vectorization on CPUs.

        *generate_template* may use the following C macros/identifiers:

        * `index_type`: expands to C identifier for the index type used
          for the calculation
        * `USER_ARG_DECL`: expands to the C declarator for `arg_decls`
        * `USER_ARGS`: a list of C argument values corresponding to
          `user_arg_decl`
        * `LIST_ARG_DECL`: expands to a C argument list representing the
          data for the output lists. These are escaped prefixed with
          `"plg_"` so as to not interfere with user-provided names.
        * `LIST_ARGS`: a list of C argument values corresponding to
          `LIST_ARG_DECL`
        * `APPEND_name(entry)`: inserts `entry` into the list `name`.
          *entry* must be a valid C expression of the correct type.

        All argument-list related macros have a trailing comma included
        if they are non-empty.

        *generate_template* must supply a function:

        .. code-block:: c

            void generate(USER_ARG_DECL LIST_ARG_DECL index_type i)
            {
                APPEND_mylist(5);
            }

        Internally, the `kernel_template` is expanded (at least) twice. Once,
        for a 'counting' stage where the size of all the lists is determined,
        and a second time, for a 'generation' stage where the lists are
        actually filled. A `generate` function that has side effects beyond
        calling `append` is therefore ill-formed.
        """

        if devices is None:
            devices = context.devices

        if count_sharing is None:
            count_sharing = {}

        self.context = context
        self.devices = devices

        self.list_names_and_dtypes = list_names_and_dtypes
        self.generate_template = generate_template

        from pyopencl.tools import parse_arg_list
        self.arg_decls = parse_arg_list(arg_decls)

        self.count_sharing = count_sharing

        self.name_prefix = name_prefix
        self.preamble = preamble
        self.options = options

        self.debug = debug

        self.complex_kernel = complex_kernel
    def __init__(self,
                 context,
                 arguments,
                 key_expr,
                 sort_arg_names,
                 bits_at_a_time=2,
                 index_dtype=np.int32,
                 key_dtype=np.uint32,
                 options=[]):
        """
        :arg arguments: A string of comma-separated C argument declarations.
            If *arguments* is specified, then *input_expr* must also be
            specified. All types used here must be known to PyOpenCL.
            (see :func:`pyopencl.tools.get_or_register_dtype`).
        :arg key_expr: An integer-valued C expression returning the
            key based on which the sort is performed. The array index
            for which the key is to be computed is available as `i`.
            The expression may refer to any of the *arguments*.
        :arg sort_arg_names: A list of argument names whose corresponding
            array arguments will be sorted according to *key_expr*.
        """

        # {{{ arg processing

        from pyopencl.tools import parse_arg_list
        self.arguments = parse_arg_list(arguments)
        del arguments

        self.sort_arg_names = sort_arg_names
        self.bits = int(bits_at_a_time)
        self.index_dtype = np.dtype(index_dtype)
        self.key_dtype = np.dtype(key_dtype)

        self.options = options

        # }}}

        # {{{ kernel creation

        scan_ctype, scan_dtype, scan_t_cdecl = \
                _make_sort_scan_type(context.devices[0], self.bits, self.index_dtype)

        from pyopencl.tools import VectorArg, ScalarArg
        scan_arguments = (list(self.arguments) + [
            VectorArg(arg.dtype, "sorted_" + arg.name)
            for arg in self.arguments if arg.name in sort_arg_names
        ] + [ScalarArg(np.int32, "base_bit")])

        def get_count_branch(known_bits):
            if len(known_bits) == self.bits:
                return "s.c%s" % known_bits

            boundary_mnr = known_bits + "1" + (self.bits - len(known_bits) -
                                               1) * "0"

            return ("((mnr < %s) ? %s : %s)" %
                    (int(boundary_mnr, 2), get_count_branch(known_bits + "0"),
                     get_count_branch(known_bits + "1")))

        codegen_args = dict(
            bits=self.bits,
            key_ctype=dtype_to_ctype(self.key_dtype),
            key_expr=key_expr,
            index_ctype=dtype_to_ctype(self.index_dtype),
            index_type_max=np.iinfo(self.index_dtype).max,
            padded_bin=_padded_bin,
            scan_ctype=scan_ctype,
            sort_arg_names=sort_arg_names,
            get_count_branch=get_count_branch,
        )

        preamble = scan_t_cdecl + RADIX_SORT_PREAMBLE_TPL.render(
            **codegen_args)
        scan_preamble = preamble \
                + RADIX_SORT_SCAN_PREAMBLE_TPL.render(**codegen_args)

        from pyopencl.scan import GenericScanKernel
        self.scan_kernel = GenericScanKernel(
            context,
            scan_dtype,
            arguments=scan_arguments,
            input_expr="scan_t_from_value(%s, base_bit, i)" % key_expr,
            scan_expr="scan_t_add(a, b, across_seg_boundary)",
            neutral="scan_t_neutral()",
            output_statement=RADIX_SORT_OUTPUT_STMT_TPL.render(**codegen_args),
            preamble=scan_preamble,
            options=self.options)

        for i, arg in enumerate(self.arguments):
            if isinstance(arg, VectorArg):
                self.first_array_arg_idx = i
Example #13
0
    def __init__(
        self,
        context,
        list_names_and_dtypes,
        generate_template,
        arg_decls,
        count_sharing=None,
        devices=None,
        name_prefix="plb_build_list",
        options=[],
        preamble="",
        debug=False,
        complex_kernel=False,
    ):
        """
        :arg context: A :class:`pyopencl.Context`.
        :arg list_names_and_dtypes: a list of `(name, dtype)` tuples
            indicating the lists to be built.
        :arg generate_template: a snippet of C as described below
        :arg arg_decls: A string of comma-separated C argument declarations.
        :arg count_sharing: A mapping consisting of `(child, mother)`
            indicating that `mother` and `child` will always have the
            same number of indices, and the `APPEND` to `mother`
            will always happen *before* the `APPEND` to the child.
        :arg name_prefix: the name prefix to use for the compiled kernels
        :arg options: OpenCL compilation options for kernels using
            *generate_template*.
        :arg complex_kernel: If `True`, prevents vectorization on CPUs.

        *generate_template* may use the following C macros/identifiers:

        * `index_type`: expands to C identifier for the index type used
          for the calculation
        * `USER_ARG_DECL`: expands to the C declarator for `arg_decls`
        * `USER_ARGS`: a list of C argument values corresponding to
          `user_arg_decl`
        * `LIST_ARG_DECL`: expands to a C argument list representing the
          data for the output lists. These are escaped prefixed with
          `"plg_"` so as to not interfere with user-provided names.
        * `LIST_ARGS`: a list of C argument values corresponding to
          `LIST_ARG_DECL`
        * `APPEND_name(entry)`: inserts `entry` into the list `name`.
          Both arguments are Python strings, the latter representing
          a valid C expression of the correct dtype.

        All argument-list related macros have a trailing comma included
        if they are non-empty.

        *generate_template* must supply a function:

        .. code-block:: c

            void generate(USER_ARG_DECL LIST_ARG_DECL index_type i)
            {
                APPEND(mylist, 5);
            }

        Internally, the `kernel_template` is expanded (at least) twice. Once,
        for a 'counting' stage where the size of all the lists is determined,
        and a second time, for a 'generation' stage where the lists are
        actually filled. A `generate` function that has side effects beyond
        calling `append` is therefore ill-formed.
        """

        if devices is None:
            devices = context.devices

        if count_sharing is None:
            count_sharing = {}

        self.context = context
        self.devices = devices

        self.list_names_and_dtypes = list_names_and_dtypes
        self.generate_template = generate_template

        from pyopencl.tools import parse_arg_list

        self.arg_decls = parse_arg_list(arg_decls)

        self.count_sharing = count_sharing

        self.name_prefix = name_prefix
        self.preamble = preamble
        self.options = options

        self.debug = debug

        self.complex_kernel = complex_kernel
Example #14
0
    def __init__(
        self,
        context,
        arguments,
        key_expr,
        sort_arg_names,
        bits_at_a_time=2,
        index_dtype=np.int32,
        key_dtype=np.uint32,
        options=[],
    ):
        """
        :arg arguments: A string of comma-separated C argument declarations.
            If *arguments* is specified, then *input_expr* must also be
            specified. All types used here must be known to PyOpenCL.
            (see :func:`pyopencl.tools.get_or_register_dtype`).
        :arg key_expr: An integer-valued C expression returning the
            key based on which the sort is performed. The array index
            for which the key is to be computed is available as `i`.
            The expression may refer to any of the *arguments*.
        :arg sort_arg_names: A list of argument names whose corresponding
            array arguments will be sorted according to *key_expr*.
        """

        # {{{ arg processing

        from pyopencl.tools import parse_arg_list

        self.arguments = parse_arg_list(arguments)
        del arguments

        self.sort_arg_names = sort_arg_names
        self.bits = int(bits_at_a_time)
        self.index_dtype = np.dtype(index_dtype)
        self.key_dtype = np.dtype(key_dtype)

        self.options = options

        # }}}

        # {{{ kernel creation

        scan_ctype, scan_dtype, scan_t_cdecl = _make_sort_scan_type(context.devices[0], self.bits, self.index_dtype)

        from pyopencl.tools import VectorArg, ScalarArg

        scan_arguments = (
            list(self.arguments)
            + [VectorArg(arg.dtype, "sorted_" + arg.name) for arg in self.arguments if arg.name in sort_arg_names]
            + [ScalarArg(np.int32, "base_bit")]
        )

        def get_count_branch(known_bits):
            if len(known_bits) == self.bits:
                return "s.c%s" % known_bits

            boundary_mnr = known_bits + "1" + (self.bits - len(known_bits) - 1) * "0"

            return "((mnr < %s) ? %s : %s)" % (
                int(boundary_mnr, 2),
                get_count_branch(known_bits + "0"),
                get_count_branch(known_bits + "1"),
            )

        codegen_args = dict(
            bits=self.bits,
            key_ctype=dtype_to_ctype(self.key_dtype),
            key_expr=key_expr,
            index_ctype=dtype_to_ctype(self.index_dtype),
            index_type_max=np.iinfo(self.index_dtype).max,
            padded_bin=_padded_bin,
            scan_ctype=scan_ctype,
            sort_arg_names=sort_arg_names,
            get_count_branch=get_count_branch,
        )

        preamble = scan_t_cdecl + RADIX_SORT_PREAMBLE_TPL.render(**codegen_args)
        scan_preamble = preamble + RADIX_SORT_SCAN_PREAMBLE_TPL.render(**codegen_args)

        from pyopencl.scan import GenericScanKernel

        self.scan_kernel = GenericScanKernel(
            context,
            scan_dtype,
            arguments=scan_arguments,
            input_expr="scan_t_from_value(%s, base_bit, i)" % key_expr,
            scan_expr="scan_t_add(a, b, across_seg_boundary)",
            neutral="scan_t_neutral()",
            output_statement=RADIX_SORT_OUTPUT_STMT_TPL.render(**codegen_args),
            preamble=scan_preamble,
            options=self.options,
        )

        for i, arg in enumerate(self.arguments):
            if isinstance(arg, VectorArg):
                self.first_array_arg_idx = i
Example #15
0
    def __init__(self, context, list_names_and_dtypes, generate_template,
            arg_decls, count_sharing=None, devices=None,
            name_prefix="plb_build_list", options=[], preamble="",
            debug=False, complex_kernel=False,
            eliminate_empty_output_lists=[]):
        """
        :arg context: A :class:`pyopencl.Context`.
        :arg list_names_and_dtypes: a list of `(name, dtype)` tuples
            indicating the lists to be built.
        :arg generate_template: a snippet of C as described below
        :arg arg_decls: A string of comma-separated C argument declarations.
        :arg count_sharing: A mapping consisting of `(child, mother)`
            indicating that `mother` and `child` will always have the
            same number of indices, and the `APPEND` to `mother`
            will always happen *before* the `APPEND` to the child.
        :arg name_prefix: the name prefix to use for the compiled kernels
        :arg options: OpenCL compilation options for kernels using
            *generate_template*.
        :arg complex_kernel: If `True`, prevents vectorization on CPUs.
        :arg eliminate_empty_output_lists: A Python list of list names
            for which the empty output lists are eliminated.

        *generate_template* may use the following C macros/identifiers:

        * `index_type`: expands to C identifier for the index type used
          for the calculation
        * `USER_ARG_DECL`: expands to the C declarator for `arg_decls`
        * `USER_ARGS`: a list of C argument values corresponding to
          `user_arg_decl`
        * `LIST_ARG_DECL`: expands to a C argument list representing the
          data for the output lists. These are escaped prefixed with
          `"plg_"` so as to not interfere with user-provided names.
        * `LIST_ARGS`: a list of C argument values corresponding to
          `LIST_ARG_DECL`
        * `APPEND_name(entry)`: inserts `entry` into the list `name`.
          *entry* must be a valid C expression of the correct type.

        All argument-list related macros have a trailing comma included
        if they are non-empty.

        *generate_template* must supply a function:

        .. code-block:: c

            void generate(USER_ARG_DECL LIST_ARG_DECL index_type i)
            {
                APPEND_mylist(5);
            }

        Internally, the `kernel_template` is expanded (at least) twice. Once,
        for a 'counting' stage where the size of all the lists is determined,
        and a second time, for a 'generation' stage where the lists are
        actually filled. A `generate` function that has side effects beyond
        calling `append` is therefore ill-formed.

        .. versionchanged:: 2018.1

            Change *eliminate_empty_output_lists* argument type from `bool` to
            `list`.
        """

        if devices is None:
            devices = context.devices

        if count_sharing is None:
            count_sharing = {}

        self.context = context
        self.devices = devices

        self.list_names_and_dtypes = list_names_and_dtypes
        self.generate_template = generate_template

        from pyopencl.tools import parse_arg_list
        self.arg_decls = parse_arg_list(arg_decls)

        # To match with the signature of the user-supplied generate(), arguments
        # can't appear to have offsets.
        arg_decls_no_offset = []
        from pyopencl.tools import VectorArg
        for arg in self.arg_decls:
            if isinstance(arg, VectorArg) and arg.with_offset:
                arg = VectorArg(arg.dtype, arg.name)
            arg_decls_no_offset.append(arg)
        self.arg_decls_no_offset = arg_decls_no_offset

        self.count_sharing = count_sharing

        self.name_prefix = name_prefix
        self.preamble = preamble
        self.options = options

        self.debug = debug

        self.complex_kernel = complex_kernel

        if eliminate_empty_output_lists is True:
            eliminate_empty_output_lists = \
                    [name for name, _ in self.list_names_and_dtypes]

        if eliminate_empty_output_lists is False:
            eliminate_empty_output_lists = []

        self.eliminate_empty_output_lists = eliminate_empty_output_lists
        for list_name in self.eliminate_empty_output_lists:
            if not any(list_name == name for name, _ in self.list_names_and_dtypes):
                raise ValueError(
                    "invalid list name '%s' in eliminate_empty_output_lists"
                    % list_name)
Example #16
0
    def __init__(self, context, list_names_and_dtypes, generate_template,
            arg_decls, count_sharing=None, devices=None,
            name_prefix="plb_build_list", options=[], preamble="",
            debug=False, complex_kernel=False,
            eliminate_empty_output_lists=[]):
        """
        :arg context: A :class:`pyopencl.Context`.
        :arg list_names_and_dtypes: a list of `(name, dtype)` tuples
            indicating the lists to be built.
        :arg generate_template: a snippet of C as described below
        :arg arg_decls: A string of comma-separated C argument declarations.
        :arg count_sharing: A mapping consisting of `(child, mother)`
            indicating that `mother` and `child` will always have the
            same number of indices, and the `APPEND` to `mother`
            will always happen *before* the `APPEND` to the child.
        :arg name_prefix: the name prefix to use for the compiled kernels
        :arg options: OpenCL compilation options for kernels using
            *generate_template*.
        :arg complex_kernel: If `True`, prevents vectorization on CPUs.
        :arg eliminate_empty_output_lists: A Python list of list names
            for which the empty output lists are eliminated.

        *generate_template* may use the following C macros/identifiers:

        * `index_type`: expands to C identifier for the index type used
          for the calculation
        * `USER_ARG_DECL`: expands to the C declarator for `arg_decls`
        * `USER_ARGS`: a list of C argument values corresponding to
          `user_arg_decl`
        * `LIST_ARG_DECL`: expands to a C argument list representing the
          data for the output lists. These are escaped prefixed with
          `"plg_"` so as to not interfere with user-provided names.
        * `LIST_ARGS`: a list of C argument values corresponding to
          `LIST_ARG_DECL`
        * `APPEND_name(entry)`: inserts `entry` into the list `name`.
          *entry* must be a valid C expression of the correct type.

        All argument-list related macros have a trailing comma included
        if they are non-empty.

        *generate_template* must supply a function:

        .. code-block:: c

            void generate(USER_ARG_DECL LIST_ARG_DECL index_type i)
            {
                APPEND_mylist(5);
            }

        Internally, the `kernel_template` is expanded (at least) twice. Once,
        for a 'counting' stage where the size of all the lists is determined,
        and a second time, for a 'generation' stage where the lists are
        actually filled. A `generate` function that has side effects beyond
        calling `append` is therefore ill-formed.

        .. versionchanged:: 2018.1

            Change *eliminate_empty_output_lists* argument type from `bool` to
            `list`.
        """

        if devices is None:
            devices = context.devices

        if count_sharing is None:
            count_sharing = {}

        self.context = context
        self.devices = devices

        self.list_names_and_dtypes = list_names_and_dtypes
        self.generate_template = generate_template

        from pyopencl.tools import parse_arg_list
        self.arg_decls = parse_arg_list(arg_decls)

        # To match with the signature of the user-supplied generate(), arguments
        # can't appear to have offsets.
        arg_decls_no_offset = []
        from pyopencl.tools import VectorArg
        for arg in self.arg_decls:
            if isinstance(arg, VectorArg) and arg.with_offset:
                arg = VectorArg(arg.dtype, arg.name)
            arg_decls_no_offset.append(arg)
        self.arg_decls_no_offset = arg_decls_no_offset

        self.count_sharing = count_sharing

        self.name_prefix = name_prefix
        self.preamble = preamble
        self.options = options

        self.debug = debug

        self.complex_kernel = complex_kernel

        if eliminate_empty_output_lists is True:
            eliminate_empty_output_lists = \
                    [name for name, _ in self.list_names_and_dtypes]

        if eliminate_empty_output_lists is False:
            eliminate_empty_output_lists = []

        self.eliminate_empty_output_lists = eliminate_empty_output_lists
        for list_name in self.eliminate_empty_output_lists:
            if not any(list_name == name for name, _ in self.list_names_and_dtypes):
                raise ValueError(
                    "invalid list name '%s' in eliminate_empty_output_lists"
                    % list_name)