Beispiel #1
0
    def preprocess_translation_unit_for_passed_args(self, t_unit, epoint,
                                                    passed_args_dict):

        # {{{ ValueArgs -> GlobalArgs if passed as array shapes

        from loopy.kernel.data import ValueArg, GlobalArg
        import pyopencl.array as cla

        knl = t_unit[epoint]
        new_args = []

        for arg in knl.args:
            if isinstance(arg, ValueArg):
                if (arg.name in passed_args_dict
                        and isinstance(passed_args_dict[arg.name], cla.Array)
                        and passed_args_dict[arg.name].shape == ()):
                    arg = GlobalArg(name=arg.name,
                                    dtype=arg.dtype,
                                    shape=(),
                                    is_output=False,
                                    is_input=True)

            new_args.append(arg)

        knl = knl.copy(args=new_args)

        t_unit = t_unit.with_kernel(knl)

        # }}}

        return t_unit
Beispiel #2
0
def to_batched(knl,
               nbatches,
               batch_varying_args,
               batch_iname_prefix="ibatch",
               sequential=False):
    """Takes in a kernel that carries out an operation and returns a kernel
    that carries out a batch of these operations.
    .. note::
       For temporaries in a kernel that are private or read only
       globals and if `sequential=True`, loopy does not does not batch these
       variables unless explicitly mentioned in `batch_varying_args`.

    :arg nbatches: the number of batches. May be a constant non-negative
        integer or a string, which will be added as an integer argument.
    :arg batch_varying_args: a list of argument names that vary per-batch.
        Each such variable will have a batch index added.
    :arg sequential: A :class:`bool`. If *True*, do not duplicate
        temporary variables for each batch. This automatically tags the batch
        iname for sequential execution.
    """

    from pymbolic import var

    vng = knl.get_var_name_generator()
    batch_iname = vng(batch_iname_prefix)
    batch_iname_expr = var(batch_iname)

    new_args = []

    batch_dom_str = "{[%(iname)s]: 0 <= %(iname)s < %(nbatches)s}" % {
        "iname": batch_iname,
        "nbatches": nbatches,
    }

    if not isinstance(nbatches, int):
        batch_dom_str = "[%s] -> " % nbatches + batch_dom_str
        new_args.append(ValueArg(nbatches, dtype=knl.index_dtype))

        nbatches_expr = var(nbatches)
    else:
        nbatches_expr = nbatches

    batch_domain = isl.BasicSet(batch_dom_str)
    new_domains = [batch_domain] + knl.domains

    for arg in knl.args:
        if arg.name in batch_varying_args:
            if isinstance(arg, ValueArg):
                arg = GlobalArg(arg.name,
                                arg.dtype,
                                shape=(nbatches_expr, ),
                                dim_tags="c")
            else:
                arg = arg.copy(shape=(nbatches_expr, ) + arg.shape,
                               dim_tags=("c", ) * (len(arg.shape) + 1),
                               dim_names=_add_unique_dim_name(
                                   "ibatch", arg.dim_names))

        new_args.append(arg)

    knl = knl.copy(domains=new_domains, args=new_args)

    if not sequential:
        new_temps = {}

        for temp in six.itervalues(knl.temporary_variables):
            if temp_needs_batching_if_not_sequential(temp, batch_varying_args):
                new_temps[temp.name] = temp.copy(
                    shape=(nbatches_expr, ) + temp.shape,
                    dim_tags=("c", ) * (len(temp.shape) + 1),
                    dim_names=_add_unique_dim_name("ibatch", temp.dim_names))
            else:
                new_temps[temp.name] = temp

        knl = knl.copy(temporary_variables=new_temps)
    else:
        import loopy as lp
        from loopy.kernel.data import ForceSequentialTag
        knl = lp.tag_inames(knl, [(batch_iname, ForceSequentialTag())])

    rule_mapping_context = SubstitutionRuleMappingContext(
        knl.substitutions, vng)
    bvc = _BatchVariableChanger(rule_mapping_context,
                                knl,
                                batch_varying_args,
                                batch_iname_expr,
                                sequential=sequential)
    kernel = rule_mapping_context.finish_kernel(bvc.map_kernel(knl))

    batch_iname_set = frozenset([batch_iname])
    kernel = kernel.copy(instructions=[
        insn.copy(within_inames=insn.within_inames | batch_iname_set)
        for insn in kernel.instructions
    ])

    return kernel