def preprocess_translation_unit_for_passed_args(self, t_unit, epoint, passed_args_dict): # {{{ ValueArgs -> GlobalArgs if passed as array shapes from loopy.kernel.data import ValueArg, GlobalArg import pyopencl.array as cla knl = t_unit[epoint] new_args = [] for arg in knl.args: if isinstance(arg, ValueArg): if (arg.name in passed_args_dict and isinstance(passed_args_dict[arg.name], cla.Array) and passed_args_dict[arg.name].shape == ()): arg = GlobalArg(name=arg.name, dtype=arg.dtype, shape=(), is_output=False, is_input=True) new_args.append(arg) knl = knl.copy(args=new_args) t_unit = t_unit.with_kernel(knl) # }}} return t_unit
def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch", sequential=False): """Takes in a kernel that carries out an operation and returns a kernel that carries out a batch of these operations. .. note:: For temporaries in a kernel that are private or read only globals and if `sequential=True`, loopy does not does not batch these variables unless explicitly mentioned in `batch_varying_args`. :arg nbatches: the number of batches. May be a constant non-negative integer or a string, which will be added as an integer argument. :arg batch_varying_args: a list of argument names that vary per-batch. Each such variable will have a batch index added. :arg sequential: A :class:`bool`. If *True*, do not duplicate temporary variables for each batch. This automatically tags the batch iname for sequential execution. """ from pymbolic import var vng = knl.get_var_name_generator() batch_iname = vng(batch_iname_prefix) batch_iname_expr = var(batch_iname) new_args = [] batch_dom_str = "{[%(iname)s]: 0 <= %(iname)s < %(nbatches)s}" % { "iname": batch_iname, "nbatches": nbatches, } if not isinstance(nbatches, int): batch_dom_str = "[%s] -> " % nbatches + batch_dom_str new_args.append(ValueArg(nbatches, dtype=knl.index_dtype)) nbatches_expr = var(nbatches) else: nbatches_expr = nbatches batch_domain = isl.BasicSet(batch_dom_str) new_domains = [batch_domain] + knl.domains for arg in knl.args: if arg.name in batch_varying_args: if isinstance(arg, ValueArg): arg = GlobalArg(arg.name, arg.dtype, shape=(nbatches_expr, ), dim_tags="c") else: arg = arg.copy(shape=(nbatches_expr, ) + arg.shape, dim_tags=("c", ) * (len(arg.shape) + 1), dim_names=_add_unique_dim_name( "ibatch", arg.dim_names)) new_args.append(arg) knl = knl.copy(domains=new_domains, args=new_args) if not sequential: new_temps = {} for temp in six.itervalues(knl.temporary_variables): if temp_needs_batching_if_not_sequential(temp, batch_varying_args): new_temps[temp.name] = temp.copy( shape=(nbatches_expr, ) + temp.shape, dim_tags=("c", ) * (len(temp.shape) + 1), dim_names=_add_unique_dim_name("ibatch", temp.dim_names)) else: new_temps[temp.name] = temp knl = knl.copy(temporary_variables=new_temps) else: import loopy as lp from loopy.kernel.data import ForceSequentialTag knl = lp.tag_inames(knl, [(batch_iname, ForceSequentialTag())]) rule_mapping_context = SubstitutionRuleMappingContext( knl.substitutions, vng) bvc = _BatchVariableChanger(rule_mapping_context, knl, batch_varying_args, batch_iname_expr, sequential=sequential) kernel = rule_mapping_context.finish_kernel(bvc.map_kernel(knl)) batch_iname_set = frozenset([batch_iname]) kernel = kernel.copy(instructions=[ insn.copy(within_inames=insn.within_inames | batch_iname_set) for insn in kernel.instructions ]) return kernel
def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch", sequential=False): """Takes in a kernel that carries out an operation and returns a kernel that carries out a batch of these operations. :arg nbatches: the number of batches. May be a constant non-negative integer or a string, which will be added as an integer argument. :arg batch_varying_args: a list of argument names that vary per-batch. Each such variable will have a batch index added. :arg sequential: A :class:`bool`. If *True*, do not duplicate temporary variables for each batch. This automatically tags the batch iname for sequential execution. """ from pymbolic import var vng = knl.get_var_name_generator() batch_iname = vng(batch_iname_prefix) batch_iname_expr = var(batch_iname) new_args = [] batch_dom_str = "{[%(iname)s]: 0 <= %(iname)s < %(nbatches)s}" % { "iname": batch_iname, "nbatches": nbatches, } if not isinstance(nbatches, int): batch_dom_str = "[%s] -> " % nbatches + batch_dom_str new_args.append(ValueArg(nbatches, dtype=knl.index_dtype)) nbatches_expr = var(nbatches) else: nbatches_expr = nbatches batch_domain = isl.BasicSet(batch_dom_str) new_domains = [batch_domain] + knl.domains for arg in knl.args: if arg.name in batch_varying_args: if isinstance(arg, ValueArg): arg = GlobalArg(arg.name, arg.dtype, shape=(nbatches_expr,), dim_tags="c") else: arg = arg.copy( shape=(nbatches_expr,) + arg.shape, dim_tags=("c",) * (len(arg.shape) + 1), dim_names=_add_unique_dim_name("ibatch", arg.dim_names)) new_args.append(arg) knl = knl.copy( domains=new_domains, args=new_args) if not sequential: new_temps = {} for temp in six.itervalues(knl.temporary_variables): if temp.initializer is not None and temp.read_only: new_temps[temp.name] = temp else: new_temps[temp.name] = temp.copy( shape=(nbatches_expr,) + temp.shape, dim_tags=("c",) * (len(temp.shape) + 1), dim_names=_add_unique_dim_name("ibatch", temp.dim_names)) knl = knl.copy(temporary_variables=new_temps) else: import loopy as lp from loopy.kernel.data import ForceSequentialTag knl = lp.tag_inames(knl, [(batch_iname, ForceSequentialTag())]) rule_mapping_context = SubstitutionRuleMappingContext( knl.substitutions, vng) bvc = _BatchVariableChanger(rule_mapping_context, knl, batch_varying_args, batch_iname_expr, sequential=sequential) kernel = rule_mapping_context.finish_kernel( bvc.map_kernel(knl)) batch_iname_set = frozenset([batch_iname]) kernel = kernel.copy( instructions=[ insn.copy(forced_iname_deps=insn.forced_iname_deps | batch_iname_set) for insn in kernel.instructions]) return kernel
def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch"): """Takes in a kernel that carries out an operation and returns a kernel that carries out a batch of these operations. :arg nbatches: the number of batches. May be a constant non-negative integer or a string, which will be added as an integer argument. :arg batch_varying_args: a list of argument names that depend vary per-batch. Each such variable will have a batch index added. """ from pymbolic import var vng = knl.get_var_name_generator() batch_iname = vng(batch_iname_prefix) batch_iname_expr = var(batch_iname) new_args = [] batch_dom_str = "{[%(iname)s]: 0 <= %(iname)s < %(nbatches)s}" % { "iname": batch_iname, "nbatches": nbatches, } if not isinstance(nbatches, int): batch_dom_str = "[%s] -> " % nbatches + batch_dom_str new_args.append(ValueArg(nbatches, dtype=knl.index_dtype)) nbatches_expr = var(nbatches) else: nbatches_expr = nbatches batch_domain = isl.BasicSet(batch_dom_str) new_domains = [batch_domain] + knl.domains for arg in knl.args: if arg.name in batch_varying_args: if isinstance(arg, ValueArg): arg = GlobalArg(arg.name, arg.dtype, shape=(nbatches_expr,), dim_tags="c") else: arg = arg.copy( shape=(nbatches_expr,) + arg.shape, dim_tags=("c",) * (len(arg.shape) + 1)) new_args.append(arg) new_temps = {} for temp in six.itervalues(knl.temporary_variables): new_temps[temp.name] = temp.copy( shape=(nbatches_expr,) + temp.shape, dim_tags=("c",) * (len(arg.shape) + 1)) knl = knl.copy( domains=new_domains, args=new_args, temporary_variables=new_temps) rule_mapping_context = SubstitutionRuleMappingContext( knl.substitutions, vng) bvc = _BatchVariableChanger(rule_mapping_context, knl, batch_varying_args, batch_iname_expr) return rule_mapping_context.finish_kernel( bvc.map_kernel(knl))