def augment_domain_for_temporary_promotion( kernel, domain, promoted_temporary, mode, name_gen): """ Add new axes to the domain corresponding to the dimensions of `promoted_temporary`. """ import islpy as isl orig_temporary = promoted_temporary.orig_temporary orig_dim = domain.dim(isl.dim_type.set) dims_to_insert = len(orig_temporary.shape) iname_to_tag = {} # Add dimension-dependent inames. dim_inames = [] domain = domain.add(isl.dim_type.set, dims_to_insert) for t_idx in range(len(orig_temporary.shape)): new_iname = name_gen("{name}_{mode}_dim_{dim}". format(name=orig_temporary.name, mode=mode, dim=orig_dim + t_idx)) domain = domain.set_dim_name( isl.dim_type.set, orig_dim + t_idx, new_iname) #from loopy.kernel.data import auto #iname_to_tag[new_iname] = auto dim_inames.append(new_iname) # Add size information. aff = isl.affs_from_space(domain.space) domain &= aff[0].le_set(aff[new_iname]) size = orig_temporary.shape[t_idx] from loopy.symbolic import aff_from_expr domain &= aff[new_iname].le_set(aff_from_expr(domain.space, size)) hw_inames = [] # Add hardware inames duplicates. for t_idx, hw_iname in enumerate(promoted_temporary.hw_inames): new_iname = name_gen("{name}_{mode}_hw_dim_{dim}". format(name=orig_temporary.name, mode=mode, dim=t_idx)) hw_inames.append(new_iname) iname_to_tag[new_iname] = kernel.iname_to_tag[hw_iname] from loopy.isl_helpers import duplicate_axes domain = duplicate_axes( domain, promoted_temporary.hw_inames, hw_inames) # The operations on the domain above return a Set object, but the # underlying domain should be expressible as a single BasicSet. domain_list = domain.get_basic_set_list() assert domain_list.n_basic_set() == 1 domain = domain_list.get_basic_set(0) return domain, hw_inames, dim_inames, iname_to_tag
def augment_domain_for_temporary_promotion(kernel, domain, promoted_temporary, mode, name_gen): """ Add new axes to the domain corresponding to the dimensions of `promoted_temporary`. """ import islpy as isl orig_temporary = promoted_temporary.orig_temporary orig_dim = domain.dim(isl.dim_type.set) dims_to_insert = len(orig_temporary.shape) iname_to_tag = {} # Add dimension-dependent inames. dim_inames = [] domain = domain.add(isl.dim_type.set, dims_to_insert) for t_idx in range(len(orig_temporary.shape)): new_iname = name_gen("{name}_{mode}_dim_{dim}".format( name=orig_temporary.name, mode=mode, dim=t_idx)) domain = domain.set_dim_name(isl.dim_type.set, orig_dim + t_idx, new_iname) if orig_temporary.is_local: # If the temporary is has local scope, then loads / stores can be # done in parallel. from loopy.kernel.data import AutoFitLocalIndexTag iname_to_tag[new_iname] = AutoFitLocalIndexTag() dim_inames.append(new_iname) # Add size information. aff = isl.affs_from_space(domain.space) domain &= aff[0].le_set(aff[new_iname]) size = orig_temporary.shape[t_idx] from loopy.symbolic import aff_from_expr domain &= aff[new_iname].lt_set(aff_from_expr(domain.space, size)) hw_inames = [] # Add hardware inames duplicates. for t_idx, hw_iname in enumerate(promoted_temporary.hw_inames): new_iname = name_gen("{name}_{mode}_hw_dim_{dim}".format( name=orig_temporary.name, mode=mode, dim=t_idx)) hw_inames.append(new_iname) iname_to_tag[new_iname] = kernel.iname_to_tag[hw_iname] from loopy.isl_helpers import duplicate_axes domain = duplicate_axes(domain, promoted_temporary.hw_inames, hw_inames) # The operations on the domain above return a Set object, but the # underlying domain should be expressible as a single BasicSet. domain_list = domain.get_basic_set_list() assert domain_list.n_basic_set() == 1 domain = domain_list.get_basic_set(0) return domain, hw_inames, dim_inames, iname_to_tag
def test_affs_from_space(): s = isl.Set("[n] -> {[i,j,k]: 0<=i,j,k<n}") v = isl.affs_from_space(s.space) myset = (v[0].le_set(v["i"] + v["j"]) & (v["i"] + v["j"]).lt_set(v["n"]) & (v[0].le_set(v["i"])) & (v["i"].le_set(13 + v["n"]))) print(myset)
def test_affs_from_space(): s = isl.Set("[n] -> {[i,j,k]: 0<=i,j,k<n}") v = isl.affs_from_space(s.space) myset = ( v[0].le_set(v["i"] + v["j"]) & (v["i"] + v["j"]).lt_set(v["n"]) & (v[0].le_set(v["i"])) & (v["i"].le_set(13 + v["n"])) ) print(myset)
def augment_domain_for_save_or_reload(self, domain, promoted_temporary, mode, subkernel): """ Add new axes to the domain corresponding to the dimensions of `promoted_temporary`. These axes will be used in the save/ reload stage. These get prefixed onto the already existing axes. """ assert mode in ("save", "reload") import islpy as isl orig_temporary = (self.kernel.temporary_variables[ promoted_temporary.orig_temporary_name]) orig_dim = domain.dim(isl.dim_type.set) # Tags for newly added inames iname_to_tag = {} from loopy.symbolic import aff_from_expr # FIXME: Restrict size of new inames to access footprint. # Add dimension-dependent inames. dim_inames = [] domain = domain.add( isl.dim_type.set, len(promoted_temporary.non_hw_dims) + len(promoted_temporary.hw_dims)) for dim_idx, dim_size in enumerate(promoted_temporary.non_hw_dims): new_iname = self.insn_name_gen( "{name}_{mode}_axis_{dim}_{sk}".format( name=orig_temporary.name, mode=mode, dim=dim_idx, sk=subkernel)) domain = domain.set_dim_name(isl.dim_type.set, orig_dim + dim_idx, new_iname) if orig_temporary.is_local: # If the temporary has local scope, then loads / stores can # be done in parallel. from loopy.kernel.data import AutoFitLocalIndexTag iname_to_tag[new_iname] = AutoFitLocalIndexTag() dim_inames.append(new_iname) # Add size information. aff = isl.affs_from_space(domain.space) domain &= aff[0].le_set(aff[new_iname]) domain &= aff[new_iname].lt_set( aff_from_expr(domain.space, dim_size)) dim_offset = orig_dim + len(promoted_temporary.non_hw_dims) hw_inames = [] # Add hardware dims. for hw_iname_idx, (hw_tag, dim) in enumerate( zip(promoted_temporary.hw_tags, promoted_temporary.hw_dims)): new_iname = self.insn_name_gen( "{name}_{mode}_hw_dim_{dim}_{sk}".format( name=orig_temporary.name, mode=mode, dim=hw_iname_idx, sk=subkernel)) domain = domain.set_dim_name(isl.dim_type.set, dim_offset + hw_iname_idx, new_iname) aff = isl.affs_from_space(domain.space) domain = ( domain & aff[0].le_set(aff[new_iname]) & aff[new_iname].lt_set(aff_from_expr(domain.space, dim))) self.updated_iname_to_tag[new_iname] = hw_tag hw_inames.append(new_iname) # The operations on the domain above return a Set object, but the # underlying domain should be expressible as a single BasicSet. domain_list = domain.get_basic_set_list() assert domain_list.n_basic_set() == 1 domain = domain_list.get_basic_set(0) return domain, hw_inames, dim_inames, iname_to_tag
def generate(builder, wrapper_name=None): if builder.layer_index is not None: outer_inames = frozenset( [builder._loop_index.name, builder.layer_index.name]) else: outer_inames = frozenset([builder._loop_index.name]) instructions = list(builder.emit_instructions()) parameters = Bag() parameters.domains = OrderedDict() parameters.assumptions = OrderedDict() parameters.wrapper_arguments = builder.wrapper_args parameters.layer_start = builder.layer_extents[0].name parameters.layer_end = builder.layer_extents[1].name parameters.conditions = [] parameters.kernel_data = list(None for _ in parameters.wrapper_arguments) parameters.temporaries = OrderedDict() parameters.kernel_name = builder.kernel.name # replace Materialise mapper = Memoizer(replace_materialise) mapper.initialisers = [] instructions = list(mapper(i) for i in instructions) # merge indices merger = index_merger(instructions) instructions = list(merger(i) for i in instructions) initialiser = list(itertools.chain(*mapper.initialisers)) merger = index_merger(initialiser) initialiser = list(merger(i) for i in initialiser) instructions = instructions + initialiser mapper.initialisers = [ tuple(merger(i) for i in inits) for inits in mapper.initialisers ] # rename indices and nodes (so that the counters start from zero) pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$") replacements = {} counter = defaultdict(itertools.count) for node in traversal(instructions): if isinstance(node, (Index, RuntimeIndex, Variable, Argument, NamedLiteral)): match = pattern.match(node.name) if match is None: continue prefix, _, postfix = match.groups() if postfix is None: postfix = "" replacements[node] = "%s%d%s" % ( prefix, next(counter[(prefix, postfix)]), postfix) instructions = rename_nodes(instructions, replacements) mapper.initialisers = [ rename_nodes(inits, replacements) for inits in mapper.initialisers ] parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, replacements) s, e = rename_nodes([mapper(e) for e in builder.layer_extents], replacements) parameters.layer_start = s.name parameters.layer_end = e.name # scheduling and loop nesting deps = instruction_dependencies(instructions, mapper.initialisers) within_inames = loop_nesting(instructions, deps, outer_inames, parameters.kernel_name) # generate loopy context = Bag() context.parameters = parameters context.within_inames = within_inames context.conditions = [] context.index_ordering = [] context.instruction_dependencies = deps statements = list(statement(insn, context) for insn in instructions) # remote the dummy instructions (they were only used to ensure # that the kernel knows about the outer inames). statements = list(s for s in statements if not isinstance(s, DummyInstruction)) domains = list(parameters.domains.values()) if builder.single_cell: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name: # n = start new_domains.append( d.add_constraint( isl.Constraint.eq_from_names(d.space, { "n": 1, "start": -1 }))) else: new_domains.append(d) domains = new_domains if builder.extruded: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder.layer_index.name: # layer = t1 - 1 t1 = parameters.layer_end new_domains.append( d.add_constraint( isl.Constraint.eq_from_names( d.space, { "layer": 1, t1: -1, 1: 1 }))) else: new_domains.append(d) domains = new_domains assumptions, = reduce( operator.and_, parameters.assumptions.values()).params().get_basic_sets() options = loopy.Options(check_dep_resolution=True, ignore_boostable_into=True) # sometimes masks are not used, but we still need to create the function arguments for i, arg in enumerate(parameters.wrapper_arguments): if parameters.kernel_data[i] is None: arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape) parameters.kernel_data[i] = arg if wrapper_name is None: wrapper_name = "wrap_%s" % builder.kernel.name pwaffd = isl.affs_from_space(assumptions.get_space()) assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0]) if builder.single_cell: assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"]) else: assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"]) if builder.extruded: assumptions = assumptions & pwaffd[parameters.layer_start].le_set( pwaffd[parameters.layer_end]) assumptions = reduce(operator.and_, assumptions.get_basic_sets()) wrapper = loopy.make_kernel(domains, statements, kernel_data=parameters.kernel_data, target=loopy.CTarget(), temporary_variables=parameters.temporaries, symbol_manglers=[symbol_mangler], options=options, assumptions=assumptions, lang_version=(2018, 2), name=wrapper_name) # prioritize loops for indices in context.index_ordering: wrapper = loopy.prioritize_loops(wrapper, indices) # register kernel kernel = builder.kernel headers = set(kernel._headers) headers = headers | set( ["#include <math.h>", "#include <complex.h>", "#include <petsc.h>"]) preamble = "\n".join(sorted(headers)) from coffee.base import Node if isinstance(kernel._code, loopy.LoopKernel): knl = kernel._code wrapper = loopy.register_callable_kernel(wrapper, knl) from loopy.transform.callable import _match_caller_callee_argument_dimension_ wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name) wrapper = loopy.inline_callable_kernel(wrapper, knl.name) else: # kernel is a string, add it to preamble if isinstance(kernel._code, Node): code = kernel._code.gencode() else: code = kernel._code wrapper = loopy.register_function_id_to_in_knl_callable_mapper( wrapper, PyOP2KernelLookup(kernel.name, code, tuple(builder.argument_accesses))) preamble = preamble + "\n" + code wrapper = loopy.register_preamble_generators(wrapper, [_PreambleGen(preamble)]) # register petsc functions wrapper = loopy.register_function_id_to_in_knl_callable_mapper( wrapper, petsc_function_lookup) return wrapper
def domain_for_shape( dim_names: Tuple[str, ...], shape: ShapeType, reductions: Dict[str, Tuple[ScalarExpression, ScalarExpression]], ) -> isl.BasicSet: # noqa """Create an :class:`islpy.BasicSet` that expresses an appropriate index domain for an array of (potentially symbolic) shape *shape* having reduction dimensions *reductions*. :param dim_names: A tuple of strings, the names of the axes. These become set dimensions in the returned domain. :param shape: A tuple of constant or quasi-affine :mod:`pymbolic` expressions. The variables in these expressions become parameter dimensions in the returned set. Must have the same length as *dim_names*. :arg reductions: A map from reduction inames to (lower, upper) bounds (as half-open integer ranges). The variables in the bounds become parameter dimensions in the returned set. """ assert len(dim_names) == len(shape) # Collect parameters. param_names_set: Set[str] = set() for sdep in map(scalar_expr.get_dependencies, shape): param_names_set |= sdep for bounds in reductions.values(): for sdep in map(scalar_expr.get_dependencies, bounds): # FIXME: Assumes that reduction bounds are not data-dependent. param_names_set |= sdep set_names = sorted(tuple(dim_names) + tuple(reductions)) param_names = sorted(param_names_set) # Build domain. dom = isl.BasicSet.universe( isl.Space.create_from_names(isl.DEFAULT_CONTEXT, set=set_names, params=param_names)) # Add constraints. from loopy.symbolic import aff_from_expr affs = isl.affs_from_space(dom.space) for iname, dim in zip(dim_names, shape): dom &= affs[0].le_set(affs[iname]) dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim)) for iname, (left, right) in reductions.items(): dom &= aff_from_expr(dom.space, left).le_set(affs[iname]) dom &= affs[iname].lt_set(aff_from_expr(dom.space, right)) doms = dom.get_basic_sets() if len(doms) == 0: # empty set dom = isl.BasicSet.empty(dom.get_space()) else: dom, = doms return dom
def augment_domain_for_save_or_reload(self, domain, promoted_temporary, mode, subkernel): """ Add new axes to the domain corresponding to the dimensions of `promoted_temporary`. These axes will be used in the save/ reload stage. These get prefixed onto the already existing axes. """ assert mode in ("save", "reload") import islpy as isl orig_temporary = ( self.kernel.temporary_variables[ promoted_temporary.orig_temporary_name]) orig_dim = domain.dim(isl.dim_type.set) # Tags for newly added inames iname_to_tags = {} from loopy.symbolic import aff_from_expr # FIXME: Restrict size of new inames to access footprint. # Add dimension-dependent inames. dim_inames = [] domain = domain.add_dims(isl.dim_type.set, len(promoted_temporary.non_hw_dims) + len(promoted_temporary.hw_dims)) for dim_idx, dim_size in enumerate(promoted_temporary.non_hw_dims): new_iname = self.insn_name_gen("{name}_{mode}_axis_{dim}_{sk}". format(name=orig_temporary.name, mode=mode, dim=dim_idx, sk=subkernel)) domain = domain.set_dim_name( isl.dim_type.set, orig_dim + dim_idx, new_iname) if orig_temporary.address_space == AddressSpace.LOCAL: # If the temporary has local scope, then loads / stores can # be done in parallel. from loopy.kernel.data import AutoFitLocalIndexTag iname_to_tags[new_iname] = frozenset([AutoFitLocalIndexTag()]) dim_inames.append(new_iname) # Add size information. aff = isl.affs_from_space(domain.space) domain &= aff[0].le_set(aff[new_iname]) domain &= aff[new_iname].lt_set(aff_from_expr(domain.space, dim_size)) dim_offset = orig_dim + len(promoted_temporary.non_hw_dims) hw_inames = [] # Add hardware dims. for hw_iname_idx, (hw_tag, dim) in enumerate( zip(promoted_temporary.hw_tags, promoted_temporary.hw_dims)): new_iname = self.insn_name_gen("{name}_{mode}_hw_dim_{dim}_{sk}". format(name=orig_temporary.name, mode=mode, dim=hw_iname_idx, sk=subkernel)) domain = domain.set_dim_name( isl.dim_type.set, dim_offset + hw_iname_idx, new_iname) aff = isl.affs_from_space(domain.space) domain = (domain & aff[0].le_set(aff[new_iname]) & aff[new_iname].lt_set(aff_from_expr(domain.space, dim))) self.updated_iname_to_tags[new_iname] = frozenset([hw_tag]) hw_inames.append(new_iname) # The operations on the domain above return a Set object, but the # underlying domain should be expressible as a single BasicSet. domain_list = domain.get_basic_set_list() assert domain_list.n_basic_set() == 1 domain = domain_list.get_basic_set(0) return domain, hw_inames, dim_inames, iname_to_tags
def generate(builder, wrapper_name=None): if builder.layer_index is not None: outer_inames = frozenset([builder._loop_index.name, builder.layer_index.name]) else: outer_inames = frozenset([builder._loop_index.name]) instructions = list(builder.emit_instructions()) parameters = Bag() parameters.domains = OrderedDict() parameters.assumptions = OrderedDict() parameters.wrapper_arguments = builder.wrapper_args parameters.layer_start = builder.layer_extents[0].name parameters.layer_end = builder.layer_extents[1].name parameters.conditions = [] parameters.kernel_data = list(None for _ in parameters.wrapper_arguments) parameters.temporaries = OrderedDict() parameters.kernel_name = builder.kernel.name # replace Materialise mapper = Memoizer(replace_materialise) mapper.initialisers = [] instructions = list(mapper(i) for i in instructions) # merge indices merger = index_merger(instructions) instructions = list(merger(i) for i in instructions) initialiser = list(itertools.chain(*mapper.initialisers)) merger = index_merger(initialiser) initialiser = list(merger(i) for i in initialiser) instructions = instructions + initialiser mapper.initialisers = [tuple(merger(i) for i in inits) for inits in mapper.initialisers] # rename indices and nodes (so that the counters start from zero) pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$") replacements = {} counter = defaultdict(itertools.count) for node in traversal(instructions): if isinstance(node, (Index, RuntimeIndex, Variable, Argument, NamedLiteral)): match = pattern.match(node.name) if match is None: continue prefix, _, postfix = match.groups() if postfix is None: postfix = "" replacements[node] = "%s%d%s" % (prefix, next(counter[(prefix, postfix)]), postfix) instructions = rename_nodes(instructions, replacements) mapper.initialisers = [rename_nodes(inits, replacements) for inits in mapper.initialisers] parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, replacements) s, e = rename_nodes([mapper(e) for e in builder.layer_extents], replacements) parameters.layer_start = s.name parameters.layer_end = e.name # scheduling and loop nesting deps = instruction_dependencies(instructions, mapper.initialisers) within_inames = loop_nesting(instructions, deps, outer_inames, parameters.kernel_name) # generate loopy context = Bag() context.parameters = parameters context.within_inames = within_inames context.conditions = [] context.index_ordering = [] context.instruction_dependencies = deps statements = list(statement(insn, context) for insn in instructions) # remote the dummy instructions (they were only used to ensure # that the kernel knows about the outer inames). statements = list(s for s in statements if not isinstance(s, DummyInstruction)) domains = list(parameters.domains.values()) if builder.single_cell: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name: # n = start new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"n": 1, "start": -1}))) else: new_domains.append(d) domains = new_domains if builder.extruded: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder.layer_index.name: # layer = t1 - 1 t1 = parameters.layer_end new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"layer": 1, t1: -1, 1: 1}))) else: new_domains.append(d) domains = new_domains assumptions, = reduce(operator.and_, parameters.assumptions.values()).params().get_basic_sets() options = loopy.Options(check_dep_resolution=True, ignore_boostable_into=True) # sometimes masks are not used, but we still need to create the function arguments for i, arg in enumerate(parameters.wrapper_arguments): if parameters.kernel_data[i] is None: arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape) parameters.kernel_data[i] = arg if wrapper_name is None: wrapper_name = "wrap_%s" % builder.kernel.name pwaffd = isl.affs_from_space(assumptions.get_space()) assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0]) if builder.single_cell: assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"]) else: assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"]) if builder.extruded: assumptions = assumptions & pwaffd[parameters.layer_start].le_set(pwaffd[parameters.layer_end]) assumptions = reduce(operator.and_, assumptions.get_basic_sets()) wrapper = loopy.make_kernel(domains, statements, kernel_data=parameters.kernel_data, target=loopy.CTarget(), temporary_variables=parameters.temporaries, symbol_manglers=[symbol_mangler], options=options, assumptions=assumptions, lang_version=(2018, 2), name=wrapper_name) # prioritize loops for indices in context.index_ordering: wrapper = loopy.prioritize_loops(wrapper, indices) # register kernel kernel = builder.kernel headers = set(kernel._headers) headers = headers | set(["#include <math.h>"]) preamble = "\n".join(sorted(headers)) from coffee.base import Node if isinstance(kernel._code, loopy.LoopKernel): knl = kernel._code wrapper = loopy.register_callable_kernel(wrapper, knl) from loopy.transform.callable import _match_caller_callee_argument_dimension_ wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name) wrapper = loopy.inline_callable_kernel(wrapper, knl.name) else: # kernel is a string, add it to preamble if isinstance(kernel._code, Node): code = kernel._code.gencode() else: code = kernel._code wrapper = loopy.register_function_id_to_in_knl_callable_mapper( wrapper, PyOP2KernelLookup(kernel.name, code, tuple(builder.argument_accesses))) preamble = preamble + "\n" + code wrapper = loopy.register_preamble_generators(wrapper, [_PreambleGen(preamble)]) # register petsc functions wrapper = loopy.register_function_id_to_in_knl_callable_mapper(wrapper, petsc_function_lookup) return wrapper