def runtime_indices(expressions): indices = [] for node in traversal(expressions): if isinstance(node, RuntimeIndex): indices.append(node.name) return frozenset(indices)
def loop_nesting(instructions, deps, outer_inames, kernel_name): nesting = {} for insn in imperatives(instructions): if isinstance(insn, Accumulate): if isinstance(insn.children[1], (Zero, Literal)): nesting[insn] = outer_inames else: nesting[insn] = runtime_indices([insn]) else: assert isinstance(insn, FunctionCall) if insn.name in (petsc_functions | {kernel_name}): nesting[insn] = outer_inames else: nesting[insn] = runtime_indices([insn]) # take care of dependencies. e.g. t1[i] = A[i], t2[j] = B[t1[j]], then t2 should depends on {i, j} name_to_insn = dict((n, i) for i, (n, _) in deps.items()) for insn, (name, _deps) in deps.items(): s = set(_deps) while s: d = s.pop() nesting[insn] = nesting[insn] | nesting[name_to_insn[d]] s = s | set(deps[name_to_insn[d]][1]) - set([name]) # boost inames, if one instruction is inside inner inames (free indices), # it should be inside the outer inames as dictated by other instructions. index_nesting = defaultdict(frozenset) # free index -> {runtime indices} for insn in instructions: if isinstance(insn, When): key = insn.children[1] else: key = insn for fi in traversal([insn]): if isinstance(fi, Index): index_nesting[fi] |= nesting[key] for insn in imperatives(instructions): outer = reduce( operator.or_, iter(index_nesting[fi] for fi in traversal([insn]) if isinstance(fi, Index)), frozenset()) nesting[insn] = nesting[insn] | outer return nesting
def loop_nesting(instructions, deps, outer_inames, kernel_name): nesting = {} for insn in imperatives(instructions): if isinstance(insn, Accumulate): if isinstance(insn.children[1], (Zero, Literal)): nesting[insn] = outer_inames else: nesting[insn] = runtime_indices([insn]) else: assert isinstance(insn, FunctionCall) if insn.name in (petsc_functions | {kernel_name}): nesting[insn] = outer_inames else: nesting[insn] = runtime_indices([insn]) # take care of dependencies. e.g. t1[i] = A[i], t2[j] = B[t1[j]], then t2 should depends on {i, j} name_to_insn = dict((n, i) for i, (n, _) in deps.items()) for insn, (name, _deps) in deps.items(): s = set(_deps) while s: d = s.pop() nesting[insn] = nesting[insn] | nesting[name_to_insn[d]] s = s | set(deps[name_to_insn[d]][1]) - set([name]) # boost inames, if one instruction is inside inner inames (free indices), # it should be inside the outer inames as dictated by other instructions. index_nesting = defaultdict(frozenset) # free index -> {runtime indices} for insn in instructions: if isinstance(insn, When): key = insn.children[1] else: key = insn for fi in traversal([insn]): if isinstance(fi, Index): index_nesting[fi] |= nesting[key] for insn in imperatives(instructions): outer = reduce(operator.or_, iter(index_nesting[fi] for fi in traversal([insn]) if isinstance(fi, Index)), frozenset()) nesting[insn] = nesting[insn] | outer return nesting
def collect_indices(expressions): """Collect indices in expressions. :arg expressions: an iterable of expressions to collect indices from. :returns: iterable of nodes of type :class:`Index` or :class:`RuntimeIndex`. """ for node in traversal(expressions): if isinstance(node, (Index, RuntimeIndex)): yield node
def expression_runtimeindex(expr, parameters): @singledispatch def translate(expr, vars): raise AssertionError("Unhandled type '%s' in domain translation" % type(expr)) @translate.register(Sum) def translate_sum(expr, vars): return operator.add(*(translate(c, vars) for c in expr.children)) @translate.register(Argument) def translate_argument(expr, vars): expr = expression(expr, parameters) return vars[expr.name] @translate.register(Variable) def translate_variable(expr, vars): return vars[expr.name] @translate.register(Zero) def translate_zero(expr, vars): assert expr.shape == () return vars[0] @translate.register(LogicalAnd) def translate_logicaland(expr, vars): a, b = (translate(c, vars) for c in expr.children) return a & b @translate.register(Comparison) def translate_comparison(expr, vars): a, b = (translate(c, vars) for c in expr.children) fn = { ">": "gt_set", ">=": "ge_set", "==": "eq_set", "!=": "ne_set", "<": "lt_set", "<=": "le_set" }[expr.operator] return getattr(a, fn)(b) name = expr.name if name not in parameters.domains: lo, hi, constraint = expr.children params = list(v.name for v in traversal([lo, hi]) if isinstance(v, (Argument, Variable))) vars = isl.make_zero_and_vars([name], params) domain = (vars[name].ge_set(translate(lo, vars)) & vars[name].lt_set(translate(hi, vars))) parameters.domains[name] = domain if constraint is not None: parameters.assumptions[name] = translate(constraint, vars) return pym.Variable(name)
def expression_runtimeindex(expr, parameters): @singledispatch def translate(expr, vars): raise AssertionError("Unhandled type '%s' in domain translation" % type(expr)) @translate.register(Sum) def translate_sum(expr, vars): return operator.add(*(translate(c, vars) for c in expr.children)) @translate.register(Argument) def translate_argument(expr, vars): expr = expression(expr, parameters) return vars[expr.name] @translate.register(Variable) def translate_variable(expr, vars): return vars[expr.name] @translate.register(Zero) def translate_zero(expr, vars): assert expr.shape == () return vars[0] @translate.register(LogicalAnd) def translate_logicaland(expr, vars): a, b = (translate(c, vars) for c in expr.children) return a & b @translate.register(Comparison) def translate_comparison(expr, vars): a, b = (translate(c, vars) for c in expr.children) fn = {">": "gt_set", ">=": "ge_set", "==": "eq_set", "!=": "ne_set", "<": "lt_set", "<=": "le_set"}[expr.operator] return getattr(a, fn)(b) name = expr.name if name not in parameters.domains: lo, hi, constraint = expr.children params = list(v.name for v in traversal([lo, hi]) if isinstance(v, (Argument, Variable))) vars = isl.make_zero_and_vars([name], params) domain = (vars[name].ge_set(translate(lo, vars)) & vars[name].lt_set(translate(hi, vars))) parameters.domains[name] = domain if constraint is not None: parameters.assumptions[name] = translate(constraint, vars) return pym.Variable(name)
def generate(builder, wrapper_name=None): if builder.layer_index is not None: outer_inames = frozenset( [builder._loop_index.name, builder.layer_index.name]) else: outer_inames = frozenset([builder._loop_index.name]) instructions = list(builder.emit_instructions()) parameters = Bag() parameters.domains = OrderedDict() parameters.assumptions = OrderedDict() parameters.wrapper_arguments = builder.wrapper_args parameters.layer_start = builder.layer_extents[0].name parameters.layer_end = builder.layer_extents[1].name parameters.conditions = [] parameters.kernel_data = list(None for _ in parameters.wrapper_arguments) parameters.temporaries = OrderedDict() parameters.kernel_name = builder.kernel.name # replace Materialise mapper = Memoizer(replace_materialise) mapper.initialisers = [] instructions = list(mapper(i) for i in instructions) # merge indices merger = index_merger(instructions) instructions = list(merger(i) for i in instructions) initialiser = list(itertools.chain(*mapper.initialisers)) merger = index_merger(initialiser) initialiser = list(merger(i) for i in initialiser) instructions = instructions + initialiser mapper.initialisers = [ tuple(merger(i) for i in inits) for inits in mapper.initialisers ] # rename indices and nodes (so that the counters start from zero) pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$") replacements = {} counter = defaultdict(itertools.count) for node in traversal(instructions): if isinstance(node, (Index, RuntimeIndex, Variable, Argument, NamedLiteral)): match = pattern.match(node.name) if match is None: continue prefix, _, postfix = match.groups() if postfix is None: postfix = "" replacements[node] = "%s%d%s" % ( prefix, next(counter[(prefix, postfix)]), postfix) instructions = rename_nodes(instructions, replacements) mapper.initialisers = [ rename_nodes(inits, replacements) for inits in mapper.initialisers ] parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, replacements) s, e = rename_nodes([mapper(e) for e in builder.layer_extents], replacements) parameters.layer_start = s.name parameters.layer_end = e.name # scheduling and loop nesting deps = instruction_dependencies(instructions, mapper.initialisers) within_inames = loop_nesting(instructions, deps, outer_inames, parameters.kernel_name) # generate loopy context = Bag() context.parameters = parameters context.within_inames = within_inames context.conditions = [] context.index_ordering = [] context.instruction_dependencies = deps statements = list(statement(insn, context) for insn in instructions) # remote the dummy instructions (they were only used to ensure # that the kernel knows about the outer inames). statements = list(s for s in statements if not isinstance(s, DummyInstruction)) domains = list(parameters.domains.values()) if builder.single_cell: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name: # n = start new_domains.append( d.add_constraint( isl.Constraint.eq_from_names(d.space, { "n": 1, "start": -1 }))) else: new_domains.append(d) domains = new_domains if builder.extruded: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder.layer_index.name: # layer = t1 - 1 t1 = parameters.layer_end new_domains.append( d.add_constraint( isl.Constraint.eq_from_names( d.space, { "layer": 1, t1: -1, 1: 1 }))) else: new_domains.append(d) domains = new_domains assumptions, = reduce( operator.and_, parameters.assumptions.values()).params().get_basic_sets() options = loopy.Options(check_dep_resolution=True, ignore_boostable_into=True) # sometimes masks are not used, but we still need to create the function arguments for i, arg in enumerate(parameters.wrapper_arguments): if parameters.kernel_data[i] is None: arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape) parameters.kernel_data[i] = arg if wrapper_name is None: wrapper_name = "wrap_%s" % builder.kernel.name pwaffd = isl.affs_from_space(assumptions.get_space()) assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0]) if builder.single_cell: assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"]) else: assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"]) if builder.extruded: assumptions = assumptions & pwaffd[parameters.layer_start].le_set( pwaffd[parameters.layer_end]) assumptions = reduce(operator.and_, assumptions.get_basic_sets()) wrapper = loopy.make_kernel(domains, statements, kernel_data=parameters.kernel_data, target=loopy.CTarget(), temporary_variables=parameters.temporaries, symbol_manglers=[symbol_mangler], options=options, assumptions=assumptions, lang_version=(2018, 2), name=wrapper_name) # prioritize loops for indices in context.index_ordering: wrapper = loopy.prioritize_loops(wrapper, indices) # register kernel kernel = builder.kernel headers = set(kernel._headers) headers = headers | set( ["#include <math.h>", "#include <complex.h>", "#include <petsc.h>"]) preamble = "\n".join(sorted(headers)) from coffee.base import Node if isinstance(kernel._code, loopy.LoopKernel): knl = kernel._code wrapper = loopy.register_callable_kernel(wrapper, knl) from loopy.transform.callable import _match_caller_callee_argument_dimension_ wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name) wrapper = loopy.inline_callable_kernel(wrapper, knl.name) else: # kernel is a string, add it to preamble if isinstance(kernel._code, Node): code = kernel._code.gencode() else: code = kernel._code wrapper = loopy.register_function_id_to_in_knl_callable_mapper( wrapper, PyOP2KernelLookup(kernel.name, code, tuple(builder.argument_accesses))) preamble = preamble + "\n" + code wrapper = loopy.register_preamble_generators(wrapper, [_PreambleGen(preamble)]) # register petsc functions wrapper = loopy.register_function_id_to_in_knl_callable_mapper( wrapper, petsc_function_lookup) return wrapper
def bounds(exprs): for op in traversal(exprs): if isinstance(op, RuntimeIndex): for v in variables(op.extents): yield v
def variables(exprs): for op in traversal(exprs): if isinstance(op, (Argument, Variable)): yield op
def imperatives(exprs): for op in traversal(exprs): if isinstance(op, (Accumulate, FunctionCall)): yield op
def generate(builder, wrapper_name=None): if builder.layer_index is not None: outer_inames = frozenset([builder._loop_index.name, builder.layer_index.name]) else: outer_inames = frozenset([builder._loop_index.name]) instructions = list(builder.emit_instructions()) parameters = Bag() parameters.domains = OrderedDict() parameters.assumptions = OrderedDict() parameters.wrapper_arguments = builder.wrapper_args parameters.layer_start = builder.layer_extents[0].name parameters.layer_end = builder.layer_extents[1].name parameters.conditions = [] parameters.kernel_data = list(None for _ in parameters.wrapper_arguments) parameters.temporaries = OrderedDict() parameters.kernel_name = builder.kernel.name # replace Materialise mapper = Memoizer(replace_materialise) mapper.initialisers = [] instructions = list(mapper(i) for i in instructions) # merge indices merger = index_merger(instructions) instructions = list(merger(i) for i in instructions) initialiser = list(itertools.chain(*mapper.initialisers)) merger = index_merger(initialiser) initialiser = list(merger(i) for i in initialiser) instructions = instructions + initialiser mapper.initialisers = [tuple(merger(i) for i in inits) for inits in mapper.initialisers] # rename indices and nodes (so that the counters start from zero) pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$") replacements = {} counter = defaultdict(itertools.count) for node in traversal(instructions): if isinstance(node, (Index, RuntimeIndex, Variable, Argument, NamedLiteral)): match = pattern.match(node.name) if match is None: continue prefix, _, postfix = match.groups() if postfix is None: postfix = "" replacements[node] = "%s%d%s" % (prefix, next(counter[(prefix, postfix)]), postfix) instructions = rename_nodes(instructions, replacements) mapper.initialisers = [rename_nodes(inits, replacements) for inits in mapper.initialisers] parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, replacements) s, e = rename_nodes([mapper(e) for e in builder.layer_extents], replacements) parameters.layer_start = s.name parameters.layer_end = e.name # scheduling and loop nesting deps = instruction_dependencies(instructions, mapper.initialisers) within_inames = loop_nesting(instructions, deps, outer_inames, parameters.kernel_name) # generate loopy context = Bag() context.parameters = parameters context.within_inames = within_inames context.conditions = [] context.index_ordering = [] context.instruction_dependencies = deps statements = list(statement(insn, context) for insn in instructions) # remote the dummy instructions (they were only used to ensure # that the kernel knows about the outer inames). statements = list(s for s in statements if not isinstance(s, DummyInstruction)) domains = list(parameters.domains.values()) if builder.single_cell: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name: # n = start new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"n": 1, "start": -1}))) else: new_domains.append(d) domains = new_domains if builder.extruded: new_domains = [] for d in domains: if d.get_dim_name(isl.dim_type.set, 0) == builder.layer_index.name: # layer = t1 - 1 t1 = parameters.layer_end new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"layer": 1, t1: -1, 1: 1}))) else: new_domains.append(d) domains = new_domains assumptions, = reduce(operator.and_, parameters.assumptions.values()).params().get_basic_sets() options = loopy.Options(check_dep_resolution=True, ignore_boostable_into=True) # sometimes masks are not used, but we still need to create the function arguments for i, arg in enumerate(parameters.wrapper_arguments): if parameters.kernel_data[i] is None: arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape) parameters.kernel_data[i] = arg if wrapper_name is None: wrapper_name = "wrap_%s" % builder.kernel.name pwaffd = isl.affs_from_space(assumptions.get_space()) assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0]) if builder.single_cell: assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"]) else: assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"]) if builder.extruded: assumptions = assumptions & pwaffd[parameters.layer_start].le_set(pwaffd[parameters.layer_end]) assumptions = reduce(operator.and_, assumptions.get_basic_sets()) wrapper = loopy.make_kernel(domains, statements, kernel_data=parameters.kernel_data, target=loopy.CTarget(), temporary_variables=parameters.temporaries, symbol_manglers=[symbol_mangler], options=options, assumptions=assumptions, lang_version=(2018, 2), name=wrapper_name) # prioritize loops for indices in context.index_ordering: wrapper = loopy.prioritize_loops(wrapper, indices) # register kernel kernel = builder.kernel headers = set(kernel._headers) headers = headers | set(["#include <math.h>"]) preamble = "\n".join(sorted(headers)) from coffee.base import Node if isinstance(kernel._code, loopy.LoopKernel): knl = kernel._code wrapper = loopy.register_callable_kernel(wrapper, knl) from loopy.transform.callable import _match_caller_callee_argument_dimension_ wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name) wrapper = loopy.inline_callable_kernel(wrapper, knl.name) else: # kernel is a string, add it to preamble if isinstance(kernel._code, Node): code = kernel._code.gencode() else: code = kernel._code wrapper = loopy.register_function_id_to_in_knl_callable_mapper( wrapper, PyOP2KernelLookup(kernel.name, code, tuple(builder.argument_accesses))) preamble = preamble + "\n" + code wrapper = loopy.register_preamble_generators(wrapper, [_PreambleGen(preamble)]) # register petsc functions wrapper = loopy.register_function_id_to_in_knl_callable_mapper(wrapper, petsc_function_lookup) return wrapper