def test_forced_iname_deps_and_reduction(): # See https://github.com/inducer/loopy/issues/24 # This is (purposefully) somewhat un-idiomatic, to replicate the conditions # under which the above bug was found. If assignees were phi[i], then the # iname propagation heuristic would not assume that dependent instructions # need to run inside of 'i', and hence the forced_iname_* bits below would not # be needed. i1 = lp.CInstruction("i", "doSomethingToGetPhi();", assignees="phi") from pymbolic.primitives import Subscript, Variable i2 = lp.Assignment("a", lp.Reduction("sum", "j", Subscript(Variable("phi"), Variable("j"))), forced_iname_deps=frozenset(), forced_iname_deps_is_final=True) k = lp.make_kernel( "{[i,j] : 0<=i,j<n}", [i1, i2], [ lp.GlobalArg("a", dtype=np.float32, shape=()), lp.ValueArg("n", dtype=np.int32), lp.TemporaryVariable("phi", dtype=np.float32, shape=("n", )), ], target=lp.CTarget(), ) k = lp.preprocess_kernel(k) assert 'i' not in k.insn_inames("insn_0_j_update") print(k.stringify(with_dependencies=True))
def cumsum(self, arg): """ Registers a substitution rule in order to cumulatively sum the elements of array ``arg`` along ``axis``. Mimics :func:`numpy.cumsum`. :return: An instance of :class:`numloopy.ArraySymbol` which is which is registered as the cumulative summed-substitution rule. """ # Note: this can remain as a substitution but loopy does not have # support for translating inames for substitutions to the kernel # domains assert len(arg.shape) == 1 i_iname = self.name_generator(based_on="i") j_iname = self.name_generator(based_on="i") space = isl.Space.create_from_names(isl.DEFAULT_CONTEXT, [i_iname, j_iname]) domain = isl.BasicSet.universe(space) arg_name = self.name_generator(based_on="arr") subst_name = self.name_generator(based_on="subst") domain = domain & make_slab(space, i_iname, 0, arg.shape[0]) domain = domain.add_constraint( isl.Constraint.ineq_from_names(space, {j_iname: 1})) domain = domain.add_constraint( isl.Constraint.ineq_from_names(space, { j_iname: -1, i_iname: 1, 1: -1 })) cumsummed_arg = ArraySymbol(stack=self, name=arg_name, shape=arg.shape, dtype=arg.dtype) cumsummed_subst = ArraySymbol(stack=self, name=subst_name, shape=arg.shape, dtype=arg.dtype) subst_iname = self.name_generator(based_on="i") rule = lp.SubstitutionRule( subst_name, (subst_iname, ), Subscript(Variable(arg_name), (Variable(subst_iname), ))) from loopy.library.reduction import SumReductionOperation insn = lp.Assignment(assignee=Subscript(Variable(arg_name), (Variable(i_iname), )), expression=lp.Reduction( SumReductionOperation(), (j_iname, ), parse('{}({})'.format(arg.name, j_iname)))) self.data.append(cumsummed_arg) self.substs_to_arrays[subst_name] = arg_name self.register_implicit_assignment(insn) self.domains.append(domain) self.register_substitution(rule) return cumsummed_subst
def map_matrix_product(self, expr: MatrixProduct, state: CodeGenState) -> ImplementedResult: if expr in state.results: return state.results[expr] x1_result = self.rec(expr.x1, state) x2_result = self.rec(expr.x2, state) loopy_expr_context = LoopyExpressionContext(state, num_indices=expr.ndim) loopy_expr_context.reduction_bounds["_r0"] = (0, expr.x2.shape[0]) # Figure out inames. x1_inames = [] for i in range(expr.x1.ndim): if i == expr.x1.ndim - 1: x1_inames.append(var("_r0")) else: x1_inames.append(var(f"_{i}")) x2_inames = [] for i in range(expr.x2.ndim): if i == 0: x2_inames.append(var("_r0")) else: offset = i + len(x1_inames) - 2 x2_inames.append(var(f"_{offset}")) inner_expr = x1_result.to_loopy_expression(tuple(x1_inames), loopy_expr_context) inner_expr *= x2_result.to_loopy_expression(tuple(x2_inames), loopy_expr_context) import loopy.library.reduction as red loopy_expr = lp.Reduction(operation=red.parse_reduction_op("sum"), inames=("_r0", ), expr=inner_expr, allow_simultaneous=False) inlined_result = InlinedResult.from_loopy_expression( loopy_expr, loopy_expr_context) output_name = state.var_name_gen("matmul") insn_id = add_store(output_name, expr, inlined_result, state, output_to_temporary=True) result = StoredResult(output_name, expr.ndim, frozenset([insn_id])) state.results[expr] = result return result
def sum(self, arg, axis=None): """ Registers a substitution rule in order to sum the elements of array ``arg`` along ``axis``. :return: An instance of :class:`numloopy.ArraySymbol` which is which is registered as the sum-substitution rule. """ if isinstance(axis, int): axis = (axis, ) if not axis: axis = tuple(range(len(arg.shape))) inames = [self.name_generator(based_on="i") for _ in arg.shape] space = isl.Space.create_from_names(isl.DEFAULT_CONTEXT, inames) domain = isl.BasicSet.universe(space) for axis_len, iname in zip(arg.shape, inames): domain &= make_slab(space, iname, 0, axis_len) self.domains.append(domain) reduction_inames = tuple(iname for i, iname in enumerate(inames) if i in axis) left_inames = tuple(iname for i, iname in enumerate(inames) if i not in axis) def _one_if_empty(t): if t: return t else: return (1, ) subst_name = self.name_generator(based_on="subst") summed_arg = ArraySymbol( stack=self, name=subst_name, shape=_one_if_empty( tuple(axis_len for i, axis_len in enumerate(arg.shape) if i not in axis)), dtype=arg.dtype) from loopy.library.reduction import SumReductionOperation rule = lp.SubstitutionRule( subst_name, left_inames, lp.Reduction(SumReductionOperation(), reduction_inames, parse('{}({})'.format(arg.name, ', '.join(inames))))) self.register_substitution(rule) return summed_arg
def build_ass(): # A_T[i,j] = sum(k, A0[i,j,k] * G_T[k]); # Get variable symbols for all required variables i, j, k = inames["i"], inames["j"], inames["k"] A_T, A0, G_T = args["A_T"], args["A0"], args["G_T"] # The target of the assignment target = pb.Subscript(A_T, (i, j)) # The rhs expression: Frobenius inner product <A0[i,j],G_T> reduce_op = lp.library.reduction.SumReductionOperation() reduce_expr = pb.Subscript(A0, (i, j, k)) * pb.Subscript(G_T, (k)) expr = lp.Reduction(reduce_op, k, reduce_expr) return lp.Assignment(target, expr)
def build_ass(): """ A[i,j] = c*sum(k, B[k,i]*B[k,j]) """ # The target of the assignment target = pb.Subscript(args["A"], (inames["i"], inames["j"])) # The rhs expression: A reduce operation of the matrix columns # Maybe replace with manual increment? reduce_op = lp.library.reduction.SumReductionOperation() reduce_expr = pb.Subscript(args["B"], (inames["k"], inames["i"])) * pb.Subscript( args["B"], (inames["k"], inames["j"])) expr = args["c"] * lp.Reduction(reduce_op, inames["k"], reduce_expr) return lp.Assignment(target, expr)