def test_bound(): print( isl.PwQPolynomial("""[n, m] -> {[i, j] -> i * m + j : 0 <= i < n and 0 <= j < m}""").bound(isl.fold.min)) print( isl.PwQPolynomial("""[n, m] -> {[i, j] -> i * m + j : 0 <= i < n and 0 <= j < m}""").bound(isl.fold.max))
def test_pwqpoly(): def term_handler(term): print(term.get_coefficient_val()) def piece_handler(set, qpoly): qpoly.foreach_term(term_handler) pwqp = isl.PwQPolynomial('[n] -> { n }') pwqp.foreach_piece(piece_handler)
def test_subst_into_pwqpolynomial(): from pymbolic.primitives import Variable arg_dict = { "m": 3 * Variable("nx"), "n": 3 * Variable("ny"), "nx": Variable("nx"), "ny": Variable("ny"), "nz": Variable("nz") } space = isl.Set("[nx, ny, nz] -> { []: }").space poly = isl.PwQPolynomial("[m, n] -> { (256 * m + 256 * m * n) : " "m > 0 and n > 0; 256 * m : m > 0 and n <= 0 }") from loopy.isl_helpers import subst_into_pwqpolynomial result = subst_into_pwqpolynomial(space, poly, arg_dict) expected_pwqpoly = isl.PwQPolynomial( "[nx, ny, nz] -> {" "(768 * nx + 2304 * nx * ny) : nx > 0 and ny > 0;" "768 * nx : nx > 0 and ny <= 0 }") assert (result - expected_pwqpoly).is_zero()
def __getitem__(self, index): try: return self.dict[index] except KeyError: return isl.PwQPolynomial('{ 0 }')
def get_synchronization_poly(knl): """Count the number of synchronization events each thread encounters in a loopy kernel. :parameter knl: A :class:`loopy.LoopKernel` whose barriers are to be counted. :return: A dictionary mapping each type of synchronization event to a :class:`islpy.PwQPolynomial` holding the number of such events per thread. Possible keys include ``barrier_local``, ``barrier_global`` (if supported by the target) and ``kernel_launch``. Example usage:: # (first create loopy kernel and specify array data types) barrier_poly = get_barrier_poly(knl) params = {'n': 512, 'm': 256, 'l': 128} barrier_count = barrier_poly.eval_with_dict(params) # (now use this count to predict performance) """ from loopy.preprocess import preprocess_kernel, infer_unknown_types from loopy.schedule import (EnterLoop, LeaveLoop, Barrier, CallKernel, ReturnFromKernel, RunInstruction) from operator import mul knl = infer_unknown_types(knl, expect_completion=True) knl = preprocess_kernel(knl) knl = lp.get_one_scheduled_kernel(knl) iname_list = [] result = ToCountMap() one = isl.PwQPolynomial('{ 1 }') def get_count_poly(iname_list): if iname_list: # (if iname_list is not empty) ct = (count(knl, (knl.get_inames_domain(iname_list).project_out_except( iname_list, [dim_type.set]))), ) return reduce(mul, ct) else: return one for sched_item in knl.schedule: if isinstance(sched_item, EnterLoop): if sched_item.iname: # (if not empty) iname_list.append(sched_item.iname) elif isinstance(sched_item, LeaveLoop): if sched_item.iname: # (if not empty) iname_list.pop() elif isinstance(sched_item, Barrier): result = result + ToCountMap( {"barrier_%s" % sched_item.kind: get_count_poly(iname_list)}) elif isinstance(sched_item, CallKernel): result = result + ToCountMap( {"kernel_launch": get_count_poly(iname_list)}) elif isinstance(sched_item, (ReturnFromKernel, RunInstruction)): pass else: raise LoopyError("unexpected schedule item: %s" % type(sched_item).__name__) return result.dict