def remove_unused_arguments(knl): new_args = [] import loopy as lp exp_knl = lp.expand_subst(knl) refd_vars = set(knl.all_params()) for insn in exp_knl.instructions: refd_vars.update(insn.dependency_names()) from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag from loopy.symbolic import get_dependencies from itertools import chain def tolerant_get_deps(expr): if expr is None or expr is lp.auto: return set() return get_dependencies(expr) for ary in chain(knl.args, six.itervalues(knl.temporary_variables)): if isinstance(ary, ArrayBase): refd_vars.update( tolerant_get_deps(ary.shape) | tolerant_get_deps(ary.offset)) for dim_tag in ary.dim_tags: if isinstance(dim_tag, FixedStrideArrayDimTag): refd_vars.update( tolerant_get_deps(dim_tag.stride)) for arg in knl.args: if arg.name in refd_vars: new_args.append(arg) return knl.copy(args=new_args)
def test_complicated_subst(ctx_factory): #ctx = ctx_factory() knl = lp.make_kernel( "{[i]: 0<=i<n}", """ f(x) := x*a[x] g(x) := 12 + f(x) h(x) := 1 + g(x) + 20*g$two(x) a[i] = h$one(i) * h$two(i) """) knl = lp.expand_subst(knl, "... > id:h and tag:two > id:g and tag:two") print(knl) sr_keys = list(knl.substitutions.keys()) for letter, how_many in [ ("f", 1), ("g", 1), ("h", 2) ]: substs_with_letter = sum(1 for k in sr_keys if k.startswith(letter)) assert substs_with_letter == how_many
def variant_gpu(knl): knl = lp.expand_subst(knl) knl = lp.split_iname(knl, "i", 256, outer_tag="g.0", inner_tag="l.0") knl = lp.split_iname(knl, "j", 256) knl = lp.add_prefetch(knl, "x[j,k]", ["j_inner", "k"], ["x_fetch_j", "x_fetch_k"], default_tag=None) knl = lp.tag_inames(knl, dict(x_fetch_k="unr", x_fetch_j="l.0")) knl = lp.add_prefetch(knl, "x[i,k]", ["k"], default_tag=None) knl = lp.prioritize_loops(knl, ["j_outer", "j_inner"]) return knl
def remove_unused_inames(knl, inames=None): """Delete those among *inames* that are unused, i.e. project them out of the domain. If these inames pose implicit restrictions on other inames, these restrictions will persist as existentially quantified variables. :arg inames: may be an iterable of inames or a string of comma-separated inames. """ # {{{ normalize arguments if inames is None: inames = knl.all_inames() elif isinstance(inames, str): inames = inames.split(",") # }}} # {{{ check which inames are unused import loopy as lp exp_knl = lp.expand_subst(knl) inames = set(inames) used_inames = set() for insn in exp_knl.instructions: used_inames.update( exp_knl.insn_inames(insn.id) | insn.reduction_inames()) unused_inames = inames - used_inames # }}} # {{{ remove them from loopy.kernel.tools import DomainChanger for iname in unused_inames: domch = DomainChanger(knl, (iname, )) dom = domch.domain dt, idx = dom.get_var_dict()[iname] dom = dom.project_out(dt, idx, 1) knl = knl.copy(domains=domch.get_domains_with(dom)) # }}} return knl
def test_nested_substs_in_insns(ctx_factory): ctx = ctx_factory() import loopy as lp ref_knl = lp.make_kernel( "{[i]: 0<=i<10}", """ a(x) := 2 * x b(x) := x**2 c(x) := 7 * x f[i] = c(b(a(i))) """) knl = lp.expand_subst(ref_knl) assert not knl.substitutions lp.auto_test_vs_ref(ref_knl, ctx, knl)
def test_nested_substs_in_insns(ctx_factory): ctx = ctx_factory() import loopy as lp ref_prg = lp.make_kernel( "{[i]: 0<=i<10}", """ a(x) := 2 * x b(x) := x**2 c(x) := 7 * x f[i] = c(b(a(i))) """) t_unit = lp.expand_subst(ref_prg) assert not any(cknl.subkernel.substitutions for cknl in t_unit.callables_table.values()) lp.auto_test_vs_ref(ref_prg, ctx, t_unit)
def test_nested_substs_in_insns(ctx_factory): ctx = ctx_factory() import loopy as lp ref_knl = lp.make_kernel( "{[i]: 0<=i<10}", """ a(x) := 2 * x b(x) := x**2 c(x) := 7 * x f[i] = c(b(a(i))) """ ) knl = lp.expand_subst(ref_knl) assert not knl.substitutions lp.auto_test_vs_ref(ref_knl, ctx, knl)
def test_complicated_subst(ctx_factory): #ctx = ctx_factory() knl = lp.make_kernel( "{[i]: 0<=i<n}", """ f(x) := x*a[x] g(x) := 12 + f(x) h(x) := 1 + g(x) + 20*g$two(x) a[i] = h$one(i) * h$two(i) """) knl = lp.expand_subst(knl, "... > id:h and tag:two > id:g and tag:two") print(knl) sr_keys = list(knl.substitutions.keys()) for letter, how_many in [("f", 1), ("g", 1), ("h", 2)]: substs_with_letter = sum(1 for k in sr_keys if k.startswith(letter)) assert substs_with_letter == how_many
def variant_cpu(knl): knl = lp.expand_subst(knl) knl = lp.split_iname(knl, "i", 1024, outer_tag="g.0", slabs=(0, 1)) knl = lp.add_prefetch(knl, "x[i,k]", ["k"], default_tag=None) return knl
def test_lbm(ctx_factory): ctx = ctx_factory() # D2Q4Q4Q4 lattice Boltzmann scheme for the shallow water equations # Example by Loic Gouarin <*****@*****.**> knl = lp.make_kernel( "{[ii,jj]:0<=ii<nx-2 and 0<=jj<ny-2}", """ # noqa (silences flake8 line length warning) i := ii + 1 j := jj + 1 for ii, jj with {id_prefix=init_m} <> m[0] = + f[i-1, j, 0] + f[i, j-1, 1] + f[i+1, j, 2] + f[i, j+1, 3] m[1] = + 4.*f[i-1, j, 0] - 4.*f[i+1, j, 2] m[2] = + 4.*f[i, j-1, 1] - 4.*f[i, j+1, 3] m[3] = + f[i-1, j, 0] - f[i, j-1, 1] + f[i+1, j, 2] - f[i, j+1, 3] m[4] = + f[i-1, j, 4] + f[i, j-1, 5] + f[i+1, j, 6] + f[i, j+1, 7] m[5] = + 4.*f[i-1, j, 4] - 4.*f[i+1, j, 6] m[6] = + 4.*f[i, j-1, 5] - 4.*f[i, j+1, 7] m[7] = + f[i-1, j, 4] - f[i, j-1, 5] + f[i+1, j, 6] - f[i, j+1, 7] m[8] = + f[i-1, j, 8] + f[i, j-1, 9] + f[i+1, j, 10] + f[i, j+1, 11] m[9] = + 4.*f[i-1, j, 8] - 4.*f[i+1, j, 10] m[10] = + 4.*f[i, j-1, 9] - 4.*f[i, j+1, 11] m[11] = + f[i-1, j, 8] - f[i, j-1, 9] + f[i+1, j, 10] - f[i, j+1, 11] end with {id_prefix=update_m,dep=init_m*} m[1] = m[1] + 2.*(m[4] - m[1]) m[2] = m[2] + 2.*(m[8] - m[2]) m[3] = m[3]*(1. - 1.5) m[5] = m[5] + 1.5*(0.5*(m[0]*m[0]) + (m[4]*m[4])/m[0] - m[5]) m[6] = m[6] + 1.5*(m[4]*m[8]/m[0] - m[6]) m[7] = m[7]*(1. - 1.2000000000000000) m[9] = m[9] + 1.5*(m[4]*m[8]/m[0] - m[9]) m[10] = m[10] + 1.5*(0.5*(m[0]*m[0]) + (m[8]*m[8])/m[0] - m[10]) m[11] = m[11]*(1. - 1.2) end with {dep=update_m*} f_new[i, j, 0] = + 0.25*m[0] + 0.125*m[1] + 0.25*m[3] f_new[i, j, 1] = + 0.25*m[0] + 0.125*m[2] - 0.25*m[3] f_new[i, j, 2] = + 0.25*m[0] - 0.125*m[1] + 0.25*m[3] f_new[i, j, 3] = + 0.25*m[0] - 0.125*m[2] - 0.25*m[3] f_new[i, j, 4] = + 0.25*m[4] + 0.125*m[5] + 0.25*m[7] f_new[i, j, 5] = + 0.25*m[4] + 0.125*m[6] - 0.25*m[7] f_new[i, j, 6] = + 0.25*m[4] - 0.125*m[5] + 0.25*m[7] f_new[i, j, 7] = + 0.25*m[4] - 0.125*m[6] - 0.25*m[7] f_new[i, j, 8] = + 0.25*m[8] + 0.125*m[9] + 0.25*m[11] f_new[i, j, 9] = + 0.25*m[8] + 0.125*m[10] - 0.25*m[11] f_new[i, j, 10] = + 0.25*m[8] - 0.125*m[9] + 0.25*m[11] f_new[i, j, 11] = + 0.25*m[8] - 0.125*m[10] - 0.25*m[11] end end """) knl = lp.add_and_infer_dtypes(knl, {"f": np.float32}) ref_knl = knl knl = lp.split_iname(knl, "ii", 16, outer_tag="g.1", inner_tag="l.1") knl = lp.split_iname(knl, "jj", 16, outer_tag="g.0", inner_tag="l.0") knl = lp.expand_subst(knl) knl = lp.add_prefetch(knl, "f", "ii_inner,jj_inner", fetch_bounding_box=True) lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters={"nx": 20, "ny": 20})
def test_lbm(ctx_factory): ctx = ctx_factory() # D2Q4Q4Q4 lattice Boltzmann scheme for the shallow water equations # Example by Loic Gouarin <*****@*****.**> knl = lp.make_kernel( "{[ii,jj]:0<=ii<nx-2 and 0<=jj<ny-2}", """ # noqa (silences flake8 line length warning) i := ii + 1 j := jj + 1 for ii, jj with {id_prefix=init_m} <> m[0] = + f[i-1, j, 0] + f[i, j-1, 1] + f[i+1, j, 2] + f[i, j+1, 3] m[1] = + 4.*f[i-1, j, 0] - 4.*f[i+1, j, 2] m[2] = + 4.*f[i, j-1, 1] - 4.*f[i, j+1, 3] m[3] = + f[i-1, j, 0] - f[i, j-1, 1] + f[i+1, j, 2] - f[i, j+1, 3] m[4] = + f[i-1, j, 4] + f[i, j-1, 5] + f[i+1, j, 6] + f[i, j+1, 7] m[5] = + 4.*f[i-1, j, 4] - 4.*f[i+1, j, 6] m[6] = + 4.*f[i, j-1, 5] - 4.*f[i, j+1, 7] m[7] = + f[i-1, j, 4] - f[i, j-1, 5] + f[i+1, j, 6] - f[i, j+1, 7] m[8] = + f[i-1, j, 8] + f[i, j-1, 9] + f[i+1, j, 10] + f[i, j+1, 11] m[9] = + 4.*f[i-1, j, 8] - 4.*f[i+1, j, 10] m[10] = + 4.*f[i, j-1, 9] - 4.*f[i, j+1, 11] m[11] = + f[i-1, j, 8] - f[i, j-1, 9] + f[i+1, j, 10] - f[i, j+1, 11] end with {id_prefix=update_m,dep=init_m*} m[1] = m[1] + 2.*(m[4] - m[1]) m[2] = m[2] + 2.*(m[8] - m[2]) m[3] = m[3]*(1. - 1.5) m[5] = m[5] + 1.5*(0.5*(m[0]*m[0]) + (m[4]*m[4])/m[0] - m[5]) m[6] = m[6] + 1.5*(m[4]*m[8]/m[0] - m[6]) m[7] = m[7]*(1. - 1.2000000000000000) m[9] = m[9] + 1.5*(m[4]*m[8]/m[0] - m[9]) m[10] = m[10] + 1.5*(0.5*(m[0]*m[0]) + (m[8]*m[8])/m[0] - m[10]) m[11] = m[11]*(1. - 1.2) end with {dep=update_m*} f_new[i, j, 0] = + 0.25*m[0] + 0.125*m[1] + 0.25*m[3] f_new[i, j, 1] = + 0.25*m[0] + 0.125*m[2] - 0.25*m[3] f_new[i, j, 2] = + 0.25*m[0] - 0.125*m[1] + 0.25*m[3] f_new[i, j, 3] = + 0.25*m[0] - 0.125*m[2] - 0.25*m[3] f_new[i, j, 4] = + 0.25*m[4] + 0.125*m[5] + 0.25*m[7] f_new[i, j, 5] = + 0.25*m[4] + 0.125*m[6] - 0.25*m[7] f_new[i, j, 6] = + 0.25*m[4] - 0.125*m[5] + 0.25*m[7] f_new[i, j, 7] = + 0.25*m[4] - 0.125*m[6] - 0.25*m[7] f_new[i, j, 8] = + 0.25*m[8] + 0.125*m[9] + 0.25*m[11] f_new[i, j, 9] = + 0.25*m[8] + 0.125*m[10] - 0.25*m[11] f_new[i, j, 10] = + 0.25*m[8] - 0.125*m[9] + 0.25*m[11] f_new[i, j, 11] = + 0.25*m[8] - 0.125*m[10] - 0.25*m[11] end end """) knl = lp.add_and_infer_dtypes(knl, {"f": np.float32}) ref_knl = knl knl = lp.split_iname(knl, "ii", 16, outer_tag="g.1", inner_tag="l.1") knl = lp.split_iname(knl, "jj", 16, outer_tag="g.0", inner_tag="l.0") knl = lp.expand_subst(knl) knl = lp.add_prefetch(knl, "f", "ii_inner,jj_inner", fetch_bounding_box=True, default_tag="l.auto") lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters={"nx": 20, "ny": 20})