def test_sum_factorization(): knl = lp.make_kernel( "{[i,j,ip,jp,k,l]: " "0<=i<I and 0<=j<J and 0<=ip<IP and 0<=jp<JP and 0<=k,l<Q}", """ phi1(i, x) := x**i phi2(i, x) := x**i psi1(i, x) := x**i psi2(i, x) := x**i a(x, y) := 1 A[i,j,ip,jp] = sum(k,sum(l, phi1(i,x[0,k]) * phi2(j,x[1,l]) * psi1(ip, x[0,k]) * psi2(jp, x[1, l]) * w[0,k] * w[1,l] * a(x[0,k], x[1,l]) )) """) pytest.xfail("extract_subst is currently too stupid for sum factorization") knl = lp.extract_subst(knl, "temp_array", "phi1(i,x[0,k]) *psi1(ip, x[0,k]) * w[0,k]") knl = lp.extract_subst(knl, "temp_array", "sum(k, phi1(i,x[0,k]) *psi1(ip, x[0,k]) * w[0,k])") print(knl)
def test_precompute_with_preexisting_inames(ctx_factory): ctx = ctx_factory() knl = lp.make_kernel( "{[e,i,j,k]: 0<=e<E and 0<=i,j,k<n}", """ result[e,i] = sum(j, D1[i,j]*u[e,j]) result2[e,i] = sum(k, D2[i,k]*u[e,k]) """) knl = lp.add_and_infer_dtypes(knl, { "u": np.float32, "D1": np.float32, "D2": np.float32, }) knl = lp.fix_parameters(knl, n=13) ref_knl = knl knl = lp.extract_subst(knl, "D1_subst", "D1[ii,jj]", parameters="ii,jj") knl = lp.extract_subst(knl, "D2_subst", "D2[ii,jj]", parameters="ii,jj") knl = lp.precompute(knl, "D1_subst", "i,j", default_tag="for", precompute_inames="ii,jj") knl = lp.precompute(knl, "D2_subst", "i,k", default_tag="for", precompute_inames="ii,jj") knl = lp.prioritize_loops(knl, "ii,jj,e,j,k") lp.auto_test_vs_ref( ref_knl, ctx, knl, parameters=dict(E=200))
def test_precompute_with_preexisting_inames(ctx_factory): ctx = ctx_factory() knl = lp.make_kernel( "{[e,i,j,k]: 0<=e<E and 0<=i,j,k<n}", """ result[e,i] = sum(j, D1[i,j]*u[e,j]) result2[e,i] = sum(k, D2[i,k]*u[e,k]) """) knl = lp.add_and_infer_dtypes(knl, { "u": np.float32, "D1": np.float32, "D2": np.float32, }) knl = lp.fix_parameters(knl, n=13) ref_knl = knl knl = lp.extract_subst(knl, "D1_subst", "D1[ii,jj]", parameters="ii,jj") knl = lp.extract_subst(knl, "D2_subst", "D2[ii,jj]", parameters="ii,jj") knl = lp.precompute(knl, "D1_subst", "i,j", default_tag="for", precompute_inames="ii,jj") knl = lp.precompute(knl, "D2_subst", "i,k", default_tag="for", precompute_inames="ii,jj") knl = lp.set_loop_priority(knl, "ii,jj,e,j,k") lp.auto_test_vs_ref( ref_knl, ctx, knl, parameters=dict(E=200))
def test_sum_factorization(): knl = lp.make_kernel( "{[i,j,ip,jp,k,l]: " "0<=i<I and 0<=j<J and 0<=ip<IP and 0<=jp<JP and 0<=k,l<Q}", """ phi1(i, x) := x**i phi2(i, x) := x**i psi1(i, x) := x**i psi2(i, x) := x**i a(x, y) := 1 A[i,j,ip,jp] = sum(k,sum(l, phi1(i,x[0,k]) * phi2(j,x[1,l]) * psi1(ip, x[0,k]) * psi2(jp, x[1, l]) * w[0,k] * w[1,l] * a(x[0,k], x[1,l]) )) """) pytest.xfail("extract_subst is currently too stupid for sum factorization") knl = lp.extract_subst(knl, "temp_array", "phi1(i,x[0,k]) *psi1(ip, x[0,k]) * w[0,k]") knl = lp.extract_subst( knl, "temp_array", "sum(k, phi1(i,x[0,k]) *psi1(ip, x[0,k]) * w[0,k])") print(knl)
def test_precompute_with_preexisting_inames_fail(): knl = lp.make_kernel( "{[e,i,j,k]: 0<=e<E and 0<=i,j<n and 0<=k<2*n}", """ result[e,i] = sum(j, D1[i,j]*u[e,j]) result2[e,i] = sum(k, D2[i,k]*u[e,k]) """) knl = lp.add_and_infer_dtypes(knl, { "u": np.float32, "D1": np.float32, "D2": np.float32, }) knl = lp.fix_parameters(knl, n=13) knl = lp.extract_subst(knl, "D1_subst", "D1[ii,jj]", parameters="ii,jj") knl = lp.extract_subst(knl, "D2_subst", "D2[ii,jj]", parameters="ii,jj") knl = lp.precompute(knl, "D1_subst", "i,j", default_tag="for", precompute_inames="ii,jj") with pytest.raises(lp.LoopyError): lp.precompute(knl, "D2_subst", "i,k", default_tag="for", precompute_inames="ii,jj")
def test_matmul(ctx_factory, buffer_inames): ctx = ctx_factory() if (buffer_inames and ctx.devices[0].platform.name == "Portable Computing Language"): pytest.skip("crashes on pocl") logging.basicConfig(level=logging.INFO) fortran_src = """ subroutine dgemm(m,n,ell,a,b,c) implicit none real*8 a(m,ell),b(ell,n),c(m,n) integer m,n,k,i,j,ell do j = 1,n do i = 1,m do k = 1,ell c(i,j) = c(i,j) + b(k,j)*a(i,k) end do end do end do end subroutine """ knl, = lp.parse_fortran(fortran_src) assert len(knl.domains) == 1 ref_knl = knl knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.0") knl = lp.split_iname(knl, "k", 32) knl = lp.assume(knl, "n mod 32 = 0") knl = lp.assume(knl, "m mod 32 = 0") knl = lp.assume(knl, "ell mod 16 = 0") knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2") knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2") knl = lp.precompute(knl, "a_acc", "k_inner,i_inner", precompute_outer_inames='i_outer, j_outer, k_outer', default_tag="l.auto") knl = lp.precompute(knl, "b_acc", "j_inner,k_inner", precompute_outer_inames='i_outer, j_outer, k_outer', default_tag="l.auto") knl = lp.buffer_array(knl, "c", buffer_inames=buffer_inames, init_expression="0", store_expression="base+buffer") lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, ell=128))
def test_precompute_some_exist(ctx_factory): fortran_src = """ subroutine dgemm(m,n,ell,a,b,c) implicit none real*8 a(m,ell),b(ell,n),c(m,n) integer m,n,k,i,j,ell do j = 1,n do i = 1,m do k = 1,ell c(i,j) = c(i,j) + b(k,j)*a(i,k) end do end do end do end subroutine """ knl = lp.parse_fortran(fortran_src) assert len(knl["dgemm"].domains) == 1 knl = lp.split_iname(knl, "i", 8, outer_tag="g.0", inner_tag="l.1") knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.0") knl = lp.split_iname(knl, "k", 8) knl = lp.assume(knl, "n mod 8 = 0") knl = lp.assume(knl, "m mod 8 = 0") knl = lp.assume(knl, "ell mod 8 = 0") knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2") knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2") knl = lp.precompute(knl, "a_acc", "k_inner,i_inner", precompute_inames="ktemp,itemp", precompute_outer_inames="i_outer, j_outer, k_outer", default_tag="l.auto") knl = lp.precompute(knl, "b_acc", "j_inner,k_inner", precompute_inames="itemp,k2temp", precompute_outer_inames="i_outer, j_outer, k_outer", default_tag="l.auto") ref_knl = knl ctx = ctx_factory() lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, ell=128))
def test_matmul(ctx_factory, buffer_inames): logging.basicConfig(level=logging.INFO) fortran_src = """ subroutine dgemm(m,n,l,a,b,c) implicit none real*8 a(m,l),b(l,n),c(m,n) integer m,n,k,i,j,l do j = 1,n do i = 1,m do k = 1,l c(i,j) = c(i,j) + b(k,j)*a(i,k) end do end do end do end subroutine """ knl, = lp.parse_fortran(fortran_src) assert len(knl.domains) == 1 ref_knl = knl knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.0") knl = lp.split_iname(knl, "k", 32) knl = lp.assume(knl, "n mod 32 = 0") knl = lp.assume(knl, "m mod 32 = 0") knl = lp.assume(knl, "l mod 16 = 0") knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2") knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2") knl = lp.precompute(knl, "a_acc", "k_inner,i_inner") knl = lp.precompute(knl, "b_acc", "j_inner,k_inner") knl = lp.buffer_array(knl, "c", buffer_inames=buffer_inames, init_expression="0", store_expression="base+buffer") ctx = ctx_factory() lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, l=128))
def test_funny_shape_matrix_mul(ctx_factory): ctx = ctx_factory() n = get_suitable_size(ctx) m = n + 12 ell = m + 12 knl = lp.make_kernel("{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}", ["c[i, j] = sum(k, a[i, k]*b[k, j])"], name="matmul", assumptions="n,m,ell >= 1") knl = lp.add_dtypes(knl, { "a": np.float32, "b": np.float32, }) ref_knl = knl knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.0") knl = lp.split_iname(knl, "k", 32) #knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"], default_tag="l.auto") #knl = lp.add_prefetch(knl, "b", ["j_inner", "k_inner"], default_tag="l.auto") knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2") knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2") knl = lp.precompute(knl, "a_acc", "k_inner,i_inner", precompute_outer_inames="i_outer, j_outer, k_outer", default_tag="l.auto") knl = lp.precompute(knl, "b_acc", "j_inner,k_inner", precompute_outer_inames="i_outer, j_outer, k_outer", default_tag="l.auto") lp.auto_test_vs_ref(ref_knl, ctx, knl, op_count=[2 * n**3 / 1e9], op_label=["GFlops"], parameters={ "n": n, "m": m, "ell": ell })
def test_precompute_some_exist(ctx_factory): fortran_src = """ subroutine dgemm(m,n,ell,a,b,c) implicit none real*8 a(m,ell),b(ell,n),c(m,n) integer m,n,k,i,j,ell do j = 1,n do i = 1,m do k = 1,ell c(i,j) = c(i,j) + b(k,j)*a(i,k) end do end do end do end subroutine """ knl, = lp.parse_fortran(fortran_src) assert len(knl.domains) == 1 knl = lp.split_iname(knl, "i", 8, outer_tag="g.0", inner_tag="l.1") knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.0") knl = lp.split_iname(knl, "k", 8) knl = lp.assume(knl, "n mod 8 = 0") knl = lp.assume(knl, "m mod 8 = 0") knl = lp.assume(knl, "ell mod 8 = 0") knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2") knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2") knl = lp.precompute(knl, "a_acc", "k_inner,i_inner", precompute_inames="ktemp,itemp", default_tag="l.auto") knl = lp.precompute(knl, "b_acc", "j_inner,k_inner", precompute_inames="itemp,k2temp", default_tag="l.auto") ref_knl = knl ctx = ctx_factory() lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, ell=128))
def test_extract_subst_with_iname_deps_in_templ(ctx_factory): knl = lp.make_kernel("{[i, j, k]: 0<=i<100 and 0<=j,k<5}", """ y[i, j, k] = x[i, j, k] """, [lp.GlobalArg('x,y', shape=lp.auto, dtype=float)], lang_version=(2018, 2)) knl = lp.extract_subst(knl, 'rule1', 'x[i, arg1, arg2]', parameters=('arg1', 'arg2')) lp.auto_test_vs_ref(knl, ctx_factory(), knl)
def test_funny_shape_matrix_mul(ctx_factory): ctx = ctx_factory() n = get_suitable_size(ctx) m = n+12 ell = m+12 knl = lp.make_kernel( "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}", [ "c[i, j] = sum(k, a[i, k]*b[k, j])" ], name="matmul", assumptions="n,m,ell >= 1") knl = lp.add_dtypes(knl, { "a": np.float32, "b": np.float32, }) ref_knl = knl knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1") knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.0") knl = lp.split_iname(knl, "k", 32) #knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"], default_tag="l.auto") #knl = lp.add_prefetch(knl, "b", ["j_inner", "k_inner"], default_tag="l.auto") knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2") knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2") knl = lp.precompute(knl, "a_acc", "k_inner,i_inner", default_tag="l.auto") knl = lp.precompute(knl, "b_acc", "j_inner,k_inner", default_tag="l.auto") lp.auto_test_vs_ref(ref_knl, ctx, knl, op_count=[2*n**3/1e9], op_label=["GFlops"], parameters={"n": n, "m": m, "ell": ell})
def test_extract_subst(ctx_factory): knl = lp.make_kernel( "{[i]: 0<=i<n}", """ a[i] = 23*b[i]**2 + 25*b[i]**2 """) knl = lp.extract_subst(knl, "bsquare", "alpha*b[i]**2", "alpha") print(knl) from loopy.symbolic import parse insn, = knl.instructions assert insn.expression == parse("bsquare(23) + bsquare(25)")
def test_fd_1d(ctx_factory): ctx = ctx_factory() knl = lp.make_kernel("{[i]: 0<=i<n}", "result[i] = u[i+1]-u[i]") knl = lp.add_and_infer_dtypes(knl, {"u": np.float32}) ref_knl = knl knl = lp.split_iname(knl, "i", 16) knl = lp.extract_subst(knl, "u_acc", "u[j]", parameters="j") knl = lp.precompute(knl, "u_acc", "i_inner", default_tag="for") knl = lp.assume(knl, "n mod 16 = 0") lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=2048))
def test_extract_subst(ctx_factory): prog = lp.make_kernel("{[i]: 0<=i<n}", """ a[i] = 23*b[i]**2 + 25*b[i]**2 """, name="extract_subst") prog = lp.extract_subst(prog, "bsquare", "alpha*b[i]**2", "alpha") print(prog) from loopy.symbolic import parse insn, = prog["extract_subst"].instructions assert insn.expression == parse("bsquare(23) + bsquare(25)")
def test_fd_1d(ctx_factory): ctx = ctx_factory() knl = lp.make_kernel( "{[i]: 0<=i<n}", "result[i] = u[i+1]-u[i]") knl = lp.add_and_infer_dtypes(knl, {"u": np.float32}) ref_knl = knl knl = lp.split_iname(knl, "i", 16) knl = lp.extract_subst(knl, "u_acc", "u[j]", parameters="j") knl = lp.precompute(knl, "u_acc", "i_inner", default_tag="for") knl = lp.assume(knl, "n mod 16 = 0") lp.auto_test_vs_ref( ref_knl, ctx, knl, parameters=dict(n=2048))