예제 #1
0
def test_sum_factorization():
    knl = lp.make_kernel(
        "{[i,j,ip,jp,k,l]: "
        "0<=i<I and 0<=j<J and 0<=ip<IP and 0<=jp<JP and 0<=k,l<Q}",
        """
        phi1(i, x) := x**i
        phi2(i, x) := x**i
        psi1(i, x) := x**i
        psi2(i, x) := x**i
        a(x, y) := 1

        A[i,j,ip,jp] = sum(k,sum(l,
            phi1(i,x[0,k]) * phi2(j,x[1,l])
            * psi1(ip, x[0,k]) * psi2(jp, x[1, l])
            * w[0,k] * w[1,l]
            * a(x[0,k], x[1,l])
        ))
        """)

    pytest.xfail("extract_subst is currently too stupid for sum factorization")

    knl = lp.extract_subst(knl, "temp_array",
            "phi1(i,x[0,k]) *psi1(ip, x[0,k]) * w[0,k]")
    knl = lp.extract_subst(knl, "temp_array",
            "sum(k, phi1(i,x[0,k]) *psi1(ip, x[0,k]) * w[0,k])")

    print(knl)
예제 #2
0
def test_precompute_with_preexisting_inames(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(
        "{[e,i,j,k]: 0<=e<E and 0<=i,j,k<n}",
        """
        result[e,i] = sum(j, D1[i,j]*u[e,j])
        result2[e,i] = sum(k, D2[i,k]*u[e,k])
        """)

    knl = lp.add_and_infer_dtypes(knl, {
        "u": np.float32,
        "D1": np.float32,
        "D2": np.float32,
        })

    knl = lp.fix_parameters(knl, n=13)

    ref_knl = knl

    knl = lp.extract_subst(knl, "D1_subst", "D1[ii,jj]", parameters="ii,jj")
    knl = lp.extract_subst(knl, "D2_subst", "D2[ii,jj]", parameters="ii,jj")

    knl = lp.precompute(knl, "D1_subst", "i,j", default_tag="for",
            precompute_inames="ii,jj")
    knl = lp.precompute(knl, "D2_subst", "i,k", default_tag="for",
            precompute_inames="ii,jj")

    knl = lp.prioritize_loops(knl, "ii,jj,e,j,k")

    lp.auto_test_vs_ref(
            ref_knl, ctx, knl,
            parameters=dict(E=200))
예제 #3
0
def test_precompute_with_preexisting_inames(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(
        "{[e,i,j,k]: 0<=e<E and 0<=i,j,k<n}",
        """
        result[e,i] = sum(j, D1[i,j]*u[e,j])
        result2[e,i] = sum(k, D2[i,k]*u[e,k])
        """)

    knl = lp.add_and_infer_dtypes(knl, {
        "u": np.float32,
        "D1": np.float32,
        "D2": np.float32,
        })

    knl = lp.fix_parameters(knl, n=13)

    ref_knl = knl

    knl = lp.extract_subst(knl, "D1_subst", "D1[ii,jj]", parameters="ii,jj")
    knl = lp.extract_subst(knl, "D2_subst", "D2[ii,jj]", parameters="ii,jj")

    knl = lp.precompute(knl, "D1_subst", "i,j", default_tag="for",
            precompute_inames="ii,jj")
    knl = lp.precompute(knl, "D2_subst", "i,k", default_tag="for",
            precompute_inames="ii,jj")

    knl = lp.set_loop_priority(knl, "ii,jj,e,j,k")

    lp.auto_test_vs_ref(
            ref_knl, ctx, knl,
            parameters=dict(E=200))
예제 #4
0
파일: test_apps.py 프로젝트: dokempf/loopy
def test_sum_factorization():
    knl = lp.make_kernel(
        "{[i,j,ip,jp,k,l]: "
        "0<=i<I and 0<=j<J and 0<=ip<IP and 0<=jp<JP and 0<=k,l<Q}", """
        phi1(i, x) := x**i
        phi2(i, x) := x**i
        psi1(i, x) := x**i
        psi2(i, x) := x**i
        a(x, y) := 1

        A[i,j,ip,jp] = sum(k,sum(l,
            phi1(i,x[0,k]) * phi2(j,x[1,l])
            * psi1(ip, x[0,k]) * psi2(jp, x[1, l])
            * w[0,k] * w[1,l]
            * a(x[0,k], x[1,l])
        ))
        """)

    pytest.xfail("extract_subst is currently too stupid for sum factorization")

    knl = lp.extract_subst(knl, "temp_array",
                           "phi1(i,x[0,k]) *psi1(ip, x[0,k]) * w[0,k]")
    knl = lp.extract_subst(
        knl, "temp_array", "sum(k, phi1(i,x[0,k]) *psi1(ip, x[0,k]) * w[0,k])")

    print(knl)
예제 #5
0
def test_precompute_with_preexisting_inames_fail():
    knl = lp.make_kernel(
        "{[e,i,j,k]: 0<=e<E and 0<=i,j<n and 0<=k<2*n}", """
        result[e,i] = sum(j, D1[i,j]*u[e,j])
        result2[e,i] = sum(k, D2[i,k]*u[e,k])
        """)

    knl = lp.add_and_infer_dtypes(knl, {
        "u": np.float32,
        "D1": np.float32,
        "D2": np.float32,
    })

    knl = lp.fix_parameters(knl, n=13)

    knl = lp.extract_subst(knl, "D1_subst", "D1[ii,jj]", parameters="ii,jj")
    knl = lp.extract_subst(knl, "D2_subst", "D2[ii,jj]", parameters="ii,jj")

    knl = lp.precompute(knl,
                        "D1_subst",
                        "i,j",
                        default_tag="for",
                        precompute_inames="ii,jj")
    with pytest.raises(lp.LoopyError):
        lp.precompute(knl,
                      "D2_subst",
                      "i,k",
                      default_tag="for",
                      precompute_inames="ii,jj")
예제 #6
0
def test_matmul(ctx_factory, buffer_inames):
    ctx = ctx_factory()

    if (buffer_inames and
            ctx.devices[0].platform.name == "Portable Computing Language"):
        pytest.skip("crashes on pocl")

    logging.basicConfig(level=logging.INFO)

    fortran_src = """
        subroutine dgemm(m,n,ell,a,b,c)
          implicit none
          real*8 a(m,ell),b(ell,n),c(m,n)
          integer m,n,k,i,j,ell

          do j = 1,n
            do i = 1,m
              do k = 1,ell
                c(i,j) = c(i,j) + b(k,j)*a(i,k)
              end do
            end do
          end do
        end subroutine
        """

    knl, = lp.parse_fortran(fortran_src)

    assert len(knl.domains) == 1

    ref_knl = knl

    knl = lp.split_iname(knl, "i", 16,
            outer_tag="g.0", inner_tag="l.1")
    knl = lp.split_iname(knl, "j", 8,
            outer_tag="g.1", inner_tag="l.0")
    knl = lp.split_iname(knl, "k", 32)
    knl = lp.assume(knl, "n mod 32 = 0")
    knl = lp.assume(knl, "m mod 32 = 0")
    knl = lp.assume(knl, "ell mod 16 = 0")

    knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2")
    knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2")
    knl = lp.precompute(knl, "a_acc", "k_inner,i_inner",
            precompute_outer_inames='i_outer, j_outer, k_outer',
            default_tag="l.auto")
    knl = lp.precompute(knl, "b_acc", "j_inner,k_inner",
            precompute_outer_inames='i_outer, j_outer, k_outer',
            default_tag="l.auto")

    knl = lp.buffer_array(knl, "c", buffer_inames=buffer_inames,
            init_expression="0", store_expression="base+buffer")

    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, ell=128))
예제 #7
0
def test_precompute_some_exist(ctx_factory):
    fortran_src = """
        subroutine dgemm(m,n,ell,a,b,c)
          implicit none
          real*8 a(m,ell),b(ell,n),c(m,n)
          integer m,n,k,i,j,ell

          do j = 1,n
            do i = 1,m
              do k = 1,ell
                c(i,j) = c(i,j) + b(k,j)*a(i,k)
              end do
            end do
          end do
        end subroutine
        """

    knl = lp.parse_fortran(fortran_src)

    assert len(knl["dgemm"].domains) == 1

    knl = lp.split_iname(knl, "i", 8, outer_tag="g.0", inner_tag="l.1")
    knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.0")
    knl = lp.split_iname(knl, "k", 8)
    knl = lp.assume(knl, "n mod 8 = 0")
    knl = lp.assume(knl, "m mod 8 = 0")
    knl = lp.assume(knl, "ell mod 8 = 0")

    knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2")
    knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2")
    knl = lp.precompute(knl,
                        "a_acc",
                        "k_inner,i_inner",
                        precompute_inames="ktemp,itemp",
                        precompute_outer_inames="i_outer, j_outer, k_outer",
                        default_tag="l.auto")
    knl = lp.precompute(knl,
                        "b_acc",
                        "j_inner,k_inner",
                        precompute_inames="itemp,k2temp",
                        precompute_outer_inames="i_outer, j_outer, k_outer",
                        default_tag="l.auto")

    ref_knl = knl

    ctx = ctx_factory()
    lp.auto_test_vs_ref(ref_knl,
                        ctx,
                        knl,
                        parameters=dict(n=128, m=128, ell=128))
예제 #8
0
def test_matmul(ctx_factory, buffer_inames):
    logging.basicConfig(level=logging.INFO)

    fortran_src = """
        subroutine dgemm(m,n,l,a,b,c)
          implicit none
          real*8 a(m,l),b(l,n),c(m,n)
          integer m,n,k,i,j,l

          do j = 1,n
            do i = 1,m
              do k = 1,l
                c(i,j) = c(i,j) + b(k,j)*a(i,k)
              end do
            end do
          end do
        end subroutine
        """

    knl, = lp.parse_fortran(fortran_src)

    assert len(knl.domains) == 1

    ref_knl = knl

    knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1")
    knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.0")
    knl = lp.split_iname(knl, "k", 32)
    knl = lp.assume(knl, "n mod 32 = 0")
    knl = lp.assume(knl, "m mod 32 = 0")
    knl = lp.assume(knl, "l mod 16 = 0")

    knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2")
    knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2")
    knl = lp.precompute(knl, "a_acc", "k_inner,i_inner")
    knl = lp.precompute(knl, "b_acc", "j_inner,k_inner")

    knl = lp.buffer_array(knl,
                          "c",
                          buffer_inames=buffer_inames,
                          init_expression="0",
                          store_expression="base+buffer")

    ctx = ctx_factory()
    lp.auto_test_vs_ref(ref_knl,
                        ctx,
                        knl,
                        parameters=dict(n=128, m=128, l=128))
예제 #9
0
def test_funny_shape_matrix_mul(ctx_factory):
    ctx = ctx_factory()

    n = get_suitable_size(ctx)
    m = n + 12
    ell = m + 12

    knl = lp.make_kernel("{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
                         ["c[i, j] = sum(k, a[i, k]*b[k, j])"],
                         name="matmul",
                         assumptions="n,m,ell >= 1")

    knl = lp.add_dtypes(knl, {
        "a": np.float32,
        "b": np.float32,
    })

    ref_knl = knl

    knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.1")
    knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.0")
    knl = lp.split_iname(knl, "k", 32)

    #knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"], default_tag="l.auto")
    #knl = lp.add_prefetch(knl, "b", ["j_inner", "k_inner"], default_tag="l.auto")
    knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2")
    knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2")
    knl = lp.precompute(knl,
                        "a_acc",
                        "k_inner,i_inner",
                        precompute_outer_inames="i_outer, j_outer, k_outer",
                        default_tag="l.auto")
    knl = lp.precompute(knl,
                        "b_acc",
                        "j_inner,k_inner",
                        precompute_outer_inames="i_outer, j_outer, k_outer",
                        default_tag="l.auto")

    lp.auto_test_vs_ref(ref_knl,
                        ctx,
                        knl,
                        op_count=[2 * n**3 / 1e9],
                        op_label=["GFlops"],
                        parameters={
                            "n": n,
                            "m": m,
                            "ell": ell
                        })
예제 #10
0
def test_matmul(ctx_factory, buffer_inames):
    logging.basicConfig(level=logging.INFO)

    fortran_src = """
        subroutine dgemm(m,n,l,a,b,c)
          implicit none
          real*8 a(m,l),b(l,n),c(m,n)
          integer m,n,k,i,j,l

          do j = 1,n
            do i = 1,m
              do k = 1,l
                c(i,j) = c(i,j) + b(k,j)*a(i,k)
              end do
            end do
          end do
        end subroutine
        """

    knl, = lp.parse_fortran(fortran_src)

    assert len(knl.domains) == 1

    ref_knl = knl

    knl = lp.split_iname(knl, "i", 16,
            outer_tag="g.0", inner_tag="l.1")
    knl = lp.split_iname(knl, "j", 8,
            outer_tag="g.1", inner_tag="l.0")
    knl = lp.split_iname(knl, "k", 32)
    knl = lp.assume(knl, "n mod 32 = 0")
    knl = lp.assume(knl, "m mod 32 = 0")
    knl = lp.assume(knl, "l mod 16 = 0")

    knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2")
    knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2")
    knl = lp.precompute(knl, "a_acc", "k_inner,i_inner")
    knl = lp.precompute(knl, "b_acc", "j_inner,k_inner")

    knl = lp.buffer_array(knl, "c", buffer_inames=buffer_inames,
            init_expression="0", store_expression="base+buffer")

    ctx = ctx_factory()
    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, l=128))
예제 #11
0
def test_precompute_some_exist(ctx_factory):
    fortran_src = """
        subroutine dgemm(m,n,ell,a,b,c)
          implicit none
          real*8 a(m,ell),b(ell,n),c(m,n)
          integer m,n,k,i,j,ell

          do j = 1,n
            do i = 1,m
              do k = 1,ell
                c(i,j) = c(i,j) + b(k,j)*a(i,k)
              end do
            end do
          end do
        end subroutine
        """

    knl, = lp.parse_fortran(fortran_src)

    assert len(knl.domains) == 1

    knl = lp.split_iname(knl, "i", 8,
            outer_tag="g.0", inner_tag="l.1")
    knl = lp.split_iname(knl, "j", 8,
            outer_tag="g.1", inner_tag="l.0")
    knl = lp.split_iname(knl, "k", 8)
    knl = lp.assume(knl, "n mod 8 = 0")
    knl = lp.assume(knl, "m mod 8 = 0")
    knl = lp.assume(knl, "ell mod 8 = 0")

    knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2")
    knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2")
    knl = lp.precompute(knl, "a_acc", "k_inner,i_inner",
            precompute_inames="ktemp,itemp",
            default_tag="l.auto")
    knl = lp.precompute(knl, "b_acc", "j_inner,k_inner",
            precompute_inames="itemp,k2temp",
            default_tag="l.auto")

    ref_knl = knl

    ctx = ctx_factory()
    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=128, m=128, ell=128))
예제 #12
0
def test_extract_subst_with_iname_deps_in_templ(ctx_factory):
    knl = lp.make_kernel("{[i, j, k]: 0<=i<100 and 0<=j,k<5}",
                         """
            y[i, j, k] = x[i, j, k]
            """, [lp.GlobalArg('x,y', shape=lp.auto, dtype=float)],
                         lang_version=(2018, 2))

    knl = lp.extract_subst(knl,
                           'rule1',
                           'x[i, arg1, arg2]',
                           parameters=('arg1', 'arg2'))

    lp.auto_test_vs_ref(knl, ctx_factory(), knl)
예제 #13
0
def test_funny_shape_matrix_mul(ctx_factory):
    ctx = ctx_factory()

    n = get_suitable_size(ctx)
    m = n+12
    ell = m+12

    knl = lp.make_kernel(
            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
            [
                "c[i, j] = sum(k, a[i, k]*b[k, j])"
                ],
            name="matmul", assumptions="n,m,ell >= 1")

    knl = lp.add_dtypes(knl, {
        "a": np.float32,
        "b": np.float32,
        })

    ref_knl = knl

    knl = lp.split_iname(knl, "i", 16,
            outer_tag="g.0", inner_tag="l.1")
    knl = lp.split_iname(knl, "j", 8,
            outer_tag="g.1", inner_tag="l.0")
    knl = lp.split_iname(knl, "k", 32)

    #knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"], default_tag="l.auto")
    #knl = lp.add_prefetch(knl, "b", ["j_inner", "k_inner"], default_tag="l.auto")
    knl = lp.extract_subst(knl, "a_acc", "a[i1,i2]", parameters="i1, i2")
    knl = lp.extract_subst(knl, "b_acc", "b[i1,i2]", parameters="i1, i2")
    knl = lp.precompute(knl, "a_acc", "k_inner,i_inner",
            default_tag="l.auto")
    knl = lp.precompute(knl, "b_acc", "j_inner,k_inner",
            default_tag="l.auto")

    lp.auto_test_vs_ref(ref_knl, ctx, knl,
            op_count=[2*n**3/1e9], op_label=["GFlops"],
            parameters={"n": n, "m": m, "ell": ell})
예제 #14
0
def test_extract_subst(ctx_factory):
    knl = lp.make_kernel(
        "{[i]: 0<=i<n}", """
                a[i] = 23*b[i]**2 + 25*b[i]**2
                """)

    knl = lp.extract_subst(knl, "bsquare", "alpha*b[i]**2", "alpha")

    print(knl)

    from loopy.symbolic import parse

    insn, = knl.instructions
    assert insn.expression == parse("bsquare(23) + bsquare(25)")
예제 #15
0
파일: test_apps.py 프로젝트: arghdos/loopy
def test_fd_1d(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel("{[i]: 0<=i<n}", "result[i] = u[i+1]-u[i]")

    knl = lp.add_and_infer_dtypes(knl, {"u": np.float32})
    ref_knl = knl

    knl = lp.split_iname(knl, "i", 16)
    knl = lp.extract_subst(knl, "u_acc", "u[j]", parameters="j")
    knl = lp.precompute(knl, "u_acc", "i_inner", default_tag="for")
    knl = lp.assume(knl, "n mod 16 = 0")

    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=2048))
예제 #16
0
def test_extract_subst(ctx_factory):
    knl = lp.make_kernel(
            "{[i]: 0<=i<n}",
            """
                a[i] = 23*b[i]**2 + 25*b[i]**2
                """)

    knl = lp.extract_subst(knl, "bsquare", "alpha*b[i]**2", "alpha")

    print(knl)

    from loopy.symbolic import parse

    insn, = knl.instructions
    assert insn.expression == parse("bsquare(23) + bsquare(25)")
예제 #17
0
def test_precompute_with_preexisting_inames_fail():
    knl = lp.make_kernel(
        "{[e,i,j,k]: 0<=e<E and 0<=i,j<n and 0<=k<2*n}",
        """
        result[e,i] = sum(j, D1[i,j]*u[e,j])
        result2[e,i] = sum(k, D2[i,k]*u[e,k])
        """)

    knl = lp.add_and_infer_dtypes(knl, {
        "u": np.float32,
        "D1": np.float32,
        "D2": np.float32,
        })

    knl = lp.fix_parameters(knl, n=13)

    knl = lp.extract_subst(knl, "D1_subst", "D1[ii,jj]", parameters="ii,jj")
    knl = lp.extract_subst(knl, "D2_subst", "D2[ii,jj]", parameters="ii,jj")

    knl = lp.precompute(knl, "D1_subst", "i,j", default_tag="for",
            precompute_inames="ii,jj")
    with pytest.raises(lp.LoopyError):
        lp.precompute(knl, "D2_subst", "i,k", default_tag="for",
                precompute_inames="ii,jj")
예제 #18
0
def test_extract_subst(ctx_factory):
    prog = lp.make_kernel("{[i]: 0<=i<n}",
                          """
                a[i] = 23*b[i]**2 + 25*b[i]**2
                """,
                          name="extract_subst")

    prog = lp.extract_subst(prog, "bsquare", "alpha*b[i]**2", "alpha")

    print(prog)

    from loopy.symbolic import parse

    insn, = prog["extract_subst"].instructions
    assert insn.expression == parse("bsquare(23) + bsquare(25)")
예제 #19
0
def test_fd_1d(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(
        "{[i]: 0<=i<n}",
        "result[i] = u[i+1]-u[i]")

    knl = lp.add_and_infer_dtypes(knl, {"u": np.float32})
    ref_knl = knl

    knl = lp.split_iname(knl, "i", 16)
    knl = lp.extract_subst(knl, "u_acc", "u[j]", parameters="j")
    knl = lp.precompute(knl, "u_acc", "i_inner", default_tag="for")
    knl = lp.assume(knl, "n mod 16 = 0")

    lp.auto_test_vs_ref(
            ref_knl, ctx, knl,
            parameters=dict(n=2048))