コード例 #1
0
def test_vectorize(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(
        "{[i]: 0<=i<n}", """
        <> temp = 2*b[i]
        a[i] = temp
        """)
    knl = lp.add_and_infer_dtypes(knl, dict(b=np.float32))
    knl = lp.set_array_dim_names(knl, "a,b", "i")
    knl = lp.split_array_dim(knl, [("a", 0), ("b", 0)],
                             4,
                             split_kwargs=dict(slabs=(0, 1)))

    knl = lp.tag_data_axes(knl, "a,b", "c,vec")
    ref_knl = knl
    ref_knl = lp.tag_inames(ref_knl, {"i_inner": "unr"})

    knl = lp.tag_inames(knl, {"i_inner": "vec"})

    knl = lp.preprocess_kernel(knl)
    knl = lp.get_one_scheduled_kernel(knl)
    code, inf = lp.generate_code(knl)

    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=30))
コード例 #2
0
ファイル: test_linalg.py プロジェクト: cmsquared/loopy
def test_small_batched_matvec(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()

    order = "C"

    K = 9997  # noqa
    Np = 36  # noqa

    knl = lp.make_kernel(
            "{[i,j,k]: 0<=k<K and 0<= i,j < %d}" % Np,
            [
                "result[k, i] = sum(j, d[i, j]*f[k, j])"
                ],
            [
                lp.GlobalArg("d", dtype, shape=(Np, Np), order=order),
                lp.GlobalArg("f", dtype, shape=("K", Np), order=order),
                lp.GlobalArg("result", dtype, shape=("K", Np), order=order),
                lp.ValueArg("K", np.int32, approximately=1000),
                ], name="batched_matvec", assumptions="K>=1")

    seq_knl = knl

    align_bytes = 64
    knl = lp.add_prefetch(knl, 'd[:,:]')
    pad_mult = lp.find_padding_multiple(knl, "f", 0, align_bytes)
    knl = lp.split_array_dim(knl, ("f", 0), pad_mult)
    knl = lp.add_padding(knl, "f", 0, align_bytes)

    lp.auto_test_vs_ref(seq_knl, ctx, knl,
            op_count=[K*2*Np**2/1e9], op_label=["GFlops"],
            parameters=dict(K=K))
コード例 #3
0
ファイル: test_linalg.py プロジェクト: shwina/loopy
def test_small_batched_matvec(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()

    order = "C"

    K = 9997  # noqa
    Np = 36  # noqa

    knl = lp.make_kernel(
        "{[i,j,k]: 0<=k<K and 0<= i,j < %d}" % Np,
        ["result[k, i] = sum(j, d[i, j]*f[k, j])"], [
            lp.GlobalArg("d", dtype, shape=(Np, Np), order=order),
            lp.GlobalArg("f", dtype, shape=("K", Np), order=order),
            lp.GlobalArg("result", dtype, shape=("K", Np), order=order),
            lp.ValueArg("K", np.int32, approximately=1000),
        ],
        name="batched_matvec",
        assumptions="K>=1")

    seq_knl = knl

    align_bytes = 64
    knl = lp.add_prefetch(knl, 'd[:,:]', default_tag="l.auto")
    pad_mult = lp.find_padding_multiple(knl, "f", 0, align_bytes)
    knl = lp.split_array_dim(knl, ("f", 0), pad_mult)
    knl = lp.add_padding(knl, "f", 0, align_bytes)

    lp.auto_test_vs_ref(seq_knl,
                        ctx,
                        knl,
                        op_count=[K * 2 * Np**2 / 1e9],
                        op_label=["GFlops"],
                        parameters=dict(K=K))
コード例 #4
0
ファイル: test_transform.py プロジェクト: cmsquared/loopy
def test_vectorize(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(
        "{[i]: 0<=i<n}",
        """
        <> temp = 2*b[i]
        a[i] = temp
        """)
    knl = lp.add_and_infer_dtypes(knl, dict(b=np.float32))
    knl = lp.set_array_dim_names(knl, "a,b", "i")
    knl = lp.split_array_dim(knl, [("a", 0), ("b", 0)], 4,
            split_kwargs=dict(slabs=(0, 1)))

    knl = lp.tag_data_axes(knl, "a,b", "c,vec")
    ref_knl = knl
    ref_knl = lp.tag_inames(ref_knl, {"i_inner": "unr"})

    knl = lp.tag_inames(knl, {"i_inner": "vec"})

    knl = lp.preprocess_kernel(knl)
    knl = lp.get_one_scheduled_kernel(knl)
    code, inf = lp.generate_code(knl)

    lp.auto_test_vs_ref(
            ref_knl, ctx, knl,
            parameters=dict(n=30))
コード例 #5
0
def set_q_storage_format(kernel, name):
    kernel = lp.set_array_dim_names(kernel, name, "i,j,k,field,e")

    kernel = lp.split_array_dim(kernel, (name, 3, "F"), 4, auto_split_inames=False)
    kernel = lp.tag_data_axes(kernel, name, "N0,N1,N2,vec,N4,N3")

    return kernel
コード例 #6
0
def set_q_storage_format(kernel, name):
    kernel = lp.set_array_axis_names(kernel, name, "i,j,k,field,e")

    kernel = lp.split_array_dim(kernel, (name, 3, "F"),
                                4,
                                auto_split_inames=False)
    kernel = lp.tag_array_axes(kernel, name, "N0,N1,N2,vec,N4,N3")

    return kernel
コード例 #7
0
    def variant_fancy_padding(knl):
        knl = lp.tag_inames(knl, dict(n="l.0"))

        pad_mult = lp.find_padding_multiple(knl, "u", 1, 32)

        arg_names = [
                prefix+name
                for name in ["u", "v", "w", "p"]
                for prefix in ["", "rhs"]]

        knl = lp.split_array_dim(knl, [(nm, 0) for nm in arg_names], pad_mult)

        return knl
コード例 #8
0
ファイル: test_dg.py プロジェクト: navjotk/loopy
    def variant_fancy_padding(knl):
        knl = lp.tag_inames(knl, dict(n="l.0"))

        pad_mult = lp.find_padding_multiple(knl, "u", 1, 32)

        arg_names = [
                prefix+name
                for name in ["u", "v", "w", "p"]
                for prefix in ["", "rhs"]]

        knl = lp.split_array_dim(knl, [(nm, 0) for nm in arg_names], pad_mult)

        return knl