Esempio n. 1
0
 def __get_knl():
     return lp.make_kernel('{[i]: 0 <= i < 10}',
     """
         a[i] = b[i]
     """,
     [lp.GlobalArg('a', shape=(10,), dtype=np.int32),
      lp.ConstantArg('b', shape=(10))],
                          target=ExecutableCTarget(),
                          name='cache_test')
Esempio n. 2
0
 def __get_knl():
     return lp.make_kernel("{[i]: 0 <= i < 10}",
     """
         a[i] = b[i]
     """,
     [lp.GlobalArg("a", shape=(10,), dtype=np.int32),
      lp.ConstantArg("b", shape=(10))],
                          target=ExecutableCTarget(),
                          name="cache_test")
Esempio n. 3
0
def test_function_decl_extractor():
    # ensure that we can tell the difference between pointers, constants, etc.
    # in execution
    from loopy.target.c import ExecutableCTarget

    knl = lp.make_kernel('{[i]: 0 <= i < 10}',
        """
            a[i] = b[i] + v
        """,
        [lp.GlobalArg('a', shape=(10,), dtype=np.int32),
         lp.ConstantArg('b', shape=(10)),
         lp.ValueArg('v', dtype=np.int32)],
        target=ExecutableCTarget())

    assert np.allclose(knl(b=np.arange(10), v=-1)[1], np.arange(10) - 1)
Esempio n. 4
0
    def get_vars(self, cname):
        vars = []
        comp = self.templates[cname]

        # Parameters are constant arguments
        for p in list(comp.parameters):
            p_edge = comp.edge_info[p]
            dtype = hu.get_attribute_value(p_edge.attributes['type'])
            dims = self.get_edge_dims(p_edge)
            param_var = lp.ConstantArg(p, dtype, shape=dims)
            vars.append(param_var)

        # Need to add read only option for inputs
        # Inputs, outputs, and states represent
        global_args = list(comp.input) + list(comp.output) + list(comp.state)
        dim_vars = []
        for g in global_args:
            g_edge = comp.edge_info[g]
            dtype = hu.get_attribute_value(g_edge.attributes['type'])
            dims = self.get_edge_dims(g_edge)
            for d in dims:
                if d not in dim_vars:
                    dim_vars.append(d)
                    dim_var = lp.ValueArg(d, dtype=np.int32)
                    vars.append(dim_var)
            if len(dims) == 0:
                g_var = lp.ValueArg(g, dtype=dtype)
            else:
                g_var = lp.GlobalArg(g, dtype=dtype, shape=dims)
            vars.append(g_var)

        # Each flow declaration is a temporary variable declared in the comp scope
        for s in comp.statements:

            if s.op_type == 'declaration' and s.op_cat == 'declaration':
                d_vars = list(s.output)
                for d in d_vars:
                    edge_decl = comp.edge_info[d]
                    dims = self.get_edge_dims(edge_decl)
                    dtype = hu.get_attribute_value(
                        edge_decl.attributes['type'])
                    t_var = lp.TemporaryVariable(d, dtype=dtype, shape=dims)
                    vars.append(t_var)

        return vars
def test_laplacian_stiffness(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()
    order = "C"

    dim = 2  # (baked into code)

    Nq = 40  # num. quadrature points (baked into code)
    Nb = 20  # num. basis functions (baked into code)
    Nc = 100  # num. cells (run-time symbolic)

    from pymbolic import var
    Nc_sym = var("Nc")

    knl = lp.make_kernel(
        ctx.devices[0],
        "[Nc] -> {[K,i,j,q, dx_axis, ax_b]: 0<=K<Nc and 0<=i,j<%(Nb)d and 0<=q<%(Nq)d "
        "and 0<= dx_axis, ax_b < %(dim)d}" % dict(Nb=Nb, Nq=Nq, dim=dim), [
            "dPsi(ij, dxi) := sum_float32(@ax_b,"
            "  jacInv[ax_b,dxi,K,q] * DPsi[ax_b,ij,q])",
            "A[K, i, j] = sum_float32(q, w[q] * jacDet[K,q] * ("
            "sum_float32(dx_axis, dPsi$one(i,dx_axis)*dPsi$two(j,dx_axis))))"
        ], [
            lp.GlobalArg(
                "jacInv", dtype, shape=(dim, dim, Nc_sym, Nq), order=order),
            lp.ConstantArg("DPsi", dtype, shape=(dim, Nb, Nq), order=order),
            lp.GlobalArg("jacDet", dtype, shape=(Nc_sym, Nq), order=order),
            lp.ConstantArg("w", dtype, shape=(Nq, ), order=order),
            lp.GlobalArg("A", dtype, shape=(Nc_sym, Nb, Nb), order=order),
            lp.ValueArg("Nc", np.int32, approximately=1000),
        ],
        name="lapquad",
        assumptions="Nc>=1")

    knl = lp.tag_inames(knl, dict(ax_b="unr"))
    seq_knl = knl

    def variant_fig31(knl):
        # This (mostly) reproduces Figure 3.1.

        knl = lp.tag_inames(knl, {"dx_axis": "unr"})
        return knl, ["K", "i", "j", "q", "ax_b_insn"]

    def variant_pg4(knl):
        # This (mostly) reproduces the unlabeled code snippet on pg. 4.

        knl = lp.tag_inames(knl, {"dx_axis": "unr"})
        Ncloc = 16
        knl = lp.split_iname(knl,
                             "K",
                             Ncloc,
                             outer_iname="Ko",
                             inner_iname="Kloc")
        return knl, ["Ko", "Kloc", "i", "j", "q", "ax_b_insn"]

    def variant_fig32(knl):
        # This (mostly) reproduces Figure 3.2.

        Ncloc = 16
        knl = lp.split_iname(knl,
                             "K",
                             Ncloc,
                             outer_iname="Ko",
                             inner_iname="Kloc")
        knl = lp.precompute(knl,
                            "dPsi",
                            np.float32, ["i", "q", "dx_axis"],
                            default_tag=None)
        knl = lp.tag_inames(knl, {"dx_axis": "unr", "dxi": "unr"})
        return knl, ["Ko", "Kloc", "dPsi_q", "ij", "i", "j", "q", "ax_b_insn"]

    def variant_fig33(knl):
        # This is meant to (mostly) reproduce Figure 3.3.

        Ncloc = 16
        knl = lp.split_iname(knl,
                             "K",
                             Ncloc,
                             outer_iname="Ko",
                             inner_iname="Kloc")
        knl = lp.precompute(knl,
                            "dPsi$one",
                            np.float32, ["dx_axis"],
                            default_tag=None)
        knl = lp.tag_inames(knl, {"j": "ilp.seq"})

        return knl, ["Ko", "Kloc"]

    def variant_simple_gpu(knl):
        # This is a simple GPU-ish variant.

        # It's not the same thing as Matt's code, but I'll need some more time
        # to reverse-engineer what is going on there. Some discussion might
        # help, too. :)

        knl = lp.tag_inames(knl, {"dx_axis": "unr"})
        Ncloc = 16
        knl = lp.split_iname(knl,
                             "K",
                             Ncloc,
                             outer_iname="Ko",
                             inner_iname="Kloc",
                             outer_tag="g.0")
        knl = lp.tag_inames(knl, {"i": "l.1", "j": "l.0"})
        return knl, ["K", "i", "j", "q", "ax_b_insn"]

    def variant_simple_gpu_prefetch(knl):
        # This adds prefetching to the GPU variant above.

        # In this variant (on my machine), loopy makes a silly choice
        # for the upper bound of Kloc (it uses Nc). I'll investigate and
        # fix that. (FIXME)

        knl = lp.tag_inames(knl, {"dx_axis": "unr"})
        Ncloc = 16
        knl = lp.split_iname(knl,
                             "K",
                             Ncloc,
                             outer_iname="Ko",
                             inner_iname="Kloc",
                             outer_tag="g.0")
        knl = lp.tag_inames(knl, {"i": "l.1", "j": "l.0"})
        knl = lp.add_prefetch(knl, "w", ["q"], default_tag="l.auto")
        knl = lp.add_prefetch(knl, "DPsi", [0, 1, 2], default_tag="l.auto")
        knl = lp.add_prefetch(knl, "jacInv", [0, 1, 3], default_tag="l.auto")
        knl = lp.add_prefetch(knl, "jacDet", [1], default_tag="l.auto")
        return knl, ["K", "i", "j", "q", "ax_b_insn"]

    # Plug in variant name here
    #                        |
    #                        v
    for variant in [variant_fig33]:
        var_knl, loop_prio = variant(knl)
        kernel_gen = lp.generate_loop_schedules(var_knl,
                                                loop_priority=loop_prio)
        kernel_gen = lp.check_kernels(kernel_gen, dict(Nc=Nc))

        #print lp.preprocess_kernel(var_knl)

        lp.auto_test_vs_ref(seq_knl,
                            ctx,
                            kernel_gen,
                            op_count=0,
                            op_label="GFlops",
                            parameters={"Nc": Nc},
                            print_ref_code=True)