Beispiel #1
0
 def knl():
     return make_loopy_program(
         """{[iel, idof, j]:
             0<=iel<nelements and
             0<=idof<n_to_nodes and
             0<=j<n_from_nodes}""",
         "result[itgt_base + to_element_indices[iel]*n_to_nodes + idof, \
                 isrc_base + from_element_indices[iel]*n_from_nodes + j] \
             = resample_mat[idof, j]", [
             lp.GlobalArg("result",
                          None,
                          shape="nnodes_tgt, nnodes_src",
                          offset=lp.auto),
             lp.ValueArg("itgt_base,isrc_base", np.int32),
             lp.ValueArg("nnodes_tgt,nnodes_src", np.int32),
             "...",
         ],
         name="oversample_mat")
Beispiel #2
0
    def make_kernels(self, seq_dependencies):
        result = []

        for sub in self.kernels:
            # {{{ figure out arguments

            kernel_data = []
            for arg_name in sub.arg_names:
                dims = sub.dim_map.get(arg_name)

                if dims is not None:
                    # default order is set to "F" in kernel creation below
                    kernel_data.append(
                        lp.GlobalArg(
                            arg_name,
                            dtype=sub.get_type(arg_name),
                            shape=sub.get_loopy_shape(arg_name),
                        ))
                else:
                    kernel_data.append(
                        lp.ValueArg(arg_name, dtype=sub.get_type(arg_name)))

            # }}}

            # {{{ figure out temporary variables

            for var_name in (sub.known_names() - set(sub.arg_names) -
                             sub.all_inames()):
                dtype = sub.get_type(var_name, none_ok=True)
                if sub.implicit_types is None and dtype is None:
                    continue

                kernel_data.append(
                    lp.TemporaryVariable(var_name,
                                         dtype=dtype,
                                         shape=sub.get_loopy_shape(var_name)))

            # }}}

            from loopy.version import MOST_RECENT_LANGUAGE_VERSION
            knl = lp.make_kernel(sub.index_sets,
                                 sub.instructions,
                                 kernel_data,
                                 name=sub.subprogram_name,
                                 default_order="F",
                                 index_dtype=self.index_dtype,
                                 target=self.target,
                                 seq_dependencies=seq_dependencies,
                                 lang_version=MOST_RECENT_LANGUAGE_VERSION)

            from loopy.loop import fuse_loop_domains
            knl = fuse_loop_domains(knl)
            knl = lp.fold_constants(knl)

            result.append(knl)

        return result
Beispiel #3
0
    def get_args(self):
        if self.allow_evanescent:
            k_dtype = np.complex128
        else:
            k_dtype = np.float64

        return [
            KernelArgument(loopy_arg=lp.ValueArg(self.helmholtz_k_name,
                                                 k_dtype), )
        ]
Beispiel #4
0
 def write_into_mat_prg():
     return lp.make_kernel(
         ["{[idof]: 0 <= idof < ndofs}", "{[jdof]: 0 <= jdof < mdofs}"],
         """
             result[offset_i + idof, offset_j + jdof] = mat[idof, jdof]
         """,
         [
             lp.GlobalArg("result", None, shape="n, m", offset=lp.auto),
             lp.ValueArg("n, m", np.int32),
             lp.GlobalArg("mat", None, shape="ndofs, mdofs",
                          offset=lp.auto),
             lp.ValueArg("offset_i", np.int32),
             lp.ValueArg("offset_j", np.int32),
             "...",
         ],
         options=lp.Options(return_dict=True),
         default_offset=lp.auto,
         name="write_into_global_matrix",
     )
Beispiel #5
0
def test_memory_tools_defn():
    wrapper = __test_cases()
    for opts in wrapper:
        # create a dummy callgen
        callgen = CallgenResult(order=opts.order, lang=opts.lang,
                                dev_mem_type=wrapper.state['dev_mem_type'],
                                type_map=type_map(opts.lang))
        # create a memory manager
        mem = get_memory(callgen, host_namer=HostNamer(), device_namer=DeviceNamer())

        a1 = lp.GlobalArg('a1', shape=(arc.problem_size), dtype=np.int32)
        a2 = lp.GlobalArg('a2', shape=(arc.problem_size, 10), dtype=np.int64)
        d3 = lp.GlobalArg('d3', shape=(arc.problem_size, 10, 10), dtype=np.float64)
        a4 = lp.ValueArg('a4', dtype=np.int64)
        a5 = lp.ValueArg('a5', dtype=np.int32)
        a6 = lp.TemporaryVariable('a6', initializer=np.array([0, 1, 2]),
                                  read_only=True)

        if opts.lang == 'opencl':
            assert mem.define(True, a1) == 'cl_mem d_a1;'
            assert mem.define(False, a2) == 'long int* h_a2;'
            assert mem.define(True, d3) == 'cl_mem d_d3;'
            assert mem.define(False, a4) == 'long int h_a4;'
            assert mem.define(True, a5) == 'cl_uint d_a5;'
            assert mem.define(True, a5) == 'cl_uint d_a5;'
            with assert_raises(Exception):
                mem.define(True, a6, host_constant=True)
            assert mem.define(False, a6, host_constant=True) == \
                'const long int h_a6[3] = {0, 1, 2};'

        elif opts.lang == 'c':
            assert mem.define(True, a1) == 'int* d_a1;'
            assert mem.define(False, a2) == 'long int* h_a2;'
            assert mem.define(True, d3) == 'double* d_d3;'
            assert mem.define(False, a4) == 'long int h_a4;'
            assert mem.define(True, a5) == 'int d_a5;'
            with assert_raises(Exception):
                mem.define(True, a6, host_constant=True)
            assert mem.define(False, a6, host_constant=True) == \
                'const long int h_a6[3] = {0, 1, 2};'
        else:
            raise NotImplementedError
Beispiel #6
0
def expression_argument(expr, parameters):
    name = expr.name
    shape = expr.shape
    dtype = expr.dtype
    if shape == ():
        arg = loopy.ValueArg(name, dtype=dtype)
    else:
        arg = loopy.GlobalArg(name, dtype=dtype, shape=shape)
    idx = parameters.wrapper_arguments.index(expr)
    parameters.kernel_data[idx] = arg
    return pym.Variable(name)
Beispiel #7
0
    def pick_used_centers(self):
        knl = lp.make_kernel(
            """{[i]: 0<=i<ntargets}""",
            """
                <>target_has_center = (target_to_center[i] >= 0)
                center_is_used[target_to_center[i]] = 1 \
                    {id=center_is_used_write,if=target_has_center}
            """, [
                lp.GlobalArg(
                    "target_to_center", shape="ntargets", offset=lp.auto),
                lp.GlobalArg("center_is_used", shape="ncenters"),
                lp.ValueArg("ncenters", np.int32),
                lp.ValueArg("ntargets", np.int32),
            ],
            name="pick_used_centers",
            silenced_warnings="write_race(center_is_used_write)",
            lang_version=MOST_RECENT_LANGUAGE_VERSION)

        knl = lp.split_iname(knl, "i", 128, inner_tag="l.0", outer_tag="g.0")
        return knl
Beispiel #8
0
def create_globals(signature, edges):

    globals = []
    dimension_vars = []
    for s in signature['args']:
        if s in signature['param']:
            g = lp.ValueArg(s, dtype=edges[s]['type'])
            globals.append(g)
        else:
            dims = edges[s]['dimensions']
            for d in dims:
                if d in dimension_vars:
                    continue
                g = lp.ValueArg(d,dtype=np.int32)
                globals.append(g)
                dimension_vars.append(d)
            g = lp.GlobalArg(s, shape=tuple(dims), dtype=edges[s]['type'])
            globals.append(g)


    return globals
Beispiel #9
0
    def map_size_param(self, expr: SizeParam,
                       state: CodeGenState) -> ImplementedResult:
        if expr in state.results:
            return state.results[expr]

        arg = lp.ValueArg(expr.name, dtype=expr.dtype)
        kernel = state.kernel.copy(args=state.kernel.args + [arg])
        state.update_kernel(kernel)

        result = StoredResult(expr.name, expr.ndim, frozenset())
        state.results[expr] = result
        return result
Beispiel #10
0
def left_V(ctx):
    order = 'C'
    dtype = np.float32
    knl = lp.make_kernel(ctx.devices[0], [
        "{[i,k,alpha,alpha1]: 0<=alpha,alpha1<r and 0<=i,k<n}",
    ], [
        "l[alpha,alpha1]=sum((i), u[alpha,i]*u[alpha1,i])*sum((k),w[alpha,k]*w[alpha1,k])",
    ], [
        lp.GlobalArg("u", dtype, shape="r, n", order=order),
        lp.GlobalArg("w", dtype, shape="r, n", order=order),
        lp.GlobalArg("l", dtype, shape="r, r", order=order),
        lp.ValueArg("n", np.int64),
        lp.ValueArg("r", np.int64),
    ],
                         assumptions="n>=1")
    knl = lp.split_iname(knl, "alpha1", 16, outer_tag="g.0", inner_tag="l.0")
    knl = lp.split_iname(knl, "alpha", 3, outer_tag="g.1", inner_tag="l.1")
    knl = lp.split_iname(knl, "i", 16)
    knl = lp.split_iname(knl, "k", 16)

    return knl
Beispiel #11
0
def left_W(ctx):
    order = 'C'
    dtype = np.float64
    knl = lp.make_kernel(ctx.devices[0], [
        "{[j,i,alpha,alpha1]: 0<=alpha,alpha1<r and 0<=j,i<n}",
    ], [
        "l[alpha,alpha1]=sum((i), u[i,alpha]*u[i,alpha1])*sum((j),v[j,alpha]*v[j,alpha1])",
    ], [
        lp.GlobalArg("v", dtype, shape="n, r", order=order),
        lp.GlobalArg("u", dtype, shape="n, r", order=order),
        lp.GlobalArg("l", dtype, shape="r, r", order=order),
        lp.ValueArg("n", np.int64),
        lp.ValueArg("r", np.int64),
    ],
                         assumptions="n>=1")
    knl = lp.split_iname(knl, "alpha1", 16, outer_tag="g.0", inner_tag="l.0")
    knl = lp.split_iname(knl, "alpha", 3, outer_tag="g.1", inner_tag="l.1")
    knl = lp.split_iname(knl, "j", 16)
    knl = lp.split_iname(knl, "i", 16)

    return knl
Beispiel #12
0
def test_add_inames_for_unused_hw_axes(ctx_factory):
    ctx = ctx_factory()
    dtype = np.float32
    order = "F"

    n = 16**3

    knl = lp.make_kernel("[n] -> {[i,j]: 0<=i,j<n}", [
        """
                <> alpha = 2.0 {id=init_alpha}
                for i
                  for j
                    c[i, j] = alpha*a[i]*b[j] {id=outerproduct}
                  end
                end
                """
    ], [
        lp.GlobalArg("a", dtype, shape=("n", ), order=order),
        lp.GlobalArg("b", dtype, shape=("n", ), order=order),
        lp.GlobalArg("c", dtype, shape=("n, n"), order=order),
        lp.ValueArg("n", np.int32, approximately=n),
    ],
                         name="rank_one",
                         assumptions="n >= 16",
                         lang_version=(2018, 2))

    ref_knl = knl

    knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.0")
    knl = lp.split_iname(knl, "j", 16, outer_tag="g.1", inner_tag="l.1")

    knl = lp.add_prefetch(knl, "a")
    knl = lp.add_prefetch(knl, "b")

    knl = lp.add_inames_for_unused_hw_axes(knl)

    assert (
        knl["rank_one"].id_to_insn["init_alpha"].within_inames == frozenset(
            ["i_inner", "i_outer", "j_outer", "j_inner"]))
    assert (
        knl["rank_one"].id_to_insn["a_fetch_rule"].within_inames == frozenset(
            ["i_inner", "i_outer", "j_outer", "j_inner"]))
    assert (
        knl["rank_one"].id_to_insn["b_fetch_rule"].within_inames == frozenset(
            ["i_inner", "i_outer", "j_outer", "j_inner"]))

    lp.auto_test_vs_ref(ref_knl,
                        ctx,
                        knl,
                        op_count=[np.dtype(dtype).itemsize * n**2 / 1e9],
                        op_label=["GBytes"],
                        parameters={"n": n})
Beispiel #13
0
    def get_kernel(self):
        loopy_insns, result_names = self.get_loopy_insns_and_result_names()
        kernel_exprs = self.get_kernel_exprs(result_names)
        arguments = (self.get_default_src_tgt_arguments() + [
            lp.GlobalArg("srcindices", None, shape="nresult"),
            lp.GlobalArg("tgtindices", None, shape="nresult"),
            lp.ValueArg("nresult", None)
        ] + [
            lp.GlobalArg("result_%d" % i, dtype, shape="nresult")
            for i, dtype in enumerate(self.value_dtypes)
        ])

        loopy_knl = lp.make_kernel(
            "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}",
            self.get_kernel_scaling_assignments()
            # NOTE: itgt, isrc need to always be defined in case a statement
            # in loopy_insns or kernel_exprs needs them (e.g. hardcoded in
            # places like get_kernel_exprs)
            + [
                """
                for imat
                    <> itgt = tgtindices[imat]
                    <> isrc = srcindices[imat]

                    <> d[idim] = targets[idim, itgt] - sources[idim, isrc]
            """
            ] + [
                """
                    <> is_self = (isrc == target_to_source[itgt])
                """ if self.exclude_self else ""
            ] + loopy_insns + kernel_exprs + [
                """
                    result_{i}[imat] = \
                        knl_{i}_scaling * pair_result_{i} \
                            {{id_prefix=write_p2p}}
                """.format(i=iknl) for iknl in range(len(self.kernels))
            ] + ["end"],
            arguments,
            assumptions="nresult>=1",
            silenced_warnings="write_race(write_p2p*)",
            name=self.name,
            fixed_parameters=dict(dim=self.dim),
            lang_version=MOST_RECENT_LANGUAGE_VERSION)

        loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr")
        loopy_knl = lp.add_dtypes(loopy_knl,
                                  dict(nsources=np.int32, ntargets=np.int32))

        for knl in self.kernels:
            loopy_knl = knl.prepare_loopy_kernel(loopy_knl)

        return loopy_knl
Beispiel #14
0
        def knl():
            import loopy as lp
            knl = lp.make_kernel(
                """{[k,i,j]:
                    0<=k<nelements and
                    0<=i<n_to_nodes and
                    0<=j<n_from_nodes}""",
                "result[itgt_base + to_element_indices[k]*n_to_nodes + i, \
                        isrc_base + from_element_indices[k]*n_from_nodes + j] \
                    = resample_mat[i, j]", [
                    lp.GlobalArg("result",
                                 None,
                                 shape="nnodes_tgt, nnodes_src",
                                 offset=lp.auto),
                    lp.ValueArg("itgt_base,isrc_base", np.int32),
                    lp.ValueArg("nnodes_tgt,nnodes_src", np.int32),
                    "...",
                ],
                name="oversample_mat")

            knl = lp.split_iname(knl, "i", 16, inner_tag="l.0")
            return lp.tag_inames(knl, dict(k="g.0"))
Beispiel #15
0
        def pick_knl():
            knl = make_loopy_program(
                """{[iel, idof]:
                    0<=iel<nelements and
                    0<=idof<n_to_nodes}""",
                "result[to_element_indices[iel], idof] \
                    = ary[from_element_indices[iel], pick_list[idof]]",
                [
                    lp.GlobalArg("result", None,
                        shape="nelements_result, n_to_nodes",
                        offset=lp.auto),
                    lp.GlobalArg("ary", None,
                        shape="nelements_vec, n_from_nodes",
                        offset=lp.auto),
                    lp.ValueArg("nelements_result", np.int32),
                    lp.ValueArg("nelements_vec", np.int32),
                    lp.ValueArg("n_from_nodes", np.int32),
                    "...",
                    ],
                name="resample_by_picking")

            return knl
Beispiel #16
0
def Prav_U(ctx):
    order = 'C'
    dtype = np.float32
    knl = lp.make_kernel(ctx.devices[0], [
        "{[i,j,k,alpha]: 0<=alpha<r and 0<=i,j,k<n}",
    ], [
        "f[alpha,i]=sum((j,k), a[i,j,k]*v[alpha,j]*w[alpha,k])",
    ], [
        lp.GlobalArg("a", dtype, shape="n, n, n", order=order),
        lp.GlobalArg("v", dtype, shape="r, n", order=order),
        lp.GlobalArg("w", dtype, shape="r, n", order=order),
        lp.GlobalArg("f", dtype, shape="r, n", order=order),
        lp.ValueArg("n", np.int64),
        lp.ValueArg("r", np.int64),
    ],
                         assumptions="n>=1")
    knl = lp.split_iname(knl, "i", 16, outer_tag="g.0", inner_tag="l.0")
    knl = lp.split_iname(knl, "alpha", 1, outer_tag="g.1", inner_tag="l.1")
    knl = lp.split_iname(knl, "j", 16)
    knl = lp.split_iname(knl, "k", 16)
    print lp.CompiledKernel(ctx, knl).get_highlighted_code()
    return knl
Beispiel #17
0
def Prav_V(ctx):
    order = 'C'
    dtype = np.float64
    knl = lp.make_kernel(ctx.devices[0], [
        "{[i,j,k,alpha]: 0<=alpha<r and 0<=i,j,k<n}",
    ], [
        "f[alpha,j]=sum((k,i), a[i,j,k]*w[k,alpha]*u[i,alpha])",
    ], [
        lp.GlobalArg("a", dtype, shape="n, n, n", order=order),
        lp.GlobalArg("u", dtype, shape="n, r", order=order),
        lp.GlobalArg("w", dtype, shape="n, r", order=order),
        lp.GlobalArg("f", dtype, shape="r, n", order=order),
        lp.ValueArg("n", np.int64),
        lp.ValueArg("r", np.int64),
    ],
                         assumptions="n>=1")
    knl = lp.split_iname(knl, "j", 16, outer_tag="g.0", inner_tag="l.0")
    knl = lp.split_iname(knl, "alpha", 3, outer_tag="g.1", inner_tag="l.1")
    knl = lp.split_iname(knl, "i", 16)
    knl = lp.split_iname(knl, "k", 16)

    return knl
Beispiel #18
0
def get_tensor(ctx):
    order = 'C'
    dtype = np.float64
    knl = lp.make_kernel(ctx.devices[0], [
        "{[j,i,alpha,k]: 0<=alpha<r and 0<=i,j,k<n}",
    ], [
        "res[i,j,k]=sum((alpha), u[i,alpha]*v[j,alpha]*w[k,alpha])",
    ], [
        lp.GlobalArg("res", dtype, shape="n, n, n", order=order),
        lp.GlobalArg("v", dtype, shape="n, r", order=order),
        lp.GlobalArg("u", dtype, shape="n, r", order=order),
        lp.GlobalArg("w", dtype, shape="n, r", order=order),
        lp.ValueArg("n", np.int32),
        lp.ValueArg("r", np.int32),
    ],
                         assumptions="n>=1")
    knl = lp.split_iname(knl, "i", 8, outer_tag="g.0", inner_tag="l.0")
    knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.1")
    knl = lp.split_iname(knl, "alpha", 2)
    knl = lp.split_iname(knl, "k", 8, outer_tag="g.2", inner_tag="l.2")

    return knl
Beispiel #19
0
    def prg():
        if norm_type == "l2":
            # NOTE: computes first-order approximation of the source centroids
            insns = """
            proxy_center[idim, irange] = 1.0 / npoints \
                    * simul_reduce(sum, i, sources[idim, srcindices[i + ioffset]])
            """
        elif norm_type == "linf":
            # NOTE: computes the centers of the bounding box
            insns = """
            <> bbox_max = \
                    simul_reduce(max, i, sources[idim, srcindices[i + ioffset]])
            <> bbox_min = \
                    simul_reduce(min, i, sources[idim, srcindices[i + ioffset]])

            proxy_center[idim, irange] = (bbox_max + bbox_min) / 2.0
            """
        else:
            raise ValueError(f"unknown norm type: '{norm_type}'")

        knl = lp.make_kernel(
            [
                "{[irange]: 0 <= irange < nranges}", "{[i]: 0 <= i < npoints}",
                "{[idim]: 0 <= idim < ndim}"
            ],
            """
            for irange
                <> ioffset = srcranges[irange]
                <> npoints = srcranges[irange + 1] - srcranges[irange]

                %(insns)s
            end
            """ % dict(insns=insns),
            [
                lp.GlobalArg("sources",
                             None,
                             shape=(ndim, "nsources"),
                             dim_tags="sep,C"),
                lp.ValueArg("nsources", np.int64), ...
            ],
            name="compute_block_centers_knl",
            assumptions="ndim>=1 and nranges>=1",
            fixed_parameters=dict(ndim=ndim),
            lang_version=MOST_RECENT_LANGUAGE_VERSION,
        )

        knl = lp.tag_inames(knl, "idim*:unr")
        knl = lp.split_iname(knl, "irange", 64, outer_tag="g.0")

        return knl
Beispiel #20
0
def test_unknown_stride_to_callee(ctx_factory):
    ctx = ctx_factory()

    twice = lp.make_function("{[i, j]: 0<=i, j < n}",
                             """
            b[i, j] = 2*a[i, j]
            """, [lp.ValueArg("n"),
                  lp.GlobalArg("a"),
                  lp.GlobalArg("b")],
                             name="twice")

    prog = lp.make_kernel(
        "{[i,i0,i1,i2,i3]: 0<=i0, i1, i2, i3< N and 0<=i<Nvar}", """
            [i0, i1]: y[i0, i1, i] = twice(N, [i2, i3]: x[2*i2, i3, i])
            """, [
            lp.ValueArg("N", dtype=np.int32),
            lp.ValueArg("Nvar", dtype=np.int32),
            lp.GlobalArg("x", shape=lp.auto, dtype=np.float64), ...
        ])

    prog = lp.merge([prog, twice])

    lp.auto_test_vs_ref(prog, ctx, prog, parameters={"N": 4, "Nvar": 5})
Beispiel #21
0
def test_nonlinear_index(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel("{[i,j]: 0<=i,j<n }",
                         """
                a[i*i] = 17
                """, [
                             lp.GlobalArg("a", shape="n"),
                             lp.ValueArg("n"),
                         ],
                         assumptions="n>=1")

    print(knl)
    print(lp.CompiledKernel(ctx, knl).get_highlighted_code())
Beispiel #22
0
        def mat_knl():
            knl = make_loopy_program(
                """{[iel, idof, j]:
                    0<=iel<nelements and
                    0<=idof<n_to_nodes and
                    0<=j<n_from_nodes}""",
                "result[to_element_indices[iel], idof] \
                    = sum(j, resample_mat[idof, j] \
                    * ary[from_element_indices[iel], j])",
                [
                    lp.GlobalArg("result", None,
                        shape="nelements_result, n_to_nodes",
                        offset=lp.auto),
                    lp.GlobalArg("ary", None,
                        shape="nelements_vec, n_from_nodes",
                        offset=lp.auto),
                    lp.ValueArg("nelements_result", np.int32),
                    lp.ValueArg("nelements_vec", np.int32),
                    "...",
                    ],
                name="resample_by_mat")

            return knl
Beispiel #23
0
def test_function_decl_extractor():
    # ensure that we can tell the difference between pointers, constants, etc.
    # in execution
    from loopy.target.c import ExecutableCTarget

    knl = lp.make_kernel('{[i]: 0 <= i < 10}',
        """
            a[i] = b[i] + v
        """,
        [lp.GlobalArg('a', shape=(10,), dtype=np.int32),
         lp.ConstantArg('b', shape=(10)),
         lp.ValueArg('v', dtype=np.int32)],
        target=ExecutableCTarget())

    assert np.allclose(knl(b=np.arange(10), v=-1)[1], np.arange(10) - 1)
Beispiel #24
0
    def map_size_param(self, expr: SizeParam,
                       state: CodeGenState) -> ImplementedResult:
        if expr in state.results:
            return state.results[expr]

        arg = lp.ValueArg(expr.name,
                          dtype=expr.dtype,
                          tags=_filter_tags_not_of_type(
                              expr, self.array_tag_t_to_not_propagate))
        kernel = state.kernel.copy(args=state.kernel.args + [arg])
        state.update_kernel(kernel)
        assert expr.name is not None
        result = StoredResult(expr.name, expr.ndim, frozenset())
        state.results[expr] = result
        return result
Beispiel #25
0
    def prg():
        knl = lp.make_kernel(
            [
                "{[irange]: 0 <= irange < nranges}", "{[i]: 0 <= i < npoints}",
                "{[idim]: 0 <= idim < ndim}"
            ],
            """
            for irange
                <> ioffset = srcranges[irange]
                <> npoints = srcranges[irange + 1] - srcranges[irange]
                <> rblk = reduce(max, i, sqrt(simul_reduce(sum, idim, \
                        (proxy_centers[idim, irange]
                        - sources[idim, srcindices[i + ioffset]]) ** 2)
                        ))

                proxy_radius[irange] = radius_factor * rblk
            end
            """,
            [
                lp.GlobalArg("sources",
                             None,
                             shape=(ndim, "nsources"),
                             dim_tags="sep,C"),
                lp.ValueArg("nsources", np.int64),
                lp.ValueArg("radius_factor", np.float64), ...
            ],
            name="compute_block_radii_knl",
            assumptions="ndim>=1 and nranges>=1",
            fixed_parameters=dict(ndim=ndim),
            lang_version=MOST_RECENT_LANGUAGE_VERSION,
        )

        knl = lp.tag_inames(knl, "idim*:unr")
        knl = lp.split_iname(knl, "irange", 64, outer_tag="g.0")

        return knl
Beispiel #26
0
 def init_global_mat_prg():
     return lp.make_kernel(
         ["{[idof]: 0 <= idof < n}", "{[jdof]: 0 <= jdof < m}"],
         """
             result[idof, jdof]  = 0 {id=init}
         """,
         [
             lp.GlobalArg("result", None, shape="n, m", offset=lp.auto),
             lp.ValueArg("n, m", np.int32),
             "...",
         ],
         options=lp.Options(return_dict=True),
         default_offset=lp.auto,
         name="init_a_global_matrix",
     )
Beispiel #27
0
    def qbx_center_to_target_box_lookup(self, particle_id_dtype, box_id_dtype):
        # FIXME Iterating over all boxes to find which ones have QBX centers
        # is inefficient.

        knl = lp.make_kernel([
            "{[ibox]: 0<=ibox<nboxes}",
            "{[itarget_tree]: b_t_start <= itarget_tree < b_t_start + ntargets}",
        ],
                             """
            for ibox
                <> b_t_start = box_target_starts[ibox]
                <> ntargets = box_target_counts_nonchild[ibox]

                for itarget_tree
                    <> itarget_user = user_target_from_tree_target[itarget_tree]
                    <> in_bounds = itarget_user < ncenters

                    # This write is race-free because each center only belongs
                    # to one box.
                    if in_bounds
                        qbx_center_to_target_box[itarget_user] = \
                                box_to_target_box[ibox] {id=tgt_write}
                    end
                end
            end
            """, [
                                 lp.GlobalArg("qbx_center_to_target_box",
                                              box_id_dtype,
                                              shape="ncenters"),
                                 lp.GlobalArg("box_to_target_box",
                                              box_id_dtype),
                                 lp.GlobalArg("user_target_from_tree_target",
                                              None,
                                              shape=None),
                                 lp.ValueArg("ncenters", particle_id_dtype),
                                 "..."
                             ],
                             name="qbx_center_to_target_box_lookup",
                             silenced_warnings="write_race(tgt_write)",
                             lang_version=MOST_RECENT_LANGUAGE_VERSION)

        knl = lp.split_iname(knl,
                             "ibox",
                             128,
                             inner_tag="l.0",
                             outer_tag="g.0")

        return knl
Beispiel #28
0
 def keval():
     return make_loopy_program(
         [
             "{[iel]: 0 <= iel < nelements}",
             "{[idof]: 0 <= idof < n_to_nodes}"
         ],
         """
             result[iel, idof] = result[iel, idof] + \
                     coefficients[iel, ibasis] * basis[idof]
         """, [
             lp.GlobalArg("coefficients",
                          None,
                          shape=("nelements", "n_to_nodes")),
             lp.ValueArg("ibasis", np.int32), "..."
         ],
         name="conn_evaluate_knl")
Beispiel #29
0
def test_divide_precedence(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(
        "{:}", """
            x[0] = c*(a/b)
            y[0] = c*(a%b)
            """,
        [lp.ValueArg("a, b, c", np.int32),
         lp.GlobalArg("x, y", np.int32)])
    print(lp.generate_code_v2(knl).device_code())

    evt, (x_out, y_out) = knl(queue, c=2, b=2, a=5)
    evt.wait()
    assert x_out.get() == 4
    assert y_out.get() == 2
Beispiel #30
0
def test_fuzz_code_generator(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    if ctx.devices[0].platform.vendor.startswith("Advanced Micro"):
        pytest.skip("crashes on AMD 15.12")

    #from expr_fuzz import get_fuzz_examples
    #for expr, var_values in get_fuzz_examples():
    for expr, var_values in generate_random_fuzz_examples(50):
        from pymbolic import evaluate
        try:
            true_value = evaluate(expr, var_values)
        except ZeroDivisionError:
            continue

        def get_dtype(x):
            if isinstance(x, (complex, np.complexfloating)):
                return np.complex128
            else:
                return np.float64

        knl = lp.make_kernel("{ : }", [lp.Assignment("value", expr)],
                             [lp.GlobalArg("value", np.complex128, shape=())] +
                             [
                                 lp.ValueArg(name, get_dtype(val))
                                 for name, val in six.iteritems(var_values)
                             ])
        ck = lp.CompiledKernel(ctx, knl)
        evt, (lp_value, ) = ck(queue, out_host=True, **var_values)
        err = abs(true_value - lp_value) / abs(true_value)
        if abs(err) > 1e-10:
            print(80 * "-")
            print("WRONG: rel error=%g" % err)
            print("true=%r" % true_value)
            print("loopy=%r" % lp_value)
            print(80 * "-")
            print(ck.get_code())
            print(80 * "-")
            print(var_values)
            print(80 * "-")
            print(repr(expr))
            print(80 * "-")
            print(expr)
            print(80 * "-")
            1 / 0