Exemple #1
0
    def get_kernel(self):
        ncoeffs = len(self.expansion)

        loopy_insns, result_names = self.get_loopy_insns_and_result_names()

        loopy_knl = lp.make_kernel(
                [
                    "{[iglobal_center]: 0<=iglobal_center<nglobal_qbx_centers}",
                    "{[icenter_tgt]: \
                            icenter_tgt_start<=icenter_tgt<icenter_tgt_end}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                loopy_insns
                + ["""
                    <> src_icenter = global_qbx_centers[iglobal_center]

                    <> icenter_tgt_start = center_to_targets_starts[src_icenter]
                    <> icenter_tgt_end = center_to_targets_starts[src_icenter+1]

                    <> center_itgt = center_to_targets_lists[icenter_tgt]

                    <> center[idim] = qbx_centers[idim, src_icenter] \
                            {id=fetch_center}
                    <> b[idim] = targets[idim, center_itgt] - center[idim] \
                            {id=compute_b}
                    <> coeff${COEFFIDX} = qbx_expansions[src_icenter, ${COEFFIDX}]
                    result[${RESULTIDX},center_itgt] = \
                            kernel_scaling * result_${RESULTIDX}_p \
                            {id_prefix=write_result}
                """],
                [
                    lp.GlobalArg("result", None, shape="nresults, ntargets",
                        dim_tags="sep,C"),
                    lp.GlobalArg("qbx_centers", None, shape="dim, ncenters",
                        dim_tags="sep,c"),
                    lp.GlobalArg("center_to_targets_starts,center_to_targets_lists",
                        None, shape=None),
                    lp.GlobalArg("qbx_expansions", None,
                        shape=("ncenters", ncoeffs)),
                    lp.GlobalArg("targets", None, shape=(self.dim, "ntargets"),
                        dim_tags="sep,C"),
                    lp.ValueArg("ncenters,ntargets", np.int32),
                    "..."
                ] + [arg.loopy_arg for arg in self.expansion.get_args()],
                name=self.name, assumptions="nglobal_qbx_centers>=1",
                defines=dict(
                    dim=self.dim,
                    COEFFIDX=[str(i) for i in range(ncoeffs)],
                    RESULTIDX=[str(i) for i in range(len(result_names))],
                    nresults=len(result_names),
                    ),
                silenced_warnings="write_race(write_result*)")

        loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "compute_b",
                tags={"idim": "unr"})
        loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_center",
                tags={"idim": "unr"})
        loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl)

        return loopy_knl
Exemple #2
0
def test_unschedulable_kernel_detection():
    knl = lp.make_kernel(["{[i,j]:0<=i,j<n}"], """
                         mat1[i,j] = mat1[i,j] + 1 {inames=i:j, id=i1}
                         mat2[j] = mat2[j] + 1 {inames=j, id=i2}
                         mat3[i] = mat3[i] + 1 {inames=i, id=i3}
                         """)

    knl = lp.preprocess_kernel(knl)

    # Check that loopy can detect the unschedulability of the kernel
    assert lp.needs_iname_duplication(knl)
    assert len(list(lp.get_iname_duplication_options(knl))) == 4

    for inames, insns in lp.get_iname_duplication_options(knl):
        fixed_knl = lp.duplicate_inames(knl, inames, insns)
        assert not lp.needs_iname_duplication(fixed_knl)

    knl = lp.make_kernel(["{[i,j,k,l,m]:0<=i,j,k,l,m<n}"], """
                         mat1[l,m,i,j,k] = mat1[l,m,i,j,k] + 1 {inames=i:j:k:l:m}
                         mat2[l,m,j,k] = mat2[l,m,j,k] + 1 {inames=j:k:l:m}
                         mat3[l,m,k] = mat3[l,m,k] + 11 {inames=k:l:m}
                         mat4[l,m,i] = mat4[l,m,i] + 1 {inames=i:l:m}
                         """)

    assert lp.needs_iname_duplication(knl)
    assert len(list(lp.get_iname_duplication_options(knl))) == 10
Exemple #3
0
def test_unschedulable_kernel_detection():
    knl = lp.make_kernel(["{[i,j]:0<=i,j<n}"],
                         """
                         mat1[i,j] = mat1[i,j] + 1 {inames=i:j, id=i1}
                         mat2[j] = mat2[j] + 1 {inames=j, id=i2}
                         mat3[i] = mat3[i] + 1 {inames=i, id=i3}
                         """)

    knl = lp.preprocess_kernel(knl)

    # Check that loopy can detect the unschedulability of the kernel
    assert lp.needs_iname_duplication(knl)
    assert len(list(lp.get_iname_duplication_options(knl))) == 4

    for inames, insns in lp.get_iname_duplication_options(knl):
        fixed_knl = lp.duplicate_inames(knl, inames, insns)
        assert not lp.needs_iname_duplication(fixed_knl)

    knl = lp.make_kernel(["{[i,j,k,l,m]:0<=i,j,k,l,m<n}"],
                         """
                         mat1[l,m,i,j,k] = mat1[l,m,i,j,k] + 1 {inames=i:j:k:l:m}
                         mat2[l,m,j,k] = mat2[l,m,j,k] + 1 {inames=j:k:l:m}
                         mat3[l,m,k] = mat3[l,m,k] + 11 {inames=k:l:m}
                         mat4[l,m,i] = mat4[l,m,i] + 1 {inames=i:l:m}
                         """)

    assert lp.needs_iname_duplication(knl)
    assert len(list(lp.get_iname_duplication_options(knl))) == 10
Exemple #4
0
    def make_inames_unique(self, knl):

        for i in knl.instructions:
            print(type(i.expression))
            inames = self.recurse_expr(i.expression)
            if len(inames) > 0:
                iid = i.id
                iname_str = ",".join(inames)
                knl = lp.duplicate_inames(knl, iname_str, within=f"id:{iid}")

        return knl
Exemple #5
0
def make_kernel(domains, instructions, kernel_data=["..."], **kwargs):
    """User-facing kernel creation entrypoint.

    :arg domains: :class:`islpy.BasicSet`
    :arg instructions:
    :arg kernel_data:

        A list of :class:`ValueArg`, :class:`GlobalArg`, ... (etc.) instances.
        The order of these arguments determines the order of the arguments
        to the generated kernel.

        May also contain :class:`TemporaryVariable` instances(which do not
        give rise to kernel-level arguments).

        The string ``"..."`` may be passed as one of the entries
        of the list, in which case loopy will infer names, shapes,
        and types of arguments from the kernel code. It is
        possible to just pass the list ``["..."]``, in which case
        all arguments are inferred.

        In Python 3, the string ``"..."`` may be spelled somewhat more sensibly
        as just ``...`` (the ellipsis), for the same meaning.

        As an additional option, each argument may be specified as just a name
        (a string). This is useful to specify argument ordering. All other
        characteristics of the named arguments are inferred.

    The following keyword arguments are recognized:

    :arg preambles: a list of (tag, code) tuples that identify preamble
        snippets.
        Each tag's snippet is only included once, at its first occurrence.
        The preambles will be inserted in order of their tags.
    :arg preamble_generators: a list of functions of signature
        (seen_dtypes, seen_functions) where seen_functions is a set of
        (name, c_name, arg_dtypes), generating extra entries for *preambles*.
    :arg defines: a dictionary of replacements to be made in instructions given
        as strings before parsing. A macro instance intended to be replaced
        should look like "MACRO" in the instruction code. The expansion given
        in this parameter is allowed to be a list. In this case, instructions
        are generated for *each* combination of macro values.

        These defines may also be used in the domain and in argument shapes and
        strides. They are expanded only upon kernel creation.
    :arg default_order: "C" (default) or "F"
    :arg default_offset: 0 or :class:`loopy.auto`. The default value of
        *offset* in :attr:`loopy.kernel.data.GlobalArg` for guessed arguments.
        Defaults to 0.
    :arg function_manglers: list of functions of signature (name, arg_dtypes)
        returning a tuple (result_dtype, c_name)
        or a tuple (result_dtype, c_name, arg_dtypes),
        where c_name is the C-level function to be called.
    :arg symbol_manglers: list of functions of signature (name) returning
        a tuple (result_dtype, c_name), where c_name is the C-level symbol to
        be evaluated.
    :arg assumptions: the initial implemented_domain, captures assumptions
        on loop domain parameters. (an isl.Set or a string in
        :ref:`isl-syntax`.  If given as a string, only the CONDITIONS part of
        the set notation should be given.)
    :arg local_sizes: A dictionary from integers to integers, mapping
        workgroup axes to their sizes, e.g. *{0: 16}* forces axis 0 to be
        length 16.
    :arg silenced_warnings: a list (or semicolon-separated string) or warnings
        to silence
    :arg options: an instance of :class:`loopy.Options` or an equivalent
        string representation
    :arg target: an instance of :class:`loopy.target.TargetBase`, or *None*,
        to use an OpenCL target.
    """

    defines = kwargs.pop("defines", {})
    default_order = kwargs.pop("default_order", "C")
    default_offset = kwargs.pop("default_offset", 0)
    silenced_warnings = kwargs.pop("silenced_warnings", [])
    options = kwargs.pop("options", None)
    flags = kwargs.pop("flags", None)
    target = kwargs.pop("target", None)

    if target is None:
        try:
            import pyopencl  # noqa
        except ImportError:
            from loopy.target.opencl import OpenCLTarget
            target = OpenCLTarget()
        else:
            from loopy.target.pyopencl import PyOpenCLTarget
            target = PyOpenCLTarget()

    if flags is not None:
        if options is not None:
            raise TypeError("may not pass both 'options' and 'flags'")

        from warnings import warn
        warn("'flags' is deprecated. Use 'options' instead",
                DeprecationWarning, stacklevel=2)
        options = flags

    from loopy.options import make_options
    options = make_options(options)

    if isinstance(silenced_warnings, str):
        silenced_warnings = silenced_warnings.split(";")

    # {{{ separate temporary variables and arguments, take care of names with commas

    from loopy.kernel.data import TemporaryVariable, ArrayBase

    if isinstance(kernel_data, str):
        kernel_data = kernel_data.split(",")

    kernel_args = []
    temporary_variables = kwargs.pop("temporary_variables", {}).copy()
    for dat in kernel_data:
        if dat is Ellipsis or isinstance(dat, str):
            kernel_args.append(dat)
            continue

        if isinstance(dat, ArrayBase) and isinstance(dat.shape, tuple):
            new_shape = []
            for shape_axis in dat.shape:
                if shape_axis is not None:
                    new_shape.append(expand_defines_in_expr(shape_axis, defines))
                else:
                    new_shape.append(shape_axis)
            dat = dat.copy(shape=tuple(new_shape))

        for arg_name in dat.name.split(","):
            arg_name = arg_name.strip()
            if not arg_name:
                continue

            my_dat = dat.copy(name=arg_name)
            if isinstance(dat, TemporaryVariable):
                temporary_variables[my_dat.name] = dat
            else:
                kernel_args.append(my_dat)

    del kernel_data

    # }}}

    # {{{ instruction/subst parsing

    parsed_instructions = []
    kwargs["substitutions"] = substitutions = {}
    inames_to_dup = []

    if isinstance(instructions, str):
        instructions = [instructions]

    for insn in instructions:
        for new_insn, insn_inames_to_dup in parse_if_necessary(insn, defines):
            if isinstance(new_insn, InstructionBase):
                parsed_instructions.append(new_insn)

                # Need to maintain 1-to-1 correspondence to instructions
                inames_to_dup.append(insn_inames_to_dup)

            elif isinstance(new_insn, SubstitutionRule):
                substitutions[new_insn.name] = new_insn

                assert not insn_inames_to_dup
            else:
                raise RuntimeError("unexpected type in instruction parsing")

    instructions = parsed_instructions
    del parsed_instructions

    # }}}

    # {{{ find/create isl_context

    for domain in domains:
        if isinstance(domain, isl.BasicSet):
            assert domain.get_ctx() == isl.DEFAULT_CONTEXT

    # }}}

    domains = parse_domains(domains, defines)

    arg_guesser = ArgumentGuesser(domains, instructions,
            temporary_variables, substitutions,
            default_offset)

    kernel_args = arg_guesser.convert_names_to_full_args(kernel_args)
    kernel_args = arg_guesser.guess_kernel_args_if_requested(kernel_args)

    from loopy.kernel import LoopKernel
    knl = LoopKernel(domains, instructions, kernel_args,
            temporary_variables=temporary_variables,
            silenced_warnings=silenced_warnings,
            options=options,
            target=target,
            **kwargs)

    from loopy import duplicate_inames
    for insn, insn_inames_to_dup in zip(knl.instructions, inames_to_dup):
        for old_iname, new_iname in insn_inames_to_dup:
            knl = duplicate_inames(knl, old_iname,
                    within=insn.id, new_inames=new_iname)

    check_for_nonexistent_iname_deps(knl)

    knl = tag_reduction_inames_as_sequential(knl)
    knl = create_temporaries(knl, default_order)
    knl = determine_shapes_of_temporaries(knl)
    knl = expand_cses(knl)
    knl = expand_defines_in_shapes(knl, defines)
    knl = guess_arg_shape_if_requested(knl, default_order)
    knl = apply_default_order_to_args(knl, default_order)
    knl = resolve_wildcard_deps(knl)

    # -------------------------------------------------------------------------
    # Ordering dependency:
    # -------------------------------------------------------------------------
    # Must create temporaries before checking for writes to temporary variables
    # that are domain parameters.
    # -------------------------------------------------------------------------

    check_for_multiple_writes_to_loop_bounds(knl)
    check_for_duplicate_names(knl)
    check_written_variable_names(knl)

    from loopy.preprocess import prepare_for_caching
    knl = prepare_for_caching(knl)

    return knl
Exemple #6
0
def test_tim2d(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()
    order = "C"

    n = 8

    from pymbolic import var
    K_sym = var("K")  # noqa

    field_shape = (K_sym, n, n)

    # K - run-time symbolic
    knl = lp.make_kernel(
            "[K] -> {[i,j,e,m,o,gi]: 0<=i,j,m,o<%d and 0<=e<K and 0<=gi<3}" % n,
            [
                "ur(a,b) := sum(o, D[a,o]*u[e,o,b])",
                "us(a,b) := sum(o, D[b,o]*u[e,a,o])",

                #"Gu(mat_entry,a,b) := G[mat_entry,e,m,j]*ur(m,j)",

                "Gux(a,b) := G$x[0,e,a,b]*ur(a,b)+G$x[1,e,a,b]*us(a,b)",
                "Guy(a,b) := G$y[1,e,a,b]*ur(a,b)+G$y[2,e,a,b]*us(a,b)",
                "lap[e,i,j]  = "
                "  sum(m, D[m,i]*Gux(m,j))"
                "+ sum(m, D[m,j]*Guy(i,m))"

            ],
            [
                lp.GlobalArg("u", dtype, shape=field_shape, order=order),
                lp.GlobalArg("lap", dtype, shape=field_shape, order=order),
                lp.GlobalArg("G", dtype, shape=(3,)+field_shape, order=order),
                # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order),
                lp.GlobalArg("D", dtype, shape=(n, n), order=order),
                # lp.ImageArg("D", dtype, shape=(n, n)),
                lp.ValueArg("K", np.int32, approximately=1000),
                ],
            name="semlap2D", assumptions="K>=1")

    knl = lp.duplicate_inames(knl, "o", within="id:ur")
    knl = lp.duplicate_inames(knl, "o", within="id:us")

    seq_knl = knl

    def variant_orig(knl):
        knl = lp.tag_inames(knl, dict(i="l.0", j="l.1", e="g.0"))

        knl = lp.add_prefetch(knl, "D[:,:]")
        knl = lp.add_prefetch(knl, "u[e, :, :]")

        knl = lp.precompute(knl, "ur(m,j)", ["m", "j"])
        knl = lp.precompute(knl, "us(i,m)", ["i", "m"])

        knl = lp.precompute(knl, "Gux(m,j)", ["m", "j"])
        knl = lp.precompute(knl, "Guy(i,m)", ["i", "m"])

        knl = lp.add_prefetch(knl, "G$x[:,e,:,:]")
        knl = lp.add_prefetch(knl, "G$y[:,e,:,:]")

        knl = lp.tag_inames(knl, dict(o="unr"))
        knl = lp.tag_inames(knl, dict(m="unr"))

        knl = lp.set_instruction_priority(knl, "id:D_fetch", 5)
        print(knl)

        return knl

    for variant in [variant_orig]:
        K = 1000  # noqa
        lp.auto_test_vs_ref(seq_knl, ctx, variant(knl),
                op_count=[K*(n*n*n*2*2 + n*n*2*3 + n**3 * 2*2)/1e9],
                op_label=["GFlops"],
                parameters={"K": K})
Exemple #7
0
    def get_kernel(self):
        ncoeffs = len(self.expansion)

        from sumpy.tools import gather_loopy_source_arguments
        arguments = (
                [
                    lp.GlobalArg("sources", None, shape=(self.dim, "nsources"),
                        dim_tags="sep,c"),
                    lp.GlobalArg("strengths", None, shape="nsources"),
                    lp.GlobalArg("qbx_center_to_target_box",
                        None, shape=None),
                    lp.GlobalArg("source_box_starts,source_box_lists",
                        None, shape=None),
                    lp.GlobalArg("box_source_starts,box_source_counts_nonchild",
                        None, shape=None),
                    lp.GlobalArg("qbx_centers", None, shape="dim, ncenters",
                        dim_tags="sep,c"),
                    lp.GlobalArg("qbx_expansions", None,
                        shape=("ncenters", ncoeffs)),
                    lp.ValueArg("ncenters", np.int32),
                    lp.ValueArg("nsources", np.int32),
                    "..."
                ] + gather_loopy_source_arguments([self.expansion]))

        loopy_knl = lp.make_kernel(
                [
                    "{[itgt_center]: 0<=itgt_center<ntgt_centers}",
                    "{[isrc_box]: isrc_box_start<=isrc_box<isrc_box_stop}",
                    "{[isrc,idim]: isrc_start<=isrc<isrc_end and 0<=idim<dim}",
                    ],
                self.get_loopy_instructions()
                + ["""
                    <> tgt_icenter = global_qbx_centers[itgt_center]

                    <> itgt_box = qbx_center_to_target_box[tgt_icenter]

                    <> isrc_box_start = source_box_starts[itgt_box]
                    <> isrc_box_stop = source_box_starts[itgt_box+1]

                    <> src_ibox = source_box_lists[isrc_box]
                    <> isrc_start = box_source_starts[src_ibox]
                    <> isrc_end = isrc_start+box_source_counts_nonchild[src_ibox]

                    <> center[idim] = qbx_centers[idim, tgt_icenter] \
                            {id=fetch_center}
                    <> a[idim] = center[idim] - sources[idim, isrc] {id=compute_a}
                    <> strength = strengths[isrc]
                    qbx_expansions[tgt_icenter, ${COEFFIDX}] = \
                            sum((isrc_box, isrc), strength*coeff${COEFFIDX}) \
                            {id_prefix=write_expn}
                    """],
                arguments,
                name=self.name, assumptions="ntgt_centers>=1",
                defines=dict(
                    dim=self.dim,
                    COEFFIDX=[str(i) for i in range(ncoeffs)]
                    ),
                silenced_warnings="write_race(write_expn*)")

        loopy_knl = self.expansion.prepare_loopy_kernel(loopy_knl)
        loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_center",
                tags={"idim": "unr"})
        loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr"))

        return loopy_knl
Exemple #8
0
    def get_kernel(self):
        ncoeff_src = len(self.src_expansion)
        ncoeff_tgt = len(self.tgt_expansion)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[icenter]: 0<=icenter<ncenters}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                self.get_translation_loopy_insns()
                + ["""
                    <> isrc_box = qbx_center_to_target_box[icenter]

                    # The box's expansions which we're translating here
                    # (our source) is, globally speaking, a target box.

                    <> src_ibox = target_boxes[isrc_box] \
                        {id=read_src_ibox}

                    <> tgt_center[idim] = qbx_centers[idim, icenter] \
                        {id=fetch_tgt_center}

                    <> src_center[idim] = centers[idim, src_ibox] \
                        {id=fetch_src_center}
                    <> d[idim] = tgt_center[idim] - src_center[idim]

                    <> src_coeff${SRC_COEFFIDX} = \
                        expansions[src_ibox, ${SRC_COEFFIDX}] \
                        {dep=read_src_ibox}
                    qbx_expansions[icenter, ${TGT_COEFFIDX}] = \
                        qbx_expansions[icenter, ${TGT_COEFFIDX}] \
                        + coeff${TGT_COEFFIDX} \
                        {id_prefix=write_expn}
                    """],
                [
                    lp.GlobalArg("target_boxes", None, shape=None,
                        offset=lp.auto),
                    lp.GlobalArg("centers", None, shape="dim, naligned_boxes"),
                    lp.GlobalArg("qbx_centers", None, shape="dim, ncenters",
                        dim_tags="sep,c"),
                    lp.ValueArg("naligned_boxes,nboxes", np.int32),
                    lp.GlobalArg("expansions", None,
                        shape=("nboxes", ncoeff_src)),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name, assumptions="ncenters>=1",
                defines=dict(
                    dim=self.dim,
                    nchildren=2**self.dim,
                    SRC_COEFFIDX=[str(i) for i in range(ncoeff_src)],
                    TGT_COEFFIDX=[str(i) for i in range(ncoeff_tgt)],
                    ),
                silenced_warnings="write_race(write_expn*)")

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_tgt_center",
                tags={"idim": "unr"})
        loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr"))

        return loopy_knl
Exemple #9
0
    def get_kernel(self):
        ncoeff_src = len(self.src_expansion)
        ncoeff_tgt = len(self.tgt_expansion)

        from sumpy.tools import gather_loopy_arguments
        loopy_knl = lp.make_kernel(
                [
                    "{[icenter]: 0<=icenter<ncenters}",
                    "{[isrc_box]: isrc_start<=isrc_box<isrc_stop}",
                    "{[idim]: 0<=idim<dim}",
                    ],
                self.get_translation_loopy_insns()
                + ["""
                    <> icontaining_tgt_box = qbx_center_to_target_box[icenter]

                    <> tgt_center[idim] = qbx_centers[idim, icenter] \
                            {id=fetch_tgt_center}

                    <> isrc_start = src_box_starts[icontaining_tgt_box]
                    <> isrc_stop = src_box_starts[icontaining_tgt_box+1]

                    <> src_ibox = src_box_lists[isrc_box] \
                            {id=read_src_ibox}
                    <> src_center[idim] = centers[idim, src_ibox] \
                            {id=fetch_src_center}
                    <> d[idim] = tgt_center[idim] - src_center[idim]
                    <> src_coeff${SRC_COEFFIDX} = \
                        src_expansions[src_ibox, ${SRC_COEFFIDX}] \
                        {dep=read_src_ibox}

                    qbx_expansions[icenter, ${TGT_COEFFIDX}] = \
                            sum(isrc_box, coeff${TGT_COEFFIDX}) \
                            {id_prefix=write_expn}
                    """],
                [
                    lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"),
                    lp.GlobalArg("src_box_starts, src_box_lists",
                        None, shape=None, strides=(1,)),
                    lp.GlobalArg("qbx_centers", None, shape="dim, ncenters",
                        dim_tags="sep,c"),
                    lp.ValueArg("aligned_nboxes,nboxes", np.int32),
                    lp.GlobalArg("src_expansions", None,
                        shape=("nboxes", ncoeff_src)),
                    lp.GlobalArg("qbx_expansions", None,
                        shape=("ncenters", ncoeff_tgt)),
                    "..."
                ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]),
                name=self.name, assumptions="ncenters>=1",
                defines=dict(
                    dim=self.dim,
                    SRC_COEFFIDX=[str(i) for i in range(ncoeff_src)],
                    TGT_COEFFIDX=[str(i) for i in range(ncoeff_tgt)],
                    ),
                silenced_warnings="write_race(write_expn*)")

        for expn in [self.src_expansion, self.tgt_expansion]:
            loopy_knl = expn.prepare_loopy_kernel(loopy_knl)

        loopy_knl = lp.duplicate_inames(loopy_knl, "idim", "fetch_tgt_center",
                tags={"idim": "unr"})
        loopy_knl = lp.tag_inames(loopy_knl, dict(idim="unr"))

        return loopy_knl
Exemple #10
0
def test_tim2d(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()
    order = "C"

    n = 8

    from pymbolic import var
    K_sym = var("K")  # noqa

    field_shape = (K_sym, n, n)

    # K - run-time symbolic
    knl = lp.make_kernel(
        "[K] -> {[i,j,e,m,o,gi]: 0<=i,j,m,o<%d and 0<=e<K and 0<=gi<3}" % n,
        [
            "ur(a,b) := simul_reduce(sum, o, D[a,o]*u[e,o,b])",
            "us(a,b) := simul_reduce(sum, o, D[b,o]*u[e,a,o])",

            #"Gu(mat_entry,a,b) := G[mat_entry,e,m,j]*ur(m,j)",
            "Gux(a,b) := G$x[0,e,a,b]*ur(a,b)+G$x[1,e,a,b]*us(a,b)",
            "Guy(a,b) := G$y[1,e,a,b]*ur(a,b)+G$y[2,e,a,b]*us(a,b)",
            "lap[e,i,j]  = "
            "  simul_reduce(sum, m, D[m,i]*Gux(m,j))"
            "+ simul_reduce(sum, m, D[m,j]*Guy(i,m))"
        ],
        [
            lp.GlobalArg("u", dtype, shape=field_shape, order=order),
            lp.GlobalArg("lap", dtype, shape=field_shape, order=order),
            lp.GlobalArg("G", dtype, shape=(3, ) + field_shape, order=order),
            # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order),
            lp.GlobalArg("D", dtype, shape=(n, n), order=order),
            # lp.ImageArg("D", dtype, shape=(n, n)),
            lp.ValueArg("K", np.int32, approximately=1000),
        ],
        name="semlap2D",
        assumptions="K>=1")

    knl = lp.duplicate_inames(knl, "o", within="id:ur")
    knl = lp.duplicate_inames(knl, "o", within="id:us")

    seq_knl = knl

    def variant_orig(knl):
        knl = lp.tag_inames(knl, dict(i="l.0", j="l.1", e="g.0"))

        knl = lp.add_prefetch(knl, "D[:,:]")
        knl = lp.add_prefetch(knl, "u[e, :, :]")

        knl = lp.precompute(knl, "ur(m,j)", ["m", "j"])
        knl = lp.precompute(knl, "us(i,m)", ["i", "m"])

        knl = lp.precompute(knl, "Gux(m,j)", ["m", "j"])
        knl = lp.precompute(knl, "Guy(i,m)", ["i", "m"])

        knl = lp.add_prefetch(knl, "G$x[:,e,:,:]")
        knl = lp.add_prefetch(knl, "G$y[:,e,:,:]")

        knl = lp.tag_inames(knl, dict(o="unr"))
        knl = lp.tag_inames(knl, dict(m="unr"))

        knl = lp.set_instruction_priority(knl, "id:D_fetch", 5)
        print(knl)

        return knl

    for variant in [variant_orig]:
        K = 1000  # noqa
        lp.auto_test_vs_ref(
            seq_knl,
            ctx,
            variant(knl),
            op_count=[
                K * (n * n * n * 2 * 2 + n * n * 2 * 3 + n**3 * 2 * 2) / 1e9
            ],
            op_label=["GFlops"],
            parameters={"K": K})