コード例 #1
0
ファイル: test_codegen.py プロジェクト: nchristensen/pytato
def test_slice(ctx_factory, shape):
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)

    from numpy.random import default_rng
    rng = default_rng()

    x_in = rng.random(size=shape)
    namespace = pt.Namespace()
    x = pt.make_data_wrapper(namespace, x_in)

    outputs = {}
    ref_outputs = {}

    i = 0
    for slice_ in generate_test_slices(shape):
        outputs[f"out_{i}"] = x[slice_]
        ref_outputs[f"out_{i}"] = x_in[slice_]
        i += 1

    prog = pt.generate_loopy(pt.make_dict_of_named_arrays(outputs),
                             target=pt.PyOpenCLTarget(queue),
                             options=lp.Options(return_dict=True))

    _, outputs = prog()

    for output in outputs:
        x_out = outputs[output]
        x_ref = ref_outputs[output]
        assert (x_out == x_ref).all()
コード例 #2
0
ファイル: test_codegen.py プロジェクト: nchristensen/pytato
def test_codegen_with_DictOfNamedArrays(ctx_factory):  # noqa
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    namespace = pt.Namespace()
    x = Placeholder(namespace, "x", (5, ), np.int)
    y = Placeholder(namespace, "y", (5, ), np.int)
    x_in = np.array([1, 2, 3, 4, 5])
    y_in = np.array([6, 7, 8, 9, 10])

    result = pt.DictOfNamedArrays(dict(x_out=x, y_out=y))

    # Without return_dict.
    prog = pt.generate_loopy(result, target=pt.PyOpenCLTarget(queue))
    _, (x_out, y_out) = prog(x=x_in, y=y_in)
    assert (x_out == x_in).all()
    assert (y_out == y_in).all()

    # With return_dict.
    prog = pt.generate_loopy(result,
                             target=pt.PyOpenCLTarget(queue),
                             options=lp.Options(return_dict=True))

    _, outputs = prog(x=x_in, y=y_in)
    assert (outputs["x_out"] == x_in).all()
    assert (outputs["y_out"] == y_in).all()
コード例 #3
0
ファイル: test_codegen.py プロジェクト: nchristensen/pytato
def test_unary_arith(ctx_factory, which):
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)

    op = getattr(operator, which)

    x_orig = np.array([1, 2, 3, 4, 5])
    namespace = pt.Namespace()

    exprs = {}
    for dtype in ARITH_DTYPES:
        exprs[dtype] = op(pt.make_data_wrapper(namespace,
                                               x_orig.astype(dtype)))

    prog = pt.generate_loopy(pt.make_dict_of_named_arrays(exprs),
                             target=pt.PyOpenCLTarget(queue),
                             options=lp.Options(return_dict=True))

    _, outputs = prog()

    for dtype in ARITH_DTYPES:
        out = outputs[dtype]
        out_ref = op(x_orig.astype(dtype))

        assert out.dtype == out_ref.dtype
        assert np.array_equal(out, out_ref)
コード例 #4
0
ファイル: test_codegen.py プロジェクト: nchristensen/pytato
def test_scalar_array_binary_arith(ctx_factory, which, reverse):
    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)

    op = getattr(operator, which)
    if reverse:
        op = reverse_args(op)

    x_orig = 7
    y_orig = np.array([1, 2, 3, 4, 5])

    for first_dtype in (int, float, complex):
        namespace = pt.Namespace()
        x_in = first_dtype(x_orig)

        exprs = {}
        for dtype in ARITH_DTYPES:
            y = pt.make_data_wrapper(namespace,
                                     y_orig.astype(dtype),
                                     name=f"y{dtype}")
            exprs[dtype] = op(x_in, y)

        prog = pt.generate_loopy(pt.make_dict_of_named_arrays(exprs),
                                 target=pt.PyOpenCLTarget(queue),
                                 options=lp.Options(return_dict=True))

        _, outputs = prog()

        for dtype in exprs:
            out = outputs[dtype]
            out_ref = op(x_in, y_orig.astype(dtype))

            assert out.dtype == out_ref.dtype, (out.dtype, out_ref.dtype)
            # In some cases ops are done in float32 in loopy but float64 in numpy.
            assert np.allclose(out, out_ref), (out, out_ref)
コード例 #5
0
def make_loopy_program(domains,
                       statements,
                       kernel_data=["..."],
                       name="mm_actx_kernel"):
    """Return a :class:`loopy.LoopKernel` suitable for use with
    :meth:`ArrayContext.call_loopy`.
    """
    return lp.make_kernel(domains,
                          statements,
                          kernel_data=kernel_data,
                          options=lp.Options(no_numpy=True, return_dict=True),
                          default_offset=lp.auto,
                          name=name,
                          lang_version=MOST_RECENT_LANGUAGE_VERSION)
コード例 #6
0
 def init_global_mat_prg():
     return lp.make_kernel(
         ["{[idof]: 0 <= idof < n}", "{[jdof]: 0 <= jdof < m}"],
         """
             result[idof, jdof]  = 0 {id=init}
         """,
         [
             lp.GlobalArg("result", None, shape="n, m", offset=lp.auto),
             lp.ValueArg("n, m", np.int32),
             "...",
         ],
         options=lp.Options(return_dict=True),
         default_offset=lp.auto,
         name="init_a_global_matrix",
     )
コード例 #7
0
    def __init__(self, fft, dk, dx, effective_k):
        self.fft = fft
        grid_size = fft.grid_shape[0] * fft.grid_shape[1] * fft.grid_shape[2]

        queue = self.fft.sub_k["momenta_x"].queue
        sub_k = list(x.get().astype("int") for x in self.fft.sub_k.values())
        k_names = ("k_x", "k_y", "k_z")
        self.momenta = {}
        self.momenta = {}
        for mu, (name, kk) in enumerate(zip(k_names, sub_k)):
            kk_mu = effective_k(dk[mu] * kk.astype(fft.rdtype), dx[mu])
            self.momenta[name] = cla.to_device(queue, kk_mu)

        args = [
            lp.GlobalArg("fk", fft.cdtype, shape="(Nx, Ny, Nz)"),
            lp.GlobalArg("k_x", fft.rdtype, shape=("Nx", )),
            lp.GlobalArg("k_y", fft.rdtype, shape=("Ny", )),
            lp.GlobalArg("k_z", fft.rdtype, shape=("Nz", )),
            lp.ValueArg("m_squared", fft.rdtype),
        ]

        from pystella.field import Field
        from pymbolic.primitives import Variable, If, Comparison

        fk = Field("fk")
        indices = fk.indices
        rho_tmp = Variable("rho_tmp")
        tmp_insns = [(rho_tmp, Field("rhok") * (1 / grid_size))]

        mom_vars = tuple(Variable(name) for name in k_names)
        minus_k_squared = sum(kk_i[x_i]
                              for kk_i, x_i in zip(mom_vars, indices))
        sol = rho_tmp / (minus_k_squared - Variable("m_squared"))

        solution = {
            Field("fk"): If(Comparison(minus_k_squared, "<", 0), sol, 0)
        }

        from pystella.elementwise import ElementWiseMap
        options = lp.Options(return_dict=True)
        self.knl = ElementWiseMap(solution,
                                  args=args,
                                  halo_shape=0,
                                  options=options,
                                  tmp_instructions=tmp_insns,
                                  lsize=(16, 2, 1))
コード例 #8
0
ファイル: step.py プロジェクト: mfkiwl/pystella
    def make_steps(self, MapKernel=ElementWiseMap, **kwargs):
        rhs = var("rhs")
        dt = var("dt")
        q = var("q")
        fixed_parameters = kwargs.pop("fixed_parameters", dict())

        rhs_statements = {
            rhs[i]: index_fields(value, prepend_with=(q, ))
            for i, value in enumerate(self.rhs_dict.values())
        }

        steps = []
        for stage in range(self.num_stages):
            RK_dict = {}
            for i, f in enumerate(self.rhs_dict.keys()):
                # ensure that key is either a Field or a Subscript of a Field
                # so that index_fields can prepend the q index
                key_has_field = False
                if isinstance(f, Field):
                    key_has_field = True
                elif isinstance(f, Subscript):
                    if isinstance(f.aggregate, Field):
                        key_has_field = True

                if not key_has_field:
                    raise ValueError("rhs_dict keys must be Field instances")

                statements = self.step_statements(stage, f, dt, rhs[i])
                for k, v in statements.items():
                    RK_dict[k] = v

            fixed_parameters.update(q=0 if stage == 0 else 1)

            options = lp.Options(enforce_variable_access_ordered="no_check")
            step = MapKernel(RK_dict,
                             tmp_instructions=rhs_statements,
                             args=self.args,
                             **kwargs,
                             options=options,
                             fixed_parameters=fixed_parameters)
            steps.append(step)

        return steps
コード例 #9
0
 def write_into_mat_prg():
     return lp.make_kernel(
         ["{[idof]: 0 <= idof < ndofs}", "{[jdof]: 0 <= jdof < mdofs}"],
         """
             result[offset_i + idof, offset_j + jdof] = mat[idof, jdof]
         """,
         [
             lp.GlobalArg("result", None, shape="n, m", offset=lp.auto),
             lp.ValueArg("n, m", np.int32),
             lp.GlobalArg("mat", None, shape="ndofs, mdofs",
                          offset=lp.auto),
             lp.ValueArg("offset_i", np.int32),
             lp.ValueArg("offset_j", np.int32),
             "...",
         ],
         options=lp.Options(return_dict=True),
         default_offset=lp.auto,
         name="write_into_global_matrix",
     )
コード例 #10
0
ファイル: test_codegen.py プロジェクト: nchristensen/pytato
def test_array_array_binary_arith(ctx_factory, which, reverse):
    if which == "sub":
        pytest.skip("https://github.com/inducer/loopy/issues/131")

    cl_ctx = ctx_factory()
    queue = cl.CommandQueue(cl_ctx)

    op = getattr(operator, which)
    if reverse:
        op = reverse_args(op)

    x_orig = np.array([1, 2, 3, 4, 5])
    y_orig = np.array([10, 9, 8, 7, 6])

    for first_dtype in ARITH_DTYPES:
        namespace = pt.Namespace()
        x_in = x_orig.astype(first_dtype)
        x = pt.make_data_wrapper(namespace, x_in, name="x")

        exprs = {}
        for dtype in ARITH_DTYPES:
            y = pt.make_data_wrapper(namespace,
                                     y_orig.astype(dtype),
                                     name=f"y{dtype}")
            exprs[dtype] = op(x, y)

        prog = pt.generate_loopy(pt.make_dict_of_named_arrays(exprs),
                                 target=pt.PyOpenCLTarget(queue),
                                 options=lp.Options(return_dict=True))

        _, outputs = prog()

        for dtype in ARITH_DTYPES:
            out = outputs[dtype]
            out_ref = op(x_in, y_orig.astype(dtype))

            assert out.dtype == out_ref.dtype, (out.dtype, out_ref.dtype)
            # In some cases ops are done in float32 in loopy but float64 in numpy.
            assert np.allclose(out, out_ref), (out, out_ref)
コード例 #11
0
    def __init__(self, fft, dk):
        self.fft = fft
        grid_size = fft.grid_shape[0] * fft.grid_shape[1] * fft.grid_shape[2]

        queue = self.fft.sub_k["momenta_x"].queue
        sub_k = list(x.get().astype("int") for x in self.fft.sub_k.values())
        k_names = ("k_x", "k_y", "k_z")
        self.momenta = {}
        for mu, (name, kk) in enumerate(zip(k_names, sub_k)):
            kk_mu = dk[mu] * kk.astype(fft.rdtype)
            self.momenta[name + "_2"] = cla.to_device(queue, kk_mu)

            # zero Nyquist mode for first derivatives
            kk_mu[abs(sub_k[mu]) == fft.grid_shape[mu] // 2] = 0.
            kk_mu[sub_k[mu] == 0] = 0.
            self.momenta[name + "_1"] = cla.to_device(queue, kk_mu)

        args = [
            lp.GlobalArg("fk", shape="(Nx, Ny, Nz)"),
            lp.GlobalArg("k_x_1, k_x_2", fft.rdtype, shape=("Nx", )),
            lp.GlobalArg("k_y_1, k_y_2", fft.rdtype, shape=("Ny", )),
            lp.GlobalArg("k_z_1, k_z_2", fft.rdtype, shape=("Nz", )),
        ]

        from pystella.field import Field
        fk = Field("fk")
        pd = tuple(Field(pdi) for pdi in ("pdx_k", "pdy_k", "pdz_k"))

        indices = fk.indices

        from pymbolic import var
        mom_vars = tuple(var(name + "_1") for name in k_names)

        fk_tmp = var("fk_tmp")
        tmp_insns = [(fk_tmp, fk * (1 / grid_size))]

        pdx, pdy, pdz = ({
            pdi: kk_i[indices[i]] * 1j * fk_tmp
        } for i, (pdi, kk_i) in enumerate(zip(pd, mom_vars)))

        pdx_incr, pdy_incr, pdz_incr = ({
            Field("div"):
            Field("div") + kk_i[indices[i]] * 1j * fk_tmp
        } for i, kk_i in enumerate(mom_vars))

        mom_vars = tuple(var(name + "_2") for name in k_names)
        kmag_sq = sum(kk_i[x_i]**2 for kk_i, x_i in zip(mom_vars, indices))
        lap = {Field("lap_k"): -kmag_sq * fk_tmp}

        from pystella.elementwise import ElementWiseMap
        common_args = dict(halo_shape=0,
                           args=args,
                           lsize=(16, 2, 1),
                           tmp_instructions=tmp_insns,
                           options=lp.Options(return_dict=True))
        self.pdx_knl = ElementWiseMap(pdx, **common_args)
        self.pdy_knl = ElementWiseMap(pdy, **common_args)
        self.pdz_knl = ElementWiseMap(pdz, **common_args)
        self.pdx_incr_knl = ElementWiseMap(pdx_incr, **common_args)
        self.pdy_incr_knl = ElementWiseMap(pdy_incr, **common_args)
        self.pdz_incr_knl = ElementWiseMap(pdz_incr, **common_args)
        self.lap_knl = ElementWiseMap(lap, **common_args)

        common_args["lsize"] = (16, 1, 1)
        self.grad_knl = ElementWiseMap({**pdx, **pdy, **pdz}, **common_args)
        self.grad_lap_knl = ElementWiseMap({
            **pdx,
            **pdy,
            **pdz,
            **lap
        }, **common_args)
コード例 #12
0
def generate(builder, wrapper_name=None):
    if builder.layer_index is not None:
        outer_inames = frozenset(
            [builder._loop_index.name, builder.layer_index.name])
    else:
        outer_inames = frozenset([builder._loop_index.name])

    instructions = list(builder.emit_instructions())

    parameters = Bag()
    parameters.domains = OrderedDict()
    parameters.assumptions = OrderedDict()
    parameters.wrapper_arguments = builder.wrapper_args
    parameters.layer_start = builder.layer_extents[0].name
    parameters.layer_end = builder.layer_extents[1].name
    parameters.conditions = []
    parameters.kernel_data = list(None for _ in parameters.wrapper_arguments)
    parameters.temporaries = OrderedDict()
    parameters.kernel_name = builder.kernel.name

    # replace Materialise
    mapper = Memoizer(replace_materialise)
    mapper.initialisers = []
    instructions = list(mapper(i) for i in instructions)

    # merge indices
    merger = index_merger(instructions)
    instructions = list(merger(i) for i in instructions)
    initialiser = list(itertools.chain(*mapper.initialisers))
    merger = index_merger(initialiser)
    initialiser = list(merger(i) for i in initialiser)
    instructions = instructions + initialiser
    mapper.initialisers = [
        tuple(merger(i) for i in inits) for inits in mapper.initialisers
    ]

    # rename indices and nodes (so that the counters start from zero)
    pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$")
    replacements = {}
    counter = defaultdict(itertools.count)
    for node in traversal(instructions):
        if isinstance(node,
                      (Index, RuntimeIndex, Variable, Argument, NamedLiteral)):
            match = pattern.match(node.name)
            if match is None:
                continue
            prefix, _, postfix = match.groups()
            if postfix is None:
                postfix = ""
            replacements[node] = "%s%d%s" % (
                prefix, next(counter[(prefix, postfix)]), postfix)

    instructions = rename_nodes(instructions, replacements)
    mapper.initialisers = [
        rename_nodes(inits, replacements) for inits in mapper.initialisers
    ]
    parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments,
                                                replacements)
    s, e = rename_nodes([mapper(e) for e in builder.layer_extents],
                        replacements)
    parameters.layer_start = s.name
    parameters.layer_end = e.name

    # scheduling and loop nesting
    deps = instruction_dependencies(instructions, mapper.initialisers)
    within_inames = loop_nesting(instructions, deps, outer_inames,
                                 parameters.kernel_name)

    # generate loopy
    context = Bag()
    context.parameters = parameters
    context.within_inames = within_inames
    context.conditions = []
    context.index_ordering = []
    context.instruction_dependencies = deps

    statements = list(statement(insn, context) for insn in instructions)
    # remote the dummy instructions (they were only used to ensure
    # that the kernel knows about the outer inames).
    statements = list(s for s in statements
                      if not isinstance(s, DummyInstruction))

    domains = list(parameters.domains.values())
    if builder.single_cell:
        new_domains = []
        for d in domains:
            if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name:
                # n = start
                new_domains.append(
                    d.add_constraint(
                        isl.Constraint.eq_from_names(d.space, {
                            "n": 1,
                            "start": -1
                        })))
            else:
                new_domains.append(d)
        domains = new_domains
        if builder.extruded:
            new_domains = []
            for d in domains:
                if d.get_dim_name(isl.dim_type.set,
                                  0) == builder.layer_index.name:
                    # layer = t1 - 1
                    t1 = parameters.layer_end
                    new_domains.append(
                        d.add_constraint(
                            isl.Constraint.eq_from_names(
                                d.space, {
                                    "layer": 1,
                                    t1: -1,
                                    1: 1
                                })))
                else:
                    new_domains.append(d)
        domains = new_domains

    assumptions, = reduce(
        operator.and_,
        parameters.assumptions.values()).params().get_basic_sets()
    options = loopy.Options(check_dep_resolution=True,
                            ignore_boostable_into=True)

    # sometimes masks are not used, but we still need to create the function arguments
    for i, arg in enumerate(parameters.wrapper_arguments):
        if parameters.kernel_data[i] is None:
            arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape)
            parameters.kernel_data[i] = arg

    if wrapper_name is None:
        wrapper_name = "wrap_%s" % builder.kernel.name

    pwaffd = isl.affs_from_space(assumptions.get_space())
    assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0])
    if builder.single_cell:
        assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"])
    else:
        assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"])
    if builder.extruded:
        assumptions = assumptions & pwaffd[parameters.layer_start].le_set(
            pwaffd[parameters.layer_end])
    assumptions = reduce(operator.and_, assumptions.get_basic_sets())

    wrapper = loopy.make_kernel(domains,
                                statements,
                                kernel_data=parameters.kernel_data,
                                target=loopy.CTarget(),
                                temporary_variables=parameters.temporaries,
                                symbol_manglers=[symbol_mangler],
                                options=options,
                                assumptions=assumptions,
                                lang_version=(2018, 2),
                                name=wrapper_name)

    # prioritize loops
    for indices in context.index_ordering:
        wrapper = loopy.prioritize_loops(wrapper, indices)

    # register kernel
    kernel = builder.kernel
    headers = set(kernel._headers)
    headers = headers | set(
        ["#include <math.h>", "#include <complex.h>", "#include <petsc.h>"])
    preamble = "\n".join(sorted(headers))

    from coffee.base import Node

    if isinstance(kernel._code, loopy.LoopKernel):
        knl = kernel._code
        wrapper = loopy.register_callable_kernel(wrapper, knl)
        from loopy.transform.callable import _match_caller_callee_argument_dimension_
        wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name)
        wrapper = loopy.inline_callable_kernel(wrapper, knl.name)
    else:
        # kernel is a string, add it to preamble
        if isinstance(kernel._code, Node):
            code = kernel._code.gencode()
        else:
            code = kernel._code
        wrapper = loopy.register_function_id_to_in_knl_callable_mapper(
            wrapper,
            PyOP2KernelLookup(kernel.name, code,
                              tuple(builder.argument_accesses)))
        preamble = preamble + "\n" + code

    wrapper = loopy.register_preamble_generators(wrapper,
                                                 [_PreambleGen(preamble)])

    # register petsc functions
    wrapper = loopy.register_function_id_to_in_knl_callable_mapper(
        wrapper, petsc_function_lookup)

    return wrapper
コード例 #13
0
    def __init__(self, decomp, input, **kwargs):
        self.decomp = decomp
        from pystella import Sector
        if isinstance(input, Sector):
            self.reducers = input.reducers
        elif isinstance(input, list):
            self.reducers = dict(i for s in input for i in s.reducers.items())
        elif isinstance(input, dict):
            self.reducers = input
        else:
            raise NotImplementedError

        reducers = self.reducers
        self.grid_size = kwargs.pop("grid_size", None)
        self.callback = kwargs.pop("callback", lambda x: x)

        self.num_reductions = sum(len(i) for i in reducers.values())

        from pymbolic import var
        tmp = var("tmp")
        self.tmp_dict = {}
        i = 0
        for key, val in reducers.items():
            inext = i + len(val)
            self.tmp_dict[key] = range(i, inext)
            i = inext

        # flatten and process inputs into expression and operation
        flat_reducers = []
        reduction_ops = []
        for val in reducers.values():
            for v in val:
                if isinstance(v, tuple):
                    flat_reducers.append(v[0])
                    reduction_ops.append(v[1])
                else:
                    flat_reducers.append(v)
                    reduction_ops.append("avg")
        self.reduction_ops = reduction_ops

        def reduction(expr, op):
            return lp.symbolic.Reduction(operation=op, inames=("i",), expr=expr,
                                         allow_simultaneous=True)

        statements = [
            (tmp[i, var("j"), var("k")],
             reduction(expr, "sum" if op == "avg" else op))
            for i, (expr, op) in enumerate(zip(flat_reducers, reduction_ops))
        ]
        statements += [
            lp.Assignment(
                var("Nx_"), var("Nx"),
                id="Nx_assign",
                predicates={"i == 0", "j == 0", "k == 0"})
        ]

        args = [lp.GlobalArg("Nx_", shape=(), dtype="int")]
        args += kwargs.pop("args", [...])
        lsize = kwargs.pop("lsize", (32, 2, 1))

        silenced_warnings = kwargs.pop("silenced_warnings", [])
        silenced_warnings += ["write_race(Nx_assign)"]

        super().__init__(statements, **kwargs, args=args, seq_dependencies=False,
                         lsize=lsize, options=lp.Options(return_dict=True),
                         silenced_warnings=silenced_warnings)
コード例 #14
0
ファイル: codegen.py プロジェクト: inducer/pytato
def generate_loopy(
    result: Union[Array, DictOfNamedArrays, Dict[str, Array]],
    target: Optional[LoopyTarget] = None,
    options: Optional[lp.Options] = None,
    *,
    cl_device: Optional["pyopencl.Device"] = None,
    array_tag_t_to_not_propagate: FrozenSet[Type[Tag]] = frozenset(
        [ImplStored, Named, PrefixNamed]),
    axis_tag_t_to_not_propagate: FrozenSet[Type[Tag]] = frozenset(),
) -> BoundProgram:
    r"""Code generation entry point.

    :param result: Outputs of the computation.
    :param target: Code generation target.
    :param options: Code generation options for the kernel.
    :returns: A :class:`pytato.target.BoundProgram` wrapping the generated
        :mod:`loopy` program.

    If *result* is a :class:`dict` or a :class:`pytato.DictOfNamedArrays` and
    *options* is not supplied, then the Loopy option
    :attr:`~loopy.Options.return_dict` will be set to *True*. If it is supplied,
    :attr:`~loopy.Options.return_dict` must already be set to *True*.

    .. note::

        :mod:`pytato` metadata :math:`\mapsto` :mod:`loopy` metadata semantics:

        - Inames that index over an :class:`~pytato.array.Array`'s axis in the
          allocation instruction are tagged with the corresponding
          :class:`~pytato.array.Axis`'s tags. The caller may choose to not
          propagate axis tags of type *axis_tag_t_to_not_propagate*.
        - :attr:`pytato.Array.tags` of inputs/outputs in *outputs*
          would be copied over to the tags of the corresponding
          :class:`loopy.ArrayArg`. The caller may choose to not
          propagate array tags of type *array_tag_t_to_not_propagate*.
        - Arrays tagged with :class:`pytato.tags.ImplStored` would have their
          tags copied over to the tags of corresponding
          :class:`loopy.TemporaryVariable`. The caller may choose to not
          propagate array tags of type *array_tag_t_to_not_propagate*.
    """

    result_is_dict = isinstance(result, (dict, DictOfNamedArrays))
    orig_outputs: DictOfNamedArrays = normalize_outputs(result)
    del result

    if target is None:
        target = LoopyPyOpenCLTarget(device=cl_device)
    else:
        if cl_device is not None:
            raise TypeError("may not pass both 'target' and 'cl_device'")

    preproc_result = preprocess(orig_outputs, target)
    outputs = preproc_result.outputs
    compute_order = preproc_result.compute_order

    if options is None:
        options = lp.Options(return_dict=result_is_dict)
    elif isinstance(options, dict):
        from warnings import warn
        warn(
            "Passing a dict for options is deprecated and will stop working in "
            "2022. Pass an actual loopy.Options object instead.",
            DeprecationWarning,
            stacklevel=2)
        options = lp.Options(**options)

    if options.return_dict != result_is_dict:
        raise ValueError("options.result_is_dict is expected to match "
                         "whether the returned value is a dictionary")

    state = get_initial_codegen_state(target, options)

    from pytato.transform import InputGatherer
    ing = InputGatherer()

    state.var_name_gen.add_names({
        input_expr.name
        for name in compute_order for input_expr in ing(outputs[name].expr)
        if isinstance(input_expr, (Placeholder, SizeParam, DataWrapper))
        if input_expr.name is not None
    })

    state.var_name_gen.add_names(outputs)

    cg_mapper = CodeGenMapper(array_tag_t_to_not_propagate,
                              axis_tag_t_to_not_propagate)

    # Generate code for outputs.
    for name in compute_order:
        expr = outputs[name].expr
        insn_id = add_store(name, expr, cg_mapper(expr, state), state,
                            cg_mapper)
        # replace "expr" with the created stored variable
        state.results[expr] = StoredResult(name, expr.ndim,
                                           frozenset([insn_id]))

    # Why call make_reduction_inames_unique?
    # Consider pt.generate_loopy(pt.sum(x) + pt.sum(x)), the generated program
    # would be a single instruction with rhs: `_pt_subst() + _pt_subst()`.
    # The result of pt.sum(x) is cached => same instance of InlinedResult is
    # emitted for both invocations and we would be required to avoid such
    # reduction iname collisions.
    program = lp.make_reduction_inames_unique(state.program)

    return target.bind_program(program=program,
                               bound_arguments=preproc_result.bound_arguments)
コード例 #15
0
ファイル: compiler.py プロジェクト: kaushikcfd/grudge
    def map_insn_assign(self, insn):
        from grudge.symbolic.primitives import OperatorBinding

        if (
                len(insn.exprs) == 1
                and (
                    isinstance(insn.exprs[0], OperatorBinding)
                    or is_external_call(
                        insn.exprs[0], self.function_registry))):
            return insn

        # FIXME: These names and the size names could clash with user-given names.
        # Need better metadata tracking in loopy.
        iel = "iel"
        idof = "idof"

        temp_names = [
                name
                for name, dnr in zip(insn.names, insn.do_not_return)
                if dnr]

        from pymbolic import var
        expr_mapper = ToLoopyExpressionMapper(
                self.dd_inference_mapper, temp_names, (var(iel), var(idof)))
        insns = []

        import loopy as lp
        from pymbolic import var
        for name, expr, dnr in zip(insn.names, insn.exprs, insn.do_not_return):
            insns.append(
                    lp.Assignment(
                        expr_mapper(var(name)),
                        expr_mapper(expr),
                        temp_var_type=lp.Optional(None) if dnr else lp.Optional(),
                        no_sync_with=frozenset([
                            ("*", "any"),
                            ]),
                        ))

        if not expr_mapper.non_scalar_vars:
            return insn

        knl = lp.make_kernel(
                "{[%(iel)s, %(idof)s]: "
                "0 <= %(iel)s < nelements and 0 <= %(idof)s < nunit_dofs}"
                % {"iel": iel, "idof": idof},
                insns,

                name="grudge_assign_%d" % self.insn_count,

                # Single-insn kernels may have their no_sync_with resolve to an
                # empty set, that's OK.
                options=lp.Options(
                    check_dep_resolution=False,
                    return_dict=True,
                    no_numpy=True,
                    )
                )

        self.insn_count += 1

        from pytools import single_valued
        governing_dd = single_valued(
                self.dd_inference_mapper(expr)
                for expr in insn.exprs)

        knl = lp.register_preamble_generators(knl,
                [bessel_preamble_generator])
        knl = lp.register_function_manglers(knl,
                [bessel_function_mangler])

        input_mappings = {}
        output_mappings = {}

        from grudge.symbolic.mappers import DependencyMapper
        dep_mapper = DependencyMapper(composite_leaves=False)

        for expr, name in expr_mapper.expr_to_name.items():
            deps = dep_mapper(expr)
            assert len(deps) <= 1
            if not deps:
                is_output = False
            else:
                dep, = deps
                is_output = dep.name in insn.names

            if is_output:
                tgt_dict = output_mappings
            else:
                tgt_dict = input_mappings

            tgt_dict[name] = expr

        return LoopyKernelInstruction(
            LoopyKernelDescriptor(
                loopy_kernel=knl,
                input_mappings=input_mappings,
                output_mappings=output_mappings,
                fixed_arguments={},
                governing_dd=governing_dd)
            )
コード例 #16
0
    def make_kernel(self, map_instructions, tmp_instructions, args, domains,
                    **kwargs):
        temp_statements = []
        temp_vars = []

        from pystella.field import index_fields
        indexed_tmp_insns = index_fields(tmp_instructions)
        indexed_map_insns = index_fields(map_instructions)

        for statement in indexed_tmp_insns:
            if isinstance(statement, lp.InstructionBase):
                temp_statements += [statement]
            else:
                assignee, expression = statement
                # only declare temporary variables once
                if isinstance(assignee, pp.Variable):
                    current_tmp = assignee
                elif isinstance(assignee, pp.Subscript):
                    current_tmp = assignee.aggregate
                else:
                    current_tmp = None
                if current_tmp is not None and current_tmp not in temp_vars:
                    temp_vars += [current_tmp]
                    tvt = lp.Optional(None)
                else:
                    tvt = lp.Optional()

                temp_statements += [
                    self._assignment(assignee, expression, temp_var_type=tvt)
                ]

        output_statements = []
        for statement in indexed_map_insns:
            if isinstance(statement, lp.InstructionBase):
                output_statements += [statement]
            else:
                assignee, expression = statement
                temp_statements += [self._assignment(assignee, expression)]

        options = kwargs.pop("options", lp.Options())
        # ignore lack of supposed dependency for single-instruction kernels
        if len(map_instructions) + len(tmp_instructions) == 1:
            options.check_dep_resolution = False

        from pystella import get_field_args
        inferred_args = get_field_args([map_instructions, tmp_instructions])
        all_args = append_new_args(args, inferred_args)

        t_unit = lp.make_kernel(
            domains,
            temp_statements + output_statements,
            all_args + [lp.ValueArg("Nx, Ny, Nz", dtype="int"), ...],
            options=options,
            **kwargs,
        )

        new_args = []
        knl = t_unit.default_entrypoint
        for arg in knl.args:
            if isinstance(arg, lp.KernelArgument) and arg.dtype is None:
                new_arg = arg.copy(dtype=self.dtype)
                new_args.append(new_arg)
            else:
                new_args.append(arg)
        t_unit = t_unit.with_kernel(knl.copy(args=new_args))
        t_unit = lp.remove_unused_arguments(t_unit)
        t_unit = lp.register_callable(t_unit, "round",
                                      UnaryOpenCLCallable("round"))

        return t_unit
コード例 #17
0
ファイル: compiler.py プロジェクト: matthiasdiener/grudge
    def map_insn_assign(self, insn):
        from grudge.symbolic.primitives import OperatorBinding

        if (len(insn.exprs) == 1 and
            (isinstance(insn.exprs[0], OperatorBinding)
             or is_external_call(insn.exprs[0], self.function_registry))):
            return insn

        iname = "grdg_i"
        size_name = "grdg_n"

        temp_names = [
            name for name, dnr in zip(insn.names, insn.do_not_return) if dnr
        ]

        expr_mapper = ToLoopyExpressionMapper(self.dd_inference_mapper,
                                              temp_names, iname)
        insns = []

        import loopy as lp
        from pymbolic import var
        for name, expr, dnr in zip(insn.names, insn.exprs, insn.do_not_return):
            insns.append(
                lp.Assignment(
                    expr_mapper(var(name)),
                    expr_mapper(expr),
                    temp_var_type=lp.Optional(None) if dnr else lp.Optional(),
                    no_sync_with=frozenset([
                        ("*", "any"),
                    ]),
                ))

        if not expr_mapper.non_scalar_vars:
            return insn

        knl = lp.make_kernel(
            "{[%s]: 0 <= %s < %s}" % (iname, iname, size_name),
            insns,
            default_offset=lp.auto,
            name="grudge_assign_%d" % self.insn_count,
            # Single-insn kernels may have their no_sync_with resolve to an
            # empty set, that's OK.
            options=lp.Options(check_dep_resolution=False))

        knl = lp.set_options(knl, return_dict=True)
        knl = lp.split_iname(knl, iname, 128, outer_tag="g.0", inner_tag="l.0")

        self.insn_count += 1

        from pytools import single_valued
        governing_dd = single_valued(
            self.dd_inference_mapper(expr) for expr in insn.exprs)

        knl = lp.register_preamble_generators(knl, [bessel_preamble_generator])
        knl = lp.register_function_manglers(knl, [bessel_function_mangler])

        input_mappings = {}
        output_mappings = {}

        from grudge.symbolic.mappers import DependencyMapper
        dep_mapper = DependencyMapper(composite_leaves=False)

        for expr, name in six.iteritems(expr_mapper.expr_to_name):
            deps = dep_mapper(expr)
            assert len(deps) <= 1
            if not deps:
                is_output = False
            else:
                dep, = deps
                is_output = dep.name in insn.names

            if is_output:
                tgt_dict = output_mappings
            else:
                tgt_dict = input_mappings

            tgt_dict[name] = expr

        return LoopyKernelInstruction(
            LoopyKernelDescriptor(loopy_kernel=knl,
                                  input_mappings=input_mappings,
                                  output_mappings=output_mappings,
                                  fixed_arguments={},
                                  governing_dd=governing_dd))