예제 #1
0
def generate_loopy_kernel(slate_expr, tsfc_parameters=None):
    cpu_time = time.time()
    if len(slate_expr.ufl_domains()) > 1:
        raise NotImplementedError("Multiple domains not implemented.")

    Citations().register("Gibson2018")

    # Create a loopy builder for the Slate expression,
    # e.g. contains the loopy kernels coming from TSFC
    gem_expr, var2terminal = slate_to_gem(slate_expr)

    scalar_type = tsfc_parameters["scalar_type"]
    slate_loopy, output_arg = gem_to_loopy(gem_expr, var2terminal, scalar_type)

    builder = LocalLoopyKernelBuilder(expression=slate_expr,
                                      tsfc_parameters=tsfc_parameters)

    name = "slate_wrapper"
    loopy_merged = merge_loopy(slate_loopy, output_arg, builder, var2terminal,
                               name)
    loopy_merged = loopy.register_callable(loopy_merged, INVCallable.name,
                                           INVCallable())
    loopy_merged = loopy.register_callable(loopy_merged, SolveCallable.name,
                                           SolveCallable())

    loopykernel = op2.Kernel(loopy_merged,
                             name,
                             include_dirs=BLASLAPACK_INCLUDE.split(),
                             ldargs=BLASLAPACK_LIB.split())

    kinfo = KernelInfo(
        kernel=loopykernel,
        integral_type=
        "cell",  # slate can only do things as contributions to the cell integrals
        oriented=builder.bag.needs_cell_orientations,
        subdomain_id="otherwise",
        domain_number=0,
        coefficient_map=tuple(range(len(slate_expr.coefficients()))),
        needs_cell_facets=builder.bag.needs_cell_facets,
        pass_layer_arg=builder.bag.needs_mesh_layers,
        needs_cell_sizes=builder.bag.needs_cell_sizes)

    # Cache the resulting kernel
    # Slate kernels are never split, so indicate that with None in the index slot.
    idx = tuple([None] * slate_expr.rank)
    logger.info(GREEN % "compile_slate_expression finished in %g seconds.",
                time.time() - cpu_time)
    return (SplitKernel(idx, kinfo), )
예제 #2
0
def test_register_function_lookup(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    from testlib import Log2Callable

    x = np.random.rand(10)
    queue = cl.CommandQueue(ctx)

    prog = lp.make_kernel("{[i]: 0<=i<10}", """
            y[i] = log2(x[i])
            """)
    prog = lp.register_callable(prog, "log2", Log2Callable("log2"))

    evt, (out, ) = prog(queue, x=x)

    assert np.linalg.norm(np.log2(x) - out) / np.linalg.norm(
        np.log2(x)) < 1e-15
예제 #3
0
    def make_kernel(self, map_instructions, tmp_instructions, args, domains,
                    **kwargs):
        temp_statements = []
        temp_vars = []

        from pystella.field import index_fields
        indexed_tmp_insns = index_fields(tmp_instructions)
        indexed_map_insns = index_fields(map_instructions)

        for statement in indexed_tmp_insns:
            if isinstance(statement, lp.InstructionBase):
                temp_statements += [statement]
            else:
                assignee, expression = statement
                # only declare temporary variables once
                if isinstance(assignee, pp.Variable):
                    current_tmp = assignee
                elif isinstance(assignee, pp.Subscript):
                    current_tmp = assignee.aggregate
                else:
                    current_tmp = None
                if current_tmp is not None and current_tmp not in temp_vars:
                    temp_vars += [current_tmp]
                    tvt = lp.Optional(None)
                else:
                    tvt = lp.Optional()

                temp_statements += [
                    self._assignment(assignee, expression, temp_var_type=tvt)
                ]

        output_statements = []
        for statement in indexed_map_insns:
            if isinstance(statement, lp.InstructionBase):
                output_statements += [statement]
            else:
                assignee, expression = statement
                temp_statements += [self._assignment(assignee, expression)]

        options = kwargs.pop("options", lp.Options())
        # ignore lack of supposed dependency for single-instruction kernels
        if len(map_instructions) + len(tmp_instructions) == 1:
            options.check_dep_resolution = False

        from pystella import get_field_args
        inferred_args = get_field_args([map_instructions, tmp_instructions])
        all_args = append_new_args(args, inferred_args)

        t_unit = lp.make_kernel(
            domains,
            temp_statements + output_statements,
            all_args + [lp.ValueArg("Nx, Ny, Nz", dtype="int"), ...],
            options=options,
            **kwargs,
        )

        new_args = []
        knl = t_unit.default_entrypoint
        for arg in knl.args:
            if isinstance(arg, lp.KernelArgument) and arg.dtype is None:
                new_arg = arg.copy(dtype=self.dtype)
                new_args.append(new_arg)
            else:
                new_args.append(arg)
        t_unit = t_unit.with_kernel(knl.copy(args=new_args))
        t_unit = lp.remove_unused_arguments(t_unit)
        t_unit = lp.register_callable(t_unit, "round",
                                      UnaryOpenCLCallable("round"))

        return t_unit
예제 #4
0
            ecm(vec).expr, 1,
            ecm(result).expr, 1
        ]
        return (
            var(self.name_in_target)(*c_parameters),
            False  # cblas_gemv does not return anything
        )

    def generate_preambles(self, target):
        assert isinstance(target, CTarget)
        yield ("99_cblas", "#include <cblas.h>")
        return


# }}}

n = 10

knl = lp.make_kernel("{:}",
                     """
        y[:] = gemv(A[:, :], x[:])
        """, [
                         lp.GlobalArg("A", dtype=np.float64, shape=(n, n)),
                         lp.GlobalArg("x", dtype=np.float64, shape=(n, )),
                         lp.GlobalArg("y", shape=(n, )), ...
                     ],
                     target=CTarget())

knl = lp.register_callable(knl, "gemv", CBLASGEMV(name="gemv"))
print(lp.generate_code_v2(knl).device_code())