def build_loopy_kernel_A_text():
    knl_name = "kernel_tensor_A"

    knl = lp.make_kernel("{ [i,j,k]: 0<=i,j<n and 0<=k<m }",
                         """
            A[i,j] = c*sum(k, B[k,i]*B[k,j])
        """,
                         name=knl_name,
                         assumptions="n >= 1 and m >= 1",
                         lang_version=lp.MOST_RECENT_LANGUAGE_VERSION,
                         target=lp.CTarget())

    knl = lp.add_and_infer_dtypes(
        knl, {
            "A": np.dtype(np.double),
            "B": np.dtype(np.double),
            "c": np.dtype(np.double)
        })
    knl = lp.fix_parameters(knl, n=3, m=2)
    knl = lp.prioritize_loops(knl, "i,j")
    #print(knl)

    knl_c, knl_h = lp.generate_code_v2(knl).device_code(), str(
        lp.generate_header(knl)[0])

    replacements = [("__restrict__", "restrict")]
    knl_c = utils.replace_strings(knl_c, replacements)
    knl_h = utils.replace_strings(knl_h, replacements)

    knl_call = "kernel_tensor_A(A, &B[0][0], 1.0/(2.0*Ae));"

    return knl_name, knl_call, knl_c, knl_h
def build_loopy_kernel_b_text():
    knl_name = "kernel_tensor_b"

    knl = lp.make_kernel("{ [i]: 0<=i<n }",
                         """
            b[i] = c
        """,
                         name="kernel_tensor_b",
                         lang_version=lp.MOST_RECENT_LANGUAGE_VERSION,
                         target=lp.CTarget())

    knl = lp.add_and_infer_dtypes(knl, {
        "b": np.dtype(np.double),
        "c": np.dtype(np.double)
    })
    knl = lp.fix_parameters(knl, n=3)
    #print(knl)

    knl_c, knl_h = lp.generate_code_v2(knl).device_code(), str(
        lp.generate_header(knl)[0])

    replacements = [("__restrict__", "restrict")]
    knl_c = utils.replace_strings(knl_c, replacements)
    knl_h = utils.replace_strings(knl_h, replacements)

    knl_call = "kernel_tensor_b(b, Ae / 6.0);"

    return knl_name, knl_call, knl_c, knl_h
예제 #3
0
def generate_kernel():
    '''Generates and returns source and header for a kernel using loopy'''

    knl = lp.make_kernel("{ [i]: 0<=i<n }",
                         "out[i] = 2*a[i]",
                         lang_version=lp.MOST_RECENT_LANGUAGE_VERSION,
                         target=lp.CTarget())

    knl = lp.add_and_infer_dtypes(knl, {"a": np.dtype(np.double)})
    #knl = lp.split_iname(knl, "i", 4)
    #knl = lp.tag_inames(knl, dict(i_inner="unr"))

    return lp.generate_code_v2(knl).all_code(), str(lp.generate_header(knl)[0])
예제 #4
0
def get_header(knl):
    """
    Returns header definition code for a :class:`loopy.LoopKernel`

    Parameters
    ----------
    knl : :class:`loopy.LoopKernel`
        The kernel to generate a header definition for

    Returns
    -------
    Generated device header code

    Notes
    -----
    The kernel's Target and name should be set for proper functioning
    """

    return str(lp.generate_header(knl)[0])
예제 #5
0
def get_header(knl, codegen_result=None):
    """
    Returns header definition code for a :class:`loopy.LoopKernel`

    Parameters
    ----------
    knl : :class:`loopy.LoopKernel`
        The kernel to generate a header definition for
    codegen_result : :class:`loopy.CodeGenerationResult`
        If supplied, the pre-generated code-gen result for this kernel (speeds up
        header generation)

    Returns
    -------
    Generated device header code

    Notes
    -----
    The kernel's Target and name should be set for proper functioning
    """

    return str(lp.generate_header(knl, codegen_result=codegen_result)[0])
    def __generate_loopy(self, knl_name: str, verbose: bool = False, **kwargs):
        """Generate cell kernel for the Laplace operator using Loopy"""

        n_dof, n_dim = self.n_dof, self.n_dim

        # Inputs to the kernel
        arg_names = ["A_T", "A0", "G_T"]
        # Kernel parameters that will be fixed later
        param_names = ["n", "m"]
        # Tuples of inames and extents of their loops
        loops = [("i", "n"), ("j", "n"), ("k", "m")]

        # Generate the domains for the loops
        isl_domains = []
        for idx, extent in loops:
            # Create dict of loop variables (inames) and parameters
            vs = isl.make_zero_and_vars([idx], [extent])
            # Create the loop domain using '<=' and '>' restrictions
            isl_domains.append(((vs[0].le_set(vs[idx])) &
                                (vs[idx].lt_set(vs[0] + vs[extent]))))

        if verbose:
            print("ISL loop domains:")
            print(isl_domains)
            print("")

        # Generate pymbolic variables for all used symbols
        args = {arg: pb.Variable(arg) for arg in arg_names}
        params = {param: pb.Variable(param) for param in param_names}
        inames = {iname: pb.Variable(iname) for iname, extent in loops}

        # Input arguments for the loopy kernel
        n, m = params["n"], params["m"]
        lp_args = {
            "A_T": lp.GlobalArg("A_T", dtype=np.double, shape=(n, n)),
            "A0": lp.GlobalArg("A0", dtype=np.double, shape=(n, n, m)),
            "G_T": lp.GlobalArg("G_T", dtype=np.double, shape=(m))
        }

        # Generate the list of arguments & parameters that will be passed to loopy
        data = []
        data += [arg for arg in lp_args.values()]
        data += [lp.ValueArg(param) for param in param_names]

        # Build the kernel instruction: computation and assignment of the element matrix
        def build_ass():
            # A_T[i,j] = sum(k, A0[i,j,k] * G_T[k]);

            # Get variable symbols for all required variables
            i, j, k = inames["i"], inames["j"], inames["k"]
            A_T, A0, G_T = args["A_T"], args["A0"], args["G_T"]

            # The target of the assignment
            target = pb.Subscript(A_T, (i, j))

            # The rhs expression: Frobenius inner product <A0[i,j],G_T>
            reduce_op = lp.library.reduction.SumReductionOperation()
            reduce_expr = pb.Subscript(A0, (i, j, k)) * pb.Subscript(G_T, (k))
            expr = lp.Reduction(reduce_op, k, reduce_expr)

            return lp.Assignment(target, expr)

        ass = build_ass()

        if verbose:
            print("Assignment expression:")
            print(ass)
            print("")

        instructions = [ass]

        # Construct the kernel
        knl = lp.make_kernel(isl_domains,
                             instructions,
                             data,
                             name=knl_name,
                             target=lp.CTarget(),
                             lang_version=lp.MOST_RECENT_LANGUAGE_VERSION)

        knl = lp.fix_parameters(knl, n=n_dof, m=n_dim**2)
        knl = lp.prioritize_loops(knl, "i,j")

        if verbose:
            print("")
            print(knl)
            print("")

        # Generate kernel code
        knl_c, knl_h = lp.generate_code_v2(knl).device_code(), str(
            lp.generate_header(knl)[0])

        if verbose:
            print(knl_c)
            print("")

        # Postprocess kernel code
        knl_c = knl_c.replace("__restrict__", "restrict")
        knl_h = knl_h.replace("__restrict__", "restrict")

        return knl_c, knl_h
def build_loopy_kernel_A_auto():
    knl_name = "kernel_tensor_A"

    # Inputs to the kernel
    arg_names = ["A", "B", "c"]
    # Kernel parameters that will be fixed later
    param_names = ["n", "m"]
    # Tuples of inames and extents of their loops
    loops = [("i", "n"), ("j", "n"), ("k", "m")]

    # Generate the domains for the loops
    isl_domains = []
    for idx, extent in loops:
        # Create dict of loop variables (inames) and parameters
        vs = isl.make_zero_and_vars([idx], [extent])
        # Create the loop domain using '<=' and '>' restrictions
        isl_domains.append(
            ((vs[0].le_set(vs[idx])) & (vs[idx].lt_set(vs[0] + vs[extent]))))

    print("ISL loop domains:")
    print(isl_domains)
    print("")

    # Generate pymbolic variables for all used symbols
    args = {arg: pb.Variable(arg) for arg in arg_names}
    params = {param: pb.Variable(param) for param in param_names}
    inames = {iname: pb.Variable(iname) for iname, extent in loops}

    # Input arguments for the loopy kernel
    lp_args = {
        "A": lp.GlobalArg("A",
                          dtype=np.double,
                          shape=(params["n"], params["n"])),
        "B": lp.GlobalArg("B",
                          dtype=np.double,
                          shape=(params["m"], params["n"])),
        "c": lp.ValueArg("c", dtype=np.double)
    }

    # Generate the list of arguments & parameters that will be passed to loopy
    data = []
    data += [arg for arg in lp_args.values()]
    data += [lp.ValueArg(param) for param in ["n", "m"]]

    # Build the kernel instruction: computation and assignment of the element matrix
    def build_ass():
        """
        A[i,j] = c*sum(k, B[k,i]*B[k,j])
        """

        # The target of the assignment
        target = pb.Subscript(args["A"], (inames["i"], inames["j"]))

        # The rhs expression: A reduce operation of the matrix columns
        # Maybe replace with manual increment?
        reduce_op = lp.library.reduction.SumReductionOperation()
        reduce_expr = pb.Subscript(args["B"],
                                   (inames["k"], inames["i"])) * pb.Subscript(
                                       args["B"], (inames["k"], inames["j"]))
        expr = args["c"] * lp.Reduction(reduce_op, inames["k"], reduce_expr)

        return lp.Assignment(target, expr)

    ass = build_ass()
    print("Assignment expression:")
    print(ass)
    print("")

    instructions = [ass]

    # Construct the kernel
    knl = lp.make_kernel(isl_domains,
                         instructions,
                         data,
                         name=knl_name,
                         target=lp.CTarget(),
                         lang_version=lp.MOST_RECENT_LANGUAGE_VERSION)

    knl = lp.fix_parameters(knl, n=3, m=2)
    knl = lp.prioritize_loops(knl, "i,j")
    print(knl)
    print("")

    # Generate kernel code
    knl_c, knl_h = lp.generate_code_v2(knl).device_code(), str(
        lp.generate_header(knl)[0])
    print(knl_c)
    print("")

    # Postprocess kernel code
    replacements = [("__restrict__", "restrict")]
    knl_c = utils.replace_strings(knl_c, replacements)
    knl_h = utils.replace_strings(knl_h, replacements)

    knl_call = "kernel_tensor_A(A, &B[0][0], 1.0/(2.0*Ae));"

    return knl_name, knl_call, knl_c, knl_h