Esempio n. 1
0
 def _make_slab_set(iname, size):
     v = isl.make_zero_and_vars([iname])
     bs, = (
             v[0].le_set(v[iname])
             &
             v[iname].lt_set(v[0] + size)).get_basic_sets()
     return bs
Esempio n. 2
0
 def _make_slab_set(iname, size):
     v = isl.make_zero_and_vars([iname])
     bs, = (
             v[0].le_set(v[iname])
             &
             v[iname].lt_set(v[0] + size)).get_basic_sets()
     return bs
Esempio n. 3
0
    def _get_scalar_func_loopy_program(self, name, nargs, naxes):
        if name == "arctan2":
            name = "atan2"
        elif name == "atan2":
            from warnings import warn
            warn(
                "'atan2' in ArrayContext.np is deprecated. Use 'arctan2', "
                "as in numpy2. This will be disallowed in 2021.",
                DeprecationWarning,
                stacklevel=3)

        from pymbolic import var

        var_names = ["i%d" % i for i in range(naxes)]
        size_names = ["n%d" % i for i in range(naxes)]
        subscript = tuple(var(vname) for vname in var_names)
        from islpy import make_zero_and_vars
        v = make_zero_and_vars(var_names, params=size_names)
        domain = v[0].domain()
        for vname, sname in zip(var_names, size_names):
            domain = domain & v[0].le_set(v[vname]) & v[vname].lt_set(v[sname])

        domain_bset, = domain.get_basic_sets()

        return make_loopy_program([domain_bset], [
            lp.Assignment(
                var("out")[subscript],
                var(name)(*[var("inp%d" % i)[subscript]
                            for i in range(nargs)]))
        ],
                                  name="actx_special_%s" % name)
Esempio n. 4
0
def expression_kernel(expr, args):
    r"""Produce a :class:`pyop2.Kernel` from the processed UFL expression
    expr and the corresponding args."""

    # Empty slot indicating assignment to indexed LHS, so don't do anything
    if type(expr) is Zero:
        return

    fs = args[0].function.function_space()

    import islpy as isl
    inames = isl.make_zero_and_vars(["d"])
    domain = (inames[0].le_set(
        inames["d"])) & (inames["d"].lt_set(inames[0] + fs.dof_dset.cdim))

    context = Bag()
    context.within_inames = frozenset(["d"])
    context.indices = (p.Variable("d"), )

    insn = loopy_instructions(expr, context)
    data = [arg.arg for arg in args]
    knl = loopy.make_function([domain], [insn],
                              data,
                              name="expression",
                              silenced_warnings=["summing_if_branches_ops"])

    return op2.Kernel(knl, "expression")
Esempio n. 5
0
 def _get_lp_domains(_inames, _extents):
     domains = []
     for idx, extent in zip(_inames, _extents):
         inames = isl.make_zero_and_vars([idx])
         domains.append(((inames[0].le_set(inames[idx])) &
                         (inames[idx].lt_set(inames[0] + extent))))
     return domains
Esempio n. 6
0
def expression_index(expr, parameters):
    name = expr.name
    if name not in parameters.domains:
        vars = isl.make_zero_and_vars([name])
        zero = vars[0]
        domain = (vars[name].ge_set(zero) & vars[name].lt_set(zero + expr.extent))
        parameters.domains[name] = domain
    return pym.Variable(name)
Esempio n. 7
0
def expression_index(expr, parameters):
    name = expr.name
    if name not in parameters.domains:
        vars = isl.make_zero_and_vars([name])
        zero = vars[0]
        domain = (vars[name].ge_set(zero) & vars[name].lt_set(zero + expr.extent))
        parameters.domains[name] = domain
    return pym.Variable(name)
Esempio n. 8
0
def test_make_zero_and_vars():
    v = isl.make_zero_and_vars("i,j,k", "n")

    myset = (v[0].le_set(v["i"] + v["j"])
             & (v["i"] + v["j"]).lt_set(v["n"])
             & (v[0].le_set(v["i"]))
             & (v["i"].le_set(13 + v["n"])))

    print(myset)
Esempio n. 9
0
def generate(impero_c, args, precision, scalar_type, kernel_name="loopy_kernel", index_names=[]):
    """Generates loopy code.

    :arg impero_c: ImperoC tuple with Impero AST and other data
    :arg args: list of loopy.GlobalArgs
    :arg precision: floating-point precision for printing
    :arg scalar_type: type of scalars as C typename string
    :arg kernel_name: function name of the kernel
    :arg index_names: pre-assigned index names
    :returns: loopy kernel
    """
    ctx = LoopyContext()
    ctx.indices = impero_c.indices
    ctx.index_names = defaultdict(lambda: "i", index_names)
    ctx.precision = precision
    ctx.scalar_type = scalar_type
    ctx.epsilon = 10.0 ** (-precision)

    # Create arguments
    data = list(args)
    for i, temp in enumerate(impero_c.temporaries):
        name = "t%d" % i
        if isinstance(temp, gem.Constant):
            data.append(lp.TemporaryVariable(name, shape=temp.shape, dtype=temp.array.dtype, initializer=temp.array, address_space=lp.AddressSpace.LOCAL, read_only=True))
        else:
            shape = tuple([i.extent for i in ctx.indices[temp]]) + temp.shape
            data.append(lp.TemporaryVariable(name, shape=shape, dtype=numpy.float64, initializer=None, address_space=lp.AddressSpace.LOCAL, read_only=False))
        ctx.gem_to_pymbolic[temp] = p.Variable(name)

    # Create instructions
    instructions = statement(impero_c.tree, ctx)

    # Create domains
    domains = []
    for idx, extent in ctx.index_extent.items():
        inames = isl.make_zero_and_vars([idx])
        domains.append(((inames[0].le_set(inames[idx])) & (inames[idx].lt_set(inames[0] + extent))))

    if not domains:
        domains = [isl.BasicSet("[] -> {[]}")]

    # Create loopy kernel
    knl = lp.make_function(domains, instructions, data, name=kernel_name, target=lp.CTarget(),
                           seq_dependencies=True, silenced_warnings=["summing_if_branches_ops"])

    # Prevent loopy interchange by loopy
    knl = lp.prioritize_loops(knl, ",".join(ctx.index_extent.keys()))

    # Help loopy in scheduling by assigning priority to instructions
    insn_new = []
    for i, insn in enumerate(knl.instructions):
        insn_new.append(insn.copy(priority=len(knl.instructions) - i))
    knl = knl.copy(instructions=insn_new)

    return knl
Esempio n. 10
0
def expression_runtimeindex(expr, parameters):
    @singledispatch
    def translate(expr, vars):
        raise AssertionError("Unhandled type '%s' in domain translation" %
                             type(expr))

    @translate.register(Sum)
    def translate_sum(expr, vars):
        return operator.add(*(translate(c, vars) for c in expr.children))

    @translate.register(Argument)
    def translate_argument(expr, vars):
        expr = expression(expr, parameters)
        return vars[expr.name]

    @translate.register(Variable)
    def translate_variable(expr, vars):
        return vars[expr.name]

    @translate.register(Zero)
    def translate_zero(expr, vars):
        assert expr.shape == ()
        return vars[0]

    @translate.register(LogicalAnd)
    def translate_logicaland(expr, vars):
        a, b = (translate(c, vars) for c in expr.children)
        return a & b

    @translate.register(Comparison)
    def translate_comparison(expr, vars):
        a, b = (translate(c, vars) for c in expr.children)
        fn = {
            ">": "gt_set",
            ">=": "ge_set",
            "==": "eq_set",
            "!=": "ne_set",
            "<": "lt_set",
            "<=": "le_set"
        }[expr.operator]
        return getattr(a, fn)(b)

    name = expr.name
    if name not in parameters.domains:
        lo, hi, constraint = expr.children
        params = list(v.name for v in traversal([lo, hi])
                      if isinstance(v, (Argument, Variable)))
        vars = isl.make_zero_and_vars([name], params)
        domain = (vars[name].ge_set(translate(lo, vars))
                  & vars[name].lt_set(translate(hi, vars)))
        parameters.domains[name] = domain
        if constraint is not None:
            parameters.assumptions[name] = translate(constraint, vars)
    return pym.Variable(name)
Esempio n. 11
0
def test_make_zero_and_vars():
    v = isl.make_zero_and_vars("i,j,k", "n")

    myset = (
            v[0].le_set(v["i"] + v["j"])
            &
            (v["i"] + v["j"]).lt_set(v["n"])
            &
            (v[0].le_set(v["i"]))
            &
            (v["i"].le_set(13 + v["n"]))
            )

    print(myset)
Esempio n. 12
0
def expression_runtimeindex(expr, parameters):
    @singledispatch
    def translate(expr, vars):
        raise AssertionError("Unhandled type '%s' in domain translation" % type(expr))

    @translate.register(Sum)
    def translate_sum(expr, vars):
        return operator.add(*(translate(c, vars) for c in expr.children))

    @translate.register(Argument)
    def translate_argument(expr, vars):
        expr = expression(expr, parameters)
        return vars[expr.name]

    @translate.register(Variable)
    def translate_variable(expr, vars):
        return vars[expr.name]

    @translate.register(Zero)
    def translate_zero(expr, vars):
        assert expr.shape == ()
        return vars[0]

    @translate.register(LogicalAnd)
    def translate_logicaland(expr, vars):
        a, b = (translate(c, vars) for c in expr.children)
        return a & b

    @translate.register(Comparison)
    def translate_comparison(expr, vars):
        a, b = (translate(c, vars) for c in expr.children)
        fn = {">": "gt_set",
              ">=": "ge_set",
              "==": "eq_set",
              "!=": "ne_set",
              "<": "lt_set",
              "<=": "le_set"}[expr.operator]
        return getattr(a, fn)(b)

    name = expr.name
    if name not in parameters.domains:
        lo, hi, constraint = expr.children
        params = list(v.name for v in traversal([lo, hi]) if isinstance(v, (Argument, Variable)))
        vars = isl.make_zero_and_vars([name], params)
        domain = (vars[name].ge_set(translate(lo, vars))
                  & vars[name].lt_set(translate(hi, vars)))
        parameters.domains[name] = domain
        if constraint is not None:
            parameters.assumptions[name] = translate(constraint, vars)
    return pym.Variable(name)
Esempio n. 13
0
def create_domains(indices):
    """ Create ISL domains from indices

    :arg indices: iterable of (index_name, extent) pairs
    :returns: A list of ISL sets representing the iteration domain of the indices."""

    domains = []
    for idx, extent in indices:
        inames = isl.make_zero_and_vars([idx])
        domains.append(((inames[0].le_set(inames[idx])) &
                        (inames[idx].lt_set(inames[0] + extent))))

    if not domains:
        domains = [isl.BasicSet("[] -> {[]}")]
    return domains
Esempio n. 14
0
    def _get_scalar_func_loopy_program(self, c_name, nargs, naxes):
        from pymbolic import var

        var_names = ["i%d" % i for i in range(naxes)]
        size_names = ["n%d" % i for i in range(naxes)]
        subscript = tuple(var(vname) for vname in var_names)
        from islpy import make_zero_and_vars
        v = make_zero_and_vars(var_names, params=size_names)
        domain = v[0].domain()
        for vname, sname in zip(var_names, size_names):
            domain = domain & v[0].le_set(v[vname]) & v[vname].lt_set(v[sname])

        domain_bset, = domain.get_basic_sets()

        return make_loopy_program(
                [domain_bset],
                [
                    lp.Assignment(
                        var("out")[subscript],
                        var(c_name)(*[
                            var("inp%d" % i)[subscript] for i in range(nargs)]))
                    ],
                name="actx_special_%s" % c_name)
def expression_kernel(expr, args):
    r"""Produce a :class:`pyop2.Kernel` from the processed UFL expression
    expr and the corresponding args."""

    # Empty slot indicating assignment to indexed LHS, so don't do anything
    if type(expr) is Zero:
        return

    fs = args[0].function.function_space()

    import islpy as isl
    inames = isl.make_zero_and_vars(["d"])
    domain = (inames[0].le_set(inames["d"])) & (inames["d"].lt_set(inames[0] + fs.dof_dset.cdim))

    context = Bag()
    context.within_inames = frozenset(["d"])
    context.indices = (p.Variable("d"),)

    insn = loopy_instructions(expr, context)
    data = [arg.arg for arg in args]
    knl = loopy.make_function([domain], [insn], data, name="expression", silenced_warnings=["summing_if_branches_ops"])

    return op2.Kernel(knl, "expression")
    def __generate_loopy(self, knl_name: str, verbose: bool = False, **kwargs):
        """Generate cell kernel for the Laplace operator using Loopy"""

        n_dof, n_dim = self.n_dof, self.n_dim

        # Inputs to the kernel
        arg_names = ["A_T", "A0", "G_T"]
        # Kernel parameters that will be fixed later
        param_names = ["n", "m"]
        # Tuples of inames and extents of their loops
        loops = [("i", "n"), ("j", "n"), ("k", "m")]

        # Generate the domains for the loops
        isl_domains = []
        for idx, extent in loops:
            # Create dict of loop variables (inames) and parameters
            vs = isl.make_zero_and_vars([idx], [extent])
            # Create the loop domain using '<=' and '>' restrictions
            isl_domains.append(((vs[0].le_set(vs[idx])) &
                                (vs[idx].lt_set(vs[0] + vs[extent]))))

        if verbose:
            print("ISL loop domains:")
            print(isl_domains)
            print("")

        # Generate pymbolic variables for all used symbols
        args = {arg: pb.Variable(arg) for arg in arg_names}
        params = {param: pb.Variable(param) for param in param_names}
        inames = {iname: pb.Variable(iname) for iname, extent in loops}

        # Input arguments for the loopy kernel
        n, m = params["n"], params["m"]
        lp_args = {
            "A_T": lp.GlobalArg("A_T", dtype=np.double, shape=(n, n)),
            "A0": lp.GlobalArg("A0", dtype=np.double, shape=(n, n, m)),
            "G_T": lp.GlobalArg("G_T", dtype=np.double, shape=(m))
        }

        # Generate the list of arguments & parameters that will be passed to loopy
        data = []
        data += [arg for arg in lp_args.values()]
        data += [lp.ValueArg(param) for param in param_names]

        # Build the kernel instruction: computation and assignment of the element matrix
        def build_ass():
            # A_T[i,j] = sum(k, A0[i,j,k] * G_T[k]);

            # Get variable symbols for all required variables
            i, j, k = inames["i"], inames["j"], inames["k"]
            A_T, A0, G_T = args["A_T"], args["A0"], args["G_T"]

            # The target of the assignment
            target = pb.Subscript(A_T, (i, j))

            # The rhs expression: Frobenius inner product <A0[i,j],G_T>
            reduce_op = lp.library.reduction.SumReductionOperation()
            reduce_expr = pb.Subscript(A0, (i, j, k)) * pb.Subscript(G_T, (k))
            expr = lp.Reduction(reduce_op, k, reduce_expr)

            return lp.Assignment(target, expr)

        ass = build_ass()

        if verbose:
            print("Assignment expression:")
            print(ass)
            print("")

        instructions = [ass]

        # Construct the kernel
        knl = lp.make_kernel(isl_domains,
                             instructions,
                             data,
                             name=knl_name,
                             target=lp.CTarget(),
                             lang_version=lp.MOST_RECENT_LANGUAGE_VERSION)

        knl = lp.fix_parameters(knl, n=n_dof, m=n_dim**2)
        knl = lp.prioritize_loops(knl, "i,j")

        if verbose:
            print("")
            print(knl)
            print("")

        # Generate kernel code
        knl_c, knl_h = lp.generate_code_v2(knl).device_code(), str(
            lp.generate_header(knl)[0])

        if verbose:
            print(knl_c)
            print("")

        # Postprocess kernel code
        knl_c = knl_c.replace("__restrict__", "restrict")
        knl_h = knl_h.replace("__restrict__", "restrict")

        return knl_c, knl_h
def build_loopy_kernel_A_auto():
    knl_name = "kernel_tensor_A"

    # Inputs to the kernel
    arg_names = ["A", "B", "c"]
    # Kernel parameters that will be fixed later
    param_names = ["n", "m"]
    # Tuples of inames and extents of their loops
    loops = [("i", "n"), ("j", "n"), ("k", "m")]

    # Generate the domains for the loops
    isl_domains = []
    for idx, extent in loops:
        # Create dict of loop variables (inames) and parameters
        vs = isl.make_zero_and_vars([idx], [extent])
        # Create the loop domain using '<=' and '>' restrictions
        isl_domains.append(
            ((vs[0].le_set(vs[idx])) & (vs[idx].lt_set(vs[0] + vs[extent]))))

    print("ISL loop domains:")
    print(isl_domains)
    print("")

    # Generate pymbolic variables for all used symbols
    args = {arg: pb.Variable(arg) for arg in arg_names}
    params = {param: pb.Variable(param) for param in param_names}
    inames = {iname: pb.Variable(iname) for iname, extent in loops}

    # Input arguments for the loopy kernel
    lp_args = {
        "A": lp.GlobalArg("A",
                          dtype=np.double,
                          shape=(params["n"], params["n"])),
        "B": lp.GlobalArg("B",
                          dtype=np.double,
                          shape=(params["m"], params["n"])),
        "c": lp.ValueArg("c", dtype=np.double)
    }

    # Generate the list of arguments & parameters that will be passed to loopy
    data = []
    data += [arg for arg in lp_args.values()]
    data += [lp.ValueArg(param) for param in ["n", "m"]]

    # Build the kernel instruction: computation and assignment of the element matrix
    def build_ass():
        """
        A[i,j] = c*sum(k, B[k,i]*B[k,j])
        """

        # The target of the assignment
        target = pb.Subscript(args["A"], (inames["i"], inames["j"]))

        # The rhs expression: A reduce operation of the matrix columns
        # Maybe replace with manual increment?
        reduce_op = lp.library.reduction.SumReductionOperation()
        reduce_expr = pb.Subscript(args["B"],
                                   (inames["k"], inames["i"])) * pb.Subscript(
                                       args["B"], (inames["k"], inames["j"]))
        expr = args["c"] * lp.Reduction(reduce_op, inames["k"], reduce_expr)

        return lp.Assignment(target, expr)

    ass = build_ass()
    print("Assignment expression:")
    print(ass)
    print("")

    instructions = [ass]

    # Construct the kernel
    knl = lp.make_kernel(isl_domains,
                         instructions,
                         data,
                         name=knl_name,
                         target=lp.CTarget(),
                         lang_version=lp.MOST_RECENT_LANGUAGE_VERSION)

    knl = lp.fix_parameters(knl, n=3, m=2)
    knl = lp.prioritize_loops(knl, "i,j")
    print(knl)
    print("")

    # Generate kernel code
    knl_c, knl_h = lp.generate_code_v2(knl).device_code(), str(
        lp.generate_header(knl)[0])
    print(knl_c)
    print("")

    # Postprocess kernel code
    replacements = [("__restrict__", "restrict")]
    knl_c = utils.replace_strings(knl_c, replacements)
    knl_h = utils.replace_strings(knl_h, replacements)

    knl_call = "kernel_tensor_A(A, &B[0][0], 1.0/(2.0*Ae));"

    return knl_name, knl_call, knl_c, knl_h