Beispiel #1
0
    def make_codepy_module(self, toolchain, dtype):
        from codepy.libraries import add_codepy
        toolchain = toolchain.copy()
        add_codepy(toolchain)

        from cgen import (Value, Include, Statement,
                Typedef, FunctionBody, FunctionDeclaration, Block, Const,
                Line, POD, Initializer, CustomLoop)
        S = Statement

        from codepy.bpl import BoostPythonModule
        mod = BoostPythonModule()

        mod.add_to_preamble([
            Include("vector"),
            Include("algorithm"),
            Include("hedge/base.hpp"),
            Include("hedge/volume_operators.hpp"),
            Include("boost/foreach.hpp"),
            Include("boost/numeric/ublas/io.hpp"),
            ]+self.get_cpu_extra_includes())

        mod.add_to_module([
            S("namespace ublas = boost::numeric::ublas"),
            S("using namespace hedge"),
            S("using namespace pyublas"),
            Line(),
            Typedef(POD(dtype, "value_type")),
            Line(),
            ])

        mod.add_function(FunctionBody(
            FunctionDeclaration(Value("void", "process_elements"), [
                Const(Value("uniform_element_ranges", "ers")),
                Const(Value("numpy_vector<value_type>", "field")),
                Value("numpy_vector<value_type>", "result"),
                ]+self.get_cpu_extra_parameter_declarators()),
            Block([
                Typedef(Value("numpy_vector<value_type>::iterator",
                    "it_type")),
                Typedef(Value("numpy_vector<value_type>::const_iterator",
                    "cit_type")),
                Line(),
                Initializer(Value("it_type", "result_it"),
                    "result.begin()"),
                Initializer(Value("cit_type", "field_it"),
                    "field.begin()"),
                Line() ]+self.get_cpu_extra_preamble()+[ Line(),
                CustomLoop(
                    "BOOST_FOREACH(const element_range er, ers)",
                    Block(self.get_cpu_per_element_code())
                    )
                ])))

        #print mod.generate()
        #toolchain = toolchain.copy()
        #toolchain.enable_debugging
        return mod.compile(toolchain)
Beispiel #2
0
def make_greet_mod(greeting):
    from cgen import FunctionBody, FunctionDeclaration, Block, \
            Const, Pointer, Value, Statement
    from codepy.bpl import BoostPythonModule

    mod = BoostPythonModule()

    mod.add_function(
        FunctionBody(
            FunctionDeclaration(Const(Pointer(Value("char", "greet"))), []),
            Block([Statement('return "%s"' % greeting)])))

    from codepy.toolchain import guess_toolchain
    return mod.compile(guess_toolchain(), wait_on_error=True)
def make_greet_mod(greeting):
    from codepy.cgen import FunctionBody, FunctionDeclaration, Block, \
            Const, Pointer, Value, Statement
    from codepy.bpl import BoostPythonModule

    mod = BoostPythonModule()

    mod.add_function(
            FunctionBody(
                FunctionDeclaration(Const(Pointer(Value("char", "greet"))), []),
                Block([Statement('return "%s"' % greeting)])
                ))

    from codepy.toolchain import guess_toolchain
    return mod.compile(guess_toolchain(), wait_on_error=True)
Beispiel #4
0

import pycuda
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np

from codepy.cgen import *
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule

#Make a host_module, compiled for CPU
host_mod = BoostPythonModule()

#Make a device module, compiled with NVCC
nvcc_mod = CudaModule(host_mod)

#Describe device module code
#NVCC includes
nvcc_includes = [
    'thrust/sort.h',
    'thrust/device_vector.h',
    'cuda.h',
    ]
#Add includes to module
nvcc_mod.add_to_preamble([Include(x) for x in nvcc_includes])

#NVCC function
nvcc_function = FunctionBody(
    FunctionDeclaration(Value('void', 'my_sort'),
                        [Value('CUdeviceptr', 'input_ptr'),
Beispiel #5
0
def get_boundary_flux_mod(fluxes, fvi, discr, dtype):
    from cgen import \
            FunctionDeclaration, FunctionBody, Typedef, Struct, \
            Const, Reference, Value, POD, MaybeUnused, \
            Statement, Include, Line, Block, Initializer, Assign, \
            CustomLoop, For

    from pytools import to_uncomplex_dtype, flatten

    from codepy.bpl import BoostPythonModule
    mod = BoostPythonModule()

    mod.add_to_preamble([
        Include("cstdlib"),
        Include("algorithm"),
        Line(),
        Include("boost/foreach.hpp"),
        Line(),
        Include("hedge/face_operators.hpp"),
        ])

    S = Statement
    mod.add_to_module([
        S("using namespace hedge"),
        S("using namespace pyublas"),
        Line(),
        Typedef(POD(dtype, "value_type")),
        Typedef(POD(to_uncomplex_dtype(dtype), "uncomplex_type")),
        ])

    arg_struct = Struct("arg_struct", [
        Value("numpy_array<value_type>", "flux%d_on_faces" % i)
        for i in range(len(fluxes))
        ]+[
        Value("numpy_array<value_type>", arg_name)
        for arg_name in fvi.arg_names
        ])

    mod.add_struct(arg_struct, "ArgStruct")
    mod.add_to_module([Line()])

    fdecl = FunctionDeclaration(
                Value("void", "gather_flux"),
                [
                    Const(Reference(Value("face_group<face_pair<straight_face> >" , "fg"))),
                    Reference(Value("arg_struct", "args"))
                    ])

    from pymbolic.mapper.stringifier import PREC_PRODUCT

    def gen_flux_code():
        f2cm = FluxToCodeMapper()

        result = [
                Assign("fof%d_it[loc_fof_base+i]" % flux_idx,
                    "uncomplex_type(fp.int_side.face_jacobian) * " +
                    flux_to_code(f2cm, False, flux_idx, fvi, flux.op.flux, PREC_PRODUCT))
                for flux_idx, flux in enumerate(fluxes)
                ]

        return [
            Initializer(Value("value_type", cse_name), cse_str)
            for cse_name, cse_str in f2cm.cse_name_list] + result

    fbody = Block([
        Initializer(
            Const(Value("numpy_array<value_type>::iterator", "fof%d_it" % i)),
            "args.flux%d_on_faces.begin()" % i)
        for i in range(len(fluxes))
        ]+[
        Initializer(
            Const(Value("numpy_array<value_type>::const_iterator",
                "%s_it" % arg_name)),
            "args.%s.begin()" % arg_name)
        for arg_name in fvi.arg_names
        ]+[
        Line(),
        CustomLoop("BOOST_FOREACH(const face_pair<straight_face> &fp, fg.face_pairs)", Block(
            list(flatten([
            Initializer(Value("node_number_t", "%s_ebi" % where),
                "fp.%s.el_base_index" % where),
            Initializer(Value("index_lists_t::const_iterator", "%s_idx_list" % where),
                "fg.index_list(fp.%s.face_index_list_number)" % where),
            Line(),
            ]
            for where in ["int_side", "ext_side"]
            ))+[
            Line(),
            Initializer(Value("node_number_t", "loc_fof_base"),
                "fg.face_length()*(fp.%(where)s.local_el_number*fg.face_count"
                " + fp.%(where)s.face_id)" % {"where": "int_side"}),
            Line(),
            For(
                "unsigned i = 0",
                "i < fg.face_length()",
                "++i",
                Block(
                    [
                    Initializer(MaybeUnused(
                        Value("node_number_t", "%s_idx" % where)),
                        "%(where)s_ebi + %(where)s_idx_list[i]"
                        % {"where": where})
                    for where in ["int_side", "ext_side"]
                    ]+gen_flux_code()
                    )
                )
            ]))
        ])

    mod.add_function(FunctionBody(fdecl, fbody))

    #print "----------------------------------------------------------------"
    #print mod.generate()
    #raw_input("[Enter]")

    return mod.compile(get_flux_toolchain(discr, fluxes))
from codepy.cgen import *
from codepy.bpl import BoostPythonModule
mod = BoostPythonModule()

mod.add_function(
        FunctionBody(
            FunctionDeclaration(Const(Pointer(Value("char", "greet"))), []),
            Block([Statement('return "hello world"')])
            ))

from codepy.toolchain import guess_distutils_toolchain
cmod = mod.compile(guess_distutils_toolchain())

print cmod.greet()

Beispiel #7
0
    def make_lift(self, fgroup, with_scale, dtype):
        discr = self.discr
        from cgen import (
                FunctionDeclaration, FunctionBody, Typedef,
                Const, Reference, Value, POD,
                Statement, Include, Line, Block, Initializer, Assign,
                For, If,
                Define)

        from pytools import to_uncomplex_dtype

        from codepy.bpl import BoostPythonModule
        mod = BoostPythonModule()

        S = Statement
        mod.add_to_preamble([
            Include("hedge/face_operators.hpp"),
            Include("hedge/volume_operators.hpp"),
            Include("boost/foreach.hpp"),
            ])

        mod.add_to_module([
            S("namespace ublas = boost::numeric::ublas"),
            S("using namespace hedge"),
            S("using namespace pyublas"),
            Line(),
            Define("DOFS_PER_EL", fgroup.ldis_loc.node_count()),
            Define("FACES_PER_EL", fgroup.ldis_loc.face_count()),
            Define("DIMENSIONS", discr.dimensions),
            Line(),
            Typedef(POD(dtype, "value_type")),
            Typedef(POD(to_uncomplex_dtype(dtype), "uncomplex_type")),
            ])

        def if_(cond, result, else_=None):
            if cond:
                return [result]
            else:
                if else_ is None:
                    return []
                else:
                    return [else_]

        fdecl = FunctionDeclaration(
                    Value("void", "lift"),
                    [
                    Const(Reference(Value("face_group<face_pair<straight_face> >", "fg"))),
                    Value("ublas::matrix<uncomplex_type>", "matrix"),
                    Value("numpy_array<value_type>", "field"),
                    Value("numpy_array<value_type>", "result")
                    ]+if_(with_scale,
                        Const(Reference(Value("numpy_array<double>",
                            "elwise_post_scaling"))))
                    )

        def make_it(name, is_const=True, tpname="value_type"):
            if is_const:
                const = "const_"
            else:
                const = ""

            return Initializer(
                Value("numpy_array<%s>::%siterator" % (tpname, const), name+"_it"),
                "%s.begin()" % name)

        fbody = Block([
            make_it("field"),
            make_it("result", is_const=False),
            ]+if_(with_scale, make_it("elwise_post_scaling", tpname="double"))+[
            Line(),
            For("unsigned fg_el_nr = 0",
                "fg_el_nr < fg.element_count()",
                "++fg_el_nr",
                Block([
                    Initializer(
                        Value("node_number_t", "dest_el_base"),
                        "fg.local_el_write_base[fg_el_nr]"),
                    Initializer(
                        Value("node_number_t", "src_el_base"),
                        "FACES_PER_EL*fg.face_length()*fg_el_nr"),
                    Line(),
                    For("unsigned i = 0",
                        "i < DOFS_PER_EL",
                        "++i",
                        Block([
                            Initializer(Value("value_type", "tmp"), 0),
                            Line(),
                            For("unsigned j = 0",
                                "j < FACES_PER_EL*fg.face_length()",
                                "++j",
                                S("tmp += matrix(i, j)*field_it[src_el_base+j]")
                                ),
                            Line(),
                            ]+if_(with_scale,
                                Assign("result_it[dest_el_base+i]",
                                    "tmp * value_type(*elwise_post_scaling_it)"),
                                Assign("result_it[dest_el_base+i]", "tmp"))
                            )
                        ),
                    ]+if_(with_scale, S("elwise_post_scaling_it++"))
                    )
                )
            ])

        mod.add_function(FunctionBody(fdecl, fbody))

        #print "----------------------------------------------------------------"
        #print FunctionBody(fdecl, fbody)
        #raw_input()

        return mod.compile(self.discr.toolchain).lift
from cgen import FunctionBody, \
        FunctionDeclaration, Typedef, POD, Value, \
        Pointer, Module, Block, Initializer, Assign, \
        Include, Statement, If
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule

import codepy.jit, codepy.toolchain

# INFO: the following code goes much along the tutorial to be found at
# http://wiki.tiker.net/PyCuda/Examples/ThrustInterop
# many thanks to Bryan Catanzaro!

#Make a host_module, compiled for CPU
host_mod = BoostPythonModule()

#Make a device module, compiled with NVCC
nvcc_mod = CudaModule(host_mod)

#Describe device module code
#NVCC includes
nvcc_includes = [
    'thrust/sort.h',
    'thrust/binary_search.h',
    'thrust/device_vector.h',
    'cuda.h',
    ]
#Add includes to module
nvcc_mod.add_to_preamble([Include(x) for x in nvcc_includes])

#NVCC function
from codepy.cgen import *
from codepy.bpl import BoostPythonModule
mod = BoostPythonModule()

mod.add_function(
        FunctionBody(
            FunctionDeclaration(Const(Pointer(Value("char", "greet"))), []),
            Block([Statement('return "hello world"')])
            ))

from codepy.toolchain import guess_toolchain
cmod = mod.compile(guess_toolchain(), wait_on_error=True, debug=True)

print cmod.greet()

Beispiel #10
0
import cgen as c
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule
from cgen.cuda import CudaGlobal

# This file tests the ability to use compile and link CUDA code into the
# Python interpreter.  Running this test requires PyCUDA
# as well as CUDA 3.0beta (or greater)

# The host module should include a function which is callable from Python
host_mod = BoostPythonModule()

# Are we on a 32 or 64 bit platform?
import sys
import math
bitness = math.log(sys.maxsize) + 1
ptr_sz_uint_conv = "K" if bitness > 32 else "I"

# This host function extracts a pointer and shape information from a PyCUDA
# GPUArray, and then sends them to a CUDA function.  The CUDA function
# returns a pointer to an array of the same type and shape as the input array.
# The host function then constructs a GPUArray with the result.

statements = [
    # Extract information from incoming GPUArray
    'PyObject* shape = PyObject_GetAttrString(gpuArray, "shape")',
    'PyObject* type = PyObject_GetAttrString(gpuArray, "dtype")',
    'PyObject* pointer = PyObject_GetAttrString(gpuArray, "gpudata")',
    "CUdeviceptr cudaPointer = boost::python::extract<CUdeviceptr>(pointer)",
    "PyObject* length = PySequence_GetItem(shape, 0)",
    "int intLength = boost::python::extract<int>(length)",
Beispiel #11
0
def get_elwise_module_descriptor(arguments, operation, name="kernel"):
    from codepy.bpl import BoostPythonModule

    from cgen import FunctionBody, FunctionDeclaration, \
            Value, POD, Struct, For, Initializer, Include, Statement, \
            Line, Block

    S = Statement

    mod = BoostPythonModule()
    mod.add_to_preamble([
        Include("pyublas/numpy.hpp"),
        ])

    mod.add_to_module([
        S("namespace ublas = boost::numeric::ublas"),
        S("using namespace pyublas"),
        Line(),
        ])

    body = Block([
        Initializer(
            Value("numpy_array<%s >::iterator"
                % dtype_to_ctype(varg.dtype),
                varg.name),
            "args.%s_ary.begin()" % varg.name)
        for varg in arguments if isinstance(varg, VectorArg)]
        +[Initializer(
            sarg.declarator(), "args." + sarg.name)
        for sarg in arguments if isinstance(sarg, ScalarArg)]
        )

    body.extend([
        Line(),
        For("unsigned i = 0",
            "i < codepy_length",
            "++i",
            Block([S(operation)])
            )
        ])

    arg_struct = Struct("arg_struct", 
            [arg.declarator() for arg in arguments])
    mod.add_struct(arg_struct, "ArgStruct")
    mod.add_to_module([Line()])

    mod.add_function(
            FunctionBody(
                FunctionDeclaration(
                    Value("void", name),
                    [POD(numpy.uintp, "codepy_length"),
                        Value("arg_struct", "args")]),
                body))

    return mod
Beispiel #12
0
    def make_lift(self, fgroup, with_scale, dtype):
        discr = self.discr
        from cgen import (FunctionDeclaration, FunctionBody, Typedef, Const,
                          Reference, Value, POD, Statement, Include, Line,
                          Block, Initializer, Assign, For, If, Define)

        from pytools import to_uncomplex_dtype

        from codepy.bpl import BoostPythonModule
        mod = BoostPythonModule()

        S = Statement
        mod.add_to_preamble([
            Include("hedge/face_operators.hpp"),
            Include("hedge/volume_operators.hpp"),
            Include("boost/foreach.hpp"),
        ])

        mod.add_to_module([
            S("namespace ublas = boost::numeric::ublas"),
            S("using namespace hedge"),
            S("using namespace pyublas"),
            Line(),
            Define("DOFS_PER_EL", fgroup.ldis_loc.node_count()),
            Define("FACES_PER_EL", fgroup.ldis_loc.face_count()),
            Define("DIMENSIONS", discr.dimensions),
            Line(),
            Typedef(POD(dtype, "value_type")),
            Typedef(POD(to_uncomplex_dtype(dtype), "uncomplex_type")),
        ])

        def if_(cond, result, else_=None):
            if cond:
                return [result]
            else:
                if else_ is None:
                    return []
                else:
                    return [else_]

        fdecl = FunctionDeclaration(Value("void", "lift"), [
            Const(
                Reference(Value("face_group<face_pair<straight_face> >",
                                "fg"))),
            Value("ublas::matrix<uncomplex_type>", "matrix"),
            Value("numpy_array<value_type>", "field"),
            Value("numpy_array<value_type>", "result")
        ] + if_(
            with_scale,
            Const(
                Reference(Value("numpy_array<double>",
                                "elwise_post_scaling")))))

        def make_it(name, is_const=True, tpname="value_type"):
            if is_const:
                const = "const_"
            else:
                const = ""

            return Initializer(
                Value("numpy_array<%s>::%siterator" % (tpname, const),
                      name + "_it"), "%s.begin()" % name)

        fbody = Block([
            make_it("field"),
            make_it("result", is_const=False),
        ] + if_(with_scale, make_it("elwise_post_scaling", tpname="double")) + [
            Line(),
            For(
                "unsigned fg_el_nr = 0", "fg_el_nr < fg.element_count()",
                "++fg_el_nr",
                Block([
                    Initializer(Value("node_number_t", "dest_el_base"),
                                "fg.local_el_write_base[fg_el_nr]"),
                    Initializer(Value("node_number_t", "src_el_base"),
                                "FACES_PER_EL*fg.face_length()*fg_el_nr"),
                    Line(),
                    For(
                        "unsigned i = 0", "i < DOFS_PER_EL", "++i",
                        Block([
                            Initializer(Value("value_type", "tmp"), 0),
                            Line(),
                            For(
                                "unsigned j = 0",
                                "j < FACES_PER_EL*fg.face_length()", "++j",
                                S("tmp += matrix(i, j)*field_it[src_el_base+j]"
                                  )),
                            Line(),
                        ] + if_(
                            with_scale,
                            Assign(
                                "result_it[dest_el_base+i]",
                                "tmp * value_type(*elwise_post_scaling_it)"),
                            Assign("result_it[dest_el_base+i]", "tmp")))),
                ] + if_(with_scale, S("elwise_post_scaling_it++"))))
        ])

        mod.add_function(FunctionBody(fdecl, fbody))

        #print "----------------------------------------------------------------"
        #print FunctionBody(fdecl, fbody)
        #raw_input()

        return mod.compile(self.discr.toolchain).lift
Beispiel #13
0
    def make_diff(self, elgroup, dtype, shape):
        """
        :param shape: If non-square, the resulting code takes two element_ranges
          arguments and supports non-square matrices.
        """
        from hedge._internal import UniformElementRanges

        assert isinstance(elgroup.ranges, UniformElementRanges)

        ldis = elgroup.local_discretization
        discr = self.discr
        from cgen import (
            FunctionDeclaration,
            FunctionBody,
            Typedef,
            Const,
            Reference,
            Value,
            POD,
            Statement,
            Include,
            Line,
            Block,
            Initializer,
            Assign,
            For,
            If,
            Define,
        )

        from pytools import to_uncomplex_dtype

        from codepy.bpl import BoostPythonModule

        mod = BoostPythonModule()

        # {{{ preamble
        S = Statement
        mod.add_to_preamble([Include("hedge/volume_operators.hpp"), Include("boost/foreach.hpp")])

        mod.add_to_module(
            [
                S("namespace ublas = boost::numeric::ublas"),
                S("using namespace hedge"),
                S("using namespace pyublas"),
                Line(),
                Define("ROW_COUNT", shape[0]),
                Define("COL_COUNT", shape[1]),
                Define("DIMENSIONS", discr.dimensions),
                Line(),
                Typedef(POD(dtype, "value_type")),
                Typedef(POD(to_uncomplex_dtype(dtype), "uncomplex_type")),
            ]
        )

        fdecl = FunctionDeclaration(
            Value("void", "diff"),
            [
                Const(Reference(Value("uniform_element_ranges", "from_ers"))),
                Const(Reference(Value("uniform_element_ranges", "to_ers"))),
                Value("numpy_array<value_type>", "field"),
            ]
            + [Value("ublas::matrix<uncomplex_type>", "diffmat_rst%d" % rst) for rst in range(discr.dimensions)]
            + [Value("numpy_array<value_type>", "result%d" % i) for i in range(discr.dimensions)],
        )
        # }}}

        # {{{ set-up
        def make_it(name, is_const=True, tpname="value_type"):
            if is_const:
                const = "const_"
            else:
                const = ""

            return Initializer(
                Value("numpy_array<%s>::%siterator" % (tpname, const), name + "_it"), "%s.begin()" % name
            )

        fbody = Block(
            [
                If("ROW_COUNT != diffmat_rst%d.size1()" % i, S('throw(std::runtime_error("unexpected matrix size"))'))
                for i in range(discr.dimensions)
            ]
            + [
                If("COL_COUNT != diffmat_rst%d.size2()" % i, S('throw(std::runtime_error("unexpected matrix size"))'))
                for i in range(discr.dimensions)
            ]
            + [
                If("ROW_COUNT != to_ers.el_size()", S('throw(std::runtime_error("unsupported image element size"))')),
                If(
                    "COL_COUNT != from_ers.el_size()",
                    S('throw(std::runtime_error("unsupported preimage element size"))'),
                ),
                If(
                    "from_ers.size() != to_ers.size()",
                    S(
                        'throw(std::runtime_error("image and preimage element groups '
                        'do nothave the same element count"))'
                    ),
                ),
                Line(),
                make_it("field"),
            ]
            + [make_it("result%d" % i, is_const=False) for i in range(discr.dimensions)]
            + [
                Line(),
                # }}}
                # {{{ computation
                For(
                    "element_number_t eg_el_nr = 0",
                    "eg_el_nr < to_ers.size()",
                    "++eg_el_nr",
                    Block(
                        [
                            Initializer(
                                Value("node_number_t", "from_el_base"), "from_ers.start() + eg_el_nr*COL_COUNT"
                            ),
                            Initializer(Value("node_number_t", "to_el_base"), "to_ers.start() + eg_el_nr*ROW_COUNT"),
                            Line(),
                            For(
                                "unsigned i = 0",
                                "i < ROW_COUNT",
                                "++i",
                                Block(
                                    [
                                        Initializer(Value("value_type", "drst_%d" % rst), 0)
                                        for rst in range(discr.dimensions)
                                    ]
                                    + [Line()]
                                    + [
                                        For(
                                            "unsigned j = 0",
                                            "j < COL_COUNT",
                                            "++j",
                                            Block(
                                                [
                                                    S(
                                                        "drst_%(rst)d += "
                                                        "diffmat_rst%(rst)d(i, j)*field_it[from_el_base+j]"
                                                        % {"rst": rst}
                                                    )
                                                    for rst in range(discr.dimensions)
                                                ]
                                            ),
                                        ),
                                        Line(),
                                    ]
                                    + [
                                        Assign("result%d_it[to_el_base+i]" % rst, "drst_%d" % rst)
                                        for rst in range(discr.dimensions)
                                    ]
                                ),
                            ),
                        ]
                    ),
                ),
            ]
        )
        # }}}

        # {{{ compilation
        mod.add_function(FunctionBody(fdecl, fbody))

        # print "----------------------------------------------------------------"
        # print mod.generate()
        # raw_input()

        compiled_func = mod.compile(self.discr.toolchain).diff

        if self.discr.instrumented:
            from hedge.tools import time_count_flop

            compiled_func = time_count_flop(
                compiled_func,
                discr.diff_timer,
                discr.diff_counter,
                discr.diff_flop_counter,
                flops=discr.dimensions
                * (
                    2 * ldis.node_count() * len(elgroup.members) * ldis.node_count()  # mul+add
                    + 2 * discr.dimensions * len(elgroup.members) * ldis.node_count()
                ),
                increment=discr.dimensions,
            )

        return compiled_func
Beispiel #14
0
#!python

import pycuda
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np

from cgen import *
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule

#Make a host_module, compiled for CPU
host_mod = BoostPythonModule()

#Make a device module, compiled with NVCC
nvcc_mod = CudaModule(host_mod)

#Describe device module code
#NVCC includes
nvcc_includes = [
    'thrust/sort.h',
    'thrust/device_vector.h',
    'cuda.h',
]
#Add includes to module
nvcc_mod.add_to_preamble([Include(x) for x in nvcc_includes])

#NVCC function
nvcc_function = FunctionBody(
    FunctionDeclaration(
        Value('void', 'my_sort'),
Beispiel #15
0
def get_boundary_flux_mod(fluxes, fvi, discr, dtype):
    from cgen import \
            FunctionDeclaration, FunctionBody, Typedef, Struct, \
            Const, Reference, Value, POD, MaybeUnused, \
            Statement, Include, Line, Block, Initializer, Assign, \
            CustomLoop, For

    from pytools import to_uncomplex_dtype, flatten

    from codepy.bpl import BoostPythonModule
    mod = BoostPythonModule()

    mod.add_to_preamble([
        Include("cstdlib"),
        Include("algorithm"),
        Line(),
        Include("boost/foreach.hpp"),
        Line(),
        Include("hedge/face_operators.hpp"),
        ])

    S = Statement
    mod.add_to_module([
        S("using namespace hedge"),
        S("using namespace pyublas"),
        Line(),
        Typedef(POD(dtype, "value_type")),
        Typedef(POD(to_uncomplex_dtype(dtype), "uncomplex_type")),
        ])

    arg_struct = Struct("arg_struct", [
        Value("numpy_array<value_type>", "flux%d_on_faces" % i)
        for i in range(len(fluxes))
        ]+[
        Value("numpy_array<value_type>", arg_name)
        for arg_name in fvi.arg_names
        ])

    mod.add_struct(arg_struct, "ArgStruct")
    mod.add_to_module([Line()])

    fdecl = FunctionDeclaration(
                Value("void", "gather_flux"),
                [
                    Const(Reference(Value("face_group<face_pair<straight_face> >" , "fg"))),
                    Reference(Value("arg_struct", "args"))
                    ])

    from pymbolic.mapper.stringifier import PREC_PRODUCT

    def gen_flux_code():
        f2cm = FluxToCodeMapper()

        result = [
                Assign("fof%d_it[loc_fof_base+i]" % flux_idx,
                    "uncomplex_type(fp.int_side.face_jacobian) * " +
                    flux_to_code(f2cm, False, flux_idx, fvi, flux.op.flux, PREC_PRODUCT))
                for flux_idx, flux in enumerate(fluxes)
                ]

        return [
            Initializer(Value("value_type", cse_name), cse_str)
            for cse_name, cse_str in f2cm.cse_name_list] + result

    fbody = Block([
        Initializer(
            Const(Value("numpy_array<value_type>::iterator", "fof%d_it" % i)),
            "args.flux%d_on_faces.begin()" % i)
        for i in range(len(fluxes))
        ]+[
        Initializer(
            Const(Value("numpy_array<value_type>::const_iterator",
                "%s_it" % arg_name)),
            "args.%s.begin()" % arg_name)
        for arg_name in fvi.arg_names
        ]+[
        Line(),
        CustomLoop("BOOST_FOREACH(const face_pair<straight_face> &fp, fg.face_pairs)", Block(
            list(flatten([
            Initializer(Value("node_number_t", "%s_ebi" % where),
                "fp.%s.el_base_index" % where),
            Initializer(Value("index_lists_t::const_iterator", "%s_idx_list" % where),
                "fg.index_list(fp.%s.face_index_list_number)" % where),
            Line(),
            ]
            for where in ["int_side", "ext_side"]
            ))+[
            Line(),
            Initializer(Value("node_number_t", "loc_fof_base"),
                "fg.face_length()*(fp.%(where)s.local_el_number*fg.face_count"
                " + fp.%(where)s.face_id)" % {"where": "int_side"}),
            Line(),
            For(
                "unsigned i = 0",
                "i < fg.face_length()",
                "++i",
                Block(
                    [
                    Initializer(MaybeUnused(
                        Value("node_number_t", "%s_idx" % where)),
                        "%(where)s_ebi + %(where)s_idx_list[i]"
                        % {"where": where})
                    for where in ["int_side", "ext_side"]
                    ]+gen_flux_code()
                    )
                )
            ]))
        ])

    mod.add_function(FunctionBody(fdecl, fbody))

    #print "----------------------------------------------------------------"
    #print mod.generate()
    #raw_input("[Enter]")

    return mod.compile(get_flux_toolchain(discr, fluxes))
Beispiel #16
0
def get_elwise_module_descriptor(arguments, operation, name="kernel"):
    from codepy.bpl import BoostPythonModule

    from cgen import FunctionBody, FunctionDeclaration, \
            Value, POD, Struct, For, Initializer, Include, Statement, \
            Line, Block

    S = Statement

    mod = BoostPythonModule()
    mod.add_to_preamble([
        Include("pyublas/numpy.hpp"),
    ])

    mod.add_to_module([
        S("namespace ublas = boost::numeric::ublas"),
        S("using namespace pyublas"),
        Line(),
    ])

    body = Block([
        Initializer(
            Value("numpy_array<%s >::iterator" % dtype_to_ctype(varg.dtype),
                  varg.name), "args.%s_ary.begin()" % varg.name)
        for varg in arguments if isinstance(varg, VectorArg)
    ] + [
        Initializer(sarg.declarator(), "args." + sarg.name)
        for sarg in arguments if isinstance(sarg, ScalarArg)
    ])

    body.extend([
        Line(),
        For("unsigned i = 0", "i < codepy_length", "++i",
            Block([S(operation)]))
    ])

    arg_struct = Struct("arg_struct", [arg.declarator() for arg in arguments])
    mod.add_struct(arg_struct, "ArgStruct")
    mod.add_to_module([Line()])

    mod.add_function(
        FunctionBody(
            FunctionDeclaration(Value("void", name), [
                POD(numpy.uintp, "codepy_length"),
                Value("arg_struct", "args")
            ]), body))

    return mod
Beispiel #17
0
import cgen as c
from codepy.bpl import BoostPythonModule
mod = BoostPythonModule()

mod.add_function(
        c.FunctionBody(
            c.FunctionDeclaration(c.Const(c.Pointer(c.Value("char", "greet"))), []),
            c.Block([c.Statement('return "hello world"')])
            ))

from codepy.toolchain import guess_toolchain
cmod = mod.compile(guess_toolchain())

print(cmod.greet())
Beispiel #18
0
from codepy.cgen import *
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule
from cgen.cuda import CudaGlobal

# This file tests the ability to use compile and link CUDA code into the
# Python interpreter.  Running this test requires PyCUDA
# as well as CUDA 3.0beta (or greater)


# The host module should include a function which is callable from Python
host_mod = BoostPythonModule()

# Are we on a 32 or 64 bit platform?
import sys, math
bitness = math.log(sys.maxsize) + 1
ptr_sz_uint_conv = 'K' if bitness > 32 else 'I'

# This host function extracts a pointer and shape information from a PyCUDA
# GPUArray, and then sends them to a CUDA function.  The CUDA function
# returns a pointer to an array of the same type and shape as the input array.
# The host function then constructs a GPUArray with the result.

statements = [
    # Extract information from incoming GPUArray
    'PyObject* shape = PyObject_GetAttrString(gpuArray, "shape")',
    'PyObject* type = PyObject_GetAttrString(gpuArray, "dtype")',
    'PyObject* pointer = PyObject_GetAttrString(gpuArray, "gpudata")',
    'CUdeviceptr cudaPointer = boost::python::extract<CUdeviceptr>(pointer)',
    'PyObject* length = PySequence_GetItem(shape, 0)',
    'int intLength = boost::python::extract<int>(length)',
Beispiel #19
0
def _cusp_solver(M, parameters):
    cache_key = lambda t, p: (t, p['ksp_type'], p['pc_type'], p['ksp_rtol'], p[
        'ksp_atol'], p['ksp_max_it'], p['ksp_gmres_restart'], p['ksp_monitor'])
    module = _cusp_cache.get(cache_key(M.ctype, parameters))
    if module:
        return module

    import codepy.toolchain
    from cgen import FunctionBody, FunctionDeclaration
    from cgen import Block, Statement, Include, Value
    from codepy.bpl import BoostPythonModule
    from codepy.cuda import CudaModule
    gcc_toolchain = codepy.toolchain.guess_toolchain()
    nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
    if 'CUSP_HOME' in os.environ:
        nvcc_toolchain.add_library('cusp', [os.environ['CUSP_HOME']], [], [])
    host_mod = BoostPythonModule()
    nvcc_mod = CudaModule(host_mod)
    nvcc_includes = [
        'thrust/device_vector.h', 'thrust/fill.h', 'cusp/csr_matrix.h',
        'cusp/krylov/cg.h', 'cusp/krylov/bicgstab.h', 'cusp/krylov/gmres.h',
        'cusp/precond/diagonal.h', 'cusp/precond/smoothed_aggregation.h',
        'cusp/precond/ainv.h', 'string'
    ]
    nvcc_mod.add_to_preamble([Include(s) for s in nvcc_includes])
    nvcc_mod.add_to_preamble([Statement('using namespace std')])

    # We're translating PETSc preconditioner types to CUSP
    diag = Statement(
        'cusp::precond::diagonal< ValueType, cusp::device_memory >M(A)')
    ainv = Statement(
        'cusp::precond::scaled_bridson_ainv< ValueType, cusp::device_memory >M(A)'
    )
    amg = Statement(
        'cusp::precond::smoothed_aggregation< IndexType, ValueType, cusp::device_memory >M(A)'
    )
    none = Statement(
        'cusp::identity_operator< ValueType, cusp::device_memory >M(nrows, ncols)'
    )
    preconditioners = {
        'diagonal': diag,
        'jacobi': diag,
        'ainv': ainv,
        'ainvcusp': ainv,
        'amg': amg,
        'hypre': amg,
        'none': none,
        None: none
    }
    try:
        precond_call = preconditioners[parameters['pc_type']]
    except KeyError:
        raise RuntimeError("Cusp does not support preconditioner type %s" %
                           parameters['pc_type'])
    solvers = {
        'cg':
        Statement('cusp::krylov::cg(A, x, b, monitor, M)'),
        'bicgstab':
        Statement('cusp::krylov::bicgstab(A, x, b, monitor, M)'),
        'gmres':
        Statement(
            'cusp::krylov::gmres(A, x, b, %(ksp_gmres_restart)d, monitor, M)' %
            parameters)
    }
    try:
        solve_call = solvers[parameters['ksp_type']]
    except KeyError:
        raise RuntimeError("Cusp does not support solver type %s" %
                           parameters['ksp_type'])
    monitor = 'monitor(b, %(ksp_max_it)d, %(ksp_rtol)g, %(ksp_atol)g)' % parameters

    nvcc_function = FunctionBody(
        FunctionDeclaration(Value('void', '__cusp_solve'), [
            Value('CUdeviceptr', '_rowptr'),
            Value('CUdeviceptr', '_colidx'),
            Value('CUdeviceptr', '_csrdata'),
            Value('CUdeviceptr', '_b'),
            Value('CUdeviceptr', '_x'),
            Value('int', 'nrows'),
            Value('int', 'ncols'),
            Value('int', 'nnz')
        ]),
        Block([
            Statement('typedef int IndexType'),
            Statement('typedef %s ValueType' % M.ctype),
            Statement(
                'typedef typename cusp::array1d_view< thrust::device_ptr<IndexType> > indices'
            ),
            Statement(
                'typedef typename cusp::array1d_view< thrust::device_ptr<ValueType> > values'
            ),
            Statement(
                'typedef cusp::csr_matrix_view< indices, indices, values, IndexType, ValueType, cusp::device_memory > matrix'
            ),
            Statement(
                'thrust::device_ptr< IndexType > rowptr((IndexType *)_rowptr)'
            ),
            Statement(
                'thrust::device_ptr< IndexType > colidx((IndexType *)_colidx)'
            ),
            Statement(
                'thrust::device_ptr< ValueType > csrdata((ValueType *)_csrdata)'
            ),
            Statement('thrust::device_ptr< ValueType > d_b((ValueType *)_b)'),
            Statement('thrust::device_ptr< ValueType > d_x((ValueType *)_x)'),
            Statement('indices row_offsets(rowptr, rowptr + nrows + 1)'),
            Statement('indices column_indices(colidx, colidx + nnz)'),
            Statement('values matrix_values(csrdata, csrdata + nnz)'),
            Statement('values b(d_b, d_b + nrows)'),
            Statement('values x(d_x, d_x + ncols)'),
            Statement('thrust::fill(x.begin(), x.end(), (ValueType)0)'),
            Statement(
                'matrix A(nrows, ncols, nnz, row_offsets, column_indices, matrix_values)'
            ),
            Statement('cusp::%s_monitor< ValueType > %s' %
                      ('verbose' if parameters['ksp_monitor'] else 'default',
                       monitor)), precond_call, solve_call
        ]))

    host_mod.add_to_preamble(
        [Include('boost/python/extract.hpp'),
         Include('string')])
    host_mod.add_to_preamble([Statement('using namespace boost::python')])
    host_mod.add_to_preamble([Statement('using namespace std')])

    nvcc_mod.add_function(nvcc_function)

    host_mod.add_function(
        FunctionBody(
            FunctionDeclaration(Value('void', 'solve'), [
                Value('object', '_rowptr'),
                Value('object', '_colidx'),
                Value('object', '_csrdata'),
                Value('object', '_b'),
                Value('object', '_x'),
                Value('object', '_nrows'),
                Value('object', '_ncols'),
                Value('object', '_nnz')
            ]),
            Block([
                Statement(
                    'CUdeviceptr rowptr = extract<CUdeviceptr>(_rowptr.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr colidx = extract<CUdeviceptr>(_colidx.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr csrdata = extract<CUdeviceptr>(_csrdata.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr b = extract<CUdeviceptr>(_b.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr x = extract<CUdeviceptr>(_x.attr("gpudata"))'
                ),
                Statement('int nrows = extract<int>(_nrows)'),
                Statement('int ncols = extract<int>(_ncols)'),
                Statement('int nnz = extract<int>(_nnz)'),
                Statement(
                    '__cusp_solve(rowptr, colidx, csrdata, b, x, nrows, ncols, nnz)'
                )
            ])))

    nvcc_toolchain.cflags.append('-arch')
    nvcc_toolchain.cflags.append('sm_20')
    nvcc_toolchain.cflags.append('-O3')
    module = nvcc_mod.compile(gcc_toolchain,
                              nvcc_toolchain,
                              debug=configuration["debug"])

    _cusp_cache[cache_key(M.ctype, parameters)] = module
    return module
Beispiel #20
0
import cgen as c
from codepy.bpl import BoostPythonModule
mod = BoostPythonModule()

mod.add_function(
    c.FunctionBody(
        c.FunctionDeclaration(c.Const(c.Pointer(c.Value("char", "greet"))),
                              []),
        c.Block([c.Statement('return "hello world"')])))

from codepy.toolchain import guess_toolchain
cmod = mod.compile(guess_toolchain())

print(cmod.greet())
from codepy.cgen import *
from codepy.bpl import BoostPythonModule
mod = BoostPythonModule()

mod.add_function(
    FunctionBody(
        FunctionDeclaration(Const(Pointer(Value("char", "greet"))), []),
        Block([Statement('return "hello world"')])))

from codepy.toolchain import guess_distutils_toolchain
cmod = mod.compile(guess_distutils_toolchain())

print cmod.greet()
Beispiel #22
0
def _cusp_solver(M, parameters):
    cache_key = lambda t, p: (t,
                              p['ksp_type'],
                              p['pc_type'],
                              p['ksp_rtol'],
                              p['ksp_atol'],
                              p['ksp_max_it'],
                              p['ksp_gmres_restart'],
                              p['ksp_monitor'])
    module = _cusp_cache.get(cache_key(M.ctype, parameters))
    if module:
        return module

    import codepy.toolchain
    from cgen import FunctionBody, FunctionDeclaration
    from cgen import Block, Statement, Include, Value
    from codepy.bpl import BoostPythonModule
    from codepy.cuda import CudaModule
    gcc_toolchain = codepy.toolchain.guess_toolchain()
    nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
    if 'CUSP_HOME' in os.environ:
        nvcc_toolchain.add_library('cusp', [os.environ['CUSP_HOME']], [], [])
    host_mod = BoostPythonModule()
    nvcc_mod = CudaModule(host_mod)
    nvcc_includes = ['thrust/device_vector.h',
                     'thrust/fill.h',
                     'cusp/csr_matrix.h',
                     'cusp/krylov/cg.h',
                     'cusp/krylov/bicgstab.h',
                     'cusp/krylov/gmres.h',
                     'cusp/precond/diagonal.h',
                     'cusp/precond/smoothed_aggregation.h',
                     'cusp/precond/ainv.h',
                     'string']
    nvcc_mod.add_to_preamble([Include(s) for s in nvcc_includes])
    nvcc_mod.add_to_preamble([Statement('using namespace std')])

    # We're translating PETSc preconditioner types to CUSP
    diag = Statement('cusp::precond::diagonal< ValueType, cusp::device_memory >M(A)')
    ainv = Statement(
        'cusp::precond::scaled_bridson_ainv< ValueType, cusp::device_memory >M(A)')
    amg = Statement(
        'cusp::precond::smoothed_aggregation< IndexType, ValueType, cusp::device_memory >M(A)')
    none = Statement(
        'cusp::identity_operator< ValueType, cusp::device_memory >M(nrows, ncols)')
    preconditioners = {
        'diagonal': diag,
        'jacobi': diag,
        'ainv': ainv,
        'ainvcusp': ainv,
        'amg': amg,
        'hypre': amg,
        'none': none,
        None: none
    }
    try:
        precond_call = preconditioners[parameters['pc_type']]
    except KeyError:
        raise RuntimeError("Cusp does not support preconditioner type %s" %
                           parameters['pc_type'])
    solvers = {
        'cg': Statement('cusp::krylov::cg(A, x, b, monitor, M)'),
        'bicgstab': Statement('cusp::krylov::bicgstab(A, x, b, monitor, M)'),
        'gmres': Statement('cusp::krylov::gmres(A, x, b, %(ksp_gmres_restart)d, monitor, M)' % parameters)
    }
    try:
        solve_call = solvers[parameters['ksp_type']]
    except KeyError:
        raise RuntimeError("Cusp does not support solver type %s" %
                           parameters['ksp_type'])
    monitor = 'monitor(b, %(ksp_max_it)d, %(ksp_rtol)g, %(ksp_atol)g)' % parameters

    nvcc_function = FunctionBody(
        FunctionDeclaration(Value('void', '__cusp_solve'),
                            [Value('CUdeviceptr', '_rowptr'),
                             Value('CUdeviceptr', '_colidx'),
                             Value('CUdeviceptr', '_csrdata'),
                             Value('CUdeviceptr', '_b'),
                             Value('CUdeviceptr', '_x'),
                             Value('int', 'nrows'),
                             Value('int', 'ncols'),
                             Value('int', 'nnz')]),
        Block([
            Statement('typedef int IndexType'),
            Statement('typedef %s ValueType' % M.ctype),
            Statement(
                'typedef typename cusp::array1d_view< thrust::device_ptr<IndexType> > indices'),
            Statement(
                'typedef typename cusp::array1d_view< thrust::device_ptr<ValueType> > values'),
            Statement(
                'typedef cusp::csr_matrix_view< indices, indices, values, IndexType, ValueType, cusp::device_memory > matrix'),
            Statement('thrust::device_ptr< IndexType > rowptr((IndexType *)_rowptr)'),
            Statement('thrust::device_ptr< IndexType > colidx((IndexType *)_colidx)'),
            Statement('thrust::device_ptr< ValueType > csrdata((ValueType *)_csrdata)'),
            Statement('thrust::device_ptr< ValueType > d_b((ValueType *)_b)'),
            Statement('thrust::device_ptr< ValueType > d_x((ValueType *)_x)'),
            Statement('indices row_offsets(rowptr, rowptr + nrows + 1)'),
            Statement('indices column_indices(colidx, colidx + nnz)'),
            Statement('values matrix_values(csrdata, csrdata + nnz)'),
            Statement('values b(d_b, d_b + nrows)'),
            Statement('values x(d_x, d_x + ncols)'),
            Statement('thrust::fill(x.begin(), x.end(), (ValueType)0)'),
            Statement(
                'matrix A(nrows, ncols, nnz, row_offsets, column_indices, matrix_values)'),
            Statement('cusp::%s_monitor< ValueType > %s' %
                      ('verbose' if parameters['ksp_monitor'] else 'default',
                       monitor)),
            precond_call,
            solve_call
        ]))

    host_mod.add_to_preamble([Include('boost/python/extract.hpp'), Include('string')])
    host_mod.add_to_preamble([Statement('using namespace boost::python')])
    host_mod.add_to_preamble([Statement('using namespace std')])

    nvcc_mod.add_function(nvcc_function)

    host_mod.add_function(
        FunctionBody(
            FunctionDeclaration(Value('void', 'solve'),
                                [Value('object', '_rowptr'),
                                 Value('object', '_colidx'),
                                 Value('object', '_csrdata'),
                                 Value('object', '_b'),
                                 Value('object', '_x'),
                                 Value('object', '_nrows'),
                                 Value('object', '_ncols'),
                                 Value('object', '_nnz')]),
            Block([
                Statement(
                    'CUdeviceptr rowptr = extract<CUdeviceptr>(_rowptr.attr("gpudata"))'),
                Statement(
                    'CUdeviceptr colidx = extract<CUdeviceptr>(_colidx.attr("gpudata"))'),
                Statement(
                    'CUdeviceptr csrdata = extract<CUdeviceptr>(_csrdata.attr("gpudata"))'),
                Statement('CUdeviceptr b = extract<CUdeviceptr>(_b.attr("gpudata"))'),
                Statement('CUdeviceptr x = extract<CUdeviceptr>(_x.attr("gpudata"))'),
                Statement('int nrows = extract<int>(_nrows)'),
                Statement('int ncols = extract<int>(_ncols)'),
                Statement('int nnz = extract<int>(_nnz)'),
                Statement('__cusp_solve(rowptr, colidx, csrdata, b, x, nrows, ncols, nnz)')
            ])))

    nvcc_toolchain.cflags.append('-arch')
    nvcc_toolchain.cflags.append('sm_20')
    nvcc_toolchain.cflags.append('-O3')
    module = nvcc_mod.compile(gcc_toolchain, nvcc_toolchain, debug=configuration["debug"])

    _cusp_cache[cache_key(M.ctype, parameters)] = module
    return module
Beispiel #23
0
    def make_diff(self, elgroup, dtype, shape):
        """
        :param shape: If non-square, the resulting code takes two element_ranges
          arguments and supports non-square matrices.
        """
        from hedge._internal import UniformElementRanges
        assert isinstance(elgroup.ranges, UniformElementRanges)

        ldis = elgroup.local_discretization
        discr = self.discr
        from cgen import (
                FunctionDeclaration, FunctionBody, Typedef,
                Const, Reference, Value, POD,
                Statement, Include, Line, Block, Initializer, Assign,
                For, If,
                Define)

        from pytools import to_uncomplex_dtype

        from codepy.bpl import BoostPythonModule
        mod = BoostPythonModule()

        # {{{ preamble
        S = Statement
        mod.add_to_preamble([
            Include("hedge/volume_operators.hpp"),
            Include("boost/foreach.hpp"),
            ])

        mod.add_to_module([
            S("namespace ublas = boost::numeric::ublas"),
            S("using namespace hedge"),
            S("using namespace pyublas"),
            Line(),
            Define("ROW_COUNT", shape[0]),
            Define("COL_COUNT", shape[1]),
            Define("DIMENSIONS", discr.dimensions),
            Line(),
            Typedef(POD(dtype, "value_type")),
            Typedef(POD(to_uncomplex_dtype(dtype), "uncomplex_type")),
            ])

        fdecl = FunctionDeclaration(
                    Value("void", "diff"),
                    [
                    Const(Reference(Value("uniform_element_ranges", "from_ers"))),
                    Const(Reference(Value("uniform_element_ranges", "to_ers"))),
                    Value("numpy_array<value_type>", "field")
                    ]+[
                    Value("ublas::matrix<uncomplex_type>", "diffmat_rst%d" % rst)
                    for rst in range(discr.dimensions)
                    ]+[
                    Value("numpy_array<value_type>", "result%d" % i)
                    for i in range(discr.dimensions)
                    ]
                    )
        # }}}

        # {{{ set-up
        def make_it(name, is_const=True, tpname="value_type"):
            if is_const:
                const = "const_"
            else:
                const = ""

            return Initializer(
                Value("numpy_array<%s>::%siterator" % (tpname, const), name+"_it"),
                "%s.begin()" % name)

        fbody = Block([
            If("ROW_COUNT != diffmat_rst%d.size1()" % i,
                S('throw(std::runtime_error("unexpected matrix size"))'))
            for i in range(discr.dimensions)
            ] + [
            If("COL_COUNT != diffmat_rst%d.size2()" % i,
                S('throw(std::runtime_error("unexpected matrix size"))'))
            for i in range(discr.dimensions) 
            ]+[
            If("ROW_COUNT != to_ers.el_size()",
                S('throw(std::runtime_error("unsupported image element size"))')),
            If("COL_COUNT != from_ers.el_size()",
                S('throw(std::runtime_error("unsupported preimage element size"))')),
            If("from_ers.size() != to_ers.size()",
                S('throw(std::runtime_error("image and preimage element groups '
                    'do nothave the same element count"))')),
            Line(),
            make_it("field"),
            ]+[
            make_it("result%d" % i, is_const=False)
            for i in range(discr.dimensions)
            ]+[
            Line(),
        # }}}

        # {{{ computation
            For("element_number_t eg_el_nr = 0",
                "eg_el_nr < to_ers.size()",
                "++eg_el_nr",
                Block([
                    Initializer(
                        Value("node_number_t", "from_el_base"),
                        "from_ers.start() + eg_el_nr*COL_COUNT"),
                    Initializer(
                        Value("node_number_t", "to_el_base"),
                        "to_ers.start() + eg_el_nr*ROW_COUNT"),
                    Line(),
                    For("unsigned i = 0",
                        "i < ROW_COUNT",
                        "++i",
                        Block([
                            Initializer(Value("value_type", "drst_%d" % rst), 0)
                            for rst in range(discr.dimensions)
                            ]+[
                            Line(),
                            ]+[
                            For("unsigned j = 0",
                                "j < COL_COUNT",
                                "++j",
                                Block([
                                    S("drst_%(rst)d += "
                                        "diffmat_rst%(rst)d(i, j)*field_it[from_el_base+j]"
                                        % {"rst":rst})
                                    for rst in range(discr.dimensions)
                                    ])
                                ),
                            Line(),
                            ]+[
                            Assign("result%d_it[to_el_base+i]" % rst,
                                "drst_%d" % rst)
                            for rst in range(discr.dimensions)
                            ])
                        )
                    ])
                )
            ])
        # }}}

        # {{{ compilation
        mod.add_function(FunctionBody(fdecl, fbody))

        #print "----------------------------------------------------------------"
        #print mod.generate()
        #raw_input()

        compiled_func = mod.compile(self.discr.toolchain).diff

        if self.discr.instrumented:
            from hedge.tools import time_count_flop

            compiled_func = time_count_flop(compiled_func,
                    discr.diff_timer, discr.diff_counter,
                    discr.diff_flop_counter,
                    flops=discr.dimensions*(
                        2 # mul+add
                        * ldis.node_count() * len(elgroup.members)
                        * ldis.node_count()
                        +
                        2 * discr.dimensions
                        * len(elgroup.members) * ldis.node_count()),
                    increment=discr.dimensions)

        return compiled_func