Python BoostPythonModule.BoostPythonModule Beispiele, codepy.bpl.BoostPythonModule.BoostPythonModule Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: bad_cell.py Projekt: yangzilongdmgy/hedge

    def make_codepy_module(self, toolchain, dtype):
        from codepy.libraries import add_codepy
        toolchain = toolchain.copy()
        add_codepy(toolchain)

        from cgen import (Value, Include, Statement,
                Typedef, FunctionBody, FunctionDeclaration, Block, Const,
                Line, POD, Initializer, CustomLoop)
        S = Statement

        from codepy.bpl import BoostPythonModule
        mod = BoostPythonModule()

        mod.add_to_preamble([
            Include("vector"),
            Include("algorithm"),
            Include("hedge/base.hpp"),
            Include("hedge/volume_operators.hpp"),
            Include("boost/foreach.hpp"),
            Include("boost/numeric/ublas/io.hpp"),
            ]+self.get_cpu_extra_includes())

        mod.add_to_module([
            S("namespace ublas = boost::numeric::ublas"),
            S("using namespace hedge"),
            S("using namespace pyublas"),
            Line(),
            Typedef(POD(dtype, "value_type")),
            Line(),
            ])

        mod.add_function(FunctionBody(
            FunctionDeclaration(Value("void", "process_elements"), [
                Const(Value("uniform_element_ranges", "ers")),
                Const(Value("numpy_vector<value_type>", "field")),
                Value("numpy_vector<value_type>", "result"),
                ]+self.get_cpu_extra_parameter_declarators()),
            Block([
                Typedef(Value("numpy_vector<value_type>::iterator",
                    "it_type")),
                Typedef(Value("numpy_vector<value_type>::const_iterator",
                    "cit_type")),
                Line(),
                Initializer(Value("it_type", "result_it"),
                    "result.begin()"),
                Initializer(Value("cit_type", "field_it"),
                    "field.begin()"),
                Line() ]+self.get_cpu_extra_preamble()+[ Line(),
                CustomLoop(
                    "BOOST_FOREACH(const element_range er, ers)",
                    Block(self.get_cpu_per_element_code())
                    )
                ])))

        #print mod.generate()
        #toolchain = toolchain.copy()
        #toolchain.enable_debugging
        return mod.compile(toolchain)

Beispiel #2

0

Datei anzeigen

def make_greet_mod(greeting):
    from cgen import FunctionBody, FunctionDeclaration, Block, \
            Const, Pointer, Value, Statement
    from codepy.bpl import BoostPythonModule

    mod = BoostPythonModule()

    mod.add_function(
        FunctionBody(
            FunctionDeclaration(Const(Pointer(Value("char", "greet"))), []),
            Block([Statement('return "%s"' % greeting)])))

    from codepy.toolchain import guess_toolchain
    return mod.compile(guess_toolchain(), wait_on_error=True)

Beispiel #3

0

Datei anzeigen

Datei: elementwise.py Projekt: inducer/codepy

def get_elwise_module_descriptor(arguments, operation, name="kernel"):
    from codepy.bpl import BoostPythonModule

    from cgen import FunctionBody, FunctionDeclaration, \
            Value, POD, Struct, For, Initializer, Include, Statement, \
            Line, Block

    S = Statement  # noqa: N806

    mod = BoostPythonModule()
    mod.add_to_preamble([
        Include("pyublas/numpy.hpp"),
    ])

    mod.add_to_module([
        S("namespace ublas = boost::numeric::ublas"),
        S("using namespace pyublas"),
        Line(),
    ])

    body = Block([
        Initializer(
            Value(
                "numpy_array<{} >::iterator".format(dtype_to_ctype(
                    varg.dtype)), varg.name), f"args.{varg.name}_ary.begin()")
        for varg in arguments if isinstance(varg, VectorArg)
    ] + [
        Initializer(sarg.declarator(), f"args.{sarg.name}")
        for sarg in arguments if isinstance(sarg, ScalarArg)
    ])

    body.extend([
        Line(),
        For("unsigned i = 0", "i < codepy_length", "++i",
            Block([S(operation)]))
    ])

    arg_struct = Struct("arg_struct", [arg.declarator() for arg in arguments])
    mod.add_struct(arg_struct, "ArgStruct")
    mod.add_to_module([Line()])

    mod.add_function(
        FunctionBody(
            FunctionDeclaration(Value("void", name), [
                POD(numpy.uintp, "codepy_length"),
                Value("arg_struct", "args")
            ]), body))

    return mod

Beispiel #4

0

Datei anzeigen

Datei: nvcc-test.py Projekt: simudream/codepy

from codepy.cgen import *
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule
from cgen.cuda import CudaGlobal

# This file tests the ability to use compile and link CUDA code into the
# Python interpreter.  Running this test requires PyCUDA
# as well as CUDA 3.0beta (or greater)


# The host module should include a function which is callable from Python
host_mod = BoostPythonModule()

# Are we on a 32 or 64 bit platform?
import sys, math
bitness = math.log(sys.maxsize) + 1
ptr_sz_uint_conv = 'K' if bitness > 32 else 'I'

# This host function extracts a pointer and shape information from a PyCUDA
# GPUArray, and then sends them to a CUDA function.  The CUDA function
# returns a pointer to an array of the same type and shape as the input array.
# The host function then constructs a GPUArray with the result.

statements = [
    # Extract information from incoming GPUArray
    'PyObject* shape = PyObject_GetAttrString(gpuArray, "shape")',
    'PyObject* type = PyObject_GetAttrString(gpuArray, "dtype")',
    'PyObject* pointer = PyObject_GetAttrString(gpuArray, "gpudata")',
    'CUdeviceptr cudaPointer = boost::python::extract<CUdeviceptr>(pointer)',
    'PyObject* length = PySequence_GetItem(shape, 0)',
    'int intLength = boost::python::extract<int>(length)',

Beispiel #5

0

Datei anzeigen

Datei: cuda.py Projekt: POETSII/PyOP2

def _cusp_solver(M, parameters):
    cache_key = lambda t, p: (t, p['ksp_type'], p['pc_type'], p['ksp_rtol'], p[
        'ksp_atol'], p['ksp_max_it'], p['ksp_gmres_restart'], p['ksp_monitor'])
    module = _cusp_cache.get(cache_key(M.ctype, parameters))
    if module:
        return module

    import codepy.toolchain
    from cgen import FunctionBody, FunctionDeclaration
    from cgen import Block, Statement, Include, Value
    from codepy.bpl import BoostPythonModule
    from codepy.cuda import CudaModule
    gcc_toolchain = codepy.toolchain.guess_toolchain()
    nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
    if 'CUSP_HOME' in os.environ:
        nvcc_toolchain.add_library('cusp', [os.environ['CUSP_HOME']], [], [])
    host_mod = BoostPythonModule()
    nvcc_mod = CudaModule(host_mod)
    nvcc_includes = [
        'thrust/device_vector.h', 'thrust/fill.h', 'cusp/csr_matrix.h',
        'cusp/krylov/cg.h', 'cusp/krylov/bicgstab.h', 'cusp/krylov/gmres.h',
        'cusp/precond/diagonal.h', 'cusp/precond/smoothed_aggregation.h',
        'cusp/precond/ainv.h', 'string'
    ]
    nvcc_mod.add_to_preamble([Include(s) for s in nvcc_includes])
    nvcc_mod.add_to_preamble([Statement('using namespace std')])

    # We're translating PETSc preconditioner types to CUSP
    diag = Statement(
        'cusp::precond::diagonal< ValueType, cusp::device_memory >M(A)')
    ainv = Statement(
        'cusp::precond::scaled_bridson_ainv< ValueType, cusp::device_memory >M(A)'
    )
    amg = Statement(
        'cusp::precond::smoothed_aggregation< IndexType, ValueType, cusp::device_memory >M(A)'
    )
    none = Statement(
        'cusp::identity_operator< ValueType, cusp::device_memory >M(nrows, ncols)'
    )
    preconditioners = {
        'diagonal': diag,
        'jacobi': diag,
        'ainv': ainv,
        'ainvcusp': ainv,
        'amg': amg,
        'hypre': amg,
        'none': none,
        None: none
    }
    try:
        precond_call = preconditioners[parameters['pc_type']]
    except KeyError:
        raise RuntimeError("Cusp does not support preconditioner type %s" %
                           parameters['pc_type'])
    solvers = {
        'cg':
        Statement('cusp::krylov::cg(A, x, b, monitor, M)'),
        'bicgstab':
        Statement('cusp::krylov::bicgstab(A, x, b, monitor, M)'),
        'gmres':
        Statement(
            'cusp::krylov::gmres(A, x, b, %(ksp_gmres_restart)d, monitor, M)' %
            parameters)
    }
    try:
        solve_call = solvers[parameters['ksp_type']]
    except KeyError:
        raise RuntimeError("Cusp does not support solver type %s" %
                           parameters['ksp_type'])
    monitor = 'monitor(b, %(ksp_max_it)d, %(ksp_rtol)g, %(ksp_atol)g)' % parameters

    nvcc_function = FunctionBody(
        FunctionDeclaration(Value('void', '__cusp_solve'), [
            Value('CUdeviceptr', '_rowptr'),
            Value('CUdeviceptr', '_colidx'),
            Value('CUdeviceptr', '_csrdata'),
            Value('CUdeviceptr', '_b'),
            Value('CUdeviceptr', '_x'),
            Value('int', 'nrows'),
            Value('int', 'ncols'),
            Value('int', 'nnz')
        ]),
        Block([
            Statement('typedef int IndexType'),
            Statement('typedef %s ValueType' % M.ctype),
            Statement(
                'typedef typename cusp::array1d_view< thrust::device_ptr<IndexType> > indices'
            ),
            Statement(
                'typedef typename cusp::array1d_view< thrust::device_ptr<ValueType> > values'
            ),
            Statement(
                'typedef cusp::csr_matrix_view< indices, indices, values, IndexType, ValueType, cusp::device_memory > matrix'
            ),
            Statement(
                'thrust::device_ptr< IndexType > rowptr((IndexType *)_rowptr)'
            ),
            Statement(
                'thrust::device_ptr< IndexType > colidx((IndexType *)_colidx)'
            ),
            Statement(
                'thrust::device_ptr< ValueType > csrdata((ValueType *)_csrdata)'
            ),
            Statement('thrust::device_ptr< ValueType > d_b((ValueType *)_b)'),
            Statement('thrust::device_ptr< ValueType > d_x((ValueType *)_x)'),
            Statement('indices row_offsets(rowptr, rowptr + nrows + 1)'),
            Statement('indices column_indices(colidx, colidx + nnz)'),
            Statement('values matrix_values(csrdata, csrdata + nnz)'),
            Statement('values b(d_b, d_b + nrows)'),
            Statement('values x(d_x, d_x + ncols)'),
            Statement('thrust::fill(x.begin(), x.end(), (ValueType)0)'),
            Statement(
                'matrix A(nrows, ncols, nnz, row_offsets, column_indices, matrix_values)'
            ),
            Statement('cusp::%s_monitor< ValueType > %s' %
                      ('verbose' if parameters['ksp_monitor'] else 'default',
                       monitor)), precond_call, solve_call
        ]))

    host_mod.add_to_preamble(
        [Include('boost/python/extract.hpp'),
         Include('string')])
    host_mod.add_to_preamble([Statement('using namespace boost::python')])
    host_mod.add_to_preamble([Statement('using namespace std')])

    nvcc_mod.add_function(nvcc_function)

    host_mod.add_function(
        FunctionBody(
            FunctionDeclaration(Value('void', 'solve'), [
                Value('object', '_rowptr'),
                Value('object', '_colidx'),
                Value('object', '_csrdata'),
                Value('object', '_b'),
                Value('object', '_x'),
                Value('object', '_nrows'),
                Value('object', '_ncols'),
                Value('object', '_nnz')
            ]),
            Block([
                Statement(
                    'CUdeviceptr rowptr = extract<CUdeviceptr>(_rowptr.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr colidx = extract<CUdeviceptr>(_colidx.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr csrdata = extract<CUdeviceptr>(_csrdata.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr b = extract<CUdeviceptr>(_b.attr("gpudata"))'
                ),
                Statement(
                    'CUdeviceptr x = extract<CUdeviceptr>(_x.attr("gpudata"))'
                ),
                Statement('int nrows = extract<int>(_nrows)'),
                Statement('int ncols = extract<int>(_ncols)'),
                Statement('int nnz = extract<int>(_nnz)'),
                Statement(
                    '__cusp_solve(rowptr, colidx, csrdata, b, x, nrows, ncols, nnz)'
                )
            ])))

    nvcc_toolchain.cflags.append('-arch')
    nvcc_toolchain.cflags.append('sm_20')
    nvcc_toolchain.cflags.append('-O3')
    module = nvcc_mod.compile(gcc_toolchain,
                              nvcc_toolchain,
                              debug=configuration["debug"])

    _cusp_cache[cache_key(M.ctype, parameters)] = module
    return module

Beispiel #6

0

Datei anzeigen

Datei: flux.py Projekt: allansnielsen/hedge

def get_boundary_flux_mod(fluxes, fvi, discr, dtype):
    from cgen import \
            FunctionDeclaration, FunctionBody, Typedef, Struct, \
            Const, Reference, Value, POD, MaybeUnused, \
            Statement, Include, Line, Block, Initializer, Assign, \
            CustomLoop, For

    from pytools import to_uncomplex_dtype, flatten

    from codepy.bpl import BoostPythonModule
    mod = BoostPythonModule()

    mod.add_to_preamble([
        Include("cstdlib"),
        Include("algorithm"),
        Line(),
        Include("boost/foreach.hpp"),
        Line(),
        Include("hedge/face_operators.hpp"),
        ])

    S = Statement
    mod.add_to_module([
        S("using namespace hedge"),
        S("using namespace pyublas"),
        Line(),
        Typedef(POD(dtype, "value_type")),
        Typedef(POD(to_uncomplex_dtype(dtype), "uncomplex_type")),
        ])

    arg_struct = Struct("arg_struct", [
        Value("numpy_array<value_type>", "flux%d_on_faces" % i)
        for i in range(len(fluxes))
        ]+[
        Value("numpy_array<value_type>", arg_name)
        for arg_name in fvi.arg_names
        ])

    mod.add_struct(arg_struct, "ArgStruct")
    mod.add_to_module([Line()])

    fdecl = FunctionDeclaration(
                Value("void", "gather_flux"),
                [
                    Const(Reference(Value("face_group<face_pair<straight_face> >" , "fg"))),
                    Reference(Value("arg_struct", "args"))
                    ])

    from pymbolic.mapper.stringifier import PREC_PRODUCT

    def gen_flux_code():
        f2cm = FluxToCodeMapper()

        result = [
                Assign("fof%d_it[loc_fof_base+i]" % flux_idx,
                    "uncomplex_type(fp.int_side.face_jacobian) * " +
                    flux_to_code(f2cm, False, flux_idx, fvi, flux.op.flux, PREC_PRODUCT))
                for flux_idx, flux in enumerate(fluxes)
                ]

        return [
            Initializer(Value("value_type", cse_name), cse_str)
            for cse_name, cse_str in f2cm.cse_name_list] + result

    fbody = Block([
        Initializer(
            Const(Value("numpy_array<value_type>::iterator", "fof%d_it" % i)),
            "args.flux%d_on_faces.begin()" % i)
        for i in range(len(fluxes))
        ]+[
        Initializer(
            Const(Value("numpy_array<value_type>::const_iterator",
                "%s_it" % arg_name)),
            "args.%s.begin()" % arg_name)
        for arg_name in fvi.arg_names
        ]+[
        Line(),
        CustomLoop("BOOST_FOREACH(const face_pair<straight_face> &fp, fg.face_pairs)", Block(
            list(flatten([
            Initializer(Value("node_number_t", "%s_ebi" % where),
                "fp.%s.el_base_index" % where),
            Initializer(Value("index_lists_t::const_iterator", "%s_idx_list" % where),
                "fg.index_list(fp.%s.face_index_list_number)" % where),
            Line(),
            ]
            for where in ["int_side", "ext_side"]
            ))+[
            Line(),
            Initializer(Value("node_number_t", "loc_fof_base"),
                "fg.face_length()*(fp.%(where)s.local_el_number*fg.face_count"
                " + fp.%(where)s.face_id)" % {"where": "int_side"}),
            Line(),
            For(
                "unsigned i = 0",
                "i < fg.face_length()",
                "++i",
                Block(
                    [
                    Initializer(MaybeUnused(
                        Value("node_number_t", "%s_idx" % where)),
                        "%(where)s_ebi + %(where)s_idx_list[i]"
                        % {"where": where})
                    for where in ["int_side", "ext_side"]
                    ]+gen_flux_code()
                    )
                )
            ]))
        ])

    mod.add_function(FunctionBody(fdecl, fbody))

    #print "----------------------------------------------------------------"
    #print mod.generate()
    #raw_input("[Enter]")

    return mod.compile(get_flux_toolchain(discr, fluxes))

Beispiel #7

0

Datei anzeigen

    def make_lift(self, fgroup, with_scale, dtype):
        discr = self.discr
        from cgen import (FunctionDeclaration, FunctionBody, Typedef, Const,
                          Reference, Value, POD, Statement, Include, Line,
                          Block, Initializer, Assign, For, If, Define)

        from pytools import to_uncomplex_dtype

        from codepy.bpl import BoostPythonModule
        mod = BoostPythonModule()

        S = Statement
        mod.add_to_preamble([
            Include("hedge/face_operators.hpp"),
            Include("hedge/volume_operators.hpp"),
            Include("boost/foreach.hpp"),
        ])

        mod.add_to_module([
            S("namespace ublas = boost::numeric::ublas"),
            S("using namespace hedge"),
            S("using namespace pyublas"),
            Line(),
            Define("DOFS_PER_EL", fgroup.ldis_loc.node_count()),
            Define("FACES_PER_EL", fgroup.ldis_loc.face_count()),
            Define("DIMENSIONS", discr.dimensions),
            Line(),
            Typedef(POD(dtype, "value_type")),
            Typedef(POD(to_uncomplex_dtype(dtype), "uncomplex_type")),
        ])

        def if_(cond, result, else_=None):
            if cond:
                return [result]
            else:
                if else_ is None:
                    return []
                else:
                    return [else_]

        fdecl = FunctionDeclaration(Value("void", "lift"), [
            Const(
                Reference(Value("face_group<face_pair<straight_face> >",
                                "fg"))),
            Value("ublas::matrix<uncomplex_type>", "matrix"),
            Value("numpy_array<value_type>", "field"),
            Value("numpy_array<value_type>", "result")
        ] + if_(
            with_scale,
            Const(
                Reference(Value("numpy_array<double>",
                                "elwise_post_scaling")))))

        def make_it(name, is_const=True, tpname="value_type"):
            if is_const:
                const = "const_"
            else:
                const = ""

            return Initializer(
                Value("numpy_array<%s>::%siterator" % (tpname, const),
                      name + "_it"), "%s.begin()" % name)

        fbody = Block([
            make_it("field"),
            make_it("result", is_const=False),
        ] + if_(with_scale, make_it("elwise_post_scaling", tpname="double")) + [
            Line(),
            For(
                "unsigned fg_el_nr = 0", "fg_el_nr < fg.element_count()",
                "++fg_el_nr",
                Block([
                    Initializer(Value("node_number_t", "dest_el_base"),
                                "fg.local_el_write_base[fg_el_nr]"),
                    Initializer(Value("node_number_t", "src_el_base"),
                                "FACES_PER_EL*fg.face_length()*fg_el_nr"),
                    Line(),
                    For(
                        "unsigned i = 0", "i < DOFS_PER_EL", "++i",
                        Block([
                            Initializer(Value("value_type", "tmp"), 0),
                            Line(),
                            For(
                                "unsigned j = 0",
                                "j < FACES_PER_EL*fg.face_length()", "++j",
                                S("tmp += matrix(i, j)*field_it[src_el_base+j]"
                                  )),
                            Line(),
                        ] + if_(
                            with_scale,
                            Assign(
                                "result_it[dest_el_base+i]",
                                "tmp * value_type(*elwise_post_scaling_it)"),
                            Assign("result_it[dest_el_base+i]", "tmp")))),
                ] + if_(with_scale, S("elwise_post_scaling_it++"))))
        ])

        mod.add_function(FunctionBody(fdecl, fbody))

        #print "----------------------------------------------------------------"
        #print FunctionBody(fdecl, fbody)
        #raw_input()

        return mod.compile(self.discr.toolchain).lift

Beispiel #8

0

Datei anzeigen

Datei: diff.py Projekt: yangzilongdmgy/hedge

    def make_diff(self, elgroup, dtype, shape):
        """
        :param shape: If non-square, the resulting code takes two element_ranges
          arguments and supports non-square matrices.
        """
        from hedge._internal import UniformElementRanges
        assert isinstance(elgroup.ranges, UniformElementRanges)

        ldis = elgroup.local_discretization
        discr = self.discr
        from cgen import (
                FunctionDeclaration, FunctionBody, Typedef,
                Const, Reference, Value, POD,
                Statement, Include, Line, Block, Initializer, Assign,
                For, If,
                Define)

        from pytools import to_uncomplex_dtype

        from codepy.bpl import BoostPythonModule
        mod = BoostPythonModule()

        # {{{ preamble
        S = Statement
        mod.add_to_preamble([
            Include("hedge/volume_operators.hpp"),
            Include("boost/foreach.hpp"),
            ])

        mod.add_to_module([
            S("namespace ublas = boost::numeric::ublas"),
            S("using namespace hedge"),
            S("using namespace pyublas"),
            Line(),
            Define("ROW_COUNT", shape[0]),
            Define("COL_COUNT", shape[1]),
            Define("DIMENSIONS", discr.dimensions),
            Line(),
            Typedef(POD(dtype, "value_type")),
            Typedef(POD(to_uncomplex_dtype(dtype), "uncomplex_type")),
            ])

        fdecl = FunctionDeclaration(
                    Value("void", "diff"),
                    [
                    Const(Reference(Value("uniform_element_ranges", "from_ers"))),
                    Const(Reference(Value("uniform_element_ranges", "to_ers"))),
                    Value("numpy_array<value_type>", "field")
                    ]+[
                    Value("ublas::matrix<uncomplex_type>", "diffmat_rst%d" % rst)
                    for rst in range(discr.dimensions)
                    ]+[
                    Value("numpy_array<value_type>", "result%d" % i)
                    for i in range(discr.dimensions)
                    ]
                    )
        # }}}

        # {{{ set-up
        def make_it(name, is_const=True, tpname="value_type"):
            if is_const:
                const = "const_"
            else:
                const = ""

            return Initializer(
                Value("numpy_array<%s>::%siterator" % (tpname, const), name+"_it"),
                "%s.begin()" % name)

        fbody = Block([
            If("ROW_COUNT != diffmat_rst%d.size1()" % i,
                S('throw(std::runtime_error("unexpected matrix size"))'))
            for i in range(discr.dimensions)
            ] + [
            If("COL_COUNT != diffmat_rst%d.size2()" % i,
                S('throw(std::runtime_error("unexpected matrix size"))'))
            for i in range(discr.dimensions) 
            ]+[
            If("ROW_COUNT != to_ers.el_size()",
                S('throw(std::runtime_error("unsupported image element size"))')),
            If("COL_COUNT != from_ers.el_size()",
                S('throw(std::runtime_error("unsupported preimage element size"))')),
            If("from_ers.size() != to_ers.size()",
                S('throw(std::runtime_error("image and preimage element groups '
                    'do nothave the same element count"))')),
            Line(),
            make_it("field"),
            ]+[
            make_it("result%d" % i, is_const=False)
            for i in range(discr.dimensions)
            ]+[
            Line(),
        # }}}

        # {{{ computation
            For("element_number_t eg_el_nr = 0",
                "eg_el_nr < to_ers.size()",
                "++eg_el_nr",
                Block([
                    Initializer(
                        Value("node_number_t", "from_el_base"),
                        "from_ers.start() + eg_el_nr*COL_COUNT"),
                    Initializer(
                        Value("node_number_t", "to_el_base"),
                        "to_ers.start() + eg_el_nr*ROW_COUNT"),
                    Line(),
                    For("unsigned i = 0",
                        "i < ROW_COUNT",
                        "++i",
                        Block([
                            Initializer(Value("value_type", "drst_%d" % rst), 0)
                            for rst in range(discr.dimensions)
                            ]+[
                            Line(),
                            ]+[
                            For("unsigned j = 0",
                                "j < COL_COUNT",
                                "++j",
                                Block([
                                    S("drst_%(rst)d += "
                                        "diffmat_rst%(rst)d(i, j)*field_it[from_el_base+j]"
                                        % {"rst":rst})
                                    for rst in range(discr.dimensions)
                                    ])
                                ),
                            Line(),
                            ]+[
                            Assign("result%d_it[to_el_base+i]" % rst,
                                "drst_%d" % rst)
                            for rst in range(discr.dimensions)
                            ])
                        )
                    ])
                )
            ])
        # }}}

        # {{{ compilation
        mod.add_function(FunctionBody(fdecl, fbody))

        #print "----------------------------------------------------------------"
        #print mod.generate()
        #raw_input()

        compiled_func = mod.compile(self.discr.toolchain).diff

        if self.discr.instrumented:
            from hedge.tools import time_count_flop

            compiled_func = time_count_flop(compiled_func,
                    discr.diff_timer, discr.diff_counter,
                    discr.diff_flop_counter,
                    flops=discr.dimensions*(
                        2 # mul+add
                        * ldis.node_count() * len(elgroup.members)
                        * ldis.node_count()
                        +
                        2 * discr.dimensions
                        * len(elgroup.members) * ldis.node_count()),
                    increment=discr.dimensions)

        return compiled_func