Python Scalar 예제들, theano.scalar.Scalar Python 예제들

예제 #1

0

파일 보기

파일: elemwise.py 프로젝트: smorfopoulou/viral_denovo_pipeline

    def generate_kernel(self, node, nodename):
        inps = [
            make_argument(i, 'i%d' % (n, )) for n, i in enumerate(node.inputs)
        ]
        scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs]

        outs = [
            make_argument(o, 'o%d' % (n, )) for n, o in enumerate(node.outputs)
            if not n in self.inplace_pattern
        ]
        scal_out = [scalar.Scalar(o.dtype) for o in node.outputs]

        fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
                          [o() for o in scal_out])

        try:
            code = self.scalar_op.c_support_code_apply(fake_node, nodename)
            if code:
                raise SupportCodeError(code)
        except MethodNotDefined:
            pass

        support_code = ""
        try:
            support_code = self.scalar_op.c_support_code()
        except MethodNotDefined:
            pass

        if (support_code.strip() != "#define THEANO_MACRO_MOD(x,y) (x % y)"
                and support_code.strip() != ""):
            # The macro is fine, the C++ struct is not.
            raise SupportCodeError(support_code)

        scal_out = []
        oi = 0
        for n in range(len(fake_node.outputs)):
            if n in self.inplace_pattern:
                scal_out.append(inps[self.inplace_pattern[n]].name + '[i]')
            else:
                scal_out.append(outs[oi].name + '[i]')
                oi += 1

        kop = self.scalar_op.c_code(fake_node, nodename + '_scalar',
                                    [i.name + '[i]' for i in inps], scal_out,
                                    dict(fail='return;'))

        # Translate types for scalar composite ops (except complex).
        support_code += """
#define npy_float64 ga_double
#define npy_float32 ga_float
#define npy_uint8 ga_ubyte
#define npy_int8 ga_byte
#define npy_uint16 ga_ushort
#define npy_int16 ga_short
#define npy_uint32 ga_uint
#define npy_int32 ga_int
#define npy_uint64 ga_ulong
#define npy_int64 ga_long
"""
        return ElemwiseKernel(None, inps + outs, kop, preamble=support_code)

예제 #2

0

파일 보기

    def make_node(self, *inputs):
        res = Elemwise.make_node(self, *inputs)
        outputs = [GpuArrayType(broadcastable=o.type.broadcastable,
                                dtype=o.type.dtype)() for o in res.outputs]
        inputs = [as_gpuarray_variable(i) for i in inputs]
        node = Apply(self, inputs, outputs)

        # Try to generate the kernel to catch SupportCodeErrors
        try:
            inps = [make_argument(i, 'i%d' % (n,)) for n, i in
                    enumerate(node.inputs)]
            scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs]

            outs = [make_argument(o, 'o%d' % (n,)) for n, o in
                    enumerate(node.outputs) if not n in self.inplace_pattern]
            scal_out = [scalar.Scalar(o.dtype) for o in node.outputs]

            fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
                              [o() for o in scal_out])
            code = self.scalar_op.c_support_code_apply(fake_node, "test")
            if code:
                raise SupportCodeError(code)
        except MethodNotDefined:
            pass
        try:
            support_code = self.scalar_op.c_support_code()
            if (support_code.strip() != "#define THEANO_MACRO_MOD(x,y) (x % y)" and
                support_code.strip() != ""):
                # The macro is fine, the C++ struct is not.
                raise SupportCodeError(support_code)
        except MethodNotDefined:
            pass

        return node

예제 #3

0

파일 보기

 def task_code(d):
     print >> sio, self.scalar_op.c_code(
         Apply(self.scalar_op,
             [scalar.Scalar(dtype = input.type.dtype)() for input in node.inputs],
             [scalar.Scalar(dtype = output.type.dtype)() for output in node.outputs])
         , nodename + '_scalar_'
         , ['i%i_data_%i[0]'%(ipos,d) for ipos, i in enumerate(node.inputs)]
         , ['o%i_data_%i[0]'%(ipos,d) for ipos, i in enumerate(node.outputs)]
         , sub=dict(fail='return;')) #TODO: set a failure code somehow!!!

예제 #4

0

파일 보기

    def generate_kernel(self, node, nodename):
        inps = [make_argument(i, 'i%d' % (n,)) for n, i in
                enumerate(node.inputs)]
        scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs]

        outs = [make_argument(o, 'o%d' % (n,)) for n, o in
                enumerate(node.outputs) if not n in self.inplace_pattern]
        scal_out = [scalar.Scalar(o.dtype) for o in node.outputs]

        fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
                          [o() for o in scal_out])

        scal_out = []
        oi = 0
        for n in range(len(node.outputs)):
            if n in self.inplace_pattern:
                scal_out.append(inps[self.inplace_pattern[n]].name+'[i]')
            else:
                scal_out.append(outs[oi].name+'[i]')
                oi += 1

        kop = self.scalar_op.c_code(fake_node, nodename+'_scalar',
                                    [i.name+'[i]' for i in inps],
                                    scal_out,
                                    dict(fail='return;'))

        # Translate types for scalar composite ops (except complex).
        support_code = """
#ifdef _MSC_VER
#define signed __int8 int8_t
#define unsigned __int8 uint8_t
#define signed __int16 int16_t
#define unsigned __int16 uint16_t
#define signed __int32 int32_t
#define unsigned __int32 uint32_t
#define signed __int64 int64_t
#define unsigned __int64 uint64_t
#else
#include <stdint.h>
#endif
#define ga_bool uint8_t
#define ga_byte int8_t
#define ga_ubyte uint8_t
#define ga_short int16_t
#define ga_ushort uint16_t
#define ga_int int32_t
#define ga_uint uint32_t
#define ga_long int64_t
#define ga_ulong uint64_t
#define ga_float float
#define ga_double double
#define ga_half uint16_t

#include <Python.h>
#include <numpy/npy_common.h>
"""
        return ElemwiseKernel(None, inps+outs, kop, preamble=support_code)

예제 #5

0

파일 보기

파일: elemwise.py 프로젝트: xinfanmeng/Theano

    def c_src_kernel_Ccontiguous(self, node, nodename):
        nd = node.outputs[0].type.ndim
        sio = StringIO.StringIO()
        #print 'C_SRC_KERNEL', sio.getvalue()

        for ipos, i in enumerate(node.inputs):
            print >> sio, "//    Input  ", ipos, str(i.type)
        for ipos, i in enumerate(node.outputs):
            print >> sio, "//    Output ", ipos, str(i.type)
        print >> sio, "static __global__ void kernel_%s_%s_Ccontiguous (unsigned int numEls" % (
            self.scalar_op.__class__.__name__, nodename)
        #declare inputs
        for ipos, i in enumerate(node.inputs):
            print >> sio, "\t,", "const float * i%i_data" % ipos
        #declare outputs
        for ipos, i in enumerate(node.outputs):
            print >> sio, "\t,", "float * o%i_data" % ipos
        print >> sio, "\t)\n{"
        print >> sio, "    const int idx = blockIdx.x * blockDim.x + threadIdx.x;"
        print >> sio, "    const int numThreads = blockDim.x * gridDim.x;"

        # For each input that is a scalar which has been broadcasted to a tensor,
        #     load it into a local variable
        for ipos, i in enumerate(node.inputs):
            if _logical_scalar(i):
                print >> sio, "    const float ii_i%i_value = i%i_data[0];" % (
                    ipos, ipos)

        #loop over the elements to be treated by this kernel call
        print >> sio, "    for (int i = idx; i < numEls; i += numThreads) {"
        # perform the scalar operation on the input and output references
        #TODO: What if the scalar_op needs support_code??
        task_code = self.scalar_op.c_code(
            Apply(self.scalar_op, [
                scalar.Scalar(dtype=input.type.dtype)()
                for input in node.inputs
            ], [
                scalar.Scalar(dtype=output.type.dtype)()
                for output in node.outputs
            ]),
            nodename + '_scalar_'
            #, ['i%i_data[i]'%ipos for ipos, i in enumerate(node.inputs)]
            ,
            get_str_list_logical_scalar(node, data_str='i%i_data[i]'),
            ['o%i_data[i]' % ipos for ipos, i in enumerate(node.outputs)],
            sub=dict(fail='return;'))  #TODO: set a failure code somehow!!!
        print >> sio, "       ", task_code
        print >> sio, "    }"
        print >> sio, "}"

        #print sio.getvalue()
        return sio.getvalue()

예제 #6

0

파일 보기

 def PRelu(x):
     out_dtype = scalar.upgrade_to_float(
         scalar.Scalar(dtype=x.dtype))[0].dtype
     a = T.constant(0.625, dtype=out_dtype)
     b = T.constant(0.375, dtype=out_dtype)
     # x = (x * slope) + shift
     y = x * a + abs(x) * b
     r = T.clip(y, 0, 1)
     return r

예제 #7

0

파일 보기

 def Relu(x):
     out_dtype = scalar.upgrade_to_float(
         scalar.Scalar(dtype=x.dtype))[0].dtype
     a = T.constant(0.5, dtype=out_dtype)
     # ab = T.constant(abs(x), dtype=out_dtype)
     # x = (x * slope) + shift
     y = (x + abs(x)) * a
     r = T.clip(y, 0, 1)
     return r

예제 #8

0

파일 보기

def safe_new(x, tag='', dtype=None):
    """
    Internal function that constructs a new variable from x with the same
    type, but with a different name (old name + tag). This function is used
    by gradient, or the R-op to construct new variables for the inputs of
    the inner graph such that there is no interference between the original
    graph and the newly constructed graph.
    """
    if hasattr(x, 'name') and x.name is not None:
        nw_name = x.name + tag
    else:
        nw_name = None
    if isinstance(x, theano.Constant):
        if dtype and x.dtype != dtype:
            casted_x = x.astype(dtype)
            nwx = x.__class__(casted_x.type, x.data, x.name)
            nwx.tag = copy(x.tag)
            return nwx
        else:
            return x.clone()
    # Note, as_tensor_variable will convert the Scalar into a
    # TensorScalar that will require a ScalarFromTensor op,
    # making the pushout optimization fail
    elif isinstance(x, scalar.ScalarVariable):
        if dtype:
            nw_x = scalar.Scalar(dtype=dtype)()
        else:
            nw_x = x.type()
        nw_x.name = nw_name
        return nw_x
    else:
        try:
            x = tensor.as_tensor_variable(x)
        except TypeError:
            # This could happen for example for random states, and I really
            # want to avoid the convoluted logic that checks for cuda
            # ndarrays
            pass
    nw_x = x.type()
    if dtype and nw_x.dtype != dtype:
        nw_x = nw_x.astype(dtype).type()
    nw_x.name = nw_name
    # Preserve test values so that the 'compute_test_value' option can be used.
    # The test value is deep-copied to ensure there can be no interactions
    # between test values, due to inplace operations for instance. This may
    # not be the most efficient memory-wise, though.
    if theano.config.compute_test_value != 'off':
        try:
            nw_x.tag.test_value = copy.deepcopy(gof.op.get_test_value(x))
        except AttributeError:
            # This means `x` has no test value.
            pass

    return nw_x

예제 #9

0

파일 보기

파일: sigm.py 프로젝트: MatthieuCourbariaux/Theano

def hard_sigmoid(x):
    """An approximation of sigmoid.

    More approximate and faster than ultra_fast_sigmoid.

    Approx in 3 parts: 0, scaled linear, 1

    Removing the slope and shift does not make it faster.

    """
    # Use the same dtype as determined by "upgrade_to_float",
    # and perform computation in that dtype.
    out_dtype = scalar.upgrade_to_float(scalar.Scalar(dtype=x.dtype))[0].dtype
    slope = tensor.constant(0.2, dtype=out_dtype)
    shift = tensor.constant(0.5, dtype=out_dtype)
    x = (x * slope) + shift
    x = tensor.clip(x, 0, 1)
    return x

예제 #10

0

파일 보기

 def to_scalar_type(self):
     return scal.Scalar(dtype=self.dtype)

예제 #11

0

파일 보기

from theano import scalar
from theano import function

s0 = scalar.Scalar(dtype='float32')(name='s0')
s1 = scalar.Scalar(dtype='float32')(name='s1')

mx0 = scalar.maximum(s0, s1)
mx0.name = 'mx0'
mx1 = scalar.maximum(mx0, s0)
mx1.name = 'mx1'

E = scalar.second(mx1, 1)
E.name = 'E'
D = scalar.eq(mx1, mx0)
D.name = 'D'
C = D * E
C.name = 'C'
B = scalar.eq(mx0, s1)
B.name = 'B'
A = B * C
A.name = 'A'

function([s0, s1], [A, D])

예제 #12

0

파일 보기

 def c_init_code(self):
     return scal.Scalar(self.dtype).c_init_code()

예제 #13

0

파일 보기

 def c_code_cache_version(self):
     scalar_version = scal.Scalar(self.dtype).c_code_cache_version()
     if scalar_version:
         return (10, ) + scalar_version
     else:
         return ()

예제 #14

0

파일 보기

 def c_compile_args(self):
     return scal.Scalar(self.dtype).c_compile_args()

예제 #15

0

파일 보기

 def c_support_code(self):
     """Override `CLinkerObject.c_support_code` """
     return scal.Scalar(self.dtype).c_support_code()

예제 #16

0

파일 보기

 def c_libraries(self):
     return scal.Scalar(self.dtype).c_libraries()

예제 #17

0

파일 보기

 def c_headers(self):
     """Override `CLinkerObject.c_headers` """
     return scal.Scalar(self.dtype).c_headers()

예제 #18

0

파일 보기

파일: elemwise.py 프로젝트: wanasit/Theano

    def c_src_kernel_tiling(self, node, nodename):
        """ The kernel applies to problems with <= 5 dimensions """

        #The kernel is intended to be structured roughly like this:
        """
        static __global__ void kernel()
        {
            for (int v = blockIdx.y; v < dim0; v += gridDim.x)
            {
                for (int w = blockIdx.y; w < dim1; w += gridDim.y)
                {
                    for (int x = threadIdx.x; x < dim2; x += blockDim.x)
                    {
                        for (int y = threadIdx.y; y < dim3; y += blockDim.y)
                        {
                            for (int z = threadIdx.z; z < dim4; z += blockDim.z)
                            {
                                out[v * out_stride[0] + ...] = f(in1[...],  in2[...])
                            }
                        }
                    }
                }
            }
        }

        """

        nd = node.outputs[0].type.ndim
        sio = StringIO()
        #print 'C_SRC_KERNEL', sio.getvalue()

        if nd in (4, ):
            # print some leading comments to make the code easier to read
            for ipos, i in enumerate(node.inputs):
                print >> sio, "//    Input  ", ipos, str(i.type)
            for ipos, i in enumerate(node.outputs):
                print >> sio, "//    Output ", ipos, str(i.type)
            print >> sio, "static __global__ void kernel_%s_%s_%s(unsigned int numEls" % (
                self.scalar_op.__class__.__name__, nodename, 'tiling%i' % nd)
            if (nd):
                print >> sio, "\t,", ", ".join("const int dim%i" % i
                                               for i in xrange(nd))
            #declare inputs
            for ipos, i in enumerate(node.inputs):
                s = ", ".join(["const float * i%i_data" % ipos] +
                              list("int i%i_str_%i" % (ipos, d)
                                   for d in xrange(nd)))
                print >> sio, "\t,", s
            #declare outputs
            for ipos, i in enumerate(node.outputs):
                s = ", ".join(["float * o%i_data" % ipos] +
                              list("int o%i_str_%i" % (ipos, d)
                                   for d in xrange(nd)))
                print >> sio, "\t,", s
                #print >> sio, "\t,", ", ".join("int o%i_str_%i" % (ipos, d) for d in xrange(nd))
                #print >> sio, "\t,", "float * o%i_data" % ipos
            print >> sio, "\t)\n{"

            # For each input that is a scalar which has been broadcasted to a tensor,
            #     load it into a local variable
            print >> sio, "    __shared__ float value0[%i];" % len(node.inputs)
            print >> sio, "    __shared__ int shared_dims[%(nd)s];" % locals()
            #print >> sio, "    __shared__ int shared_i_str[%(n_in)s][%(nd)s]"
            print >> sio, "    if ((threadIdx.x == 0) && (threadIdx.y == 0)) {"
            for ipos, i in enumerate(node.inputs):
                if _logical_scalar(i):
                    print >> sio, "    value0[%i] = i%i_data[0];" % (ipos,
                                                                     ipos)
            for ipos in xrange(nd):
                print >> sio, "    shared_dims[%i] = dim%i;" % (ipos, ipos)
            print >> sio, "    }"
            print >> sio, "    __syncthreads();"

            if (nd == 4):
                print >> sio, """
                for (int pos0 = blockIdx.x; pos0 < shared_dims[0]; pos0 += gridDim.x)
                {
                    for (int pos1 = blockIdx.y; pos1 < shared_dims[1]; pos1 += gridDim.y)
                    {
                        //for (int pos2 = threadIdx.x; pos2 < shared_dims[2]; pos2 += blockDim.x)
                        for (int pos2 = threadIdx.y; pos2 < shared_dims[2]; pos2 += blockDim.y)
                        {
                            //for (int pos3 = threadIdx.y; pos3 < shared_dims[3]; pos3 += blockDim.y)
                            for (int pos3 = threadIdx.x; pos3 < shared_dims[3]; pos3 += blockDim.x)
                            {
                """
            else:
                raise NotImplementedError()

            for ipos, i in enumerate(node.inputs):
                if not _logical_scalar(i):
                    print >> sio, "        const float * ii_i%i_data = i%i_data;" % (
                        ipos, ipos)
            for ipos, i in enumerate(node.outputs):
                print >> sio, "        float * ii_o%i_data = o%i_data;" % (
                    ipos, ipos)
            for d in xrange(nd):
                for ipos, i in enumerate(node.inputs):
                    if not _logical_scalar(i):
                        print >> sio, "        ii_i%i_data += pos%i * i%i_str_%i;" % (
                            ipos, d, ipos, d)
                for ipos, i in enumerate(node.outputs):
                    print >> sio, "        ii_o%i_data += pos%i * o%i_str_%i;" % (
                        ipos, d, ipos, d)

            # perform the scalar operation on the input and output references
            #TODO: What if the scalar_op needs support_code??
            task_code = self.scalar_op.c_code(
                Apply(self.scalar_op, [
                    scalar.Scalar(dtype=input.type.dtype).make_variable()
                    for input in node.inputs
                ], [
                    scalar.Scalar(dtype=output.type.dtype).make_variable()
                    for output in node.outputs
                ]),
                nodename + '_scalar_',
                get_str_list_logical_scalar(node, value_str='value0[%i]'), [
                    'ii_o%i_data[0]' % ipos
                    for ipos, i in enumerate(node.outputs)
                ],
                sub=dict(fail='return;'))  #TODO: set a failure code somehow!!!
            print >> sio, "       ", task_code

            print >> sio, "    }" * nd

            #TODO: insert runtime stride checks that select the best loop order either here, or in
            # the host code that launched the  kernel (host code probably better spot)

            #indent = " "*(4*d+7)
            #for ipos, i in enumerate(node.inputs):
            #print >> sio, indent, "const float * i%i" % ipos, '= i%i_data', ''
            print >> sio, "}"

        print sio.getvalue()
        return sio.getvalue()

예제 #19

0

파일 보기

    def make_node(self, *inputs):
        _inputs = [as_gpuarray_variable(i) for i in inputs]
        if self.nin > 0 and len(_inputs) != self.nin:
            raise TypeError("Wrong argument count", (self.nin, len(_inputs)))
        for i in _inputs[1:]:
            if i.type.ndim != inputs[0].type.ndim:
                raise TypeError('mismatched rank amongst inputs')

        broadcastable = []
        for d in xrange(_inputs[0].type.ndim):
            bcast_d = True
            for i in _inputs:
                if not i.type.broadcastable[d]:
                    bcast_d = False
                    break
            broadcastable.append(bcast_d)
        assert len(broadcastable) == _inputs[0].type.ndim

        assert self.nout > 0
        inps = [make_argument(i, 'i%d' % (n, )) for n, i in enumerate(inputs)]
        scal_ins = [scalar.Scalar(i.dtype) for i in inputs]

        res = Apply(self, _inputs, [
            GpuArrayType(o.dtype, broadcastable)()
            for o in self.scalar_op.output_types(scal_ins)
        ])

        outs = [
            make_argument(o, 'o%d' % (n, )) for n, o in enumerate(res.outputs)
        ]
        scal_out = [scalar.Scalar(o.dtype) for o in res.outputs]

        fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
                          [o() for o in scal_out])

        kcode = self.scalar_op.c_code(fake_node,
                                      'kcode', [i.expr() for i in inps],
                                      [o.expr() for o in outs],
                                      sub=dict(fail='return;'))
        res.tag.kcode = kcode

        try:
            code = self.scalar_op.c_support_code_apply(fake_node, 'kcode')
            if code:
                raise SupportCodeError()
        except MethodNotDefined:
            pass

        support_code = ""
        try:
            support_code += self.scalar_op.c_support_code()
        except MethodNotDefined:
            pass

        if support_code != "#define THEANO_MACRO_MOD(x,y) (x % y)":
            # Avoid the C++ complex struct
            raise SupportCodeError()

        k = ElemwiseKernel(None, inps + outs, kcode, preamble=support_code)
        res.tag.kernel = k

        return res

예제 #20

0

파일 보기

파일: elemwise.py 프로젝트: wanasit/Theano

    def c_src_kernel(self, node, nodename, nd):
        sio = StringIO()
        #print 'C_SRC_KERNEL', sio.getvalue()

        for ipos, i in enumerate(node.inputs):
            print >> sio, "//    Input  ", ipos, str(i.type)
        for ipos, i in enumerate(node.outputs):
            print >> sio, "//    Output ", ipos, str(i.type)
        print >> sio, "static __global__ void kernel_%s_%s_%s(unsigned int numEls" % (
            self.scalar_op.__class__.__name__, nodename, nd)
        if (nd):
            print >> sio, "\t,", ", ".join("const int dim%i" % i
                                           for i in xrange(nd))
        #declare inputs
        for ipos, i in enumerate(node.inputs):
            s = ", ".join(["const float * i%i_data" % ipos] +
                          ["int i%i_str_%i" % (ipos, d) for d in xrange(nd)])
            print >> sio, "\t,", s
        #declare outputs
        for ipos, i in enumerate(node.outputs):
            s = ", ".join(["float * o%i_data" % ipos] +
                          ["int o%i_str_%i" % (ipos, d) for d in xrange(nd)])
            print >> sio, "\t,", s
            #print >> sio, "\t,", ", ".join("int o%i_str_%i" % (ipos, d) for d in xrange(nd))
            #print >> sio, "\t,", "float * o%i_data" % ipos
        print >> sio, "\t)\n{"
        print >> sio, "    const int idx = blockIdx.x * blockDim.x + threadIdx.x;"
        print >> sio, "    const int numThreads = blockDim.x * gridDim.x;"

        # For each input that is a scalar which has been broadcasted to a tensor,
        #     load it into a local variable
        for ipos, i in enumerate(node.inputs):
            if _logical_scalar(i):
                print >> sio, "    const float ii_i%i_value = i%i_data[0];" % (
                    ipos, ipos)

        #loop over the elements to be treated by this kernel call
        print >> sio, "    for (int i = idx; i < numEls; i += numThreads) {"
        # calculate the data pointers for all arguments
        print >> sio, "        int ii = i;"
        for ipos, i in enumerate(node.inputs):
            if not _logical_scalar(i):
                print >> sio, "        const float * ii_i%i_data = i%i_data;" % (
                    ipos, ipos)
        for ipos, i in enumerate(node.outputs):
            print >> sio, "        float * ii_o%i_data = o%i_data;" % (ipos,
                                                                       ipos)
        for d in xrange(nd - 1, -1, -1):
            if d > 0:
                print >> sio, "        int pos%i = ii %% dim%i;" % (d, d)
                print >> sio, "        ii = ii / dim%i;" % d
            else:
                print >> sio, "        int pos%i = ii;" % d

            for ipos, i in enumerate(node.inputs):
                if not _logical_scalar(i):
                    print >> sio, "        ii_i%i_data += pos%i * i%i_str_%i;" % (
                        ipos, d, ipos, d)
            for ipos, i in enumerate(node.outputs):
                print >> sio, "        ii_o%i_data += pos%i * o%i_str_%i;" % (
                    ipos, d, ipos, d)

        # perform the scalar operation on the input and output references
        #TODO: What if the scalar_op needs support_code??
        task_code = self.scalar_op.c_code(
            Apply(self.scalar_op, [
                scalar.Scalar(dtype=input.type.dtype).make_variable()
                for input in node.inputs
            ], [
                scalar.Scalar(dtype=output.type.dtype).make_variable()
                for output in node.outputs
            ]),
            nodename + '_scalar_',
            get_str_list_logical_scalar(node),
            ['ii_o%i_data[0]' % ipos for ipos, i in enumerate(node.outputs)],
            sub=dict(fail='return;'))  # TODO: set a failure code somehow!!!
        print >> sio, "       ", task_code
        print >> sio, "    }"

        #indent = " "*(4*d+7)
        #for ipos, i in enumerate(node.inputs):
        #print >> sio, indent, "const float * i%i" % ipos, '= i%i_data', ''
        print >> sio, "}"

        #print sio.getvalue()
        return sio.getvalue()