def elemwise_layouts_mixed(shape, offseted_outer, offseted_inner, sliced,
                           order):
    ac, ag = gen_gpuarray(shape, dtype='float32', sliced=sliced, order=order,
                          offseted_outer=offseted_outer,
                          offseted_inner=offseted_inner, ctx=context)
    b = numpy.asarray(2.0, dtype='float32')

    outg = gpuarray.empty(shape, dtype='float32', context=context)

    k = ElemwiseKernel(context, "float *a, float b, float *c",
                       "c[i] = a[i] + b")
    # will use contig or basic
    k(ag, b, outg)
    outc = ac + b
    assert numpy.allclose(numpy.asarray(outg), outc)

    # test basic
    outg = gpuarray.empty(shape, dtype='float32', context=context)
    k.call_basic(ag, b, outg)
    assert numpy.allclose(numpy.asarray(outg), outc)

    # test dimspec
    outg = gpuarray.empty(shape, dtype='float32', context=context)
    k.call_dimspec(ag, b, outg)
    assert numpy.allclose(numpy.asarray(outg), outc)

    # test specialized
    outg = gpuarray.empty(shape, dtype='float32', context=context)
    k.call_specialized(ag, b, outg)
    assert numpy.allclose(numpy.asarray(outg), outc)
    def generate_kernel(self, node, nodename):
        inps = [
            make_argument(i, 'i%d' % (n, )) for n, i in enumerate(node.inputs)
        ]
        scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs]

        outs = [
            make_argument(o, 'o%d' % (n, )) for n, o in enumerate(node.outputs)
            if not n in self.inplace_pattern
        ]
        scal_out = [scalar.Scalar(o.dtype) for o in node.outputs]

        fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
                          [o() for o in scal_out])

        try:
            code = self.scalar_op.c_support_code_apply(fake_node, nodename)
            if code:
                raise SupportCodeError(code)
        except MethodNotDefined:
            pass

        support_code = ""
        try:
            support_code = self.scalar_op.c_support_code()
        except MethodNotDefined:
            pass

        if (support_code.strip() != "#define THEANO_MACRO_MOD(x,y) (x % y)"
                and support_code.strip() != ""):
            # The macro is fine, the C++ struct is not.
            raise SupportCodeError(support_code)

        scal_out = []
        oi = 0
        for n in range(len(fake_node.outputs)):
            if n in self.inplace_pattern:
                scal_out.append(inps[self.inplace_pattern[n]].name + '[i]')
            else:
                scal_out.append(outs[oi].name + '[i]')
                oi += 1

        kop = self.scalar_op.c_code(fake_node, nodename + '_scalar',
                                    [i.name + '[i]' for i in inps], scal_out,
                                    dict(fail='return;'))

        # Translate types for scalar composite ops (except complex).
        support_code += """
#define npy_float64 ga_double
#define npy_float32 ga_float
#define npy_uint8 ga_ubyte
#define npy_int8 ga_byte
#define npy_uint16 ga_ushort
#define npy_int16 ga_short
#define npy_uint32 ga_uint
#define npy_int32 ga_int
#define npy_uint64 ga_ulong
#define npy_int64 ga_long
"""
        return ElemwiseKernel(None, inps + outs, kop, preamble=support_code)
Exemple #3
0
    def generate_kernel(self, node, nodename):
        inps = [make_argument(i, 'i%d' % (n,)) for n, i in
                enumerate(node.inputs)]
        scal_ins = [scalar.Scalar(i.dtype) for i in node.inputs]

        outs = [make_argument(o, 'o%d' % (n,)) for n, o in
                enumerate(node.outputs) if not n in self.inplace_pattern]
        scal_out = [scalar.Scalar(o.dtype) for o in node.outputs]

        fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
                          [o() for o in scal_out])

        scal_out = []
        oi = 0
        for n in range(len(node.outputs)):
            if n in self.inplace_pattern:
                scal_out.append(inps[self.inplace_pattern[n]].name+'[i]')
            else:
                scal_out.append(outs[oi].name+'[i]')
                oi += 1

        kop = self.scalar_op.c_code(fake_node, nodename+'_scalar',
                                    [i.name+'[i]' for i in inps],
                                    scal_out,
                                    dict(fail='return;'))

        # Translate types for scalar composite ops (except complex).
        support_code = """
#ifdef _MSC_VER
#define signed __int8 int8_t
#define unsigned __int8 uint8_t
#define signed __int16 int16_t
#define unsigned __int16 uint16_t
#define signed __int32 int32_t
#define unsigned __int32 uint32_t
#define signed __int64 int64_t
#define unsigned __int64 uint64_t
#else
#include <stdint.h>
#endif
#define ga_bool uint8_t
#define ga_byte int8_t
#define ga_ubyte uint8_t
#define ga_short int16_t
#define ga_ushort uint16_t
#define ga_int int32_t
#define ga_uint uint32_t
#define ga_long int64_t
#define ga_ulong uint64_t
#define ga_float float
#define ga_double double
#define ga_half uint16_t

#include <Python.h>
#include <numpy/npy_common.h>
"""
        return ElemwiseKernel(None, inps+outs, kop, preamble=support_code)
def elemwise_collapse(dtype1, dtype2, shape1, shape2, expected):
    assert len(shape1) == len(shape2)

    # int8 does not cause problematic upcasts
    scalar = numpy.asarray(1, dtype='int8')

    a_cpu, a_gpu = gen_gpuarray(shape1, dtype1, ctx=context)
    b_cpu, b_gpu = gen_gpuarray(shape2, dtype2, ctx=context)

    o_shape = []
    for i in range(len(shape1)):
        o_shape.append(max(shape1[i], shape2[i]))

    o = gpuarray.empty(o_shape, dtype=(a_cpu + b_cpu).dtype, context=context)

    n, nd, dims, strs, offsets, contig = check_args((a_gpu, b_gpu),
                                                    collapse=True,
                                                    broadcast=True)

    assert nd == expected, (shape1, shape2, dims, nd, expected)

    k = ElemwiseKernel(context, [ArrayArg(numpy.dtype(dtype1), 'a'),
                                 ArrayArg(numpy.dtype(dtype2), 'b'),
                                 ArrayArg(o.dtype, 'o')], "o[i] = a[i] + b[i]")
    out_cpu = a_cpu + b_cpu
    k(a_gpu, b_gpu, o, collapse=True, broadcast=True)

    assert numpy.allclose(numpy.asarray(o), out_cpu)

    k(a_gpu, b_gpu, o, collapse=False, broadcast=True)

    assert numpy.allclose(numpy.asarray(o), out_cpu)

    broadcast = any([True for i in shape1 + shape2
                     if i == 1])

    n, nd, dims, strs, offsets, contig = check_args((a_gpu, b_gpu, scalar),
                                                    collapse=True,
                                                    broadcast=True)
    assert nd == expected

    k = ElemwiseKernel(context, [ArrayArg(numpy.dtype(dtype1), 'a'),
                                 ArrayArg(numpy.dtype(dtype2), 'b'),
                                 ScalarArg(scalar.dtype, 's'),
                                 ArrayArg(o.dtype, 'o')],
                       "o[i] = a[i] + b[i] + s")
    out_cpu = a_cpu + b_cpu + scalar
    k(a_gpu, b_gpu, scalar, o, collapse=True, broadcast=True)

    assert numpy.allclose(numpy.asarray(o), out_cpu)

    k(a_gpu, b_gpu, scalar, o, collapse=False, broadcast=True)

    assert numpy.allclose(numpy.asarray(o), out_cpu)

    if expected == 1:
        expected2 = 2
    else:
        expected2 = expected

    if len(shape1) != 4:
        return

    if shape1[0] != 1:
        c_cpu, c_gpu = gen_gpuarray(shape1, dtype=dtype1, sliced=2, ctx=context)
        n, nd, dims, strs, offsets,contig = check_args((c_gpu, b_gpu),
                                                       collapse=True,
                                                       broadcast=True)
        if broadcast:
            assert nd >= expected
        else:
            assert nd == expected2
Exemple #5
0
    def make_node(self, *inputs):
        _inputs = [as_gpuarray_variable(i) for i in inputs]
        if self.nin > 0 and len(_inputs) != self.nin:
            raise TypeError("Wrong argument count", (self.nin, len(_inputs)))
        for i in _inputs[1:]:
            if i.type.ndim != inputs[0].type.ndim:
                raise TypeError('mismatched rank amongst inputs')

        broadcastable = []
        for d in xrange(_inputs[0].type.ndim):
            bcast_d = True
            for i in _inputs:
                if not i.type.broadcastable[d]:
                    bcast_d = False
                    break
            broadcastable.append(bcast_d)
        assert len(broadcastable) == _inputs[0].type.ndim

        assert self.nout > 0
        inps = [make_argument(i, 'i%d' % (n, )) for n, i in enumerate(inputs)]
        scal_ins = [scalar.Scalar(i.dtype) for i in inputs]

        res = Apply(self, _inputs, [
            GpuArrayType(o.dtype, broadcastable)()
            for o in self.scalar_op.output_types(scal_ins)
        ])

        outs = [
            make_argument(o, 'o%d' % (n, )) for n, o in enumerate(res.outputs)
        ]
        scal_out = [scalar.Scalar(o.dtype) for o in res.outputs]

        fake_node = Apply(self.scalar_op, [i() for i in scal_ins],
                          [o() for o in scal_out])

        kcode = self.scalar_op.c_code(fake_node,
                                      'kcode', [i.expr() for i in inps],
                                      [o.expr() for o in outs],
                                      sub=dict(fail='return;'))
        res.tag.kcode = kcode

        try:
            code = self.scalar_op.c_support_code_apply(fake_node, 'kcode')
            if code:
                raise SupportCodeError()
        except MethodNotDefined:
            pass

        support_code = ""
        try:
            support_code += self.scalar_op.c_support_code()
        except MethodNotDefined:
            pass

        if support_code != "#define THEANO_MACRO_MOD(x,y) (x % y)":
            # Avoid the C++ complex struct
            raise SupportCodeError()

        k = ElemwiseKernel(None, inps + outs, kcode, preamble=support_code)
        res.tag.kernel = k

        return res