Exemple #1
0
def test_pycuda_theano():
    """Simple example with pycuda function and Theano CudaNdarray object."""
    from pycuda.compiler import SourceModule
    mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
  const int i = threadIdx.x;
  dest[i] = a[i] * b[i];
}
""")

    multiply_them = mod.get_function("multiply_them")

    a = numpy.random.randn(100).astype(numpy.float32)
    b = numpy.random.randn(100).astype(numpy.float32)

    # Test with Theano object
    ga = cuda_ndarray.CudaNdarray(a)
    gb = cuda_ndarray.CudaNdarray(b)
    dest = cuda_ndarray.CudaNdarray.zeros(a.shape)
    multiply_them(dest, ga, gb, block=(400, 1, 1), grid=(1, 1))
    assert (numpy.asarray(dest) == a * b).all()
Exemple #2
0
def test_copy_subtensor0():
    sizeof_float = 4
    a = theano._asarray(numpy.random.rand(30, 20, 5, 5), dtype='float32')
    cuda_a = cuda_ndarray.CudaNdarray(a)
    a_view = cuda_a.view()
    a_view_strides = a_view._strides
    a_view._set_stride(2, -a_view_strides[2])
    a_view._set_stride(3, -a_view_strides[3])
    a_view._dev_data += 24 * sizeof_float

    a_view_copy = copy.deepcopy(a_view)

    assert numpy.all(a[:, :, ::-1, ::-1] == numpy.asarray(a_view_copy))
Exemple #3
0
 def test_leak2():
     import theano.sandbox.cuda as cuda
     for i in xrange(1000000):
         n = numpy.asarray([2.3, 4.5], dtype='f')
         c = sys.getrefcount(n)
         a = cuda.CudaNdarray(n)
         assert c == sys.getrefcount(n)
         del a
         if not i % 1000:
             print('.', end=' ')
             print(gc.collect(), end=' ')
             print(gc.collect())
         sys.stdout.flush()
Exemple #4
0
def test_elemwise_collapse5():
    """ Test when only one inputs have two broadcastable dimension at the beginning and we add a scalar"""

    shape = (4, 5)
    a = cuda_ndarray.CudaNdarray(
        theano._asarray(numpy.random.rand(*shape), dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle('x', 'x', 0, 1)
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = (a3 + b + 2)
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(5, 4, shape[0], shape[1]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    if False:
        for id, n in enumerate(f.maker.env.toposort()):
            print id, n
    #let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(1, 1, shape[0], shape[1]) + v + 2)
    print "Expected collapse to 2 dimensions"
Exemple #5
0
def test_elemwise_collapse2():
    """ Test when only one inputs have one broadcastable dimension """

    shape = (4, 5, 9)
    a = cuda_ndarray.CudaNdarray(
        theano._asarray(numpy.random.rand(*shape), dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = a3 + b
    f = pfunc([b], [c], mode=mode_with_gpu)

    v = theano._asarray(numpy.random.rand(shape[0], 5, *shape[1:]),
                        dtype='float32')
    v = cuda_ndarray.CudaNdarray(v)
    if False:
        for id, n in enumerate(f.maker.env.toposort()):
            print id, n
    #let debugmode catch errors
    out = f(v)[0]
    assert numpy.allclose(out, a.reshape(shape[0], 1, *shape[1:]) + v)
    print "Expected collapse to 3 dimensions"
Exemple #6
0
            def thunk():
                N, nchans, winw, winh = inputs[0][0].shape
                nbins = self.nbins
                x0 = np.array([br[0] for br in self.binranges], dtype=np.float32)
                x0 = cuda.CudaNdarray(x0)
                x1 = np.array([br[1] for br in self.binranges], dtype=np.float32)
                x1 = cuda.CudaNdarray(x1)
                cx = np.array([nbins / (br[1] - br[0]) for br in self.binranges],
                              dtype=np.float32)
                cx = cuda.CudaNdarray(cx)

                z = outputs[0]
                if z[0] is None:
                    # Need to allocate
                    z[0] = cuda.CudaNdarray.zeros((N, nchans, nbins))

                # https://docs.nvidia.com/cuda/cuda-c-programming-guide/#thread-hierarchy
                # https://github.com/inducer/pycuda/blob/master/pycuda/gpuarray.py#L81
                grid, thread_blocks = pycuda.gpuarray.splay(N)
                pycuda_fct(inputs[0][0], z[0], np.intc(N), np.intc(nchans),
                           np.intc(winw), np.intc(winh), np.intc(nbins),
                           x0, x1, cx,
                           block=thread_blocks, grid=grid)
Exemple #7
0
def speed_elemwise_collapse2():
    """ used to test the speed up of the generalised collapse of ccontiguous dims"""

    shape = (30,40,50,600)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape),dtype='float32')
    a2 = tcn.shared_constructor(a, 'a')
    a3 = a2[:,:,:,::2]
    b = tcn.CudaNdarrayType((False, False, False, False))()
    c = a3+b * tensor.exp(1 + b**a3)
    f = pfunc([b], [c], mode=mode_with_gpu)


    v = theano._asarray(numpy.random.rand(*shape),dtype='float32')
    v = v[:,:,:,::2]
    v=cuda_ndarray.CudaNdarray(v)
    for id,n in enumerate(f.maker.env.toposort()):
        print id, n
    t1=time.time()
    for i in range(100):
        #let debugmode catch errors
        f(v)
    t2=time.time()
Exemple #8
0
def test_sum():
    shape = (2, 3)
    a0 = theano._asarray(numpy.arange(shape[0] * shape[1]).reshape(shape),
                         dtype='float32')

    b0 = cuda_ndarray.CudaNdarray(a0)

    assert numpy.allclose(a0.sum(), numpy.asarray(b0.reduce_sum([1, 1])))

    a0sum = a0.sum(axis=0)
    b0sum = b0.reduce_sum([1, 0])

    # print 'asum\n',a0sum
    # print 'bsum\n',numpy.asarray(b0sum)

    assert numpy.allclose(a0.sum(axis=0), numpy.asarray(b0.reduce_sum([1, 0])))
    assert numpy.allclose(a0.sum(axis=1), numpy.asarray(b0.reduce_sum([0, 1])))
    assert numpy.allclose(a0, numpy.asarray(b0.reduce_sum([0, 0])))

    shape = (3, 4, 5, 6, 7, 8)
    a0 = theano._asarray(numpy.arange(3 * 4 * 5 * 6 * 7 * 8).reshape(shape),
                         dtype='float32')
    b0 = cuda_ndarray.CudaNdarray(a0)
    assert numpy.allclose(
        a0.sum(axis=5).sum(axis=3).sum(axis=0),
        numpy.asarray(b0.reduce_sum([1, 0, 0, 1, 0, 1])))

    shape = (16, 2048)
    a0 = theano._asarray(numpy.arange(16 * 2048).reshape(shape),
                         dtype='float32')
    b0 = cuda_ndarray.CudaNdarray(a0)
    assert numpy.allclose(a0.sum(axis=0), numpy.asarray(b0.reduce_sum([1, 0])))

    shape = (16, 10)
    a0 = theano._asarray(numpy.arange(160).reshape(shape), dtype='float32')
    b0 = cuda_ndarray.CudaNdarray(a0)
    assert numpy.allclose(a0.sum(), numpy.asarray(b0.reduce_sum([1, 1])))
def _test_dummy():
    ishape = (1, 1, 5, 5)
    kshape = (1, 1, 3, 3)
    mode = 'valid'
    subsample = (1, 1)

    npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
    npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32')

    img = cuda_ndarray.CudaNdarray(npy_img)
    kern = cuda_ndarray.CudaNdarray(npy_kern)

    #print >> sys.stdout, '_params_allgood trying ', ishape, kshape, mode
    t2 = None
    rval = True

    t0 = time.time()
    cpuval = py_conv(npy_img, npy_kern, mode, subsample)
    t1 = time.time()
    gpuval = cuda_ndarray.conv(img, kern, mode, subsample)
    t2 = time.time()
    gpuval = numpy.asarray(gpuval)
    print gpuval
    print cpuval
Exemple #10
0
def test_setitem_assign_to_slice():
    a = numpy.arange(27)
    a.resize((3,3,3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7,8,9], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    # first get a slice of a
    _c = _a[:,:,1]

    # set middle row through cube to 7,8,9
    # (this corresponds to middle row of matrix _c)
    _c[:,1] = _b

    a[:,:,1][:,1] = b
    assert numpy.allclose(a,numpy.asarray(_a))

    #test direct transfert from numpy
    _d = _a[1,:,:]
    _d[1,:] = b*10
    a[1,:,:][1,:] = b*10
    assert numpy.allclose(a,numpy.asarray(_a))
Exemple #11
0
def test_setitem_matrix_tensor3():
    a = numpy.arange(27)
    a.resize((3, 3, 3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7, 8, 9], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    # set middle row through cube to 7,8,9
    _a[:, 1, 1] = _b

    a[:, 1, 1] = b
    assert numpy.allclose(a, numpy.asarray(_a))

    # test direct transfert from numpy
    _a[:, 1, 1] = b * 100
    a[:, 1, 1] = b * 100
    assert numpy.allclose(a, numpy.asarray(_a))

    row = theano._asarray([777, 888, 999], dtype='float32')
    _a[1, 1, :] = row
    a[1, 1, :] = row
    assert numpy.allclose(a, numpy.asarray(_a))
Exemple #12
0
            def cmp(a_shp, b_shp):

                a = numpy.random.randn(*a_shp).astype(numpy.float32)
                b = numpy.random.randn(*b_shp).astype(numpy.float32)

                x = tensor.ftensor3()
                y = tensor.ftensor3()

                f = theano.function([x, y],
                                    batched_dot(x, y),
                                    mode=mode_with_gpu)

                z0 = numpy.asarray(f(a, b))

                ga = cuda_ndarray.CudaNdarray(a)
                gb = cuda_ndarray.CudaNdarray(b)

                z1 = numpy.asarray(f(ga, gb))

                z_test = numpy.sum(a[:, :, :, None] * b[:, None, :, :],
                                   axis=-2)

                unittest_tools.assert_allclose(z0, z_test)
                unittest_tools.assert_allclose(z1, z_test)
Exemple #13
0
def test_elemwise_collapse7(atol=1e-6):
    """ Test when one input have one broadcastable dimension and the
    other is a scalar"""

    shape = (5, 4, 1)
    a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                 dtype='float32'))
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a2 = tcn.shared_constructor(a.copy(), 'a')
    a3 = a2.dimshuffle(0, 'x', 1, 2)
    f = pfunc([], [a3 + 2], mode=mode_with_gpu)

    #let debugmode catch errors
    out = f()[0]
    ans = (a + 2).reshape(shape[0], 1, shape[1], shape[2])
    assert numpy.allclose(out, ans, atol=atol)
Exemple #14
0
def test_setitem_matrix_bad_type():
    a = numpy.arange(27)
    a.resize((3,3,3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7,8], dtype='float64')

    #test direct transfert from numpy
    try:
        # attempt to assign the ndarray b with setitem
        _a[1,:,:] = b
        assert False
    except TypeError, e:
        #print e
        assert True
Exemple #15
0
def test_host_to_device():
    #print >>sys.stdout, 'starting test_host_to_dev'
    for shape in ((), (3, ), (2, 3), (3, 4, 5, 6)):
        a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        c = numpy.asarray(b)
        assert numpy.all(a == c)

        # test with float32 dtype
        d = numpy.asarray(b, dtype='float32')
        assert numpy.all(a == d)

        # test with not float32 dtype
        try:
            numpy.asarray(b, dtype='int8')
            assert False
        except TypeError:
            pass
def test_deepcopy():
    a = cuda.fmatrix()
    a_v = cuda.CudaNdarray(numpy.zeros((3, 4), dtype='float32'))

    # We force the c code to check that we generate c code
    mode = theano.Mode("c", mode_with_gpu.optimizer)
    f = theano.function([a], a, mode=mode)
    theano.printing.debugprint(f)
    out = f(a_v)
    assert out is not a_v
    assert numpy.allclose(numpy.asarray(a_v), numpy.asarray(out))

    # We force the python linker as the default code should work for this op
    mode = theano.Mode("py", mode_with_gpu.optimizer)
    f = theano.function([a], a, mode=mode)
    theano.printing.debugprint(f)
    out = f(a_v)
    assert out is not a_v
    assert numpy.allclose(numpy.asarray(a_v), numpy.asarray(out))
    def test_output_broadcast_cuda(self):
        from theano.sandbox import cuda
        if not cuda.cuda_available:
            raise SkipTest("Optional package Cuda disabled")
        if cuda.use.device_number is None:
            # We should normally set VecAsRowAndCol as a GPUOp But we
            # don't want to do this here as this will disable others
            # tests in this file.  So we manually init the GPU if
            # needed to remove warning.
            cuda.use("gpu",
                     force=True,
                     default_to_move_computation_to_gpu=False,
                     move_shared_float32_to_gpu=False,
                     enable_cuda=False)
        v = cuda.fvector('v')
        c, r = VecAsRowAndCol()(v)
        f = theano.function([v], [c, r])

        v_val = cuda.CudaNdarray(self.rng.randn(5).astype('float32'))
        f(v_val)
Exemple #18
0
def test_nvcc_bug():
    """
    The fct k_elemwise_unary_rowmajor_copy(used by cuda.copy()) in cuda_ndarray.cu
    is not well compiled with nvcc 3.0 and 3.1 beta. We found a workaround, so it
    sould work correctly. Without the workaround, this test fail.
    """
    shape = (5,4)
    aa = theano._asarray(numpy.random.rand(*shape), dtype='float32')
    a = aa[::,::-1]

    b = cuda_ndarray.CudaNdarray(aa)[::,::-1]
    c = copy.copy(b)
    d = copy.deepcopy(b)

    assert numpy.allclose(a, numpy.asarray(b))
    assert numpy.allclose(a, numpy.asarray(c))
    assert numpy.allclose(a, numpy.asarray(d))
    b+=b
    assert numpy.allclose(a+a, numpy.asarray(b))
    assert numpy.allclose(a+a, numpy.asarray(c))
    assert numpy.allclose(a, numpy.asarray(d))
Exemple #19
0
def test_copy():
    print >>sys.stdout, 'starting test_copy'
    shape = (500,499)
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')

    print >>sys.stdout, '.. creating device object'
    b = cuda_ndarray.CudaNdarray(a)

    print >>sys.stdout, '.. copy'
    c = copy.copy(b)
    print >>sys.stdout, '.. deepcopy'
    d = copy.deepcopy(b)

    print >>sys.stdout, '.. comparisons'
    assert numpy.allclose(a, numpy.asarray(b))
    assert numpy.allclose(a, numpy.asarray(c))
    assert numpy.allclose(a, numpy.asarray(d))
    b+=b
    assert numpy.allclose(a+a, numpy.asarray(b))
    assert numpy.allclose(a+a, numpy.asarray(c))
    assert numpy.allclose(a, numpy.asarray(d))
Exemple #20
0
def test_stride_manipulation():

    a = theano._asarray([[0, 1, 2], [3, 4, 5]], dtype='float32')
    b = cuda_ndarray.CudaNdarray(a)
    v = b.view()
    v._dev_data += 0
    c = numpy.asarray(v)
    assert numpy.all(a == c)

    sizeof_float = 4
    offset = 0

    b_strides = b._strides
    for i in xrange(len(b.shape)):
        offset += (b.shape[i] - 1) * b_strides[i]
        v._set_stride(i, -b_strides[i])

    v._dev_data += offset * sizeof_float
    c = numpy.asarray(v)

    assert numpy.all(c == [[5, 4, 3], [2, 1, 0]])
Exemple #21
0
def test_hostfromgpu_shape_i():
    """
    Test that the shape is lifted over hostfromgpu
    """
    pass

    m = mode_with_gpu.including('local_dot_to_dot22',
                                'local_dot22_to_dot22scalar', 'specialize')
    a = T.fmatrix('a')
    ca = theano.sandbox.cuda.var.CudaNdarrayType((False, False))()

    av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
    cv = cuda.CudaNdarray(
        numpy.asarray(numpy.random.rand(5, 4), dtype='float32'))

    f = theano.function([a], cuda.basic_ops.gpu_from_host(a), mode=m)
    assert cuda.basic_ops.gpu_from_host in [
        x.op for x in f.maker.env.toposort()
    ]
    f = theano.function([a], cuda.basic_ops.gpu_from_host(a).shape, mode=m)
    topo = f.maker.env.toposort()
    assert isinstance(topo[0].op, T.opt.Shape_i)
    assert isinstance(topo[1].op, T.opt.Shape_i)
    assert isinstance(topo[2].op, T.opt.MakeVector)
    assert tuple(f(av)) == (5, 4)

    f = theano.function([ca], cuda.basic_ops.host_from_gpu(ca), mode=m)
    assert cuda.basic_ops.host_from_gpu in [
        x.op for x in f.maker.env.toposort()
    ]
    f = theano.function([ca], cuda.basic_ops.host_from_gpu(ca).shape, mode=m)
    topo = f.maker.env.toposort()
    assert isinstance(topo[0].op, T.opt.Shape_i)
    assert isinstance(topo[1].op, T.opt.Shape_i)
    assert isinstance(topo[2].op, T.opt.MakeVector)
    assert tuple(f(cv)) == (5, 4)
Exemple #22
0
def test_dot():
    print >>sys.stdout, 'starting test_dot'

    utt.seed_rng()
    rng = numpy.random.RandomState(utt.fetch_seed())

    a0 = theano._asarray(rng.randn(4, 7), dtype='float32')
    a1 = theano._asarray(rng.randn(7, 6), dtype='float32')

    b0 = cuda_ndarray.CudaNdarray(a0)
    b1 = cuda_ndarray.CudaNdarray(a1)

    assert _allclose(numpy.dot(a0, a1), cuda_ndarray.dot(b0, b1))


    a1 = theano._asarray(rng.randn(6, 7), dtype='float32')
    b1 = cuda_ndarray.CudaNdarray(a1)

    numpy_version = numpy.dot(a0, a1.T)
    transposed = cuda_ndarray.dimshuffle(b1,(1,0))
    cuda_version  =  cuda_ndarray.dot(b0,  transposed)

    assert _allclose(numpy_version, cuda_version)

    a1 = theano._asarray(rng.randn(7, 6), dtype='float32')
    b1 = cuda_ndarray.CudaNdarray(a1)


    a0 = theano._asarray(rng.randn(7, 4), dtype='float32')
    b0 = cuda_ndarray.CudaNdarray(a0)

    assert _allclose(numpy.dot(a0.T, a1),
            cuda_ndarray.dot(cuda_ndarray.dimshuffle(b0,(1,0)), b1))

    a1 = theano._asarray(rng.randn(6, 7), dtype='float32')
    b1 = cuda_ndarray.CudaNdarray(a1)

    assert _allclose(numpy.dot(a0.T, a1.T),
            cuda_ndarray.dot(cuda_ndarray.dimshuffle(b0,(1,0)),
                             cuda_ndarray.dimshuffle(b1,(1,0))))
Exemple #23
0
def test_setitem_broadcast():
    #test scalar to vector without stride
    a = numpy.arange(3)
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray(9, dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)
    _a[:] = _b.reshape((1,))
    a[:] = b.reshape((1,))
    assert numpy.allclose(numpy.asarray(_a),a)

    #test vector to matrice without stride
    a = numpy.arange(9)
    a.resize((3,3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7,8,9], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)
    _a[:,:] = _b.reshape((1,3))
    a[:,:] = b.reshape((1,3))
    assert numpy.allclose(numpy.asarray(_a),a)

    #test vector to matrice with stride
    a = numpy.arange(27)
    a.resize((3,3,3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([[7,8,9],[10,11,12]], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)[0]
    b = b[0]
    _a[:,:,1] = _b.reshape((1,3))
    a[:,:,1] = b.reshape((1,3))
    assert numpy.allclose(numpy.asarray(_a),a)
Exemple #24
0
def test_mapping_getitem_w_int():
    def _cmp(x,y):
        assert x.shape == y.shape
        if not numpy.all(x == y):
            print x
            print y
        assert numpy.all(x == y)

    def _cmpf(x,*y):
        try:
            x.__getitem__(y)
        except IndexError:
            pass
        else:
            raise Exception("Did not generate out or bound error")

    def _cmpfV(x,*y):
        try:
            if len(y)==1:
                x.__getitem__(*y)
            else:
                x.__getitem__(y)
        except ValueError:
            pass
        else:
            raise Exception("Did not generate out or bound error")

    dim =(2,)
    a = theano._asarray(numpy.random.rand(*dim), dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)
    _cmp(numpy.asarray(_a[1]), a[1])
    _cmp(numpy.asarray(_a[-1]), a[-1])
    _cmp(numpy.asarray(_a[0]), a[0])
    _cmp(numpy.asarray(_a[::1]), a[::1])
    _cmp(numpy.asarray(_a[::-1]), a[::-1])
    _cmp(numpy.asarray(_a[...]), a[...])
    _cmpf(_a,2)

    dim =()
    a = theano._asarray(numpy.random.rand(*dim), dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)
    _cmp(numpy.asarray(_a[...]), a[...])
    _cmpf(_a,0)
    _cmpfV(_a,slice(1))

    dim =(5,4,3,2)
    a = theano._asarray(numpy.random.rand(*dim), dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    _cmpf(_a,slice(-1),slice(-1),10,-10)
    _cmpf(_a,slice(-1),slice(-1),-10,slice(-1))
    _cmpf(_a,0,slice(0,-1,-20),-10)
    _cmpf(_a,10)
    _cmpf(_a,(10,0,0,0))
    _cmpf(_a,-10)

    #test with integer
    _cmp(numpy.asarray(_a[1]), a[1])
    _cmp(numpy.asarray(_a[-1]), a[-1])
    _cmp(numpy.asarray(_a[numpy.int64(1)]), a[numpy.int64(1)])
    _cmp(numpy.asarray(_a[numpy.int64(-1)]), a[numpy.int64(-1)])

    #test with slice
    _cmp(numpy.asarray(_a[1:]), a[1:])
    _cmp(numpy.asarray(_a[1:2]), a[1:2])
    _cmp(numpy.asarray(_a[-1:1]), a[-1:1])


    #test with tuple (mix slice, integer, numpy.int64)
    _cmp(numpy.asarray(_a[:,:,::numpy.int64(-1), ::-1]), a[:,:,::-1,::-1])
    _cmp(numpy.asarray(_a[:,:,numpy.int64(1),-1]), a[:,:,1,-1])
    _cmp(numpy.asarray(_a[:,:,::-1, ::-1]), a[:,:,::-1,::-1])
    _cmp(numpy.asarray(_a[:,:,::-10, ::-10]), a[:,:,::-10,::-10])
    _cmp(numpy.asarray(_a[:,:,1,-1]), a[:,:,1,-1])
    _cmp(numpy.asarray(_a[:,:,-1,:]), a[:,:,-1,:])
    _cmp(numpy.asarray(_a[:,::-2,-1,:]), a[:,::-2,-1,:])
    _cmp(numpy.asarray(_a[:,::-20,-1,:]), a[:,::-20,-1,:])
    _cmp(numpy.asarray(_a[:,::-2,-1]), a[:,::-2,-1])
    _cmp(numpy.asarray(_a[0,::-2,-1]), a[0,::-2,-1])

    _cmp(numpy.asarray(_a[-1,-1,-1,-2]), a[-1,-1,-1,-2])
    _cmp(numpy.asarray(_a[...]), a[...])
Exemple #25
0
 def subtest(shape):
     a = theano._asarray(numpy.random.rand(*shape_1), dtype='float32')
     b = cuda_ndarray.CudaNdarray(a)
     assert b.shape == a.shape
Exemple #26
0
def test_add_iadd_idiv():
    for shapes in (
                  [(5,5),(5,1)],
                  [(5,5),(1,5)],
                  (), (0,), (3,), (2,3),
                  (1,10000000),(10000,1000),(1000000,10),
                  (4100,33,34),(33,4100,34),(33,34,4100),
                  (4100,33,3,6),(33,4100,3,6),(33,3,4100,6),(33,3,6,4100),
                  (4100,3,34,6),(3,4100,34,6),(3,34,4100,6),(3,34,6,4100),
                  (4100,3,4,36),(3,4100,4,36),(3,4,4100,36),(3,4,36,4100),
                  (0,0,0,0,0),
                  (3,34,35,36,37),
                  (33,34,3,36,37),
                  (33,34,35,36,3),
                  ):
        if isinstance(shapes,tuple):
            shape = shapes
            shape2 = shapes
            a0 = theano._asarray(numpy.random.rand(*shape), dtype='float32')
            a0_orig = a0.copy()
            a1 = a0.copy()
            assert numpy.allclose(a0, a1)
        else:
            shape = shapes[0]
            shape2 = shapes[1]

            a0 = theano._asarray(numpy.random.rand(*shape), dtype='float32')
            a0_orig = a0.copy()
            a1 = theano._asarray(numpy.random.rand(*shape2), dtype='float32')

        b0 = cuda_ndarray.CudaNdarray(a0)
        b1 = cuda_ndarray.CudaNdarray(a1)
        assert numpy.allclose(a0, numpy.asarray(b0))
        assert numpy.allclose(a1, numpy.asarray(b1))

        # add don't support stride
        if shape == shape2:
            t0 = time.time()
            bsum = b0 + b1
            bsum = b0 + b1
            t1 = time.time()
            gpu_dt = t1 - t0
            t0 = time.time()
            asum = a0 + a1
            asum = a0 + a1
            t1 = time.time()
            cpu_dt = t1 - t0
            print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
            assert numpy.allclose(asum,  numpy.asarray(bsum))

        #test not contiguous version.
        #should raise not implemented.
        a0 = a0_orig.copy()
        b0 = cuda_ndarray.CudaNdarray(a0)
        if len(shape)==0:
            continue
        elif len(shape) == 1:
            _b = b1[::-1]
        elif len(shape) == 2:
            _b = b1[::, ::-1]
        elif len(shape) == 3:
            _b = b1[::, ::, ::-1]
        elif len(shape) == 4:
            _b = b1[::, ::, ::, ::-1]
        elif len(shape) == 5:
            _b = b1[::, ::, ::, ::, ::-1]
        # TODO: b0[...,::-1] don't work

        if shape == shape2:
            t = False
            try:
                _c = _b+b1
            except TypeError:
                t = True
            assert t

        # test inplace version
        t0 = time.time()
        b0 += b1
        t1 = time.time()
        gpu_dt = t1 - t0
        t0 = time.time()
        a0 += a1
        t1 = time.time()
        cpu_dt = t1 - t0
        print shape, 'adding inplace', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
        assert numpy.allclose(a0, numpy.asarray(b0))
        assert numpy.allclose(a0, a0_orig + a1)

        b0 /= b1
        a0 /= a1
        assert numpy.allclose(a0, numpy.asarray(b0))
        assert numpy.allclose(a0, (a0_orig + a1)/a1)

        # test inplace version
        # for not contiguous input
        b0 += _b
        a0 += a1[..., ::-1]
        assert numpy.allclose(a0, numpy.asarray(b0))
        assert numpy.allclose(a0, (a0_orig+a1)/a1+a1[..., ::-1])

        b0 /= _b
        a0 /= a1[..., ::-1]
        assert numpy.allclose(a0, numpy.asarray(b0))
        assert numpy.allclose(a0, ((a0_orig+a1)/a1+a1[..., ::-1])/a1[..., ::-1])
Exemple #27
0
def test_sum():
    """
    test sum pattern 1, 11, 10, 01, 100, 110, 011, 001, 111, 0011, 0101, 0111, 1011, 1111

    test sum pattern implemented with reshape:
    1000, 0100, 0010, 0001, 11111

    others implemented by reshape that are not tested
    0011,0101,0110,1001,1010,1100
    1110,1101,1011

    TODO: test with broadcast
    """
    for shape, pattern in [((100,3,1300),[1]),
                           ((0,),[0]),((5,),[0]),
                           ((0,0),[0,1]),((1,0),[0,1]),((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test.
                           ((5,4,3),[0]),((5,4,3),[1]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]),
                           ((0,0,0,0),[0,1,2,3]),
                           ((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),
                           ((5,4,3,10,11),[1,2]),
                           ((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),

                           #test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enought thread/block in each dimensions
                           ((4100,3),[0]),((3,4101),[0]),#10
                           ((1024,33),[0]),((33,1024),[0]),#10
                           ((1025,33),[0]),((33,1025),[0]),#10

                           ((4100,3),[1]),((3,4101),[1]),#01
                           ((1024,33),[1]),((33,1024),[1]),#01
                           ((1025,33),[1]),((33,1025),[1]),#01

                           ((4100,3),[0,1]),((3,4101),[0,1]),#11
                           ((1024,33),[0,1]),((33,1024),[0,1]),#01
                           ((1025,33),[0,1]),((33,1025),[0,1]),#01

                           ((4100,4,3),[0]),((5,4100,3),[0]),((5,4,4100),[0]),#100
                           ((4100,4,3),[1]),((5,4100,3),[1]),((5,4,4100),[1]),#010
                           ((4100,4,3),[2]),((5,4100,3),[2]),((5,4,4100),[2]),#001
                           ((4100,4,3),[0,1]),((5,4100,3),[0,1]),((5,4,4100),[0,1]),#110
                           ((4100,4,3),[1,2]),((5,4100,3),[1,2]),((5,4,4100),[1,2]),#011
                           #((4100,4,3),[0,2]),((5,4100,3),[0,2]),((5,4,4100),[0,2]),#101 ##not implemented
                           ((4100,4,3),[0,1,2]),((5,4100,3),[0,1,2]),((5,4,4100),[0,1,2]),#111

                           ((4100,4,3,2),[2,3]),((4,4100,3,2),[2,3]),((4,3,4100,2),[2,3]),((4,3,2,4100),[2,3]),#0011
                           ((4100,4,3,2),[1,3]),((4,4100,3,2),[1,3]),((4,3,4100,2),[1,3]),((4,3,2,4100),[1,3]),#0101
                           ((4100,4,3,2),[0,2,3]),((4,4100,3,2),[0,2,3]),((4,3,4100,2),[0,2,3]),#((4,3,2,4100),[0,2,3]),#1011
                           ((4100,4,3,2),[1,2,3]),((4,4100,3,2),[1,2,3]),((4,3,4100,2),[1,2,3]),((4,3,2,4100),[1,2,3]),#0111
                           ((4100,2,3,4),[0,1,2,3]),((2,4100,3,4),[0,1,2,3]),((2,3,4100,4),[0,1,2,3]),((2,3,4,4100),[0,1,2,3]),#1111


                           #test pattern implemented by reshape
                           ((4100,4,3,2),[0]),((4,4100,3,2),[0]),((4,3,4100,2),[0]),((4,3,2,4100),[0]),#1000
                           ((4100,4,3,2),[1]),((4,4100,3,2),[1]),((4,3,4100,2),[1]),((4,3,2,4100),[1]),#0100
                           ((4100,4,3,2),[2]),((4,4100,3,2),[2]),((4,3,4100,2),[2]),((4,3,2,4100),[2]),#0010
                           ((4100,4,3,2),[3]),((4,4100,3,2),[3]),((4,3,4100,2),[3]),((4,3,2,4100),[3]),#0001
                           ((1100,2,3,4,5),[0,1,2,3,4]),((2,1100,3,4,5),[0,1,2,3,4]),((2,3,1100,4,5),[0,1,2,3,4]),((2,3,4,1100,5),[0,1,2,3,4]),((2,3,4,5,1100),[0,1,2,3,4]),#11111

                           ]:
        a = tensor.TensorType('float32',(False,)*len(shape))()
        b = T.Sum(pattern)(a)
        val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
#        val = numpy.ones(shape)
#        val = numpy.arange(numpy.prod(shape)).reshape(shape)
        val = theano._asarray(val,dtype='float32')
        f = theano.function([a],b, mode=mode_with_gpu)
        f2 = theano.function([a],b, mode=mode_without_gpu)
        assert tcn.GpuSum in [x.op.__class__ for x in f.maker.env.toposort()]
        assert T.Sum in [x.op.__class__ for x in f2.maker.env.toposort()]
        if val.size==0:
            assert f2(val)==f(val), ('shape', shape, 'pattern', pattern)
        else:
            try:
                #We raise the error threashold as we sum big matrix
                #and this cause small rounding difference with some seed
                #example in debug mode with unittests.rseed=9275
                orig_rtol = theano.tensor.basic.float32_rtol
                theano.tensor.basic.float32_rtol = 2e-5
                assert _allclose(f2(val),f(val)), ('shape', shape, 'pattern', pattern, sum([shape[i] for i in pattern]))
            finally:
                theano.tensor.basic.float32_rtol = orig_rtol


        #test with dimshuffle
        #we shuffle the 2 outer dims.
    for shape, pattern in [#((5,),[0]),
                           ((5,4),[0,1]),((5,4),[0]),
                           ((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[0,1,2]),
                           ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
        a = tensor.TensorType('float32',(False,)*len(shape))()
        dim_pattern = range(len(shape))
        dim_pattern[0]=1
        dim_pattern[1]=0
        a = a.dimshuffle(dim_pattern)
        b = T.Sum(pattern)(a)
        val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
#        val = numpy.ones(shape)
#        val = numpy.arange(numpy.prod(shape)).reshape(shape)
        val = theano._asarray(val,dtype='float32')
        f = theano.function([a],b, mode=mode_with_gpu)
        f2 = theano.function([a],b, mode=mode_without_gpu)
        assert tcn.GpuSum in [x.op.__class__ for x in f.maker.env.toposort()]
        assert T.Sum in [x.op.__class__ for x in f2.maker.env.toposort()]
        assert _allclose(f2(val),f(val)), ('shape', shape, 'pattern', pattern, sum([shape[i] for i in pattern]))


        #test with broadcast
    for shape, pattern in [((5,),[0]),
                           ((5,4),[0,1]),((5,4),[0]),
                           ((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[0,1,2]),
                           ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
        shape = numpy.asarray(shape)*2
        a = tensor.TensorType('float32',(False,)*len(shape))()
        a2 = tcn.CudaNdarrayType((False,)*len(shape))()
        b = T.Sum(pattern)(a)
        b2 = T.Sum(pattern)(a2)
        val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
#        val = numpy.ones(shape)
#        val = numpy.arange(numpy.prod(shape)).reshape(shape)
        val = theano._asarray(val,dtype='float32')
        val2 = cuda.CudaNdarray(val)
        if len(shape)==1:
            val = val[::2]
            val2 = val2[::2]
        elif len(shape)==2:
            val = val[::2,::2]
            val2 = val2[::2,::2]
        elif len(shape)==3:
            val = val[::2,::2,::2]
            val2 = val2[::2,::2,::2]
        elif len(shape)==4:
            val = val[::2,::2,::2,::2]
            val2 = val2[::2,::2,::2,::2]
        f = theano.function([a],b, mode=mode_without_gpu)
        f2 = theano.function([a2],b2, mode=mode_with_gpu)
        assert tcn.GpuSum in [x.op.__class__ for x in f2.maker.env.toposort()]
        assert T.Sum in [x.op.__class__ for x in f.maker.env.toposort()]
        assert _allclose(f2(val2),f(val)), ('shape', shape, 'pattern', pattern, sum([shape[i] for i in pattern]))
def _params_allgood(ishape, kshape, mode, subsample=(1, 1), img_stride=(1, 1),
                    kern_stride=(1, 1), version=-1, verbose=0, random=True,
                    print_=None, id=None, rtol=1e-5, atol=1e-8,
                    nb_iter=0, ones=False, compile_kshp=None):
    #
    # This function is the core of several of the big unit-test drivers,
    # but it can also be used very directly on its own to test a specific
    # kind of convolution.
    #
    # See `test_example` (above) for an example of how to use this directly.
    #
    # :param kshape: (4d)The shape of the kernel at run time.
    # :param compile_kshp: (2d) hardcode the shape of the kernel in
    #                      the generated code This is supposed to be
    #                      faster, but we need to check That we raise
    #                      an error if the input have the wrong shape.
    #
    if ones:
        assert not random
        npy_img = theano._asarray(numpy.ones(ishape), dtype='float32')
        npy_kern = -theano._asarray(numpy.ones(kshape), dtype='float32')
    elif random:
        npy_img = theano._asarray(numpy.random.rand(*ishape) + 1,
                                  dtype='float32')
        npy_kern = theano._asarray(numpy.random.rand(*kshape) - 2,
                                   dtype='float32')
    else:
        npy_img = theano._asarray(numpy.arange(
                numpy.prod(ishape)).reshape(ishape), dtype='float32') + 1
        npy_kern = -(theano._asarray(numpy.arange(
                    numpy.prod(kshape)).reshape(kshape), dtype='float32') + 1)

    img = cuda_ndarray.CudaNdarray(npy_img)
    kern = cuda_ndarray.CudaNdarray(npy_kern)

    #we take the stride after the transfert as we make c_contiguous
    #data on the GPU.
    if img_stride != (1, 1):
        img = img[:, :, ::img_stride[0], ::img_stride[1]]
        npy_img = npy_img[:, :, ::img_stride[0], ::img_stride[1]]
    if kern_stride != (1, 1):
        kern = kern[:, :, ::kern_stride[0], ::kern_stride[1]]
        npy_kern = npy_kern[:, :, ::kern_stride[0], ::kern_stride[1]]

    t2 = None
    rval = True
    try:
        t0 = time.time()
        cpuval = py_conv(npy_img, npy_kern, mode, subsample)
        t1 = time.time()
        i = cuda_tensor4()
        k = cuda_tensor4()
        op = theano.sandbox.cuda.blas.GpuConv(border_mode=mode,
                                              subsample=subsample,
                                              version=version,
                                              verbose=verbose,
                                              kshp=compile_kshp)(i, k)
        f = theano.function([i, k], op, mode=theano_mode)
        gpuval = f(img, kern)
        t2 = time.time()
        for i in range(nb_iter):
            gpuval2 = f(img, kern)
            assert numpy.allclose(numpy.asarray(gpuval),
                                  numpy.asarray(gpuval2))
            assert (numpy.asarray(gpuval) == numpy.asarray(gpuval2)).all()
        gpuval = numpy.asarray(gpuval)
        if gpuval.shape != cpuval.shape:
            print >> sys.stdout, "ERROR: shape mismatch",
            print >> sys.stdout, gpuval.shape, cpuval.shape
            rval = False
        if rval:
            rval = numpy.allclose(cpuval, gpuval, rtol=rtol)
            assert numpy.all(numpy.isfinite(gpuval))
    except NotImplementedError, e:
        print >> sys.stdout, '_params_allgood Failed allclose', e
        rval = False
 def setUp(self):
     self.input = cuda.ftensor4()
     self.filters = cuda.ftensor4()
     self.topgrad = cuda.ftensor4()
     self.constant_tensor = cuda.CudaNdarray(
         numpy.zeros((3, 5, 7, 11), dtype='float32'))
Exemple #30
0
    def test_dimshuffle(self):
        utt.seed_rng()
        rng = numpy.random.RandomState(utt.fetch_seed())

        # 2d -> 0d
        a = theano._asarray(rng.randn(1, 1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a),
                              cuda_ndarray.dimshuffle(b, ()))

        # Test when we drop a axis that don't have shape 1
        a = theano._asarray(rng.randn(2, 1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b, ())

        # Test that we can't take a dimensions multiple time
        a = theano._asarray(rng.randn(2, 1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b, (1, 1))

        # 1d
        a = theano._asarray(rng.randn(3, ), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a),
                              cuda_ndarray.dimshuffle(b, (0, )))
        assert numpy.allclose(a[None, :, None],
                              cuda_ndarray.dimshuffle(b, (-1, 0, -1)))

        # 2d
        a = theano._asarray(rng.randn(3, 11), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a),
                              cuda_ndarray.dimshuffle(b, (1, 0)))
        assert numpy.allclose(
            numpy.transpose(a)[None, :, None, :, None],
            cuda_ndarray.dimshuffle(b, (-1, 1, -1, 0, -1)))

        # 2d -> 1d
        a = theano._asarray(rng.randn(1, 11), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a[:], cuda_ndarray.dimshuffle(b, (1, )))
        a = theano._asarray(rng.randn(11, 1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a.reshape((11, )),
                              cuda_ndarray.dimshuffle(b, (0, )))

        # 3d
        a = theano._asarray(rng.randn(3, 4, 5), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a, cuda_ndarray.dimshuffle(b, (0, 1, 2)))
        assert numpy.allclose(numpy.swapaxes(a, 0, 1),
                              cuda_ndarray.dimshuffle(b, (1, 0, 2)))
        assert numpy.allclose(numpy.swapaxes(a, 0, 2),
                              cuda_ndarray.dimshuffle(b, (2, 1, 0)))
        assert numpy.allclose(numpy.swapaxes(a, 1, 2),
                              cuda_ndarray.dimshuffle(b, (0, 2, 1)))
        assert numpy.allclose(
            numpy.swapaxes(a, 1, 2)[None, :, None, :, :, None],
            cuda_ndarray.dimshuffle(b, (-1, 0, -1, 2, 1, -1)))

        # 4d
        a = theano._asarray(rng.randn(3, 11, 4, 5), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.swapaxes(a, 0, 1),
                              cuda_ndarray.dimshuffle(b, (1, 0, 2, 3)))
        assert numpy.allclose(numpy.swapaxes(a, 0, 2),
                              cuda_ndarray.dimshuffle(b, (2, 1, 0, 3)))
        assert numpy.allclose(numpy.swapaxes(a, 0, 3),
                              cuda_ndarray.dimshuffle(b, (3, 1, 2, 0)))
        assert numpy.allclose(numpy.swapaxes(a, 0, 3),
                              cuda_ndarray.dimshuffle(b, (3, 1, 2, 0)))
        assert numpy.allclose(
            numpy.swapaxes(a, 0, 3)[None, :, None, :, :, :],
            cuda_ndarray.dimshuffle(b, (-1, 3, -1, 1, 2, 0)))