Ejemplo n.º 1
0
def test_setitem_matrix_tensor3():
    a = numpy.arange(27)
    a.resize((3,3,3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7,8,9], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    # set middle row through cube to 7,8,9
    _a[:,1,1] = _b

    a[:,1,1] = b
    assert numpy.allclose(a,numpy.asarray(_a))

    #test direct transfert from numpy
    try:
        _a[:,1,1] = b*100
        a[:,1,1] = b*100
        raise Exception("CudaNdarray.__setitem__ should have returned an error")
        assert numpy.allclose(a,numpy.asarray(_a))
    except NotImplementedError:
        pass

    row = theano._asarray([777,888,999], dtype='float32')
    _a[1,1,:] = row
    a[1,1,:] = row
    assert numpy.allclose(a,numpy.asarray(_a))
Ejemplo n.º 2
0
    def __init__(self, input, n_in, n_out, activation, rng=RandomState(1234), 
        layer_name="HiddenLayer", W=None, b=None, borrow=True):

        if W!=None: self.W = shared(value=W, borrow=borrow, name=layer_name+'_W')
        elif activation in (relu,softplus): 
            W_val = _asarray(rng.normal(loc=0, scale=0.01, 
                size=(n_in, n_out)), dtype=floatX)
            self.W = shared(W_val, name=layer_name+"_W", borrow=borrow)    
        else: 
            # uniformly sampled W
            low = -sqrt(6. / (n_in + n_out))
            high = sqrt(6. / (n_in + n_out))
            values = rng.uniform(low=low, high=high, size=(n_in, n_out))
            W_val = _asarray(values, dtype=floatX)
            if activation == sigmoid: W_val *= 4
            self.W = shared(value=W_val, borrow=borrow, name=layer_name+'_W')
            

        if b != None: self.b = shared(b, name=layer_name+"_b", borrow=borrow)
        elif activation in (relu,softplus): 
            b_val = ones((n_out,), dtype=floatX)
            self.b = shared(value=b_val, borrow=True)
        else: 
            # Initialize b with zeros
            self.b = shared(value=zeros((n_out,), dtype=floatX), borrow=True)

        # Parameters of the model
        self.params = [self.W, self.b]
        # Output of the hidden layer
        self.output = activation(T.dot(input, self.W) + self.b)
Ejemplo n.º 3
0
    def subtest(shape_1, shape_2, rng):
        #print >> sys.stdout, "INFO: shapes", shape_1, shape_2
        a = theano._asarray(rng.randn(*shape_1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)

        aa = a.reshape(shape_2)
        bb = b.reshape(shape_2)

        n_bb = numpy.asarray(bb)

        # print n_bb

        assert numpy.all(aa == n_bb)
        assert aa.shape == n_bb.shape

        # Test the not contiguous case
        shape_1_2x = (shape_1[0] * 2,) + shape_1[1:]
        a = theano._asarray(rng.randn(*shape_1_2x), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        a = a[::2]
        b = b[::2]

        aa = a.reshape(shape_2)
        bb = b.reshape(shape_2)

        n_bb = numpy.asarray(bb)

        # print n_bb

        assert numpy.all(aa == n_bb)
        assert aa.shape == n_bb.shape
Ejemplo n.º 4
0
    def set_input_space(self, space):
	self.input_space = space
	
	if isinstance(space, VectorSpace):
	    self.requires_reformat = False
	    self.input_dim = space.dim
	else:
	    self.requires_reformat = True
	    self.input_dim = space.get_total_dimension()
	    self.desired_space = VectorSpace(self.input_dim)

	self.output_space = VectorSpace(self.dim)
	
	# we cannot set this in init() as we're not sure about input dimesnions yet
	if self.istdev is not None:
	    W = self.rng.randn(self.input_dim, self.dim) * self.istdev
	    b = self.rng.randn(self.dim,) * self.istdev
	else:
	    W = np.zeros((self.input_dim, self.dim))
	    b = np.zeros((self.dim,)) * self.istdev
	    
	self.W = theano.shared(theano._asarray(W,
	                        dtype=theano.config.floatX),
	                        name=(self.layer_name+'_W'))
	
	self.b = theano.shared(theano._asarray(b,
	                        dtype=theano.config.floatX),
	                        name=(self.layer_name + '_b'))		
Ejemplo n.º 5
0
def test_elemwise2():
    """ Several kinds of elemwise expressions with dimension permutations """
    rng = numpy.random.RandomState(int(time.time()))
    shape = (3, 5)
    for pattern in [(0, 1), (1, 0)]:
        a = tcn.shared_constructor(theano._asarray(rng.rand(*shape),
                                                   dtype='float32'), name=None)
        b = tensor.Tensor(dtype='float32', broadcastable=[0] * len(shape))()
        f = pfunc([b], [], updates=[(a, (a + b).dimshuffle(pattern))],
                  mode=mode_with_gpu)
        has_elemwise = False
        for i, node in enumerate(f.maker.env.toposort()):
            has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
        assert not has_elemwise
        #let debugmode catch errors
        f(theano._asarray(rng.rand(*shape), dtype='float32') * .3)

    shape = (3, 4, 5, 6)
    a = tcn.shared_constructor(theano._asarray(rng.rand(*shape),
                                               dtype='float32'), 'a')
    b = tensor.Tensor(dtype='float32', broadcastable=[0] * len(shape))()
    f = pfunc([b], [], updates=[(a, (a + b).dimshuffle([2, 0, 3, 1]) *
        tensor.exp(b ** a).dimshuffle([2, 0, 3, 1]))], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.env.toposort()):
        has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
    assert not has_elemwise
    #let debugmode catch errors
    f(theano._asarray(rng.rand(*shape), dtype='float32'))
Ejemplo n.º 6
0
def gemm_directly(bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsx, subsy,
                  direction):
    ishape = (bs, ch, rImg1, rImg2)
    kshape = (nf, ch, rFlt1, rFlt2)
    subsample = (subsx, subsy)

    npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
    npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32')

    i = cuda_tensor4()
    k = cuda_tensor4()

    if direction == 'fprop':
        cpuval = py_conv(npy_img, npy_kern, 'valid', subsample)
        op = theano.sandbox.cuda.blas.GpuCorrMM(border_mode='valid',
                                                subsample=subsample)(i, k)
        f = theano.function([i, k], op, mode=theano_mode)
        gpuval = f(npy_img, npy_kern[:,:,::-1,::-1])
    elif direction == 'bprop img':
        cpuval = py_conv(npy_img, npy_kern, 'full', subsample)
        op = theano.sandbox.cuda.blas.GpuCorrMM_gradInputs(
            border_mode='valid', subsample=subsample)(i, k)
        f = theano.function([i, k], op, mode=theano_mode)
        gpuval = f(npy_kern.transpose(1, 0, 2, 3), npy_img)
    elif direction == 'bprop kern':
        cpuval = py_conv(npy_img, npy_kern, 'valid', subsample)
        op = theano.sandbox.cuda.blas.GpuCorrMM_gradWeights(
            border_mode='valid', subsample=subsample)(i, k)
        f = theano.function([i, k], op, mode=theano_mode)
        gpuval = numpy.array(f(
                npy_img.transpose(1, 0, 2, 3),
                npy_kern.transpose(1, 0, 2, 3)[:,:,::-1,::-1])).transpose(
            1, 0, 2, 3)

    assert_allclose(cpuval, gpuval, rtol=1e-4)
Ejemplo n.º 7
0
def test_setitem_rightvalue_ndarray_fails():
    """
    Now we don't automatically add dimensions to broadcast
    """
    a = numpy.arange(3 * 4 * 5)
    a.resize((3, 4, 5))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7, 8, 9, 10], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)
    b5 = theano._asarray([7, 8, 9, 10, 11], dtype='float32')
    _b5 = cuda_ndarray.CudaNdarray(b)

    # attempt to assign the ndarray b with setitem
    _a[:, :, 1] = _b
    a[:, :, 1] = b
    assert numpy.allclose(numpy.asarray(_a), a)

    #test direct transfert from numpy to contiguous region
    # attempt to assign the ndarray b with setitem
    # same number of dim
    mat = numpy.random.rand(4, 5).astype('float32')
    _a[2, :, :] = mat
    a[2, :, :] = mat
    assert numpy.allclose(numpy.asarray(_a), a)

    # without same number of dim
    try:
        _a[0, :, :] = mat
        #a[0, :, :] = mat
        #assert numpy.allclose(numpy.asarray(_a), a)
    except ValueError, e:
        pass
Ejemplo n.º 8
0
def test_setitem_matrix_bad_ndim():
    a = numpy.arange(27)
    a.resize((3, 3, 3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7, 8], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    try:
        # attempt to assign the ndarray b with setitem
        _a[:, :, 1] = _b
        assert False
    except ValueError as e:
        # print e
        assert True

    # test direct transfert from numpy
    try:
        # attempt to assign the ndarray b with setitem
        _a[1, :, :] = b
        assert False
    except ValueError as e:
        # print e
        assert True
Ejemplo n.º 9
0
def test_elemwise1():
    """ Several kinds of elemwise expressions with no broadcasting,
    non power-of-two shape """

    shape = (3, 4)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape),
                                               dtype='float32') + 0.5, 'a')
    b = tensor.fmatrix()

    #let debugmode catch any mistakes
    print >> sys.stdout, "STARTING FUNCTION 1"
    f = pfunc([b], [], updates=[(a, b ** a)], mode=mode_with_gpu)
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)

    print >> sys.stdout, "STARTING FUNCTION 2"
    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, tensor.exp(b ** a))], mode=mode_with_gpu)
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)

    print >> sys.stdout, "STARTING FUNCTION 3"
    #let debugmode catch any mistakes
    f = pfunc([b], [], updates=[(a, a + b * tensor.exp(b ** a))],
              mode=mode_with_gpu)
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)
Ejemplo n.º 10
0
    def get_updates(self, grads):
        grads = OrderedDict(grads)
        updates = OrderedDict()

        for param in grads.keys():
            # mean_squared_grad := E[g^2]_{t-1}
            mean_square_grad = theano.shared(theano._asarray(param.get_value() * 0., dtype=theano.config.floatX), name='mean_square_grad_' + param.name, borrow=False)
            self.parameters.append(mean_square_grad)
            # mean_square_dx := E[(\Delta x)^2]_{t-1}
            mean_square_dx = theano.shared(theano._asarray(param.get_value() * 0., dtype=theano.config.floatX), name='mean_square_dx_' + param.name, borrow=False)
            self.parameters.append(mean_square_dx)

            # Accumulate gradient
            new_mean_squared_grad = self.decay * mean_square_grad + (1 - self.decay) * T.sqr(grads[param])

            # Compute update
            rms_dx_tm1 = T.sqrt(mean_square_dx + self.epsilon)
            rms_grad_t = T.sqrt(new_mean_squared_grad + self.epsilon)
            delta_x_t = - rms_dx_tm1 / rms_grad_t * grads[param]

            # Accumulate updates
            new_mean_square_dx = self.decay * mean_square_dx + (1 - self.decay) * T.sqr(delta_x_t)

            # Apply update
            updates[mean_square_grad] = new_mean_squared_grad
            updates[mean_square_dx] = new_mean_square_dx
            updates[param] = param + delta_x_t

        return updates
Ejemplo n.º 11
0
def test_sum():
    shape = (2,3)
    a0 = theano._asarray(numpy.arange(shape[0]*shape[1]).reshape(shape), dtype='float32')

    b0 = cuda_ndarray.CudaNdarray(a0)

    assert numpy.allclose(a0.sum(), numpy.asarray(b0.reduce_sum([1,1])))

    a0sum = a0.sum(axis=0)
    b0sum = b0.reduce_sum([1,0])

    print 'asum\n',a0sum
    print 'bsum\n',numpy.asarray(b0sum)

    assert numpy.allclose(a0.sum(axis=0), numpy.asarray(b0.reduce_sum([1,0])))
    assert numpy.allclose(a0.sum(axis=1), numpy.asarray(b0.reduce_sum([0,1])))
    assert numpy.allclose(a0, numpy.asarray(b0.reduce_sum([0,0])))

    shape = (3,4,5,6,7,8)
    a0 = theano._asarray(numpy.arange(3*4*5*6*7*8).reshape(shape), dtype='float32')
    b0 = cuda_ndarray.CudaNdarray(a0)
    assert numpy.allclose(a0.sum(axis=5).sum(axis=3).sum(axis=0), numpy.asarray(b0.reduce_sum([1,0,0,1,0,1])))

    shape = (16,2048)
    a0 = theano._asarray(numpy.arange(16*2048).reshape(shape), dtype='float32')
    b0 = cuda_ndarray.CudaNdarray(a0)
    assert numpy.allclose(a0.sum(axis=0), numpy.asarray(b0.reduce_sum([1,0])))

    shape = (16,10)
    a0 = theano._asarray(numpy.arange(160).reshape(shape), dtype='float32')
    b0 = cuda_ndarray.CudaNdarray(a0)
    assert numpy.allclose(a0.sum(), numpy.asarray(b0.reduce_sum([1,1])))
Ejemplo n.º 12
0
    def new_filters_expbounds(
        cls, rng, input, n_in, n_out, n_terms, dtype=None, eps=1e-1, exponent_range=(1.0, 3.0), filter_range=1.0
    ):
        """Return a KouhLayer instance with random parameters

        The parameters are drawn on a range [typically] suitable for fine-tuning by gradient
        descent.


        :param input: a tensor of shape (n_examples, n_in)

        :type n_in: positive int
        :param n_in: number of input dimensions

        :type n_out: positive int
        :param n_out: number of dimensions in rval.output

        :param nterms: each (of n_out) complex-cell firing rate will be determined from this
        many 'simple cell' responses.

        :param eps: this amount is added to the softplus of filter responses as a baseline
        firing rate (that prevents a subsequent error from ``pow(0, p)``)

        :returns: KouhLayer instance with freshly-allocated random weights.

        """
        if input.type.ndim != 2:
            raise TypeError("matrix expected for input")

        if dtype is None:
            dtype = input.dtype
        _logger.debug("dtype %s" % dtype)

        def shared_uniform(low, high, size, name):
            return _shared_uniform(rng, low, high, size, dtype, name)

        f_list = [
            shared_uniform(
                low=-2.0 / numpy.sqrt(n_in), high=2.0 / numpy.sqrt(n_in), size=(n_in, n_out), name="f_%i" % i
            )
            for i in xrange(n_terms)
        ]

        b_list = [shared_uniform(low=0, high=0.01, size=(n_out,), name="b_%i" % i) for i in xrange(n_terms)]
        # x_list = [theano._asarray(eps, dtype=dtype)+softplus(tensor.dot(input, f_list[i])) for i in xrange(n_terms)]
        filter_range = theano._asarray(filter_range, dtype=dtype)
        half_filter_range = theano._asarray(filter_range / 2, dtype=dtype)
        x_list = [
            theano._asarray(filter_range + eps, dtype=dtype)
            + half_filter_range * softsign(tensor.dot(input, f_list[i]) + b_list[i])
            for i in xrange(n_terms)
        ]

        rval = cls.new_expbounds(rng, x_list, n_out, dtype=dtype, params=f_list + b_list, exponent_range=exponent_range)
        rval.f_list = f_list
        rval.input = input  # add the input to the returned object
        rval.filter_l1 = sum(abs(fi).sum() for fi in f_list)
        rval.filter_l2_sqr = sum((fi ** 2).sum() for fi in f_list)
        return rval
Ejemplo n.º 13
0
 def test_invalid_arg(self):
     img = theano._asarray(numpy.empty((1, 1, 1, 1)), dtype='float32')
     kern = theano._asarray(numpy.empty((1, 1, 1, 1)), dtype='float32')
     for i in self.conv_ops:
         assert_raises(ValueError, i, img, kern,
                           border_mode=(-1, 0))
         assert_raises(ValueError, i, img, kern,
                           border_mode=(0, -1))
         assert_raises(ValueError, i, img, kern,
                           border_mode='not border')
Ejemplo n.º 14
0
def sharedX(value, name=None, borrow=True, keep_on_cpu=False):
    """ Transform value into a shared variable of type floatX """
    if keep_on_cpu:
        return T._shared(theano._asarray(value, dtype=theano.config.floatX),
                         name=name,
                         borrow=borrow)

    return theano.shared(theano._asarray(value, dtype=theano.config.floatX),
                         name=name,
                         borrow=borrow)
Ejemplo n.º 15
0
def conv_grad(mode, bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsample, op):
    ishape = (bs, ch, rImg1, rImg2)
    kshape = (nf, ch, rFlt1, rFlt2)

    npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
    npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32')

    i = cuda.CudaNdarrayType(
        broadcastable=[sh == 1 for sh in npy_img.shape])()
    k = cuda.CudaNdarrayType(
        broadcastable=[sh == 1 for sh in npy_kern.shape])()

    # TODO: also test custom pad values
    corr_op = op(mode, subsample)(i, k)
    # try to compile reference implementation without shape,
    # so we don't have to compile hundreds of versions
    conv_op = tensor.nnet.conv2d(i, k[:, :, ::-1, ::-1],
                                 border_mode=mode, subsample=subsample)
    try:
        conv_op_di = theano.grad(conv_op.sum(), i)
        conv_op_dk = theano.grad(conv_op.sum(), k)
    except Exception:
        # compile with shape information only when needed
        conv_op = tensor.nnet.conv2d(i, k[:, :, ::-1, ::-1],
                                     ishape, kshape, mode, subsample)
    conv_op_di = theano.grad(conv_op.sum(), i)
    conv_op_dk = theano.grad(conv_op.sum(), k)
    corr_op_di = theano.grad(corr_op.sum(), i)
    corr_op_dk = theano.grad(corr_op.sum(), k)
    outputs = [corr_op, conv_op,
               corr_op_di, conv_op_di,
               corr_op_dk, conv_op_dk]
    try:
        conv_op_dik = theano.grad(conv_op_di.sum(), k)
        conv_op_dki = theano.grad(conv_op_dk.sum(), i)
        corr_op_dik = theano.grad(corr_op_di.sum(), k)
        corr_op_dki = theano.grad(corr_op_dk.sum(), i)
        outputs.extend([corr_op_dik, conv_op_dik,
                        corr_op_dki, conv_op_dki])
    except Exception:
        # skip if the reference implementation can't do it
        pass

    f = theano.function([i, k], outputs, mode=theano_mode.excluding('conv_dnn', 'conv_gemm'))

    allvals = f(npy_img, npy_kern)

    for a, b, oa, ob, p in zip(allvals[::2], allvals[1::2],
                               outputs[::2], outputs[1::2],
                               ('top', 'dtop/dbottom', 'dtop/dweight',
                                'dtop/dbottom/dweight', 'dtop/dweight/dbottom')):
        assert oa.type.broadcastable[:2] == ob.type.broadcastable[:2]

        assert_allclose(a, b, rtol=1e-4)
Ejemplo n.º 16
0
    def learning_rates_setup(self, base_lr, **kwargs):
        """
        Initializes parameter-specific learning rate dictionary and shared
        variables for the annealed base learning rate and iteration number.

        Parameters
        ----------
        base_lr : float
            The base learning rate before annealing or parameter-specific
            scaling.

        Notes
        -----
        Parameter-specific learning rates can be set by passing keyword
        arguments <name>_lr, where name is the .name attribute of a given
        parameter.
        """
        # Take care of learning rate scales for individual parameters
        self.learning_rates = {}
        # Base learning rate per example.
        self.base_lr = theano._asarray(base_lr, dtype=floatX)

        # Keep track of names already seen
        lr_names_seen = set()
        for parameter in self.params:
            lr_name = '%s_lr' % parameter.name
            if lr_name in lr_names_seen:
                print >> sys.stderr, ('Warning: In SGDOptimizer, '
                        'at least two parameters have the same name. '
                        'Both will be affected by the keyword argument '
                        '%s.' % lr_name)
            lr_names_seen.add(parameter.name)

            thislr = kwargs.get(lr_name, 1.)
            self.learning_rates[parameter] = sharedX(thislr, lr_name)

        # Verify that no ..._lr keyword argument is ignored
        for lr_name in lr_names_seen:
            if lr_name in kwargs:
                kwargs.pop(lr_name)
        for kw in kwargs.iterkeys():
            if kw[-3:] == '_lr':
                print >> sys.stderr, ('Warning: in SGDOptimizer, '
                        'keyword argument %s will be ignored, '
                        'because no parameter was found with name %s.'
                        % (kw, kw[:-3]))

        # A shared variable for storing the iteration number.
        self.iteration = sharedX(theano._asarray(0, dtype='int32'),
                                 name='iter')

        # A shared variable for storing the annealed base learning rate, used
        # to lower the learning rate gradually after a certain amount of time.
        self.annealed = sharedX(base_lr, 'annealed')
Ejemplo n.º 17
0
def test_gemm_vector_vector():
    a = theano._asarray(numpy.random.rand(5,1), dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)
    b = theano._asarray(numpy.random.rand(1,5), dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    _c = cuda_ndarray.dot(_a,_b)
    assert _c.shape == (5,5)
    assert numpy.allclose(_c, numpy.dot(a, b))

    _c = cuda_ndarray.dot(_b,_a)
    assert _c.shape == (1,1)
    assert numpy.allclose(_c, numpy.dot(b, a))
Ejemplo n.º 18
0
    def test_dimshuffle(self):
        utt.seed_rng()
        rng = numpy.random.RandomState(utt.fetch_seed())

        # 2d -> 0d
        a = theano._asarray(rng.randn(1,1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b,()))

        # Test when we drop a axis that don't have shape 1
        a = theano._asarray(rng.randn(2,1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b,())

        # Test that we can't take a dimensions multiple time
        a = theano._asarray(rng.randn(2,1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b,(1,1))

        # 1d
        a = theano._asarray(rng.randn(3,), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b,(0,)))
        assert numpy.allclose(a[None,:,None], cuda_ndarray.dimshuffle(b,(-1,0,-1)))

        # 2d
        a = theano._asarray(rng.randn(3,11), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b,(1,0)))
        assert numpy.allclose(numpy.transpose(a)[None,:,None,:,None], cuda_ndarray.dimshuffle(b,(-1,1,-1,0,-1)))

        # 2d -> 1d
        a = theano._asarray(rng.randn(1,11), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a[:,], cuda_ndarray.dimshuffle(b,(1,)))
        a = theano._asarray(rng.randn(11,1), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a.reshape((11,)), cuda_ndarray.dimshuffle(b,(0,)))

        # 3d
        a = theano._asarray(rng.randn(3,4,5), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(a, cuda_ndarray.dimshuffle(b,(0,1,2)))
        assert numpy.allclose(numpy.swapaxes(a,0,1), cuda_ndarray.dimshuffle(b,(1,0,2)))
        assert numpy.allclose(numpy.swapaxes(a,0,2), cuda_ndarray.dimshuffle(b,(2,1,0)))
        assert numpy.allclose(numpy.swapaxes(a,1,2), cuda_ndarray.dimshuffle(b,(0,2,1)))
        assert numpy.allclose(numpy.swapaxes(a,1,2)[None,:,None,:,:,None], cuda_ndarray.dimshuffle(b,(-1,0,-1,2,1,-1)))

        # 4d
        a = theano._asarray(rng.randn(3,11,4,5), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        assert numpy.allclose(numpy.swapaxes(a,0,1), cuda_ndarray.dimshuffle(b,(1,0,2,3)))
        assert numpy.allclose(numpy.swapaxes(a,0,2), cuda_ndarray.dimshuffle(b,(2,1,0,3)))
        assert numpy.allclose(numpy.swapaxes(a,0,3), cuda_ndarray.dimshuffle(b,(3,1,2,0)))
        assert numpy.allclose(numpy.swapaxes(a,0,3), cuda_ndarray.dimshuffle(b,(3,1,2,0)))
        assert numpy.allclose(numpy.swapaxes(a,0,3)[None,:,None,:,:,:], cuda_ndarray.dimshuffle(b,(-1,3,-1,1,2,0)))
Ejemplo n.º 19
0
def conv_grad(mode, bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsample, op):
    ishape = (bs, ch, rImg1, rImg2)
    kshape = (nf, ch, rFlt1, rFlt2)

    npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
    npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32')

    i = cuda.CudaNdarrayType(
        broadcastable=[sh == 1 for sh in npy_img.shape])()
    k = cuda.CudaNdarrayType(
        broadcastable=[sh == 1 for sh in npy_kern.shape])()

    # TODO: also test custom pad values
    corr_op = op(mode, subsample)(i, k)
    conv_op = tensor.nnet.conv2d(i, k[:, :, ::-1, ::-1],
                                 border_mode=mode, subsample=subsample)
    conv_op_di = theano.grad(conv_op.sum(), i)
    conv_op_dk = theano.grad(conv_op.sum(), k)
    corr_op_di = theano.grad(corr_op.sum(), i)
    corr_op_dk = theano.grad(corr_op.sum(), k)
    outputs = [corr_op, conv_op,
               corr_op_di, conv_op_di,
               corr_op_dk, conv_op_dk]

    conv_op_dik = theano.grad(conv_op_di.sum(), k)
    conv_op_dki = theano.grad(conv_op_dk.sum(), i)
    corr_op_dik = theano.grad(corr_op_di.sum(), k)
    corr_op_dki = theano.grad(corr_op_dk.sum(), i)
    outputs.extend([corr_op_dik, conv_op_dik,
                    corr_op_dki, conv_op_dki])

    if not theano.config.blas.ldflags:
        # Some of the operations are not transferred to the GPU,
        # and withoug BLAS, the abstract Op will not be optimized
        # to CorrMM either, so we have to accept the use of the
        # slow Python convolution in that case.
        mode = theano_mode.excluding('AbstractConvCheck')
    else:
        mode = theano_mode

    f = theano.function([i, k], outputs, mode=mode)

    allvals = f(npy_img, npy_kern)

    for a, b, oa, ob, p in zip(allvals[::2], allvals[1::2],
                               outputs[::2], outputs[1::2],
                               ('top', 'dtop/dbottom', 'dtop/dweight',
                                'dtop/dbottom/dweight', 'dtop/dweight/dbottom')):
        assert oa.type.broadcastable[:2] == ob.type.broadcastable[:2]

        assert_allclose(a, b, rtol=1e-4)
Ejemplo n.º 20
0
def test_elemwise_fusion():
    """ Test the the GpuElemwise fusion work correctly"""
    shape = (3,4)
    a = cuda.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a')
    b = tensor.fmatrix()
    c = tensor.fmatrix()
    f = pfunc([b,c], [a+b+c], mode=mode_with_gpu)
    topo = f.maker.env.toposort()
    for i, node in enumerate(topo):
        print >> sys.stdout, i, node
    assert len(topo)==4
    assert isinstance(topo[2].op.scalar_op,theano.scalar.basic.Composite)
    #let debugmode catch errors
    f(theano._asarray(numpy.random.rand(*shape), dtype='float32'), theano._asarray(numpy.random.rand(*shape), dtype='float32'))
Ejemplo n.º 21
0
def test_gemm_directly():
    for direction in ['fprop', 'bprop img', 'bprop kern']:
        print 'Testing direction: ' + direction
        for bs in range(1, 5):
            for ch in range(1,4):
                for nf in range(1,4):
                    for rImg1 in range(5, 9):
                        for rImg2 in range(5, 9):
                            for rFlt1 in range(2, 4):
                                for rFlt2 in range(2, 4):
                                    for subsx in range(1, 3) if direction == 'fprop' else [1]:
                                        for subsy in range(1, 3) if direction == 'fprop' else [1]:
                                            ishape = (bs, ch, rImg1, rImg2)
                                            kshape = (nf, ch, rFlt1, rFlt2)
                                            subsample = (subsx, subsy)

                                            npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
                                            npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32')

                                            i = cuda_tensor4()
                                            k = cuda_tensor4()

                                            if direction == 'fprop':
                                                cpuval = py_conv(npy_img, npy_kern, 'valid', subsample)
                                                op = theano.sandbox.cuda.blas.GpuCorrMM(border_mode='valid',
                                                        subsample=subsample)(i, k)
                                                f = theano.function([i, k], op, mode=theano_mode)
                                                gpuval = f(npy_img, npy_kern[:,:,::-1,::-1])
                                            elif direction == 'bprop img':
                                                cpuval = py_conv(npy_img, npy_kern, 'full', subsample)
                                                op = theano.sandbox.cuda.blas.GpuCorrMM_gradInputs(border_mode='valid',
                                                        subsample=subsample)(i, k)
                                                f = theano.function([i, k], op, mode=theano_mode)
                                                gpuval = f(npy_kern.transpose(1, 0, 2, 3), npy_img)
                                            elif direction == 'bprop kern':
                                                cpuval = py_conv(npy_img, npy_kern, 'valid', subsample)
                                                op = theano.sandbox.cuda.blas.GpuCorrMM_gradWeights(border_mode='valid',
                                                        subsample=subsample)(i, k)
                                                f = theano.function([i, k], op, mode=theano_mode)
                                                gpuval = numpy.array(f(npy_img.transpose(1, 0, 2, 3),
                                                        npy_kern.transpose(1, 0, 2, 3)[:,:,::-1,::-1])).transpose(1, 0, 2, 3)

                                            if not numpy.allclose(cpuval, gpuval, rtol=1e-4):
                                                print "Test failed for"
                                                print "direction: ", direction
                                                print "ishape: ", ishape
                                                print "kshape: ", kshape
                                                print "subsample: ", subsample
                                                assert False
Ejemplo n.º 22
0
def test_elemwise4():
    """ Test that two vectors can be broadcast to form an outer product (by performing rank-1 matrix update"""

    shape = (3,4)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a')
    b = tensor.fvector()
    c = tensor.fvector()
    f = pfunc([b,c], [], updates=[(a, (a+b.dimshuffle('x', 0)*c.dimshuffle(0, 'x')))], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.env.toposort()):
        print >> sys.stdout, i, node
        has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
    assert not has_elemwise
    #let debugmode catch errors
    f(theano._asarray(numpy.random.rand(4), dtype='float32'), theano._asarray(numpy.random.rand(3), dtype='float32'))
Ejemplo n.º 23
0
Archivo: ops.py Proyecto: intel/theano
 def perform(self, node, inp, out_):
     x, = inp
     out, = out_
     if out[0] is None:
         out[0] = theano._asarray(x.shape[self.i], dtype='int64')
     else:
         out[0][...] = x.shape[self.i]
Ejemplo n.º 24
0
    def __init__(self, input, n_in, n_out, activation, rng, layer_name="LogReg", 
        W=None, b=None, borrow=True):

        # Weigth matrix W
        if W != None: self.W = shared(W, name=layer_name+"_W", borrow=borrow)
        elif activation in (relu,softplus): 
            W_val = _asarray(rng.normal(loc=0, scale=0.01, 
                size=(n_in, n_out)), dtype=floatX)
            self.W = shared(W_val, name=layer_name+"_W", borrow=borrow)
        else:
            self.W = shared(zeros((n_in, n_out), dtype=floatX), 
                name=layer_name+"_W",
                borrow=borrow)

        # Bias vector
        if b!=None: self.b = shared(b, name=layer_name+"_b", borrow=borrow)
        elif activation in (relu,softplus): 
            b_val = ones((n_out,), dtype=floatX)
            self.b = shared(value=b_val, borrow=True)
        else:
            self.b = shared(zeros((n_out,), dtype=floatX),
                name=layer_name+"_b",
                borrow=borrow)
            

        # Vector of prediction probabilities
        self.p_y_given_x = softmax(T.dot(input, self.W) + self.b)
        # Prediction
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        # Parameters of the model
        self.params = [self.W, self.b]
Ejemplo n.º 25
0
    def __init__(self, num_components=None, min_variance=0.0, whiten=False):
        """
        :type num_components: int
        :param num_components: this many components will be preserved, in
            decreasing order of variance (default None keeps all)

        :type min_variance: float
        :param min_variance: components with normalized variance [0-1] below
            this threshold will be discarded

        :type whiten: bool
        :param whiten: whether or not to divide projected features by their
            standard deviation
        """

        super(_PCABase, self).__init__()

        self.num_components = num_components
        self.min_variance = min_variance
        self.whiten = whiten

        self.W = None
        self.v = None
        self.mean = None

        self.component_cutoff = theano.shared(
                                    theano._asarray(0, dtype='int64'),
                                    name='component_cutoff')

        # This module really has no adjustable parameters -- once train()
        # is called once, they are frozen, and are not modified via gradient
        # descent.
        self._params = []
Ejemplo n.º 26
0
def test_host_to_device():
    print >>sys.stdout, 'starting test_host_to_dev'
    for shape in ((), (3,), (2,3), (3,4,5,6)):
        a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
        c = numpy.asarray(b)
        assert numpy.all(a == c)
Ejemplo n.º 27
0
def test_may_share_memory():
    a = scipy.sparse.csc_matrix(scipy.sparse.eye(5, 3))
    b = scipy.sparse.csc_matrix(scipy.sparse.eye(4, 3))
    as_ar = lambda a: theano._asarray(a, dtype="int32")
    for a_, b_, rep in [
        (a, a, True),
        (b, b, True),
        (a, b, False),
        (a, a.data, True),
        (a, a.indptr, True),
        (a, a.indices, True),
        (a, as_ar(a.shape), False),
        (a.data, a, True),
        (a.indptr, a, True),
        (a.indices, a, True),
        (as_ar(a.shape), a, False),
        (b, b.data, True),
        (b, b.indptr, True),
        (b, b.indices, True),
        (b, as_ar(b.shape), False),
        (b.data, b, True),
        (b.indptr, b, True),
        (b.indices, b, True),
        (as_ar(b.shape), b, False),
        (b.data, a, False),
        (b.indptr, a, False),
        (b.indices, a, False),
        (as_ar(b.shape), a, False),
    ]:

        assert SparseType.may_share_memory(a_, b_) == rep
Ejemplo n.º 28
0
def test_setitem_matrix_bad_ndim():
    a = numpy.arange(27)
    a.resize((3,3,3))
    a = theano._asarray(a, dtype='float32')
    _a = cuda_ndarray.CudaNdarray(a)

    b = theano._asarray([7,8], dtype='float32')
    _b = cuda_ndarray.CudaNdarray(b)

    try:
        # attempt to assign the ndarray b with setitem
        _a[:,:,1] = _b
        assert False
    except NotImplementedError, e:
        #print e
        assert True
Ejemplo n.º 29
0
 def perform(self, node, inputs, output_storage):
     a = inputs[0]
     axis = inputs[1]
     z = output_storage[0]
     z[0] = theano._asarray(
             np.argsort(a, axis, self.kind, self.order),
             dtype=node.outputs[0].dtype)
Ejemplo n.º 30
0
    def __init__(self, which_set, multi_target=False):
        assert which_set in ['train', 'test']

        self.which_set = which_set

        X = SmallNORB.load(which_set, 'dat')

        # Casts to the GPU-supported float type, using theano._asarray(), a
        # safer alternative to numpy.asarray().
        #
        # TODO: move the dtype-casting to the view_converter's output space,
        #       once dtypes-for-spaces is merged into master.
        X = theano._asarray(X, theano.config.floatX)

        # Formats data as rows in a matrix, for DenseDesignMatrix
        X = X.reshape(-1, 2*numpy.prod(self.original_image_shape))

        # This is uint8
        y = SmallNORB.load(which_set, 'cat')
        if multi_target:
            y_extra = SmallNORB.load(which_set, 'info')
            y = numpy.hstack((y[:, numpy.newaxis], y_extra))

        datum_shape = ((2, ) +  # two stereo images
                       self.original_image_shape +
                       (1, ))  # one color channel

        # 's' is the stereo channel: 0 (left) or 1 (right)
        axes = ('b', 's', 0, 1, 'c')
        view_converter = StereoViewConverter(datum_shape, axes)

        super(SmallNORB, self).__init__(X=X,
                                        y=y,
                                        view_converter=view_converter)
Ejemplo n.º 31
0
    def new_filters_expbounds(cls,
                              rng,
                              input,
                              n_in,
                              n_out,
                              n_terms,
                              dtype=None,
                              eps=1e-1,
                              exponent_range=(1.0, 3.0),
                              filter_range=1.0):
        """Return a KouhLayer instance with random parameters

        The parameters are drawn on a range [typically] suitable for fine-tuning by gradient
        descent.


        :param input: a tensor of shape (n_examples, n_in)

        :type n_in: positive int
        :param n_in: number of input dimensions

        :type n_out: positive int
        :param n_out: number of dimensions in rval.output

        :param nterms: each (of n_out) complex-cell firing rate will be determined from this
        many 'simple cell' responses.

        :param eps: this amount is added to the softplus of filter responses as a baseline
        firing rate (that prevents a subsequent error from ``pow(0, p)``)

        :returns: KouhLayer instance with freshly-allocated random weights.

        """
        if input.type.ndim != 2:
            raise TypeError('matrix expected for input')

        if dtype is None:
            dtype = input.dtype
        _logger.debug('dtype %s' % dtype)

        def shared_uniform(low, high, size, name):
            return _shared_uniform(rng, low, high, size, dtype, name)

        f_list = [
            shared_uniform(low=-2.0 / numpy.sqrt(n_in),
                           high=2.0 / numpy.sqrt(n_in),
                           size=(n_in, n_out),
                           name='f_%i' % i) for i in xrange(n_terms)
        ]

        b_list = [
            shared_uniform(low=0, high=.01, size=(n_out, ), name='b_%i' % i)
            for i in xrange(n_terms)
        ]
        #x_list = [theano._asarray(eps, dtype=dtype)+softplus(tensor.dot(input, f_list[i])) for i in xrange(n_terms)]
        filter_range = theano._asarray(filter_range, dtype=dtype)
        half_filter_range = theano._asarray(filter_range / 2, dtype=dtype)
        x_list = [
            theano._asarray(filter_range + eps, dtype=dtype) +
            half_filter_range *
            softsign(tensor.dot(input, f_list[i]) + b_list[i])
            for i in xrange(n_terms)
        ]

        rval = cls.new_expbounds(rng,
                                 x_list,
                                 n_out,
                                 dtype=dtype,
                                 params=f_list + b_list,
                                 exponent_range=exponent_range)
        rval.f_list = f_list
        rval.input = input  # add the input to the returned object
        rval.filter_l1 = sum(abs(fi).sum() for fi in f_list)
        rval.filter_l2_sqr = sum((fi**2).sum() for fi in f_list)
        return rval
Ejemplo n.º 32
0
 def perform(self, node, inputs, output_storage):
     # Fixed by GWT: ensure output from numpy matches expected output dtype
     # Addresses hyperopt issue #58
     output_storage[0][0] = theano._asarray(
         numpy.argsort(inputs[0]), dtype=node.outputs[0].type.dtype)
Ejemplo n.º 33
0
            op = careduce_op(scalar_op, axis=pattern)
            pat = tensor_pattern_to_gpu_pattern(shape, pattern)
            #GpuCAReduce{maximum} support only those patterns
            if scalar_op is theano.scalar.maximum and pat not in [
                (0, 1), (0, 1, 1), (0, 1, 1)]:
                continue
            a = tensor.TensorType('float32', (False,) * len(shape))()
            dim_pattern = range(len(shape))
            dim_pattern[0] = 1
            dim_pattern[1] = 0
            a = a.dimshuffle(dim_pattern)
            b = op(a)
            val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
    #        val = numpy.ones(shape)
    #        val = numpy.arange(numpy.prod(shape)).reshape(shape)
            val = theano._asarray(val, dtype='float32')
            f = theano.function([a], b, mode=mode_with_gpu)
            f2 = theano.function([a], b, mode=mode_without_gpu)
            assert tcn.GpuCAReduce in [x.op.__class__
                                       for x in f.maker.fgraph.toposort()]
            assert op.__class__ in [x.op.__class__
                                    for x in f2.maker.fgraph.toposort()]
            assert _allclose(f2(val), f(val)), ('shape', shape,
                                                'pattern', pattern,
                                                sum([shape[i] for i in pattern]))

            #test with broadcast
        for shape, pattern in [((5,),[0]),
                               ((5,4),[0,1]),((5,4),[0]),
                               ((5,4,3),[0]),((5,4,3),[0,1]),
                               ((5,4,3),[2]),((5,4,3),[0,1,2]),
Ejemplo n.º 34
0
def rand_cuda_ndarray(shape):
    return cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape),
                                                    dtype='float32'))
Ejemplo n.º 35
0
def cmp_run_conv_nnet2_classif(seed,
                               isize,
                               ksize,
                               bsize,
                               ignore_error=False,
                               n_train=10,
                               gpu_only=False,
                               cpu_only=False,
                               float_atol=1e-06,
                               check_isfinite=True,
                               pickle=False,
                               verbose=0,
                               version=-1):
    """Run the nnet2 function on 1 or 2 devices, and compares the results.

       float_atol: None mean use the default value.
       check_isfinite: the debug mode option. We forward this value to debug mode.
                       For some parameter CrossentropyCategorical1Hot op generate inf when not optimized.
    """
    if config.mode == 'DEBUG_MODE':
        n_train = 1

    # Change global tolerance, used in DebugMode for instance
    orig_float32_atol = theano.tensor.basic.float32_atol
    try:
        if float_atol:
            #print "float_atol", float_atol
            theano.tensor.basic.float32_atol = float_atol

        if gpu_only and cpu_only:
            raise ValueError("Please use only one of cpu_only and gpu_only")
        elif cpu_only:
            use_gpu = False
            compare = False
        elif gpu_only:
            use_gpu = True
            compare = False
        else:
            compare = True

        if not compare:
            return run_conv_nnet2_classif(use_gpu=use_gpu,
                                          seed=seed,
                                          isize=isize,
                                          ksize=ksize,
                                          bsize=bsize,
                                          n_train=n_train,
                                          check_isfinite=check_isfinite,
                                          pickle=pickle,
                                          verbose=verbose,
                                          version=version)

        utt.seed_rng(seed)  # Seeds numpy.random with seed
        train_cpu, params_cpu, x_shape, y_shape, mode_cpu = \
                build_conv_nnet2_classif(
                        use_gpu=False,
                        isize=isize,
                        ksize=ksize,
                        n_batch=bsize,
                        verbose=verbose,
                        version=version,
                        check_isfinite=check_isfinite)

        utt.seed_rng(seed)  # Seeds numpy.random with seed
        train_gpu, params_gpu, x_shape_gpu, y_shape_gpu, mode_gpu = \
                build_conv_nnet2_classif(
                        use_gpu=True,
                        isize=isize,
                        ksize=ksize,
                        n_batch=bsize,
                        verbose=verbose,
                        version=version,
                        check_isfinite=check_isfinite)

        assert x_shape == x_shape_gpu
        assert y_shape == y_shape_gpu

        xval = my_rand(*x_shape)
        yval = my_rand(*y_shape)
        lr = theano._asarray(0.01, dtype='float32')

        time_cpu = 0
        time_gpu = 0

        for i in range(n_train):
            # Train one batch on CPU
            t0 = time.time()
            rval_cpu = train_cpu(xval, yval, lr)[0]
            t1 = time.time()
            time_cpu += (t1 - t0)

            # Train one batch on GPU
            t0 = time.time()
            rval_gpu = train_gpu(xval, yval, lr)[0]
            t1 = time.time()
            time_gpu += (t1 - t0)

            # Compare results
            if (verbose or not numpy.allclose(
                    rval_cpu, rval_gpu, rtol=1e-5, atol=float_atol)):
                print "At batch:", i + 1
                print "CPU:", rval_cpu
                print "GPU:", rval_gpu
                print "abs diff:", numpy.absolute(rval_gpu - rval_cpu)
                print "rel diff:", numpy.absolute(
                    (rval_gpu - rval_cpu) / rval_gpu)

            if not ignore_error:
                assert numpy.allclose(rval_cpu,
                                      rval_gpu,
                                      rtol=1e-5,
                                      atol=float_atol)

            # Synchronize parameters to start from the same point next time
            if i < n_train - 1:
                for cpu_p, gpu_p in zip(params_cpu, params_gpu):
                    cpu_p.set_value(gpu_p.get_value(borrow=False), borrow=True)

    finally:
        theano.tensor.basic.float32_atol = orig_float32_atol

    if pickle:
        if isinstance(cpu_mode, theano.compile.ProfileMode):
            import pickle
            print "BEGIN CPU profile mode dump"
            print pickle.dumps(cpu_mode)
            print "END CPU profile mode dump"
        if isinstance(gpu_mode, theano.compile.ProfileMode):
            import pickle
            print "BEGIN GPU profile mode dump"
            print pickle.dumps(gpu_mode)
            print "END GPU profile mode dump"
Ejemplo n.º 36
0
def my_rand(*shape):
    return theano._asarray(numpy.random.rand(*shape), dtype='float32')
    def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, 
        batch_size, activation, layer_name="Conv", rng=RandomState(1234), 
        borrow=True, stride=1, W=None, b=None, b_scale=0.1, W_scale=0.01, fast_conv=False):

        """
        video_shape: (frames, height, width)
        kernel_shape: (frames, height, width)

        W_shape: (out, in, kern_frames, kern_height, kern_width)
        """

        self.__dict__.update(locals())
        del self.self
        
        # init W
	#print type(W)
	if type(W) != numpy.ndarray:
		W_flag = (W != None) 
	else:
		W_flag = (W.all() != None)

        if W_flag: self.W = shared(array(W, dtype=floatX), borrow=borrow, name=layer_name+'_W') # wudi made it shared
        else: 
            # fan in: filter time x filter height x filter width x input maps
            fan_in = prod(kernel_shape)*n_in_maps
            norm_scale = 2. * sqrt( 1. / fan_in )
            if activation in ('relu', 'softplus', 'leaky_relu'): 
                print activation
                norm_scale = W_scale
            W_shape = (n_out_maps, n_in_maps)+kernel_shape
            W_val = _asarray(rng.normal(loc=0, scale=norm_scale, size=W_shape),\
                        dtype=floatX)
            # W_val = ones(W_shape, dtype=floatX)*W_scale
            self.W = shared(value=W_val, borrow=borrow, name=layer_name+'_W')
        self.params = [self.W]

        # init bias
	if type(b) != numpy.ndarray:
		b_flag = (b != None)
	else:
		b_flag = (b.all() != None)

        if b_flag: 
            self.b = shared(array(b, dtype=floatX), name=layer_name+"_b", borrow=borrow) # wudi made it shared
        elif activation in ('relu', 'softplus', 'leaky_relu'): 
            # print b_scale
            b_val = (ones((n_out_maps,), dtype=floatX)*b_scale).astype(floatX)
            self.b = shared(b_val, name=layer_name+"_b", borrow=borrow)
        else: 
            b_val = zeros((n_out_maps,), dtype=floatX)
            self.b = shared(b_val, name=layer_name+"_b", borrow=borrow)
        self.params.append(self.b)

        # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w)
        n_fr, h, w = video_shape
        n_fr_k, h_k, w_k = kernel_shape
        out = conv3d(
                signals=input.dimshuffle([0,2,1,3,4]), 
                filters=self.W.dimshuffle([0,2,1,3,4]), 
                signals_shape=(batch_size, n_fr, n_in_maps, h, w), 
                filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k),         
                border_mode='valid',
                fast_conv=fast_conv,
                stride=stride
                ).dimshuffle([0,2,1,3,4])

        out += self.b.dimshuffle('x',0,'x','x','x')

        self.output = eval(activation)(out)
Ejemplo n.º 38
0
 def perform(self, node, inp, out_):
     x, = inp
     out, = out_
     out[0] = theano._asarray(x.shape, dtype='int64')
Ejemplo n.º 39
0
    def filter(self, data, strict=False, allow_downcast=None):
        """
        Convert `data` to something which can be associated to a
        `TensorVariable`.

        This function is not meant to be called in user code. It is for
        `Linker` instances to use when running a compiled graph.

        """
        # Explicit error message when one accidentally uses a Variable as
        # input (typical mistake, especially with shared variables).
        if isinstance(data, Variable):
            raise TypeError(
                'Expected an array-like object, but found a Variable: '
                'maybe you are trying to call a function on a (possibly '
                'shared) variable instead of a numeric array?')

        if ((type(data) is numpy.ndarray) and
                (data.dtype == self.numpy_dtype)):
            if data.dtype.num != self.numpy_dtype.num:
                data = theano._asarray(data, dtype=self.dtype)
            # -- now fall through to ndim check
        elif ((type(data) is numpy.memmap) and
              (data.dtype == self.numpy_dtype)):
            # numpy.memmap is a "safe" subclass of ndarray,
            # so we can use it whereever we expect a base ndarray.
            # however, casting it would defeat the purpose of not
            # loading the whole data into memory
            pass
        elif strict:
            # If any of the two conditions above was not met,
            # we raise a meaningful TypeError.
            if not (type(data) is numpy.ndarray):
                raise TypeError("%s expected a ndarray object." % self,
                                data, type(data))
            if data.dtype != self.numpy_dtype:
                raise TypeError(("%s expected a ndarray object with "
                                "dtype = %s (got %s).") %
                                (self, self.numpy_dtype, data.dtype))
            assert False, "This point should never be reached."
        else:
            if allow_downcast:
                # Convert to self.dtype, regardless of the type of data
                data = theano._asarray(data, dtype=self.dtype)
                # TODO: consider to pad shape with ones to make it consistent
                # with self.broadcastable... like vector->row type thing
            else:
                if isinstance(data, numpy.ndarray):
                    # Check if self.dtype can accurately represent data
                    # (do not try to convert the data)
                    up_dtype = scal.upcast(self.dtype, data.dtype)
                    if up_dtype == self.dtype:
                        # Bug in the following line when data is a
                        # scalar array, see
                        # http://projects.scipy.org/numpy/ticket/1611
                        # data = data.astype(self.dtype)
                        data = theano._asarray(data, dtype=self.dtype)
                    if up_dtype != self.dtype:
                        err_msg = (
                            '%s cannot store a value of dtype %s without '
                            'risking loss of precision. If you do not mind '
                            'this loss, you can: '
                            '1) explicitly cast your data to %s, or '
                            '2) set "allow_input_downcast=True" when calling '
                            '"function".'
                            % (self, data.dtype, self.dtype))
                        raise TypeError(err_msg, data)
                elif (allow_downcast is None and
                        type(data) is float and
                        self.dtype == theano.config.floatX):
                    # Special case where we allow downcasting of Python float
                    # literals to floatX, even when floatX=='float32'
                    data = theano._asarray(data, self.dtype)
                else:
                    # data has to be converted.
                    # Check that this conversion is lossless
                    converted_data = theano._asarray(data, self.dtype)
                    # We use the `values_eq` static function from TensorType
                    # to handle NaN values.
                    if TensorType.values_eq(numpy.asarray(data),
                                            converted_data,
                                            force_same_dtype=False):
                        data = converted_data
                    else:
                        # Do not print a too long description of data
                        # (ndarray truncates it, but it's not sure for data)
                        str_data = str(data)
                        if len(str_data) > 80:
                            str_data = str_data[:75] + '(...)'

                        err_msg = (
                            '%s cannot store accurately value %s, '
                            'it would be represented as %s. '
                            'If you do not mind this precision loss, you can: '
                            '1) explicitly convert your data to a numpy array '
                            'of dtype %s, or '
                            '2) set "allow_input_downcast=True" when calling '
                            '"function".'
                            % (self, data, converted_data, self.dtype))
                        raise TypeError(err_msg, data)

        if self.ndim != data.ndim:
            raise TypeError("Wrong number of dimensions: expected %s,"
                            " got %s with shape %s." % (self.ndim, data.ndim,
                                                        data.shape))
        if not data.flags.aligned:
            try:
                msg = "object buffer" + str(data.data)
            except AttributeError:
                msg = ""
            raise TypeError("The numpy.ndarray object is not aligned."
                            " Theano C code does not support that.",
                            msg,
                            "object shape", data.shape,
                            "object strides", data.strides,
                            "object dtype", data.dtype)

        i = 0
        for b in self.broadcastable:
            if b and data.shape[i] != 1:
                raise TypeError("Non-unit value on shape on a broadcastable"
                                " dimension.", data.shape, self.broadcastable)
            i += 1
        if (self.filter_checks_isfinite and
                not numpy.all(numpy.isfinite(data))):
            raise ValueError("non-finite elements not allowed")
        return data
Ejemplo n.º 40
0
def sharedX(value, name=None, borrow=False, dtype=None):
    if dtype is None:
        dtype = theano.config.floatX
    return theano.shared(theano._asarray(value, dtype=dtype),
                         name=name,
                         borrow=borrow)
Ejemplo n.º 41
0
Archivo: mixer.py Proyecto: gunkisu/asr
def castX(value):
    return theano._asarray(value, dtype=theano.config.floatX)
Ejemplo n.º 42
0
def my_zeros(*shape):
    return theano._asarray(numpy.zeros(*shape), dtype='float32')
Ejemplo n.º 43
0
 def just_vals(v):
     return T.Reshape(2)(v, theano._asarray([2, 3], dtype='int32'))
Ejemplo n.º 44
0
def run_conv_nnet2(use_gpu):  # pretend we are training LeNet for MNIST
    if use_gpu:
        shared_fn = tcn.shared_constructor
    else:
        shared_fn = shared

    #cumulativ rounding error affect this comparaison of result. So we lower the tolerance.
    #TODO: why the last two example see the error lower? We are converging?
    #n_train=10, n_batch=3, n_kern=1, n_kern1=1, error see of 1e-9
    #n_train=10, n_batch=3, n_kern=10, n_kern1=1, error see of -1.27777e-06
    #n_train=10, n_batch=3, n_kern=10, n_kern1=10, error see of -6.91377e-05
    #n_train=10, n_batch=30, n_kern=10, n_kern1=10, error see of -0.00185963
    #n_train=10, n_batch=60, n_kern=10, n_kern1=10, error see of -5.26905e-05
    #n_train=30, n_batch=60, n_kern=10, n_kern1=10, error see of -3.8147e-06

    #n_train=30, n_batch=60, n_kern=20, n_kern1=10, error see of 6.82771e-05
    #n_train=30, n_batch=60, n_kern=20, n_kern1=30, error see of 0.000231534
    n_batch = 60
    shape_img = (n_batch, 1, 32, 32)

    n_kern = 20
    shape_kern = (n_kern, 1, 5, 5)

    n_kern1 = 10
    shape_kern1 = (n_kern1, n_kern, 5, 5)

    n_train = 30
    if config.mode == 'DEBUG_MODE':
        n_train = 1

    logical_hid_shape = tcn.blas.GpuConv.logical_output_shape_2d(
        tuple(shape_img[2:]), tuple(shape_kern[2:]), 'valid')
    logical_hid_shape1 = tcn.blas.GpuConv.logical_output_shape_2d(
        (logical_hid_shape[0] // 2, logical_hid_shape[1] // 2),
        tuple(shape_kern1[2:]), 'valid')
    n_hid = n_kern1 * logical_hid_shape1[0] * logical_hid_shape1[1]
    n_out = 10

    w0 = shared_fn(0.01 * (my_rand(*shape_kern) - 0.5), 'w0')
    b0 = shared_fn(my_zeros((n_kern, )), 'b0')
    w1 = shared_fn(0.01 * (my_rand(*shape_kern1) - 0.5), 'w1')
    b1 = shared_fn(my_zeros((n_kern1, )), 'b1')
    v = shared_fn(my_zeros((n_hid, n_out)), 'c')
    c = shared_fn(my_zeros(n_out), 'c')

    x = tensor.Tensor(dtype='float32', broadcastable=(0, 1, 0, 0))('x')
    y = tensor.fmatrix('y')
    lr = tensor.fscalar('lr')

    conv_op = conv.ConvOp(shape_img[2:], shape_kern[2:], n_kern, n_batch, 1, 1)
    conv_op1 = conv.ConvOp(
        (n_kern, logical_hid_shape[0] / 2, logical_hid_shape[1] / 2),
        shape_kern1[2:], n_kern1, n_batch, 1, 1)

    hid = tensor.tanh(conv_op(x, w0) + b0.dimshuffle((0, 'x', 'x')))
    hid1 = tensor.tanh(
        conv_op1(hid[:, :, ::2, ::2], w1) + b1.dimshuffle((0, 'x', 'x')))
    hid_flat = hid1.reshape((n_batch, n_hid))
    out = tensor.tanh(tensor.dot(hid_flat, v) + c)
    loss = tensor.sum(0.5 * (out - y)**2 * lr)
    #print 'loss type', loss.type

    params = [w0, b0, w1, b1, v, c]
    gparams = tensor.grad(loss, params)

    mode = get_mode(use_gpu)

    #print 'building pfunc ...'
    train = pfunc([x, y, lr], [loss],
                  mode=mode,
                  updates=[(p, p - g) for p, g in zip(params, gparams)])

    #    for i, n in enumerate(train.maker.fgraph.toposort()):
    #        print i, n

    xval = my_rand(*shape_img)
    yval = my_rand(n_batch, n_out)  # int32 make all 0...
    lr = theano._asarray(0.01, dtype='float32')
    for i in xrange(n_train):
        rval = train(xval, yval, lr)

    print_mode(mode)
    return rval
Ejemplo n.º 45
0
def sharedX(value, name=None, borrow=False):
    """Transform value into a shared variable of type floatX"""
    return theano.shared(theano._asarray(value, dtype=theano.config.floatX),
                         name=name,
                         borrow=borrow)
Ejemplo n.º 46
0
def _params_allgood(ishape,
                    kshape,
                    mode,
                    subsample=(1, 1),
                    img_stride=(1, 1),
                    kern_stride=(1, 1),
                    version=-1,
                    verbose=0,
                    random=True,
                    print_=None,
                    id=None,
                    rtol=1e-5,
                    atol=1e-8,
                    nb_iter=0,
                    ones=False,
                    compile_kshp=None,
                    theano_mode=None,
                    cls=None):
    #
    # This function is the core of several of the big unit-test drivers,
    # but it can also be used very directly on its own to test a specific
    # kind of convolution.
    #
    # See `test_example` (above) for an example of how to use this directly.
    #
    # :param kshape: (4d)The shape of the kernel at run time.
    # :param compile_kshp: (2d) hardcode the shape of the kernel in
    #                      the generated code This is supposed to be
    #                      faster, but we need to check That we raise
    #                      an error if the input have the wrong shape.
    #
    if ones:
        assert not random
        npy_img = theano._asarray(numpy.ones(ishape), dtype='float32')
        npy_kern = -theano._asarray(numpy.ones(kshape), dtype='float32')
    elif random:
        npy_img = theano._asarray(numpy.random.rand(*ishape) + 1,
                                  dtype='float32')
        npy_kern = theano._asarray(numpy.random.rand(*kshape) - 2,
                                   dtype='float32')
    else:
        npy_img = theano._asarray(numpy.arange(
            numpy.prod(ishape)).reshape(ishape),
                                  dtype='float32') + 1
        npy_kern = -(
            theano._asarray(numpy.arange(numpy.prod(kshape)).reshape(kshape),
                            dtype='float32') + 1)

    img = cuda_ndarray.CudaNdarray(npy_img)
    kern = cuda_ndarray.CudaNdarray(npy_kern)

    # we take the stride after the transfert as we make c_contiguous
    # data on the GPU.
    if img_stride != (1, 1):
        img = img[:, :, ::img_stride[0], ::img_stride[1]]
        npy_img = npy_img[:, :, ::img_stride[0], ::img_stride[1]]
    if kern_stride != (1, 1):
        kern = kern[:, :, ::kern_stride[0], ::kern_stride[1]]
        npy_kern = npy_kern[:, :, ::kern_stride[0], ::kern_stride[1]]

    i = cuda.CudaNdarrayType(broadcastable=[sh == 1 for sh in npy_img.shape])()
    k = cuda.CudaNdarrayType(broadcastable=[sh == 1
                                            for sh in npy_kern.shape])()
    op = theano.sandbox.cuda.blas.GpuConv(border_mode=mode,
                                          subsample=subsample,
                                          version=version,
                                          verbose=verbose,
                                          kshp=compile_kshp)(i, k)
    f = theano.function([i, k], op, mode=theano_mode)
    if cls is not None:
        assert any([
            isinstance(node.op, cls) for node in f.maker.fgraph.toposort()
        ]), "Cannot find class %r in %r" % (cls, f.maker.fgraph.toposort())
    t2 = time.time()
    gpuval = f(img, kern)
    t3 = time.time()
    for i in range(nb_iter):
        gpuval2 = f(img, kern)
        assert (numpy.asarray(gpuval) == numpy.asarray(gpuval2)).all()
    gpuval = numpy.asarray(gpuval)

    # CPU val computed after GPU val to get the GPU errors.
    t0 = time.time()
    cpuval = py_conv(npy_img, npy_kern, mode, subsample)
    t1 = time.time()

    assert gpuval.shape == cpuval.shape, ("shape mismatch", gpuval.shape,
                                          cpuval.shape)
    assert_allclose(cpuval, gpuval, rtol=rtol, atol=atol)
    assert numpy.all(numpy.isfinite(gpuval)), gpuval
    assert [(sh == 1) is br
            for sh, br in zip(cpuval.shape[:2], op.type.broadcastable[:2])]

    if (t2 is not None):
        if mode == 'valid':
            approx_fp = cpuval.size * ishape[1] * kshape[2] * kshape[3] * 2
        else:
            approx_fp = (ishape[0] * kshape[0] * kshape[1] * kshape[2] *
                         kshape[3] * ishape[2] * ishape[3] * 2)
        approx_fp /= 1e6
        cpu_mflops = approx_fp / (t1 - t0)
        gpu_mflops = approx_fp / (t3 - t2)
        if verbose > 0:
            print('%15s' % str(ishape),
                  '%15s' % str(kshape),
                  end=' ',
                  file=sys.stdout)
            print('%12.5f  %7.2f %7.2f %7.1f' %
                  (approx_fp, cpu_mflops, gpu_mflops, (t1 - t0) / (t2 - t1)),
                  file=sys.stdout)
Ejemplo n.º 47
0
 def as_ar(a):
     return theano._asarray(a, dtype="int32")
Ejemplo n.º 48
0
 def perform(self, node, inp, out):
     from theano.sandbox.cuda import filter as cuda_filter
     x, = inp
     z, = out
     z[0] = cuda_filter(theano._asarray(x, dtype='float32'),
                        tuple([0] * x.ndim), 0, z[0])
Ejemplo n.º 49
0
 def perform(self, node, inp, out_):
     (x,) = inp
     (out,) = out_
     out[0] = theano._asarray(np.shape(x), dtype="int64")
Ejemplo n.º 50
0
def castX(x):
    return theano._asarray(x, dtype=theano.config.floatX)
Ejemplo n.º 51
0
    def new_expbounds(cls,
                      rng,
                      x_list,
                      n_out,
                      dtype=None,
                      params=None,
                      updates=None,
                      exponent_range=(1.0, 3.0)):
        """
        """
        if params is None:
            params = []
        if updates is None:
            updates = []
        if dtype is None:
            dtype = x_list[0].dtype
        n_terms = len(x_list)

        def shared_uniform(low, high, size, name):
            return _shared_uniform(rng, low, high, size, dtype, name)

        use_softmax_w = True

        if use_softmax_w:
            w = shared_uniform(low=-.1,
                               high=.1,
                               size=(n_out, n_terms),
                               name='Kouh2008::w')
            w_sm = theano.tensor.nnet.softmax(w)
            w_list = [w_sm[:, i] for i in xrange(n_terms)]
            w_l1 = abs(w).sum()
            w_l2_sqr = (w**2).sum()
        else:
            w_list = [
                shared_uniform(low=-2.0 / n_terms,
                               high=2.0 / n_terms,
                               size=(n_out, ),
                               name='w_%i' % i) for i in xrange(n_terms)
            ]
            w_l1 = sum(abs(wi).sum() for wi in w_list)
            w_l2_sqr = sum((wi**2).sum() for wi in w_list)

        e_range_low, e_range_high = exponent_range
        e_range_low = theano._asarray(e_range_low, dtype=dtype)
        e_range_high = theano._asarray(e_range_high, dtype=dtype)
        e_range_mag = e_range_high - e_range_low
        if e_range_mag < 0:
            raise ValueError('exponent range must have low <= high')

        p_unbounded = shared_uniform(low=-0.1,
                                     high=0.1,
                                     size=(n_out, ),
                                     name='p')
        q_unbounded = shared_uniform(low=-0.1,
                                     high=0.1,
                                     size=(n_out, ),
                                     name='q')
        r_unbounded = shared_uniform(low=-0.1,
                                     high=0.1,
                                     size=(n_out, ),
                                     name='r')
        k_unbounded = shared_uniform(low=-0.2,
                                     high=0.2,
                                     size=(n_out, ),
                                     name='k')  # biases

        p = tensor.nnet.sigmoid(p_unbounded) * e_range_mag + e_range_low
        q = tensor.nnet.sigmoid(q_unbounded) * e_range_mag + e_range_low
        r = tensor.nnet.sigmoid(r_unbounded) * \
                theano._asarray(1.0/e_range_low - 1.0/e_range_high, dtype=dtype) \
                + theano._asarray(1.0/e_range_high, dtype=dtype)

        k = softsign(k_unbounded)

        if use_softmax_w:
            rval = cls(
                w_list,
                x_list,
                p,
                q,
                r,
                k,
                params=[p_unbounded, q_unbounded, r_unbounded, k_unbounded, w
                        ] + params,
                updates=updates)
        else:
            rval = cls(
                w_list,
                x_list,
                p,
                q,
                r,
                k,
                params=[p_unbounded, q_unbounded, r_unbounded, k_unbounded] +
                w_list + params,
                updates=updates)
        rval.p_unbounded = p_unbounded
        rval.q_unbounded = q_unbounded
        rval.r_unbounded = r_unbounded
        rval.k_unbounded = k_unbounded
        rval.exp_l1 = abs(p_unbounded).sum() + abs(q_unbounded).sum() + abs(
            r_unbounded).sum()
        rval.exp_l2_sqr = (p_unbounded**2).sum() + (q_unbounded**2).sum() + (
            r_unbounded**2).sum()
        rval.w_l1 = w_l1
        rval.w_l2_sqr = w_l2_sqr
        return rval
Ejemplo n.º 52
0
def sharedX(x):
    return theano.shared(theano._asarray(x, dtype=theano.config.floatX))
Ejemplo n.º 53
0
def _shared_uniform(rng, low, high, size, dtype, name=None):
    return shared(
        theano._asarray(rng.uniform(low=low, high=high, size=size),
                        dtype=dtype), name)
Ejemplo n.º 54
0
def _infer_ndim_bcast(ndim, shape, *args):
    """
    Infer the number of dimensions from the shape or the other arguments.

    :rtype: (int, variable, tuple) triple, where the variable is an integer
    vector, and the tuple contains Booleans.
    :returns: the first element returned is the inferred number of dimensions.
    The second element is the shape inferred (combining symbolic and constant
    informations from shape and args).
    The third element is a broadcasting pattern corresponding to that shape.
    """

    # Find the minimum value of ndim required by the *args
    if args:
        args_ndim = max(arg.ndim for arg in args)
    else:
        args_ndim = 0

    # there is a convention that -1 means the corresponding shape of a
    # potentially-broadcasted symbolic arg
    if (isinstance(shape, (tuple, list))
            and numpy.all(numpy.asarray(shape)>=0)):
        bcast = [(s==1) for s in shape]
        v_shape = tensor.TensorConstant(type=tensor.lvector, data=theano._asarray(shape, dtype='int64'))
        shape_ndim = len(shape)
        if ndim is None:
            ndim = shape_ndim
        else:
            if shape_ndim != ndim:
                raise ValueError('ndim should be equal to len(shape), but\n',
                            'ndim = %s, len(shape) = %s, shape = %s'
                            % (ndim, shape_ndim, shape))
    elif isinstance(shape, (tuple, list)):
        # there is a convention that -1 means the corresponding shape of a
        # potentially-broadcasted symbolic arg
        #
        # This case combines together symbolic and non-symbolic shape
        # information
        if ndim is None:
            ndim=args_ndim
        else:
            ndim = max(args_ndim, ndim)
        ndim = max(args_ndim, len(shape))
        shape = [-1]*(ndim - len(shape))+list(shape)
        bcast = []
        pre_v_shape = []
        for i,s in enumerate(shape):
            if hasattr(s, 'type'): # s is symbolic
                bcast.append(False) # todo - introspect further
                pre_v_shape.append(s)
            else:
                if s >= 0:
                    pre_v_shape.append(tensor.as_tensor_variable(s))
                    bcast.append((s==1))
                elif s == -1:
                    n_a_i = 0
                    for a in args:
                        # ndim: _   _   _   _   _   _
                        # ashp:         s0  s1  s2  s3
                        #           i
                        if i >= ndim - a.ndim:
                            n_a_i += 1
                            a_i = i + a.ndim -ndim
                            if not a.broadcastable[a_i]:
                                pre_v_shape.append(a.shape[a_i])
                                bcast.append(False)
                                break
                    else:
                        if n_a_i == 0:
                            raise ValueError(('Auto-shape of -1 must overlap'
                                'with the shape of one of the broadcastable'
                                'inputs'))
                        else:
                            pre_v_shape.append(tensor.as_tensor_variable(1))
                            bcast.append(True)
                else:
                    ValueError('negative shape', s)
        # post-condition: shape may still contain both symbolic and non-symbolic things
        v_shape = tensor.stack(*pre_v_shape)

    elif shape is None:
        # The number of drawn samples will be determined automatically,
        # but we need to know ndim
        if not args:
            raise TypeError(('_infer_ndim_bcast cannot infer shape without'
                ' either shape or args'))
        template = reduce(lambda a,b:a+b, args)
        v_shape = template.shape
        bcast = template.broadcastable
        ndim = template.ndim
    else:
        v_shape = tensor.as_tensor_variable(shape)
        if ndim is None:
            ndim = tensor.get_vector_length(v_shape)
        bcast = [False]*ndim

    if not (v_shape.dtype.startswith('int') or v_shape.dtype.startswith('uint')):
        raise TypeError('shape must be an integer vector or list', v_shape.dtype)

    if args_ndim > ndim:
        raise ValueError('ndim should be at least as big as required by args value',
                    (ndim, args_ndim), args)

    assert ndim == len(bcast)
    return ndim, tensor.cast(v_shape, 'int32'), tuple(bcast)
Ejemplo n.º 55
0
def test_sum():
    """
    test sum pattern 1, 11, 10, 01, 001, 010, 100, 110, 011, 111,
    0011, 0101, 0111, 1011, 1111

    test sum pattern implemented with reshape:
    1000, 0100, 0010, 0001, 11111

    others implemented by reshape that are not tested
    0011,0101,0110,1001,1010,1100
    1110,1101,1011

    TODO: test with broadcast
    """
    for shape, pattern in [((100,3,1300),[1]),
                           ((0,),[0]),((5,),[0]),
                           ((0,0),[0,1]),((1,0),[0,1]),((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test.
                           ((5,4,3),[0]),((5,4,3),[1]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]),
                           ((0,0,0,0),[0,1,2,3]),
                           ((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),
                           ((5,4,3,10,11),[1,2]),
                           ((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),

                           #test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enought thread/block in each dimensions
                           ((4100,3),[0]),((3,4101),[0]),#10
                           ((1024,33),[0]),((33,1024),[0]),#10
                           ((1025,33),[0]),((33,1025),[0]),#10

                           ((4100,3),[1]),((3,4101),[1]),#01
                           ((1024,33),[1]),((33,1024),[1]),#01
                           ((1025,33),[1]),((33,1025),[1]),#01

                           ((4100,3),[0,1]),((3,4101),[0,1]),#11
                           ((1024,33),[0,1]),((33,1024),[0,1]),#01
                           ((1025,33),[0,1]),((33,1025),[0,1]),#01

                           ((4100,4,3),[0]),((5,4100,3),[0]),((5,4,4100),[0]),#100
                           ((4100,4,3),[1]),((5,4100,3),[1]),((5,4,4100),[1]),#010
                           ((4100,4,3),[2]),((5,4100,3),[2]),((5,4,4100),[2]),#001
                           ((4100,4,3),[0,1]),((5,4100,3),[0,1]),((5,4,4100),[0,1]),#110
                           ((4100,4,3),[1,2]),((5,4100,3),[1,2]),((5,4,4100),[1,2]),#011
                           #((4100,4,3),[0,2]),((5,4100,3),[0,2]),((5,4,4100),[0,2]),#101 ##not implemented
                           ((4100,4,3),[0,1,2]),((5,4100,3),[0,1,2]),((5,4,4100),[0,1,2]),#111

                           ((4100,4,3,2),[2,3]),((4,4100,3,2),[2,3]),((4,3,4100,2),[2,3]),((4,3,2,4100),[2,3]),#0011
                           ((4100,4,3,2),[1,3]),((4,4100,3,2),[1,3]),((4,3,4100,2),[1,3]),((4,3,2,4100),[1,3]),#0101
                           ((4100,4,3,2),[0,2,3]),((4,4100,3,2),[0,2,3]),((4,3,4100,2),[0,2,3]),#((4,3,2,4100),[0,2,3]),#1011
                           ((4100,4,3,2),[1,2,3]),((4,4100,3,2),[1,2,3]),((4,3,4100,2),[1,2,3]),((4,3,2,4100),[1,2,3]),#0111
                           ((4100,2,3,4),[0,1,2,3]),((2,4100,3,4),[0,1,2,3]),((2,3,4100,4),[0,1,2,3]),((2,3,4,4100),[0,1,2,3]),#1111


                           #test pattern implemented by reshape
                           ((4100,4,3,2),[0]),((4,4100,3,2),[0]),((4,3,4100,2),[0]),((4,3,2,4100),[0]),#1000
                           ((4100,4,3,2),[1]),((4,4100,3,2),[1]),((4,3,4100,2),[1]),((4,3,2,4100),[1]),#0100
                           ((4100,4,3,2),[2]),((4,4100,3,2),[2]),((4,3,4100,2),[2]),((4,3,2,4100),[2]),#0010
                           ((4100,4,3,2),[3]),((4,4100,3,2),[3]),((4,3,4100,2),[3]),((4,3,2,4100),[3]),#0001
                           ((1100,2,3,4,5),[0,1,2,3,4]),((2,1100,3,4,5),[0,1,2,3,4]),((2,3,1100,4,5),[0,1,2,3,4]),((2,3,4,1100,5),[0,1,2,3,4]),((2,3,4,5,1100),[0,1,2,3,4]),#11111

                           ]:
        a = tensor.TensorType('float32', (False,) * len(shape))()
        b = T.Sum(pattern)(a)
        val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
#        val = numpy.ones(shape)
#        val = numpy.arange(numpy.prod(shape)).reshape(shape)
        val = theano._asarray(val, dtype='float32')
        f = theano.function([a], b, mode=mode_with_gpu)
        f2 = theano.function([a], b, mode=mode_without_gpu)
        assert tcn.GpuSum in [x.op.__class__ for x in f.maker.env.toposort()]
        assert T.Sum in [x.op.__class__ for x in f2.maker.env.toposort()]
        if val.size == 0:
            assert f2(val) == f(val), ('shape', shape, 'pattern', pattern)
        else:
            try:
                #We raise the error threashold as we sum big matrix
                #and this cause small rounding difference with some seed
                #example in debug mode with unittests.rseed=9275
                orig_rtol = theano.tensor.basic.float32_rtol
                theano.tensor.basic.float32_rtol = 2e-5
                assert _allclose(f2(val), f(val)), ('shape', shape,
                                                    'pattern', pattern,
                                                    sum([shape[i] for i in pattern]),
                                                    f2(val), f(val), val)
            finally:
                theano.tensor.basic.float32_rtol = orig_rtol


        #test with dimshuffle
        #we shuffle the 2 outer dims.
    for shape, pattern in [#((5,),[0]),
                           ((5,4),[0,1]),((5,4),[0]),
                           ((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[0,1,2]),
                           ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
        a = tensor.TensorType('float32', (False,) * len(shape))()
        dim_pattern = range(len(shape))
        dim_pattern[0] = 1
        dim_pattern[1] = 0
        a = a.dimshuffle(dim_pattern)
        b = T.Sum(pattern)(a)
        val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
#        val = numpy.ones(shape)
#        val = numpy.arange(numpy.prod(shape)).reshape(shape)
        val = theano._asarray(val, dtype='float32')
        f = theano.function([a], b, mode=mode_with_gpu)
        f2 = theano.function([a], b, mode=mode_without_gpu)
        assert tcn.GpuSum in [x.op.__class__ for x in f.maker.env.toposort()]
        assert T.Sum in [x.op.__class__ for x in f2.maker.env.toposort()]
        assert _allclose(f2(val), f(val)), ('shape', shape,
                                            'pattern', pattern,
                                            sum([shape[i] for i in pattern]))


        #test with broadcast
    for shape, pattern in [((5,),[0]),
                           ((5,4),[0,1]),((5,4),[0]),
                           ((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[0,1,2]),
                           ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
        shape = numpy.asarray(shape) * 2
        a = tensor.TensorType('float32', (False,) * len(shape))()
        a2 = tcn.CudaNdarrayType((False,) * len(shape))()
        b = T.Sum(pattern)(a)
        b2 = T.Sum(pattern)(a2)
        val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
#        val = numpy.ones(shape)
#        val = numpy.arange(numpy.prod(shape)).reshape(shape)
        val = theano._asarray(val, dtype='float32')
        val2 = cuda.CudaNdarray(val)
        if len(shape) == 1:
            val = val[::2]
            val2 = val2[::2]
        elif len(shape) == 2:
            val = val[::2, ::2]
            val2 = val2[::2, ::2]
        elif len(shape) == 3:
            val = val[::2, ::2, ::2]
            val2 = val2[::2, ::2, ::2]
        elif len(shape) == 4:
            val = val[::2, ::2, ::2, ::2]
            val2 = val2[::2, ::2, ::2, ::2]
        f = theano.function([a], b, mode=mode_without_gpu)
        f2 = theano.function([a2], b2, mode=mode_with_gpu)
        assert tcn.GpuSum in [x.op.__class__ for x in f2.maker.env.toposort()]
        assert T.Sum in [x.op.__class__ for x in f.maker.env.toposort()]
        assert _allclose(f2(val2), f(val)), ('shape', shape,
                                             'pattern', pattern,
                                             sum([shape[i] for i in pattern]))
Ejemplo n.º 56
0
 def shared_dataset(data_x):
     """Function that loads the dataset into shared variables"""
     if conf.get('normalize', True):
         return sharedX(data_x, borrow=True)
     else:
         return theano.shared(theano._asarray(data_x), borrow=True)
Ejemplo n.º 57
0
def test_careduce():
    """
    test sum pattern 1, 11, 10, 01, 001, 010, 100, 110, 011, 111,
    0011, 0101, 0111, 1011, 1111

    test sum pattern implemented with reshape:
    1000, 0100, 0010, 0001, 11111

    others implemented by reshape that are not tested
    0011,0101,0110,1001,1010,1100
    1110,1101,1011

    TODO: test with broadcast
    """
    for scalar_op, careduce_op in [
            (theano.scalar.add, tensor.elemwise.CAReduceDtype),
            (theano.scalar.maximum, tensor.CAReduce)]:
        for shape, pattern in [((1,1),(1,)),
                               ((1,0),(1,)),
                               ((0,1),(1,)),
                               ((0,0),(1,)),
                               ((0,0,0),(1,2)),
                               ((0,0,0,0),(1,2,3)),
                               ((2,1),(1,)),
                               ((1,2),(1,)),
                               ((100,3,1300),[1]),
                               ((0,),[0]),((5,),[0]),
                               ((0,0),[0,1]),((1,0),[0,1]),((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test.
                               ((5,4,3),[0]),((5,4,3),[1]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]),
                               ((0,0,0,0),[0,1,2,3]),
                               ((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),
                               ((5,4,3,10,11),[1,2]),
                               ((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),

                               #test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enough thread/block in each dimensions
                               ((4100,3),[0]),((3,4101),[0]),#10
                               ((1024,33),[0]),((33,1024),[0]),#10
                               ((1025,33),[0]),((33,1025),[0]),#10

                               ((4100,3),[1]),((3,4101),[1]),#01
                               ((1024,33),[1]),((33,1024),[1]),#01
                               ((1025,33),[1]),((33,1025),[1]),#01

                               ((4100,3),[0,1]),((3,4101),[0,1]),#11
                               ((1024,33),[0,1]),((33,1024),[0,1]),#01
                               ((1025,33),[0,1]),((33,1025),[0,1]),#01

                               ((4100,4,3),[0]),((5,4100,3),[0]),((5,4,4100),[0]), ((3,65536,1), [0]),#100
                               ((4100,4,3),[1]),((5,4100,3),[1]),((5,4,4100),[1]),#010
                               ((4100,4,3),[2]),((5,4100,3),[2]),((5,4,4100),[2]),#001
                               ((4100,4,3),[0,1]),((5,4100,3),[0,1]),((5,4,4100),[0,1]),#110
                               ((4100,4,3),[1,2]),((5,4100,3),[1,2]),((5,4,4100),[1,2]),#011
                               #((4100,4,3),[0,2]),((5,4100,3),[0,2]),((5,4,4100),[0,2]),#101 ##not implemented
                               ((4100,4,3),[0,1,2]),((5,4100,3),[0,1,2]),((5,4,4100),[0,1,2]),#111

                               ((4100,4,3,2),[2,3]),((4,4100,3,2),[2,3]),((4,3,4100,2),[2,3]),((4,3,2,4100),[2,3]),#0011
                               ((4100,4,3,2),[1,3]),((4,4100,3,2),[1,3]),((4,3,4100,2),[1,3]),((4,3,2,4100),[1,3]),#0101
                               ((4100,4,3,2),[0,2,3]),((4,4100,3,2),[0,2,3]),((4,3,4100,2),[0,2,3]),#((4,3,2,4100),[0,2,3]),#1011
                               ((4100,4,3,2),[1,2,3]),((4,4100,3,2),[1,2,3]),((4,3,4100,2),[1,2,3]),((4,3,2,4100),[1,2,3]),#0111
                               ((4100,2,3,4),[0,1,2,3]),((2,4100,3,4),[0,1,2,3]),((2,3,4100,4),[0,1,2,3]),((2,3,4,4100),[0,1,2,3]),#1111


                               #test pattern implemented by reshape
                               ((4100,4,3,2),[0]),((4,4100,3,2),[0]),((4,3,4100,2),[0]),((4,3,2,4100),[0]),#1000
                               ((4100,4,3,2),[1]),((4,4100,3,2),[1]),((4,3,4100,2),[1]),((4,3,2,4100),[1]),#0100
                               ((4100,4,3,2),[2]),((4,4100,3,2),[2]),((4,3,4100,2),[2]),((4,3,2,4100),[2]),#0010
                               ((4100,4,3,2),[3]),((4,4100,3,2),[3]),((4,3,4100,2),[3]),((4,3,2,4100),[3]),#0001
                               ((1100,2,3,4,5),[0,1,2,3,4]),((2,1100,3,4,5),[0,1,2,3,4]),((2,3,1100,4,5),[0,1,2,3,4]),((2,3,4,1100,5),[0,1,2,3,4]),((2,3,4,5,1100),[0,1,2,3,4]),#11111

                               ]:

            op = careduce_op(scalar_op, axis=pattern)
            pat = tensor_pattern_to_gpu_pattern(shape, pattern)
            #GpuCAReduce{maximum} support only those patterns
            if scalar_op is theano.scalar.maximum and pat not in [
                (0, 1), (0, 1, 1), (0, 1, 1)]:
                continue

            a = tensor.TensorType('float32', (False,) * len(shape))()
            b = op(a)
            val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
    #        val = numpy.ones(shape)
    #        val = numpy.arange(numpy.prod(shape)).reshape(shape)
            val = theano._asarray(val, dtype='float32')
            f = theano.function([a], b, mode=mode_with_gpu)
            f2 = theano.function([a], b, mode=mode_without_gpu)
            assert tcn.GpuCAReduce in [x.op.__class__
                                       for x in f.maker.fgraph.toposort()]
            assert op.__class__ in [x.op.__class__
                                    for x in f2.maker.fgraph.toposort()]
            f_caused_value_error = False
            try:
                f_out = f(val)
            except ValueError, e:
                exc = e
                f_caused_value_error = True

            f2_caused_value_error = False
            try:
                f2_out = f2(val)
            except ValueError, e:
                exc2 = e
                f2_caused_value_error = True

            if f_caused_value_error != f2_caused_value_error:
                if f_caused_value_error:
                    print 'f caused this value error:'
                    print exc
                else:
                    print 'f did not raise a value error, but should have'
                if f2_caused_value_error:
                    print 'f2 caused this value error:'
                    print exc2
                else:
                    print 'f should not have raised a value error'
                print 'shape was: ', shape
                print 'pattern was: ', pattern
                assert False

            try:
                #We raise the error threashold as we sum big matrix
                #and this cause small rounding difference with some seed
                #example in debug mode with unittests.rseed=9275
                orig_rtol = theano.tensor.basic.float32_rtol
                theano.tensor.basic.float32_rtol = 2e-5
                assert _allclose(f_out, f2_out), ('shape', shape,
                                                    'pattern', pattern,
                                                    sum([shape[i] for i in pattern]),
                                                    f2(val), f(val), val)
            finally:
                theano.tensor.basic.float32_rtol = orig_rtol
Ejemplo n.º 58
0
 def perform(self, node, inputs, output_storage):
     a = inputs[0]
     axis = inputs[1]
     z = output_storage[0]
     z[0] = theano._asarray(np.argsort(a, axis, self.kind, self.order),
                            dtype=node.outputs[0].dtype)
Ejemplo n.º 59
0
def test_huge_elemwise_fusion():
    """ Test the the GpuElemwise fusion work correctly
        We check that we fuse one node with part of its input
        in case their is too many inputs and that would make it bust the 256
        bytes limits.
    """
    shape = (2, 3, 4, 5, 6)
    ttype = tensor.tensor(dtype='float32',
                          broadcastable=(False, ) * len(shape))
    gpu_ptr_size = theano.sandbox.cuda.opt.get_device_type_sizes(
    )['gpu_ptr_size']
    if gpu_ptr_size == 8:
        nb_in = 7
        len_topo = 10
    elif gpu_ptr_size == 4:
        nb_in = 8
        len_topo = 11
    else:
        raise Exception("Unexpected value for gpu_ptr_size", gpu_ptr_size)
    vars = [tensor.tanh(ttype) for x in range(nb_in)]
    f = pfunc(vars, [reduce(operator.sub, vars)], mode=mode_with_gpu)

    topo = f.maker.fgraph.toposort()
    #theano.printing.debugprint(f)
    #for i, node in enumerate(topo):
    #    print >> sys.stdout, i, node
    assert len(topo) == len_topo
    assert sum([isinstance(node.op, cuda.GpuElemwise) for node in topo]) == 2
    assert isinstance(topo[-3].op.scalar_op, theano.scalar.basic.Sub)
    assert isinstance(topo[-2].op.scalar_op, theano.scalar.basic.Composite)
    #let debugmode catch errors
    gen = lambda: theano._asarray(numpy.random.rand(*shape), dtype='float32')
    f(*[gen() for i in range(nb_in)])

    # Test the case where we can't put the computation on the gpu! their is too
    # many dimensions to the input to have 2 inputs to the op!

    shape = (
        1,
        2,
        3,
        4,
        5,
        6,
        7,
        2,
        2,
        3,
        2,
        1,
        2,
        2,
        2,
    )
    ttype = tensor.tensor(dtype='float32',
                          broadcastable=(False, ) * len(shape))
    vars = [tensor.tanh(ttype) for x in range(7)]
    f = pfunc(
        vars,
        [vars[0] - vars[1] - vars[2] - vars[3] - vars[4] - vars[5] - vars[6]],
        mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    #theano.printing.debugprint(f)
    assert len(topo) == 1
    assert sum([isinstance(node.op, cuda.GpuElemwise) for node in topo]) == 0
    assert sum([isinstance(node.op, tensor.Elemwise) for node in topo]) == 1
    #let debugmode catch errors
    gen = lambda: theano._asarray(numpy.random.rand(*shape), dtype='float32')
    f(gen(), gen(), gen(), gen(), gen(), gen(), gen())

    def gen(shape):
        return theano._asarray(numpy.random.rand(*shape), dtype='float32')

    max_var = 16  # excluded
    for shape in [
        (2, ),
        (2, 2),
        (2, 2, 2),
        (2, 2, 2, 2),
        (2, 2, 2, 2, 2),  # 5d
        (2, 2, 2, 2, 2, 2),
            #                  (2, 2, 2, 2, 2, 2, 2),
            #                  (2, 2, 2, 2, 2, 2, 2, 2),
            #                  (2, 2, 2, 1, 1, 1, 1, 2, 2),  # 9d
    ]:
        vals = [cuda.shared_constructor(gen(shape)) for x in range(max_var)]
        for use_tan in [True, False]:
            if use_tan:
                vars = [tensor.tanh(x) for x in vals]
            else:
                vars = vals
            for nb_var in range(1, max_var):
                out = reduce(lambda x, y: x + y, vars[:nb_var])
                if not isinstance(out.type, CudaNdarrayType):
                    out = cuda.gpu_from_host(out)
                f = pfunc([], [out], mode=mode_with_gpu)
                topo = f.maker.fgraph.toposort()
                #print shape, nb_var, use_tan, len(topo)
                assert (sum(
                    [isinstance(node.op, cuda.GpuElemwise)
                     for node in topo]) == len(topo)
                        or (nb_var == 1 and use_tan == False))
                assert sum([
                    isinstance(node.op, tensor.Elemwise) for node in topo
                ]) == 0

                #let debugmode catch errors
                f()
Ejemplo n.º 60
0
def _params_allgood(ishape,
                    kshape,
                    mode,
                    subsample=(1, 1),
                    img_stride=(1, 1),
                    kern_stride=(1, 1),
                    version=-1,
                    verbose=0,
                    random=True,
                    print_=None,
                    id=None,
                    rtol=1e-5,
                    atol=1e-8,
                    nb_iter=0,
                    ones=False,
                    compile_kshp=None):
    #
    # This function is the core of several of the big unit-test drivers,
    # but it can also be used very directly on its own to test a specific
    # kind of convolution.
    #
    # See `test_example` (above) for an example of how to use this directly.
    #
    # :param kshape: (4d)The shape of the kernel at run time.
    # :param compile_kshp: (2d) hardcode the shape of the kernel in the generated code
    #                      This is supposed to be faster, but we need to check
    #                      That we raise an error if the input have the wrong shape.
    #
    if ones:
        assert not random
        npy_img = theano._asarray(numpy.ones(ishape), dtype='float32')
        npy_kern = -theano._asarray(numpy.ones(kshape), dtype='float32')
    elif random:
        npy_img = theano._asarray(numpy.random.rand(*ishape) + 1,
                                  dtype='float32')
        npy_kern = theano._asarray(numpy.random.rand(*kshape) - 2,
                                   dtype='float32')
    else:
        npy_img = theano._asarray(numpy.arange(
            numpy.prod(ishape)).reshape(ishape),
                                  dtype='float32') + 1
        npy_kern = -(
            theano._asarray(numpy.arange(numpy.prod(kshape)).reshape(kshape),
                            dtype='float32') + 1)

    img = cuda_ndarray.CudaNdarray(npy_img)
    kern = cuda_ndarray.CudaNdarray(npy_kern)

    #we take the stride after the transfert as we make c_contiguous data on the GPU.
    if img_stride != (1, 1):
        img = img[:, :, ::img_stride[0], ::img_stride[1]]
        npy_img = npy_img[:, :, ::img_stride[0], ::img_stride[1]]
    if kern_stride != (1, 1):
        kern = kern[:, :, ::kern_stride[0], ::kern_stride[1]]
        npy_kern = npy_kern[:, :, ::kern_stride[0], ::kern_stride[1]]

    t2 = None
    rval = True
    try:
        t0 = time.time()
        cpuval = py_conv(npy_img, npy_kern, mode, subsample)
        t1 = time.time()
        i = cuda_tensor4()
        k = cuda_tensor4()
        op = theano.sandbox.cuda.blas.GpuConv(border_mode=mode,
                                              subsample=subsample,
                                              version=version,
                                              verbose=verbose,
                                              kshp=compile_kshp)(i, k)
        f = theano.function([i, k], op, mode=theano_mode)
        gpuval = f(img, kern)
        t2 = time.time()
        for i in range(nb_iter):
            gpuval2 = f(img, kern)
            assert numpy.allclose(numpy.asarray(gpuval),
                                  numpy.asarray(gpuval2))
            assert (numpy.asarray(gpuval) == numpy.asarray(gpuval2)).all()
        gpuval = numpy.asarray(gpuval)
        if gpuval.shape != cpuval.shape:
            print >> sys.stdout, "ERROR: shape mismatch", gpuval.shape, cpuval.shape
            rval = False
        if rval:
            rval = numpy.allclose(cpuval, gpuval, rtol=rtol)
            assert numpy.all(numpy.isfinite(gpuval))
    except NotImplementedError, e:
        print >> sys.stdout, '_params_allgood Failed allclose', e
        rval = False