def __init__(self, input=tensor.dvector('input'), target=tensor.dvector('target'), n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw): super(NNet, self).__init__(**kw) self.input = input self.target = target self.lr = shared(lr, 'learning_rate') self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1') self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2') # print self.lr.type self.hidden = sigmoid(tensor.dot(self.w1, self.input)) self.output = tensor.dot(self.w2, self.hidden) self.cost = tensor.sum((self.output - self.target)**2) self.sgd_updates = { self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1), self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2) } self.sgd_step = pfunc(params=[self.input, self.target], outputs=[self.output, self.cost], updates=self.sgd_updates) self.compute_output = pfunc([self.input], self.output) self.output_from_hidden = pfunc([self.hidden], self.output)
def test_elemwise1(): """ Several kinds of elemwise expressions with no broadcasting, non power-of-two shape """ shape = (3,4) a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32')+0.5, 'a') b = tensor.fmatrix() #let debugmode catch any mistakes print >> sys.stdout, "STARTING FUNCTION 1" f = pfunc([b], [], updates=[(a, b**a)], mode=mode_with_gpu) for i, node in enumerate(f.maker.env.toposort()): print i, node f(theano._asarray(numpy.random.rand(*shape), dtype='float32')+0.3) print >> sys.stdout, "STARTING FUNCTION 2" #let debugmode catch any mistakes f = pfunc([b], [], updates=[(a, tensor.exp(b**a))], mode=mode_with_gpu) for i, node in enumerate(f.maker.env.toposort()): print i, node f(theano._asarray(numpy.random.rand(*shape), dtype='float32')+0.3) print >> sys.stdout, "STARTING FUNCTION 3" #let debugmode catch any mistakes f = pfunc([b], [], updates=[(a, a+b * tensor.exp(b**a))], mode=mode_with_gpu) f(theano._asarray(numpy.random.rand(*shape), dtype='float32')+0.3)
def __init__(self, input=tensor.dvector('input'), target=tensor.dvector('target'), n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw): super(NNet, self).__init__(**kw) self.input = input self.target = target self.lr = shared(lr, 'learning_rate') self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1') self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2') # print self.lr.type self.hidden = sigmoid(tensor.dot(self.w1, self.input)) self.output = tensor.dot(self.w2, self.hidden) self.cost = tensor.sum((self.output - self.target)**2) self.sgd_updates = { self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1), self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2)} self.sgd_step = pfunc( params=[self.input, self.target], outputs=[self.output, self.cost], updates=self.sgd_updates) self.compute_output = pfunc([self.input], self.output) self.output_from_hidden = pfunc([self.hidden], self.output)
def test_elemwise2(): """ Several kinds of elemwise expressions with dimension permutations """ rng = numpy.random.RandomState(int(time.time())) shape = (3, 5) for pattern in [(0, 1), (1, 0)]: a = tcn.shared_constructor(theano._asarray(rng.rand(*shape), dtype='float32'), name=None) b = tensor.Tensor(dtype='float32', broadcastable=[0] * len(shape))() f = pfunc([b], [], updates=[(a, (a + b).dimshuffle(pattern))], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.env.toposort()): has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise #let debugmode catch errors f(theano._asarray(rng.rand(*shape), dtype='float32') * .3) shape = (3, 4, 5, 6) a = tcn.shared_constructor(theano._asarray(rng.rand(*shape), dtype='float32'), 'a') b = tensor.Tensor(dtype='float32', broadcastable=[0] * len(shape))() f = pfunc([b], [], updates=[(a, (a + b).dimshuffle([2, 0, 3, 1]) * tensor.exp(b ** a).dimshuffle([2, 0, 3, 1]))], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.env.toposort()): has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise #let debugmode catch errors f(theano._asarray(rng.rand(*shape), dtype='float32'))
def test_elemwise2(): """ Several kinds of elemwise expressions with dimension permutations """ rng = numpy.random.RandomState(int(time.time())) print 'random?', rng.rand(3) shape = (3,5) for pattern in [(0,1), (1,0)]: a = tcn.shared_constructor(theano._asarray(rng.rand(*shape),dtype='float32'), name=None) b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))() f = pfunc([b], [], updates=[(a, (a+b).dimshuffle(pattern))], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.env.toposort()): print >> sys.stdout, i, node has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise #let debugmode catch errors print >> sys.stdout, 'pattern', pattern f(theano._asarray(rng.rand(*shape),dtype='float32')*.3) shape = (3,4,5,6) a = tcn.shared_constructor(theano._asarray(rng.rand(*shape),dtype='float32'), 'a') b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))() f = pfunc([b], [], updates=[(a, (a+b).dimshuffle([2,0,3,1]) * tensor.exp(b**a).dimshuffle([2,0,3,1]))], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.env.toposort()): print i, node has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise #let debugmode catch errors f(theano._asarray(rng.rand(*shape),dtype='float32'))
def test_downsample(): shps = [ (1, 1, 1, 12), (1, 1, 2, 2), (1, 1, 1, 1), (1, 1, 4, 4), (1, 1, 10, 11), (1, 2, 2, 2), (3, 5, 4, 4), (25, 1, 7, 7), (1, 1, 12, 12), (1, 1, 2, 14), (1, 1, 12, 14), (1, 1, 14, 14), (1, 1, 16, 16), (1, 1, 18, 18), (1, 1, 24, 24), (1, 6, 24, 24), (10, 1, 24, 24), (10, 6, 24, 24), (30, 6, 12, 12), (30, 2, 24, 24), (30, 6, 24, 24), (10, 10, 10, 11), (1, 1, 10, 1025), (1, 1, 10, 1023), (1, 1, 1025, 10), (1, 1, 1023, 10), ] numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps) for shp in shps: for ds in (2, 2), (3, 2), (1, 1): if ds[0] > shp[2]: continue if ds[1] > shp[3]: continue # GpuDownsampleFactorMax doesn't like having more than 512 columns # in the output tensor. if float(shp[3]) / ds[1] > 512: continue for ignore_border in (True, False): print "test_downsample", shp, ds, ignore_border ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border) a = tcn.shared_constructor(my_rand(*shp), "a") f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu) f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_without_gpu) assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMax) for node in f.maker.env.toposort()]) assert any([isinstance(node.op, DownsampleFactorMax) for node in f2.maker.env.toposort()]) assert numpy.allclose(f(), f2()) g = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_with_gpu) g2 = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_without_gpu) assert any( [isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad) for node in g.maker.env.toposort()] ) assert any([isinstance(node.op, DownsampleFactorMaxGrad) for node in g2.maker.env.toposort()]) assert numpy.allclose(g(), g2())
def test_elemwise1(): """ Several kinds of elemwise expressions with no broadcasting, non power-of-two shape """ shape = (3, 4) a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.5, 'a') b = tensor.fmatrix() #let debugmode catch any mistakes print >> sys.stdout, "STARTING FUNCTION 1" f = pfunc([b], [], updates=[(a, b ** a)], mode=mode_with_gpu) for i, node in enumerate(f.maker.env.toposort()): print i, node f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3) print >> sys.stdout, "STARTING FUNCTION 2" #let debugmode catch any mistakes f = pfunc([b], [], updates=[(a, tensor.exp(b ** a))], mode=mode_with_gpu) for i, node in enumerate(f.maker.env.toposort()): print i, node f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3) print >> sys.stdout, "STARTING FUNCTION 3" #let debugmode catch any mistakes f = pfunc([b], [], updates=[(a, a + b * tensor.exp(b ** a))], mode=mode_with_gpu) f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)
def test_downsample(): import random shps = [ (1, 1, 1, 12), (1, 1, 2, 2), (1, 1, 1, 1), (1,1,4,4), (1, 1, 10, 11), (1, 2, 2, 2), (3,5,4,4), (25, 1, 7, 7), (1, 1, 12, 12), (1, 1, 2, 14), (1, 1, 12, 14), (1, 1, 14, 14), (1, 1, 16, 16), (1, 1, 18, 18), (1, 1, 24, 24), (1, 6, 24, 24), (10, 1, 24, 24), (10, 6, 24, 24), (30, 6, 12, 12), (30, 2, 24, 24), (30, 6, 24, 24), (10, 10, 10, 11), (1,1,10,1025), (1,1,10,1023), (1,1,1025,10), (1,1,1023,10), ] numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps) for shp in shps: for ds in (2, 2), (3,2), (1,1): if ds[0] > shp[2]: continue if ds[1] > shp[3]: continue #GpuDownsampleFactorMax don't having more then 512 columns in the output tensor if float(shp[3])/ds[1]>512: continue for ignore_border in (True, False): print 'test_downsample', shp, ds, ignore_border ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border) a = tcn.shared_constructor(my_rand(*shp), 'a') f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu) f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_without_gpu) assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMax) for node in f.maker.env.toposort()]) assert any([isinstance(node.op, DownsampleFactorMax) for node in f2.maker.env.toposort()]) assert numpy.allclose(f(),f2()) g = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),a), mode=mode_with_gpu) g2 = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),a), mode=mode_without_gpu) assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad) for node in g.maker.env.toposort()]) assert any([isinstance(node.op, DownsampleFactorMaxGrad) for node in g2.maker.env.toposort()]) assert numpy.allclose(g(),g2())
def cmp_sigmoids(shape): def numpy_sigmoid(input): rval = 1.0 / (1.0 + numpy.exp(-input)) sinput = tensor.Tensor(dtype='float32', broadcastable=(0,)*len(shape))() shared_input = tcn.shared_constructor(numpy.random.rand(*shape), 'shared_input') times = compare_fns( dict( numpy=numpy_sigmoid , theano_cpu=pfunc([sinput], 1.0 / (1.0 + tensor.exp(-sinput))) , theano_gpu_onboard=pfunc([sinput], [], updates=[(shared_input, 1.0 / (1.0 + tensor.exp(-shared_input)))]) ), input=shared_input.value) showtimes(times)
def test_elemwise_empty(): #test with 0 element a = tcn.shared_constructor(theano._asarray(numpy.random.rand(0,0), dtype='float32'), 'a') b = tensor.fmatrix() f = pfunc([b], [], updates=[(a, a+b)], mode=mode_with_gpu) f2 = pfunc([b], [], updates=[(a, a+b)], mode=mode_without_gpu) a0 = a.get_value() * 1.0 f(numpy.ones((0,0), dtype='float32')) assert numpy.all(a0 + 1.0 == a.get_value())
def test_elemwise_composite_float64(): # test that we don't fuse composite elemwise with float64 somewhere inside # nvcc by default downcast them to float32. We would need to tell him not # to do so, but that possible only on some device. a = tensor.fmatrix() b = tensor.fmatrix() av = theano._asarray(numpy.random.rand(4, 4), dtype='float32') bv = numpy.ones((4, 4), dtype='float32') def get_all_basic_scalar(composite_op): l = [] for i in composite_op.env.toposort(): if isinstance(i, theano.scalar.Composite): l += get_all_basic_scalar(i) else: l.append(i) return l for mode in [mode_with_gpu, mode_with_gpu.excluding('gpu_after_fusion'), mode_with_gpu.excluding('elemwise_fusion')]: f = pfunc([a, b], tensor.cast(tensor.lt(tensor.cast(a, 'float64') ** 2, b), 'float32'), mode=mode) out = f(av, bv) assert numpy.all(out == ((av ** 2) < bv)) for node in f.maker.env.toposort(): if isinstance(node.op, cuda.GpuElemwise): if isinstance(node.op.scalar_op, theano.scalar.Composite): scals = get_all_basic_scalar(node.op.scalar_op) for s in scals: assert not any([i.type.dtype == 'float64' for i in s.inputs + s.outputs])
def cmp(a_shp, b_shp): a0 = my_rand(* a_shp) a = tcn.shared_constructor(a0, 'a') cval = my_rand(a_shp[0], b_shp[1]) c = tcn.shared_constructor(cval.copy(), 'c') b = tcn.fmatrix('b') b2 = tcn.fmatrix('b2') f = pfunc([b, b2], [tensor.dot(a, b2) + c], updates=[(a, tensor.dot(a, b) + c)], mode=mode_with_gpu) assert any([node.op == tcn.blas.gpu_gemm_no_inplace for node in f.maker.fgraph.toposort()]) bval = my_rand(*b_shp) bval2 = my_rand(*b_shp) rval = f(bval, bval2) assert numpy.allclose(numpy.dot(a0, bval) + cval, a.get_value()) assert numpy.allclose(numpy.dot(a0, bval2) + cval, rval) # Try with a matrix equal to a0, but with strides in both dims a.set_value(a0) a.set_value( a.get_value( borrow=True, return_internal_type=True)[::-1, ::-1], borrow=True) f(bval, bval2)
def cmp(a_shp, b_shp): a0 = my_rand(*a_shp) a = tcn.shared_constructor(a0, 'a') cval = my_rand(a_shp[0], b_shp[1]) c = tcn.shared_constructor(cval.copy(), 'c') b = tcn.fmatrix('b') b2 = tcn.fmatrix('b2') f = pfunc( [b, b2], [tensor.dot(a, b2) + c], updates=[(a, tensor.dot(a, b) + c)], mode=mode_with_gpu) assert any([node.op == tcn.blas.gpu_gemm_no_inplace for node in f.maker.fgraph.toposort()]) bval = my_rand(*b_shp) bval2 = my_rand(*b_shp) rval = f(bval, bval2) assert numpy.allclose(numpy.dot(a0, bval) + cval, a.get_value()) assert numpy.allclose(numpy.dot(a0, bval2) + cval, rval) # Try with a matrix equal to a0, but with strides in both dims a.set_value(a0) a.set_value( a.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], borrow=True) f(bval, bval2)
def cmp(a_shp, b_shp): a0 = my_rand(*a_shp) a = tcn.shared_constructor(a0, 'a') b = tensor.fmatrix('b') c = tensor.fmatrix('c') f = pfunc([b, c], [], updates=[(a, tensor.dot(a, b) + tensor.exp(c))], mode=mode_with_gpu) assert any([node.op == tcn.blas.gpu_gemm_inplace for node in f.maker.env.toposort()]) bval = my_rand(*b_shp) cval = my_rand(a_shp[0], b_shp[1]) f(bval, cval) assert numpy.allclose(numpy.dot(a0, bval) + numpy.exp(cval), a.get_value()) # Try with a matrix equal to a0, but with strides in both dims a.set_value(a0) a.set_value( a.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], borrow=True) f(bval, cval)
def cmp(a_shp, b_shp): a0 = my_rand(*a_shp) a = tcn.shared_constructor(a0, 'a') b = tensor.fmatrix('b') c = tensor.fmatrix('c') f = pfunc([b, c], [], updates=[(a, tensor.dot(a, b) + tensor.exp(c))], mode=mode_with_gpu) assert any([ node.op == tcn.blas.gpu_gemm_inplace for node in f.maker.env.toposort() ]) bval = my_rand(*b_shp) cval = my_rand(a_shp[0], b_shp[1]) f(bval, cval) assert numpy.allclose( numpy.dot(a0, bval) + numpy.exp(cval), a.get_value()) # Try with a matrix equal to a0, but with strides in both dims a.set_value(a0) a.set_value(a.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], borrow=True) f(bval, cval)
def test_elemwise_composite_float64(): # test that we don't fuse composite elemwise with float64 somewhere inside # nvcc by default downcast them to float32. We would need to tell him not to # do so, but that possible only on some device. a = tensor.fmatrix() b = tensor.fmatrix() av = theano._asarray(numpy.random.rand(4,4), dtype='float32') bv = numpy.ones((4,4), dtype='float32') def get_all_basic_scalar(composite_op): l=[] for i in composite_op.env.toposort(): if isinstance(i, theano.scalar.Composite): l += get_all_basic_scalar(i) else: l.append(i) return l for mode in [mode_with_gpu, mode_with_gpu.excluding('gpu_after_fusion'), mode_with_gpu.excluding('elemwise_fusion')]: f = pfunc([a,b], tensor.cast(tensor.lt(tensor.cast(a,'float64')**2,#*numpy.asarray(2, 'float32'), b), 'float32'), mode=mode) #theano.printing.debugprint(f, print_type=True) out = f(av,bv) assert numpy.all(out == ((av**2)<bv)) for node in f.maker.env.toposort(): if isinstance(node.op, cuda.GpuElemwise): if isinstance(node.op.scalar_op, theano.scalar.Composite): scals = get_all_basic_scalar(node.op.scalar_op) for s in scals: assert not any([i.type.dtype=='float64' for i in s.inputs+s.outputs])
def run_conv_nnet1(use_gpu): if use_gpu: shared_fn = tcn.shared_constructor else: shared_fn = shared n_batch = 16 n_kern = 20 shape_img = (n_batch, 1, 32, 32) shape_kern = (n_kern, 1, 5, 5) n_train = 10 if config.mode == 'DEBUG_MODE': n_train = 1 logical_hid_shape = tcn.blas.GpuConv.logical_output_shape_2d( shape_img[2:], shape_kern[2:], 'valid') n_hid = n_kern * logical_hid_shape[0] * logical_hid_shape[1] n_out = 10 w = shared_fn(0.01 * (my_rand(*shape_kern) - 0.5), 'w') b = shared_fn(my_zeros((n_kern,)), 'b') v = shared_fn(my_zeros((n_hid, n_out)), 'c') c = shared_fn(my_zeros(n_out), 'c') x = tensor.Tensor(dtype='float32', broadcastable=(0, 1, 0, 0))('x') y = tensor.fmatrix('y') lr = tensor.fscalar('lr') conv_op = conv.ConvOp(shape_img[2:], shape_kern[2:], n_kern, n_batch, 1, 1) hid = tensor.tanh(conv_op(x, w) + b.dimshuffle((0, 'x', 'x'))) hid_flat = hid.reshape((n_batch, n_hid)) out = tensor.tanh(tensor.dot(hid_flat, v) + c) loss = tensor.sum(0.5 * (out - y) ** 2 * lr) # print 'loss type', loss.type params = [w, b, v, c] gparams = tensor.grad(loss, params) mode = get_mode(use_gpu) # print 'building pfunc ...' train = pfunc( [x, y, lr], [loss], mode=mode, updates=[(p, p - g) for p, g in zip(params, gparams)]) # for i, n in enumerate(train.maker.fgraph.toposort()): # print i, n xval = my_rand(*shape_img) yval = my_rand(n_batch, n_out) lr = theano._asarray(0.01, dtype='float32') for i in xrange(n_train): rval = train(xval, yval, lr) # print 'training done' print_mode(mode) return rval
def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, n_train=100): if config.mode == 'DEBUG_MODE': n_train = 1 if use_gpu: w = tcn.shared_constructor(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w') b = tcn.shared_constructor(my_zeros(n_hid), 'b') v = tcn.shared_constructor(my_zeros((n_hid, n_out)), 'c') c = tcn.shared_constructor(my_zeros(n_out), 'c') else: w = shared(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w') b = shared(my_zeros(n_hid), 'b') v = shared(my_zeros((n_hid, n_out)), 'c') c = shared(my_zeros(n_out), 'c') x = tensor.fmatrix('x') y = tensor.fmatrix('y') lr = tensor.fscalar('lr') hid = tensor.tanh(tensor.dot(x, w) + b) out = tensor.tanh(tensor.dot(hid, v) + c) loss = tensor.sum(0.5 * (out - y)**2 * lr) if 0: print 'loss type', loss.type params = [w, b, v, c] gparams = tensor.grad(loss, params) mode = get_mode(use_gpu) #print 'building pfunc ...' train = pfunc([x, y, lr], [loss], mode=mode, updates=[(p, p - g) for p, g in izip(params, gparams)]) if 0: for i, n in enumerate(train.maker.fgraph.toposort()): print i, n xval = my_rand(n_batch, n_in) yval = my_rand(n_batch, n_out) lr = theano._asarray(0.01, dtype='float32') t0 = time.time() rval = [] for i in xrange(n_train): rval.append(train(xval, yval, lr)) dt = time.time() - t0 print_mode(mode) return numpy.asarray(rval), dt
def __init__( self, input=None, target=None, n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw, ): super().__init__(**kw) if input is None: input = tensor.dvector("input") if target is None: target = tensor.dvector("target") self.input = input self.target = target self.lr = shared(lr, "learning_rate") self.w1 = shared(np.zeros((n_hidden, n_input)), "w1") self.w2 = shared(np.zeros((n_output, n_hidden)), "w2") # print self.lr.type self.hidden = sigmoid(tensor.dot(self.w1, self.input)) self.output = tensor.dot(self.w2, self.hidden) self.cost = tensor.sum((self.output - self.target) ** 2) self.sgd_updates = { self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1), self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2), } self.sgd_step = pfunc( params=[self.input, self.target], outputs=[self.output, self.cost], updates=self.sgd_updates, ) self.compute_output = pfunc([self.input], self.output) self.output_from_hidden = pfunc([self.hidden], self.output)
def test_elemwise1(): """ Several kinds of elemwise expressions with no broadcasting, non power-of-two shape """ shape = (3, 4) a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.5, 'a') b = tensor.fmatrix() #let debugmode catch any mistakes f = pfunc([b], [], updates=[(a, b ** a)], mode=mode_with_gpu) f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3) #let debugmode catch any mistakes f = pfunc([b], [], updates=[(a, tensor.exp(b ** a))], mode=mode_with_gpu) f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3) #let debugmode catch any mistakes f = pfunc([b], [], updates=[(a, a + b * tensor.exp(b ** a))], mode=mode_with_gpu) f(theano._asarray(numpy.random.rand(*shape), dtype='float32') + 0.3)
def cmp(a_shp, b_shp): a = tcn.shared_constructor(my_rand(*a_shp), 'a') b = tensor.fmatrix() f = pfunc([b], [], updates=[(a, tensor.dot(a,b))], mode=mode_with_gpu) a0 = a.get_value() * 1.0 bval = my_rand(*b_shp) f(bval) assert numpy.allclose(numpy.dot(a0, bval), a.get_value())
def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, n_train=100): if config.mode == 'DEBUG_MODE': n_train = 1 if use_gpu: w = tcn.shared_constructor(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w') b = tcn.shared_constructor(my_zeros(n_hid), 'b') v = tcn.shared_constructor(my_zeros((n_hid, n_out)), 'c') c = tcn.shared_constructor(my_zeros(n_out), 'c') else: w = shared(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w') b = shared(my_zeros(n_hid), 'b') v = shared(my_zeros((n_hid, n_out)), 'c') c = shared(my_zeros(n_out), 'c') x = tensor.fmatrix('x') y = tensor.fmatrix('y') lr = tensor.fscalar('lr') hid = tensor.tanh(tensor.dot(x, w) + b) out = tensor.tanh(tensor.dot(hid, v) + c) loss = tensor.sum(0.5 * (out - y) ** 2 * lr) if 0: print('loss type', loss.type) params = [w, b, v, c] gparams = tensor.grad(loss, params) mode = get_mode(use_gpu) # print 'building pfunc ...' train = pfunc([x, y, lr], [loss], mode=mode, updates=[(p, p - g) for p, g in izip(params, gparams)]) if 0: for i, n in enumerate(train.maker.fgraph.toposort()): print(i, n) xval = my_rand(n_batch, n_in) yval = my_rand(n_batch, n_out) lr = theano._asarray(0.01, dtype='float32') t0 = time.time() rval = [] for i in xrange(n_train): rval.append(train(xval, yval, lr)) dt = time.time() - t0 print_mode(mode) return numpy.asarray(rval), dt
def test_elemwise_fusion(): """ Test the the GpuElemwise fusion work correctly""" shape = (3,4) a = cuda.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a') b = tensor.fmatrix() c = tensor.fmatrix() f = pfunc([b,c], [a+b+c], mode=mode_with_gpu) topo = f.maker.env.toposort() for i, node in enumerate(topo): print >> sys.stdout, i, node assert len(topo)==4 assert isinstance(topo[2].op.scalar_op,theano.scalar.basic.Composite) #let debugmode catch errors f(theano._asarray(numpy.random.rand(*shape), dtype='float32'), theano._asarray(numpy.random.rand(*shape), dtype='float32'))
def test_huge_elemwise_fusion(): """ Test the the GpuElemwise fusion work correctly We check that we fuse one node with part of its input in case their is too many inputs and that would make it bust the 256 bytes limits. """ shape = (2,3,4,5,6) ttype = tensor.tensor(dtype='float32',broadcastable=(False,)*len(shape)) vars = [tensor.tanh(ttype) for x in range(10)] f = pfunc(vars, [vars[0]-vars[1]-vars[2]-vars[3]-vars[4]-vars[5]-vars[6]], mode=mode_with_gpu) topo = f.maker.env.toposort() #theano.printing.debugprint(f) #for i, node in enumerate(topo): # print >> sys.stdout, i, node assert len(topo)==10 assert sum([isinstance(node.op, cuda.GpuElemwise) for node in topo])==2 assert isinstance(topo[7].op.scalar_op,theano.scalar.basic.Sub) assert isinstance(topo[8].op.scalar_op,theano.scalar.basic.Composite) #let debugmode catch errors gen = lambda : theano._asarray(numpy.random.rand(*shape), dtype='float32') f(gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen()) # Test the case where we can't put the computation on the gpu! their is too many # dimensions to the input to have 2 inputs to the op! shape = (1,2,3,4,5,6,7,2,2,3,2,1,2,2,2,) ttype = tensor.tensor(dtype='float32',broadcastable=(False,)*len(shape)) vars = [tensor.tanh(ttype) for x in range(10)] f = pfunc(vars, [vars[0]-vars[1]-vars[2]-vars[3]-vars[4]-vars[5]-vars[6]], mode=mode_with_gpu) topo = f.maker.env.toposort() #theano.printing.debugprint(f) assert len(topo)==1 assert sum([isinstance(node.op, cuda.GpuElemwise) for node in topo])==0 assert sum([isinstance(node.op, tensor.Elemwise) for node in topo])==1 #let debugmode catch errors gen = lambda : theano._asarray(numpy.random.rand(*shape), dtype='float32') f(gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen(),gen())
def cmp(a_shp, b_shp): a = tcn.shared_constructor(my_rand(*a_shp), 'a') b = tensor.fmatrix('b') c = tensor.fmatrix('c') f = pfunc([b,c], [], updates=[(a, tensor.dot(a,b) + tensor.exp(c))], mode=mode_with_gpu) assert any([node.op == tcn.blas.gpu_gemm_inplace for node in f.maker.env.toposort()]) a0 = a.get_value() * 1.0 bval = my_rand(*b_shp) cval = my_rand(a_shp[0],b_shp[1]) f(bval,cval) assert numpy.allclose(numpy.dot(a0, bval)+numpy.exp(cval), a.get_value())
def test_elemwise4(): """ Test that two vectors can be broadcast to form an outer product (by performing rank-1 matrix update""" shape = (3,4) a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a') b = tensor.fvector() c = tensor.fvector() f = pfunc([b,c], [], updates=[(a, (a+b.dimshuffle('x', 0)*c.dimshuffle(0, 'x')))], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.env.toposort()): print >> sys.stdout, i, node has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise #let debugmode catch errors f(theano._asarray(numpy.random.rand(4), dtype='float32'), theano._asarray(numpy.random.rand(3), dtype='float32'))
def cmp(a_shp, b_shp): a0 = numpy.random.uniform(-0.4, 0.4, a_shp).astype('float32') a = cuda.shared_constructor(a0, 'a') b0 = numpy.random.uniform(-0.4, 0.4, b_shp).astype('float32') b = cuda.shared_constructor(b0, 'b') f = pfunc([], tensor.slinalg.solve(a, b), mode=mode_with_gpu) assert isinstance(f.maker.fgraph.toposort()[1].inputs[0].owner.op, cuda.cula.GpuSolve) assert cuda.opt.local_gpu_solve.transform( tensor.slinalg.solve(a, b).owner) out = f() assert numpy.allclose(numpy.dot(a0, out), b0)
def test_elemwise_fusion(): """ Test the the GpuElemwise fusion work correctly""" shape = (3, 4) a = cuda.shared_constructor( theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a') b = tensor.fmatrix() c = tensor.fmatrix() f = pfunc([b, c], [a + b + c], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() for i, node in enumerate(topo): print >> sys.stdout, i, node assert len(topo) == 4 assert isinstance(topo[2].op.scalar_op, theano.scalar.basic.Composite) # let debugmode catch errors f(theano._asarray(numpy.random.rand(*shape), dtype='float32'), theano._asarray(numpy.random.rand(*shape), dtype='float32'))
def test_elemwise0(): a = tcn.shared_constructor(theano._asarray(numpy.random.rand(4, 4), dtype='float32'), 'a') b = tensor.fmatrix() f = pfunc([b], [], updates=[(a, a + b)], mode=mode_with_gpu) #check that we work inplace. assert f.maker.env.toposort()[1].op.destroy_map.items() == [(0, [0])] a0 = a.get_value() * 1.0 f(numpy.ones((4, 4), dtype='float32')) assert numpy.all(a0 + 1.0 == a.get_value())
def test_elemwise_collapse7(atol=1e-6): """ Test when one input have one broadcastable dimension and the other is a scalar""" shape = (5, 4, 1) a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape), dtype='float32')) a = theano._asarray(numpy.random.rand(*shape), dtype='float32') a2 = tcn.shared_constructor(a.copy(), 'a') a3 = a2.dimshuffle(0, 'x', 1, 2) f = pfunc([], [a3 + 2], mode=mode_with_gpu) #let debugmode catch errors out = f()[0] ans = (a + 2).reshape(shape[0], 1, shape[1], shape[2]) assert numpy.allclose(out, ans, atol=atol)
def test_elemwise0(): a = tcn.shared_constructor(theano._asarray(numpy.random.rand(4, 4), dtype='float32'), 'a') b = tensor.fmatrix() f = pfunc([b], [], updates=[(a, a + b)], mode=mode_with_gpu) #check that we work inplace. assert f.maker.fgraph.toposort()[1].op.destroy_map.items() == [(0, [0])] a0 = a.get_value() * 1.0 f(numpy.ones((4, 4), dtype='float32')) assert numpy.all(a0 + 1.0 == a.get_value())
def test_maxpool(): """TODO: test the gpu version!!! """ for d0, d1, r_true, r_false in [(4, 4, [[[[5, 7], [13, 15]]]], [[[[5, 7], [13, 15]]]]), (5, 5, [[[[6, 8], [16, 18], [21, 23]]]], [[[[6, 8, 9], [16, 18, 19], [21, 23, 24]]]])]: for border, ret in [(True, r_true), (False, r_false)]: ret = numpy.array(ret) a = tcn.blas.Pool((2, 2), border) dmatrix4 = tensor.TensorType("float32", (False, False, False, False)) b = dmatrix4() f = pfunc([b], [a(b)], mode=mode_with_gpu) bval = numpy.arange(0, d0 * d1).reshape(1, 1, d0, d1) r = f(bval)[0] # print bval, bval.shape, border # print r, r.shape assert (ret == r).all()
def test_elemwise3(): """ Several kinds of elemwise expressions with dimension permutations and broadcasting""" shape = (3, 4, 5, 6) a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a') b = tensor.fvector() new_val = (a + b).dimshuffle([2, 0, 3, 1]) new_val *= tensor.exp(1 + b ** a).dimshuffle([2, 0, 3, 1]) f = pfunc([b], [], updates=[(a, new_val)], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.fgraph.toposort()): has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise #let debugmode catch errors f(theano._asarray(numpy.random.rand(6), dtype='float32'))
def test_elemwise3(): """ Several kinds of elemwise expressions with dimension permutations and broadcasting""" shape = (3, 4, 5, 6) a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a') b = tensor.fvector() new_val = (a + b).dimshuffle([2, 0, 3, 1]) new_val *= tensor.exp(1 + b ** a).dimshuffle([2, 0, 3, 1]) f = pfunc([b], [], updates=[(a, new_val)], mode=mode_with_gpu) has_elemwise = False for i, node in enumerate(f.maker.env.toposort()): has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise) assert not has_elemwise #let debugmode catch errors f(theano._asarray(numpy.random.rand(6), dtype='float32'))
def cmp(a_shp, b_shp): a0 = my_rand(*a_shp) a = tcn.shared_constructor(a0, "a") b = tensor.fmatrix() f = pfunc([b], [], updates=[(a, tensor.dot(a, b))], mode=mode_with_gpu) bval = my_rand(*b_shp) f(bval) assert numpy.allclose(numpy.dot(a0, bval), a.get_value()) # Try with a matrix equal to a0, but with strides in both dims a.set_value(a0) a.set_value(a.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], borrow=True) f(bval)
def test_maxpool(): """TODO: test the gpu version!!! """ for d0, d1, r_true, r_false in [(4,4,[[[[5,7],[13,15]]]],[[[[5,7],[13,15]]]]), (5,5,[[[[6, 8],[ 16, 18], [ 21, 23]]]], [[[[6, 8, 9],[ 16, 18, 19], [ 21, 23, 24]]]])]: for border,ret in [(True,r_true),(False, r_false)]: ret=numpy.array(ret) a = tcn.blas.DownsampleFactorMax((2,2),border) dmatrix4 = tensor.TensorType("float32", (False, False, False, False)) b = dmatrix4() f = pfunc([b], [a(b)], mode=mode_with_gpu) bval = numpy.arange(0,d0*d1).reshape(1,1,d0,d1) r = f(bval)[0] # print bval, bval.shape, border #print r, r.shape assert (ret==r).all()
def cmp(a_shp, b_shp): a = tcn.shared_constructor(my_rand(*a_shp), 'a') cval = my_rand(a_shp[0], b_shp[1]) c = tcn.shared_constructor(cval.copy(), 'c') b = tcn.fmatrix('b') b2 = tcn.fmatrix('b2') f = pfunc([b,b2], [tensor.dot(a,b2) + c], updates=[(a, tensor.dot(a,b) + c)], mode=mode_with_gpu) a0 = a.get_value() * 1.0 assert any([node.op == tcn.blas.gpu_gemm_no_inplace for node in f.maker.env.toposort()]) bval = my_rand(*b_shp) bval2 = my_rand(*b_shp) rval = f(bval,bval2) assert numpy.allclose(numpy.dot(a0, bval)+cval, a.get_value()) assert numpy.allclose(numpy.dot(a0, bval2)+cval, rval)
def test_elemwise_collapse7(atol=1e-6): """ Test when one input have one broadcastable dimension and the other is a scalar""" shape = (5, 4, 1) a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape), dtype="float32")) a = theano._asarray(numpy.random.rand(*shape), dtype="float32") a2 = tcn.shared_constructor(a.copy(), "a") a3 = a2.dimshuffle(0, "x", 1, 2) f = pfunc([], [a3 + 2]) if False: for id, n in enumerate(f.maker.env.toposort()): print id, n # let debugmode catch errors out = f()[0] ans = (a + 2).reshape(shape[0], 1, shape[1], shape[2]) assert numpy.allclose(out, ans, atol=atol) print "Expected collapse to c contiguous"
def test_elemwise_comparaison_cast(): """ test if an elemwise comparaison followed by a cast to float32 are pushed to gpu. """ a = tensor.fmatrix() b = tensor.fmatrix() av = theano._asarray(numpy.random.rand(4, 4), dtype="float32") bv = numpy.ones((4, 4), dtype="float32") for g, ans in [(tensor.lt, av < bv), (tensor.gt, av > bv), (tensor.le, av <= bv), (tensor.ge, av >= bv)]: f = pfunc([a, b], tensor.cast(g(a, b), "float32"), mode=mode_with_gpu) # theano.printing.debugprint(f) out = f(av, bv) assert numpy.all(out == ans) assert any([isinstance(node.op, cuda.GpuElemwise) for node in f.maker.env.toposort()])
def cmp(a_shp, b_shp): a0 = numpy.random.rand(*a_shp).astype('float32') a = cuda.shared_constructor(a0, 'a') b0 = numpy.random.rand(*b_shp).astype('float32') b = cuda.shared_constructor(b0, 'a') f = pfunc([], tensor.dot(a, b), mode=mode_with_gpu) assert cuda.opt.local_gpu_dot_to_dot22.transform( tensor.dot(a, b).owner) out = f() assert numpy.allclose(numpy.dot(a0, b0), out) # Try with a matrix equal to a0, but with strides in both dims a.set_value(a0) a.set_value(a.get_value(borrow=True, return_internal_type=True)[::-1], borrow=True) f()
def test_elemwise_collapse6(): """ Test when all inputs have two broadcastable dimension at the beginning""" shape = (4, 5) a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape), dtype='float32')) a = theano._asarray(numpy.random.rand(*shape), dtype='float32') a2 = tcn.shared_constructor(a, 'a') a3 = a2.dimshuffle('x', 'x', 0, 1) b = tcn.CudaNdarrayType((True, True, False, False))() f = pfunc([b], [a3 + b], mode=mode_with_gpu) v = theano._asarray(numpy.random.rand(1, 1, shape[0], shape[1]), dtype='float32') v = cuda_ndarray.CudaNdarray(v) #let debugmode catch errors out = f(v)[0] assert numpy.allclose(out, a.reshape(1, 1, shape[0], shape[1]) + v)
def test_elemwise0(): a = tcn.shared_constructor(theano._asarray(numpy.random.rand(4, 4), dtype="float32"), "a") b = tensor.fmatrix() f = pfunc([b], [], updates=[(a, a + b)], mode=mode_with_gpu) # check that we work inplace. assert f.maker.env.toposort()[1].op.destroy_map.items() == [(0, [0])] a0 = a.get_value() * 1.0 print "BEFORE ADD", a.get_value() for i, node in enumerate(f.maker.env.toposort()): print i, node f(numpy.ones((4, 4), dtype="float32")) print "AFTER ADD", a.get_value() assert numpy.all(a0 + 1.0 == a.get_value())
def test_elemwise_comparaison_cast(): """ test if an elemwise comparaison followed by a cast to float32 are pushed to gpu. """ a = tensor.fmatrix() b = tensor.fmatrix() av = theano._asarray(numpy.random.rand(4,4), dtype='float32') bv = numpy.ones((4,4), dtype='float32') for g,ans in [(tensor.lt, av<bv), (tensor.gt, av>bv), (tensor.le, av<=bv), (tensor.ge, av>=bv)]: f = pfunc([a,b], tensor.cast(g(a,b),'float32'), mode=mode_with_gpu) #theano.printing.debugprint(f) out = f(av,bv) assert numpy.all(out == ans) assert any([isinstance(node.op, cuda.GpuElemwise) for node in f.maker.env.toposort()])
def test_elemwise_collapse2(): """ Test when only one inputs have one broadcastable dimension """ shape = (4, 5, 9) a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape), dtype='float32')) a = theano._asarray(numpy.random.rand(*shape), dtype='float32') a2 = tcn.shared_constructor(a, 'a') a3 = a2.dimshuffle(0, 'x', 1, 2) b = tcn.CudaNdarrayType((False, False, False, False))() c = a3 + b f = pfunc([b], [c], mode=mode_with_gpu) v = theano._asarray(numpy.random.rand(shape[0], 5, *shape[1:]), dtype='float32') v = cuda_ndarray.CudaNdarray(v) #let debugmode catch errors out = f(v)[0] assert numpy.allclose(out, a.reshape(shape[0], 1, *shape[1:]) + v)
def cmp(a_shp, b_shp): a0 = my_rand(* a_shp) a = tcn.shared_constructor(a0, 'a') b = tensor.fmatrix() f = pfunc([b], [], updates=[(a, tensor.dot(a, b))], mode=mode_with_gpu) bval = my_rand(* b_shp) f(bval) assert numpy.allclose(numpy.dot(a0, bval), a.get_value()) # Try with a matrix equal to a0, but with strides in both dims a.set_value(a0) a.set_value(a.get_value(borrow=True, return_internal_type=True)[::-1, ::-1], borrow=True) f(bval)
def cmp(a_shp, b_shp): a0 = numpy.random.rand(*a_shp).astype('float32') a = cuda.shared_constructor(a0, 'a') b0 = numpy.random.rand(*b_shp).astype('float32') b = cuda.shared_constructor(b0, 'b') f = pfunc([], tensor.dot(a, b), mode=mode_with_gpu) assert cuda.opt.local_gpu_dot_to_dot22.transform( tensor.dot(a, b).owner) out = f() assert numpy.allclose(numpy.dot(a0, b0), out) # Try with a matrix equal to a0, but with strides in both dims a.set_value(a0) a.set_value( a.get_value(borrow=True, return_internal_type=True)[::-1], borrow=True) f()
def test_elemwise_collapse4(): """ Test when only one inputs have two broadcastable dimension at each ends and we add a scalar""" shape = (4, 5) a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape), dtype='float32')) a = theano._asarray(numpy.random.rand(*shape), dtype='float32') a2 = tcn.shared_constructor(a, 'a') a3 = a2.dimshuffle('x', 0, 1, 'x') b = tcn.CudaNdarrayType((False, False, False, False))() c = (a3 + b + 2) f = pfunc([b], [c], mode=mode_with_gpu) v = theano._asarray(numpy.random.rand(5, shape[0], shape[1], 4), dtype='float32') v = cuda_ndarray.CudaNdarray(v) #let debugmode catch errors out = f(v)[0] assert numpy.allclose(out, a.reshape(1, shape[0], shape[1], 1) + v + 2)
def speed_elemwise_collapse(): """ used to time if the collapse of ccontiguous dims are useful """ shape = (30, 40, 50, 600) a = cuda_ndarray.CudaNdarray(theano._asarray(numpy.random.rand(*shape), dtype='float32')) a = theano._asarray(numpy.random.rand(*shape), dtype='float32') a2 = tcn.shared_constructor(a, 'a') a3 = a2[:, ::2, :, :] b = tcn.CudaNdarrayType((False, False, False, False))() c = a3 + b * tensor.exp(1 + b ** a3) f = pfunc([b], [c], mode=mode_with_gpu) v = theano._asarray(numpy.random.rand(*shape), dtype='float32') v = v[:, ::2, :, :] v = cuda_ndarray.CudaNdarray(v) t1 = time.time() for i in range(100): #let debugmode catch errors f(v) t2 = time.time()