def setUp(self): utt.seed_rng() self.mode = mode_with_gpu.excluding('constant_folding') self.gemv_op = gpu_sparse_block_gemv self.outer_op = gpu_sparse_block_outer self.gemv_class = GpuSparseBlockGemv self.outer_class = GpuSparseBlockOuter
def test_subsample(): seed_rng() # implement when shapes = [((1, 1, 1, 1), (1, 1, 1, 1), (1, 1), (1, 1), (1, 1)), ((1, 1, 1, 1), (1, 1, 1, 1), (2, 2), (1, 1), (1, 1)), ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3), (1, 1), (1, 1)), ((4, 2, 10, 10), (3, 2, 2, 2), (3, 3), (1, 1), (1, 1)), ((4, 2, 10, 10), (3, 2, 2, 2), (3, 1), (1, 1), (1, 1)) ] shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 1)) shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 2)) shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 1)) shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 2)) version_valid = [-1] version_full = [-1] verbose = 0 random = True print_ = False ones = False if ones: random = False exec_conv(version_valid, shapes, verbose, random, 'valid', print_=print_, ones=ones) exec_conv(version_full, shapes, verbose, random, 'full', print_=print_, ones=ones)
def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize, n_train=10, check_isfinite=True, verbose=0, version=-1): """Run the train function returned by build_conv_nnet2_classif on one device. """ utt.seed_rng(seed) # Seeds numpy.random with seed train, params, x_shape, y_shape, mode = build_conv_nnet2_classif( use_gpu=use_gpu, isize=isize, ksize=ksize, n_batch=bsize, verbose=verbose, version=version, check_isfinite=check_isfinite) xval = my_rand(*x_shape) yval = my_rand(*y_shape) lr = theano._asarray(0.01, dtype='float32') rvals = my_zeros(n_train) for i in xrange(n_train): rvals[i] = train(xval, yval, lr)[0]
def test_doubleop(): utt.seed_rng() x = matrix() f = function([x], DoubleOp()(x)) inp = numpy.asarray(numpy.random.rand(5, 4), dtype=config.floatX) out = f(inp) utt.assert_allclose(inp * 2, out)
def test_invalid_input_shape(self): """ Tests that when the shape gived at build time is not the same as run time we raise an error """ seed_rng() verbose = 0 random = True print_ = False ones = False if ones: random = False global theano_mode theano_mode_orig = theano_mode try: if theano.config.mode in ['DebugMode', 'DEBUG_MODE']: theano_mode = theano.compile.mode.get_mode( 'FAST_RUN').including('gpu') for mode in ['valid', 'full']: for shapes in [((3, 2, 8, 8), (4, 2, 5, 5), (8, 8)), ((3, 2, 8, 8), (4, 2, 5, 5), (5, 8)), #((3, 2, 8, 8), (4, 2, 5, 5), (8, 5)), # We use only the number of columns. ]: self.assertRaises(ValueError, _params_allgood, shapes[0], shapes[1], verbose=verbose, random=random, mode=mode, print_=print_, ones=ones, compile_kshp=shapes[2]) finally: theano_mode = theano_mode_orig
def test_valid_1_3_11_12(): seed_rng() shapes = get_valid_shapes() version = [1, 3, 11, 12] verbose = 0 random = True print_ = False ones = False if ones: random = False shapes2 = [] for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes): oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) - numpy.asarray(kshape[2:]) + numpy.asarray([1, 1])) if oshape[3] > device_prop['maxThreadsDim0']: continue if ((numpy.prod(ishape[2:]) + numpy.prod(kshape[2:])) * 4 > (16 * 1024 - 150)): continue if subshape == (1, 1): shapes2.append((ishape, kshape, subshape, istride, kstride)) shapes = shapes2 for t in exec_conv(version, shapes, verbose, random, 'valid', print_=print_, ones=ones, rtol=1.1e-5): yield t
def setUp(self): if theano.config.mode == 'FAST_COMPILE': m = theano.compile.mode.get_mode('FAST_RUN').excluding('local_elemwise_fusion') else: m = theano.compile.mode.get_default_mode().excluding('local_elemwise_fusion') self.m = m utt.seed_rng()
def test_full(): seed_rng() shapes = get_basic_shapes() shapes += get_shapes2() #test image stride shapes += get_shapes2(scales_img=(2, 2), img_stride=(1, 2)) shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 1)) shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2)) shapes += get_shapes2(scales_img=(2, 2), img_stride=(-1, -1)) shapes += get_shapes2(scales_img=(2, 2), kern_stride=(-1, -1)) #test subsample done in a separate fct shapes += [ #other test ((2, 1, 2, 2), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1)) , ((3, 2, 4, 4), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)) , ((4, 1, 10, 10), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1)) , ((1, 1, 4, 4), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1)) , ((4, 1, 10, 10), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1)) , ((4, 1, 10, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1)) , ((4, 1, 20, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1)) , ((3, 2, 8, 8), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize , ((3, 2, 8, 6), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize, non-square image , ((3, 2, 8, 6), (4, 2, 4, 3), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize, non-square image, non-square kern , ((3, 2, 8, 6), (4, 2, 4, 6), (1, 1), (1, 1), (1, 1)) #stack, nkern, bsize ,non-square image, non-square kern, kernsize==imgsize on one dim , ((16, 5, 64, 64), (8, 5, 8, 8), (1, 1), (1, 1), (1, 1)) # a big one , ((16, 1, 28, 28), (20, 1, 5, 5), (1, 1), (1, 1), (1, 1)) # MNIST LeNET layer 1 , ((20, 16, 32, 32), (1, 16, 28, 28), (1, 1), (1, 1), (1, 1)) # layer 1 backprop to weights #other test , ((3, 1, 1, 1), (2, 1, 5, 3), (1, 1), (1, 1), (1, 1))#kernel bigger then image , ((3, 2, 1, 1), (4, 2, 1, 1), (1, 1), (1, 1), (1, 1)) , ((3, 2, 4, 4), (4, 2, 2, 6), (1, 1), (1, 1), (1, 1)) , ((3, 2, 4, 4), (4, 2, 8, 6), (1, 1), (1, 1), (1, 1))#kernel bigger then image , ((4, 2, 10, 10), (3, 2, 2, 12), (1, 1), (1, 1), (1, 1)) ] shapes += [ # ((60,1,28,28),(20,1,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 1 layers # , ((60,20,12,12),(30,20,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 2 layers ((60,30,8,8),(20,30,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 1 full # , ((20,60,12,12),(30,60,8,8), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid # , ((1,60,28,28),(20,60,24,24), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid # , ((10,1,64,64),(20,1,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 1 layers # , ((10,20,29,29),(30,20,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 2 layers , ((10,30,23,23),(20,30,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 full # , ((20,10,29,29),(30,10,23,23), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 1 # , ((1,10,64,64),(20,10,58,58), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 2 #Test more than maxThreadsDim0 , ((2,4,13,1050), (3,4,10, 11), (1, 1), (1, 1), (1, 1)) , ((2,4,1050,13), (3,4,10, 11), (1, 1), (1, 1), (1, 1)) ] # shapes=shapes[:277] version = [-2, -1, 0, 1, 2, 3, 4, 5] verbose = 0 # version=[4] random = True exec_conv(version, shapes, verbose, random, 'full')
def setUp(self): if theano.config.mode == "FAST_COMPILE": m = theano.compile.mode.get_mode("FAST_RUN").excluding("local_elemwise_fusion") else: m = theano.compile.mode.get_default_mode().excluding("local_elemwise_fusion") self.m = m utt.seed_rng()
def test_subsample(): seed_rng() # implement when shapes = [((1, 1, 1, 1), (1, 1, 1, 1), (1, 1), (1, 1), (1, 1)), ((1, 1, 1, 1), (1, 1, 1, 1), (2, 2), (1, 1), (1, 1)), ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3), (1, 1), (1, 1)), ((4, 2, 10, 10), (3, 2, 2, 2), (3, 3), (1, 1), (1, 1)), ((4, 2, 10, 10), (3, 2, 2, 2), (3, 1), (1, 1), (1, 1)) ] shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 1)) shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 2)) shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 1)) shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 2)) #We put only the version that implement the subsample to make the test faster. version_valid = [-2, -1, 1, 3, 11, 12] version_full = [-2, -1] verbose = 0 random = True print_ = False ones = False if ones: random = False exec_conv(version_valid, shapes, verbose, random, 'valid', print_=print_, ones=ones) exec_conv(version_full, shapes, verbose, random, 'full', print_=print_, ones=ones)
def test_valid(): seed_rng() shapes = get_valid_shapes() #shapes=shapes[400:426] # I put -1 in case we forget to add version in the test to. # I put -2 to test the reference version. version = [-2, -1, 6] verbose = 0 # version=[1] random = True print_ = False ones = False if ones: random = False # exec_conv(version, shapes, verbose, random, 'valid', # print_=print_, ones=ones, rtol=1.1e-5) mode = theano_mode.including("conv_gemm") version = [-1] # Remove case not supported # Add tests with strided inputs by still square images and filters. shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2)) shapes += get_shapes2(scales_kern=(2, 2), kern_stride=(2, 2)) # Keep only tests with square images and filters even with inputs strides shapes = [shp for shp in shapes if ( shp[0][2]/shp[3][0] == shp[0][3]/shp[3][1] and shp[1][2]/shp[4][0] == shp[1][3]/shp[4][1])] exec_conv(version, shapes, verbose, random, 'valid', print_=print_, ones=ones, rtol=1.1e-5, theano_mode=mode, cls=cuda.blas.GpuCorrMM)
def test_batch_normalization_train_without_running_averages(): # compile and run batch_normalization_train without running averages utt.seed_rng() x, scale, bias, dy = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias'), T.tensor4('dy') data_shape = (5, 10, 30, 25) param_shape = (1, 10, 30, 25) # forward pass out, x_mean, x_invstd = bn.batch_normalization_train(x, scale, bias, 'per-activation') # backward pass grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy}) # compile f = theano.function([x, scale, bias, dy], [out, x_mean, x_invstd] + grads) # check if the abstract Ops have been replaced assert not any([isinstance(n.op, (bn.AbstractBatchNormTrain, bn.AbstractBatchNormInference, bn.AbstractBatchNormTrainGrad)) for n in f.maker.fgraph.toposort()]) # run X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX) Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(theano.config.floatX) Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX) Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX) f(X, Scale, Bias, Dy)
def _test_subsample(cls, mode, version_valid=[-1], version_full=[-1]): seed_rng() shapes = [((1, 1, 1, 1), (1, 1, 1, 1), (1, 1), (1, 1), (1, 1)), ((1, 1, 1, 1), (1, 1, 1, 1), (2, 2), (1, 1), (1, 1)), ((4, 2, 10, 10), (3, 2, 2, 2), (1, 3), (1, 1), (1, 1)), ((4, 2, 10, 10), (3, 2, 2, 2), (3, 3), (1, 1), (1, 1)), ((4, 2, 10, 10), (3, 2, 2, 2), (3, 1), (1, 1), (1, 1)) ] shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 1)) shapes += get_shapes2(scales_img=(2, 2), subsample=(1, 2)) shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 1)) shapes += get_shapes2(scales_img=(2, 2), subsample=(2, 2)) # We put only the version that implement the subsample to make the # test faster. verbose = 0 random = True print_ = False ones = False if ones: random = False for t in exec_conv(version_valid, shapes, verbose, random, 'valid', print_=print_, ones=ones, theano_mode=mode, cls=cls): yield t for t in exec_conv(version_full, shapes, verbose, random, 'full', print_=print_, ones=ones, theano_mode=mode, cls=cls): yield t
def test_valid_4(): seed_rng() shapes = get_valid_shapes() version = [4] verbose = 0 random = True print_ = False ones = False if ones: random = False shapes2 = [] for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes): oshape = ( [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) - numpy.asarray(kshape[2:]) + numpy.asarray([1, 1])) ) if oshape[3] > device_prop["maxThreadsDim0"]: continue if ishape[1] > 1: continue if (kshape[2] * ishape[3] * 4 + numpy.prod(kshape[2:]) * 4) > (16 * 1024 - 150): continue if subshape == (1, 1): shapes2.append((ishape, kshape, subshape, istride, kstride)) shapes = shapes2 exec_conv(version, shapes, verbose, random, "valid", print_=print_, ones=ones, rtol=1.1e-5)
def test_valid(conv_gemm=False): seed_rng() shapes = get_valid_shapes() #shapes=shapes[400:426] # I put -1 in case we forget to add version in the test to. # I put -2 to test the reference version. version = [-2, -1, 6] verbose = 0 random = True print_ = False ones = False if ones: random = False if conv_gemm: # Test the GpuCorrMM version mode = theano_mode.including("conv_gemm") cls = cuda.blas.BaseGpuCorrMM # dummy version; not used by GpuCorrMM so one version is enough version = [-1] # Add tests with strided inputs by still square images and filters. shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2)) shapes += get_shapes2(scales_kern=(2, 2), kern_stride=(2, 2)) else: mode = theano_mode cls = None exec_conv(version, shapes, verbose, random, 'valid', print_=print_, ones=ones, rtol=1.1e-5, theano_mode=mode, cls=cls)
def test_valid_7_8_13(): seed_rng() shapes = get_valid_shapes() # This is to test the "new" lower shared memory usage. shapes.append(((10, 30, 60, 60), (20, 30, 40, 40), (1, 1), (1, 1), (1, 1))) version = [7, 8, 13] verbose = 0 random = True print_ = False ones = False if ones: random = False shapes2 = [] # print len(shapes) for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes): oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) - numpy.asarray(kshape[2:]) + numpy.asarray([1, 1])) if oshape[2] * oshape[3] > device_prop['maxThreadsDim0']: continue if max(numpy.prod(ishape[2:]) * 4 + 2 * kshape[3] * 4, oshape[2] * oshape[3] * 4 * 2) > (16 * 1024 - 150): continue if subshape == (1, 1): shapes2.append((ishape, kshape, subshape, istride, kstride)) shapes = shapes2 # print len(shapes2) exec_conv(version, shapes, verbose, random, 'valid', print_=print_, ones=ones, rtol=1.1e-5)
def test_valid_9_10(): seed_rng() shapes = get_valid_shapes() version = [9, 10] verbose = 0 random = True print_ = False ones = False if ones: random = False shapes2 = [] # print len(shapes) for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes): oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) - numpy.asarray(kshape[2:]) + numpy.asarray([1, 1])) if oshape[3] > device_prop['maxThreadsDim0']: continue if (kshape[3] * 4 + ishape[3]) > (16 * 1024 - 150): continue if subshape == (1, 1): shapes2.append((ishape, kshape, subshape, istride, kstride)) shapes = shapes2 # print len(shapes2) exec_conv(version, shapes, verbose, random, 'valid', print_=print_, ones=ones, rtol=1.1e-5)
def test_logical_shapes(self): seed_rng() for stride in range(1, 4): kshp = (10, 2, 10, 10) featshp = (3, 10, 11, 11) a = tensor.ftensor4() A = tensor.ftensor4() # Need to transpose first two dimensions of kernel, and reverse # index kernel image dims (for correlation) kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3]) featshp_logical = (featshp[0], featshp[1], featshp[2] * stride, featshp[3] * stride) kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3]) #print featshp, kshp_rotated, featshp_logical[1:], kshp[2:] image_estimate = tensor.nnet.conv2d(a, kernel_rotated, border_mode='full', image_shape=featshp, filter_shape=kshp_rotated, imshp_logical=featshp_logical[1:], kshp_logical=kshp[2:]) func = theano.function([a, A], image_estimate, mode=theano_mode) #theano.printing.debugprint(func,) assert any([isinstance(node.op, theano.sandbox.cuda.blas.GpuConv) for node in func.maker.fgraph.toposort()]) a_in = numpy.random.randn(*featshp).astype("float32") A_in = numpy.random.randn(*kshp).astype("float32") func(a_in, A_in)
def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias # We check that we loop when their is too much threads n_in = 1000 batch_size = 4097 n_out = 1250 if not isinstance(mode_with_gpu, theano.compile.DebugMode): n_in = 4098 n_out = 4099 y = T.lvector('y') b = T.fvector('b') # we precompute the dot with big shape before to allow the test of # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error # (the launch timed out and was terminated) on GPU card not # powerful enough. We need the big shape to check for corner # case. dot_result = T.fmatrix('dot_result') # Seed numpy.random with config.unittests.rseed utt.seed_rng() xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32) yy = np.ones((batch_size,), dtype='int32') b_values = np.zeros((n_out,), dtype='float32') W_values = np.asarray(np.random.rand(n_in, n_out), dtype='float32') dot_value = np.asarray(np.dot(xx, W_values), dtype='float32') del W_values p_y_given_x = T.nnet.softmax(dot_result + b) y_pred = T.argmax(p_y_given_x, axis=-1) loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y]) dW = T.grad(loss, dot_result) classify = theano.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu) classify_gpu = theano.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_with_gpu) assert any([isinstance(node.op, T.nnet.CrossentropySoftmaxArgmax1HotWithBias) for node in classify.maker.fgraph.toposort()]) assert any([isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias) for node in classify_gpu.maker.fgraph.toposort()]) out = classify(yy, b_values, dot_value) gout = classify_gpu(yy, b_values, dot_value) assert len(out) == len(gout) == 3 utt.assert_allclose(out[0], gout[0]) utt.assert_allclose(out[2], gout[2], atol=3e-6) utt.assert_allclose(out[1], gout[1])
def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): """ This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias We check that we loop when their is too much threads TODO: check that we loop when their is too much block(>32*1024) """ n_in = 1000 batch_size = 4097 n_out = 1250 if theano.config.mode!="DEBUG_MODE": n_in = 4098 n_out = 4099 x = T.fmatrix('x') y = T.lvector('y') b = T.fvector('b') #W = T.fmatrix('W') #we precompute the dot with big shape before to allow the test of GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error (the launch timed out and was terminated) on GPU card not powerfull enought. We need the big shape to check for corner case. dot_result = T.fmatrix('dot_result') # Seed numpy.random with config.unittests.rseed utt.seed_rng() xx = numpy.asarray(numpy.random.rand(batch_size,n_in),dtype=numpy.float32) #?????yy = numpy.ones((batch_size,),dtype='float32') yy = numpy.ones((batch_size,),dtype='int32') b_values = numpy.zeros((n_out,),dtype='float32') W_values = numpy.asarray(numpy.random.rand(n_in,n_out),dtype='float32') dot_value = numpy.asarray(numpy.dot(xx, W_values),dtype='float32') del W_values p_y_given_x = T.nnet.softmax(dot_result+b) y_pred = T.argmax(p_y_given_x, axis=-1) loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y]) dW = T.grad(loss, dot_result) classify = theano.function( inputs = [x,y,b,dot_result], outputs = [loss,y_pred,dW], mode = mode_without_gpu) classify_gpu = theano.function( inputs = [x,y,b,dot_result], outputs = [loss,y_pred,dW], mode = mode_with_gpu) #theano.printing.debugprint(classify) #theano.printing.debugprint(classify_gpu) assert any([isinstance(node.op,T.nnet.CrossentropySoftmaxArgmax1HotWithBias) for node in classify.maker.env.toposort()]) assert any([isinstance(node.op,cuda.nnet.GpuCrossentropySoftmaxArgmax1HotWithBias) for node in classify_gpu.maker.env.toposort()]) out=classify(xx,yy,b_values,dot_value) gout=classify_gpu(xx,yy,b_values,dot_value) assert len(out)==len(gout)==3 assert numpy.allclose(out[0],gout[0]) assert numpy.allclose(out[2],gout[2],atol=3e-6),numpy.absolute(gout-out).max() assert numpy.allclose(out[1],gout[1]),[(id,out[1][id],gout[1][id],val) for id,val in enumerate(out[1]-gout[1]) if val!=0]
def test_conv_nnet2(): utt.seed_rng() rval_gpu = run_conv_nnet2(True) if True: utt.seed_rng() rval_cpu = run_conv_nnet2(False) # print rval_cpu[0], rval_gpu[0],rval_cpu[0]-rval_gpu[0] utt.assert_allclose(rval_cpu, rval_gpu, rtol=1e-4, atol=1e-4)
def test_doubleop_grad(): utt.seed_rng() utt.verify_grad( # Op instance DoubleOp(), # Numeric inputs [numpy.random.rand(5, 7, 2)], )
def test_GpuCrossentropySoftmax1HotWithBiasDx(): """ This is basic test for GpuCrossentropySoftmax1HotWithBiasDx We check that we loop when their is too much threads TODO: check that we loop when their is too much block(>32*1024) """ n_in = 1000 batch_size = 4097 n_out = 1250 # Seed numpy.random with config.unittests.rseed utt.seed_rng() softmax_output_value = numpy.random.rand(batch_size, n_out).astype("float32") dnll_value = numpy.asarray(numpy.random.rand(batch_size), dtype="float32") y_idx_value = numpy.random.randint(low=0, high=5, size=batch_size) softmax_output = T.fmatrix() softmax_output /= softmax_output.sum(axis=1).reshape(softmax_output.shape[1], 1) op = theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(dnll_value, softmax_output, y_idx_value) cpu_f = theano.function([softmax_output], op, mode=mode_without_gpu) gpu_f = theano.function([softmax_output], op, mode=mode_with_gpu) # theano.printing.debugprint(cpu_f) # theano.printing.debugprint(gpu_f) assert any( [isinstance(node.op, T.nnet.CrossentropySoftmax1HotWithBiasDx) for node in cpu_f.maker.fgraph.toposort()] ) assert any( [isinstance(node.op, cuda.nnet.GpuCrossentropySoftmax1HotWithBiasDx) for node in gpu_f.maker.fgraph.toposort()] ) cpu_out = cpu_f(softmax_output_value) gpu_out = gpu_f(softmax_output_value) rtol = 1e-5 atol = 1e-6 if not numpy.allclose(cpu_out, gpu_out, rtol=rtol, atol=atol): abs_err, rel_err = T.numeric_grad.abs_rel_err(cpu_out, gpu_out) scaled_err = numpy.minimum(abs_err / atol, rel_err / rtol) max_i = scaled_err.argmax() print "max err index:", max_i, max_i / batch_size, print max_i % batch_size, max_i / n_out, max_i & n_out print "At that index:" print "err:", scaled_err.flatten()[max_i] print "absolute error:", abs_err.flatten()[max_i] print "relative error:", rel_err.flatten()[max_i] print "cpu_out:", cpu_out.flatten()[max_i] print "gpu_out:", gpu_out.flatten()[max_i] print "softmax_output_value:", softmax_output_value.flatten()[max_i] print "dnll_value:", dnll_value[max_i / n_out] print "y_idx_value:", y_idx_value[max_i / n_out] assert False, "numpy.allclose(cpu_out, gpu_out, rtol=%s, atol=%s)" % (rtol, atol)
def test_gpu_tril_triu(): def check_l(m, k=0): m_symb = T.matrix(dtype=m.dtype) k_symb = T.iscalar() f = theano.function([m_symb, k_symb], T.tril(m_symb, k_symb), mode=mode_with_gpu) result = f(m, k) assert np.allclose(result, np.tril(m, k)) assert result.dtype == np.dtype(dtype) assert any([isinstance(node.op, GpuTri) for node in f.maker.fgraph.toposort()]) def check_u(m, k=0): m_symb = T.matrix(dtype=m.dtype) k_symb = T.iscalar() f = theano.function([m_symb, k_symb], T.triu(m_symb, k_symb), mode=mode_with_gpu) result = f(m, k) assert np.allclose(result, np.triu(m, k)) assert result.dtype == np.dtype(dtype) assert any([isinstance(node.op, GpuTri) for node in f.maker.fgraph.toposort()]) utt.seed_rng() test_rng = np.random.RandomState(seed=utt.fetch_seed()) for dtype in ['float64', 'float32', 'float16']: # try a big one m = np.asarray(test_rng.rand(5000, 5000) * 2 - 1, dtype=dtype) yield check_l, m, 0 yield check_l, m, 1 yield check_l, m, -1 yield check_u, m, 0 yield check_u, m, 1 yield check_u, m, -1 m = np.asarray(test_rng.rand(10, 10) * 2 - 1, dtype=dtype) yield check_l, m, 0 yield check_l, m, 1 yield check_l, m, -1 yield check_u, m, 0 yield check_u, m, 1 yield check_u, m, -1 m = np.asarray(test_rng.rand(10, 5) * 2 - 1, dtype=dtype) yield check_l, m, 0 yield check_l, m, 1 yield check_l, m, -1 yield check_u, m, 0 yield check_u, m, 1 yield check_u, m, -1
def setUp(self): unittest_tools.seed_rng() # we want to allow nans in the matrices, so we disable this DEBUG_MODE check mode = theano.compile.mode.get_default_mode() mode = copy(mode) mode.check_isfinite = False self.mode = mode
def test_dimshuffle(self): utt.seed_rng() rng = numpy.random.RandomState(utt.fetch_seed()) # 2d -> 0d a = theano._asarray(rng.randn(1,1), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b,())) # Test when we drop a axis that don't have shape 1 a = theano._asarray(rng.randn(2,1), dtype='float32') b = cuda_ndarray.CudaNdarray(a) self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b,()) # Test that we can't take a dimensions multiple time a = theano._asarray(rng.randn(2,1), dtype='float32') b = cuda_ndarray.CudaNdarray(a) self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b,(1,1)) # 1d a = theano._asarray(rng.randn(3,), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b,(0,))) assert numpy.allclose(a[None,:,None], cuda_ndarray.dimshuffle(b,(-1,0,-1))) # 2d a = theano._asarray(rng.randn(3,11), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b,(1,0))) assert numpy.allclose(numpy.transpose(a)[None,:,None,:,None], cuda_ndarray.dimshuffle(b,(-1,1,-1,0,-1))) # 2d -> 1d a = theano._asarray(rng.randn(1,11), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(a[:,], cuda_ndarray.dimshuffle(b,(1,))) a = theano._asarray(rng.randn(11,1), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(a.reshape((11,)), cuda_ndarray.dimshuffle(b,(0,))) # 3d a = theano._asarray(rng.randn(3,4,5), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(a, cuda_ndarray.dimshuffle(b,(0,1,2))) assert numpy.allclose(numpy.swapaxes(a,0,1), cuda_ndarray.dimshuffle(b,(1,0,2))) assert numpy.allclose(numpy.swapaxes(a,0,2), cuda_ndarray.dimshuffle(b,(2,1,0))) assert numpy.allclose(numpy.swapaxes(a,1,2), cuda_ndarray.dimshuffle(b,(0,2,1))) assert numpy.allclose(numpy.swapaxes(a,1,2)[None,:,None,:,:,None], cuda_ndarray.dimshuffle(b,(-1,0,-1,2,1,-1))) # 4d a = theano._asarray(rng.randn(3,11,4,5), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(numpy.swapaxes(a,0,1), cuda_ndarray.dimshuffle(b,(1,0,2,3))) assert numpy.allclose(numpy.swapaxes(a,0,2), cuda_ndarray.dimshuffle(b,(2,1,0,3))) assert numpy.allclose(numpy.swapaxes(a,0,3), cuda_ndarray.dimshuffle(b,(3,1,2,0))) assert numpy.allclose(numpy.swapaxes(a,0,3), cuda_ndarray.dimshuffle(b,(3,1,2,0))) assert numpy.allclose(numpy.swapaxes(a,0,3)[None,:,None,:,:,:], cuda_ndarray.dimshuffle(b,(-1,3,-1,1,2,0)))
def setUp(self): utt.seed_rng() mode = None if theano.config.mode == "FAST_COMPILE": mode = "FAST_RUN" self.mode = theano.compile.get_mode(mode).excluding( 'constant_folding' ) self.gemv_op = sparse_block_gemv self.outer_op = sparse_block_outer
def test_batchnorm_inference(): if not dnn.dnn_available(test_ctx_name): raise SkipTest(dnn.dnn_available.msg) if dnn.version(raises=False) < 5000: raise SkipTest("batch normalization requires cudnn v5+") utt.seed_rng() for mode in ('per-activation', 'spatial'): for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector): x, scale, bias, mean, var = (vartype(n) for n in ('x', 'scale', 'bias', 'mean', 'var')) ndim = x.ndim eps = 5e-3 # some non-standard value to test if it's used # forward pass out = dnn.dnn_batch_normalization_test(x, scale, bias, mean, var, mode, eps) # reference forward pass if mode == 'per-activation': axes = (0,) elif mode == 'spatial': axes = (0,) + tuple(range(2, ndim)) scale2, bias2, mean2, var2 = (T.addbroadcast(t, *axes) for t in (scale, bias, mean, var)) out2 = (x - mean2) * (scale2 / T.sqrt(var2 + eps)) + bias2 # backward pass dy = vartype('dy') grads = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out: dy}) # reference backward pass grads2 = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy}) # compile f = theano.function([x, scale, bias, mean, var, dy], [out, out2] + grads + grads2, mode=mode_with_gpu) # run for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)): data_shape = data_shape[:ndim] param_shape = tuple(1 if d in axes else s for d, s in enumerate(data_shape)) X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32') Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32') Scale = numpy.random.randn(*param_shape).astype('float32') Bias = numpy.random.randn(*param_shape).astype('float32') Mean = numpy.random.randn(*param_shape).astype('float32') Var = numpy.random.rand(*param_shape).astype('float32') outputs = f(X, Scale, Bias, Mean, Var, Dy) # compare outputs utt.assert_allclose(outputs[0], outputs[1]) # out # compare gradients utt.assert_allclose(outputs[2], outputs[2 + 5]) # dx utt.assert_allclose(outputs[3], outputs[3 + 5]) # dscale utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean utt.assert_allclose(outputs[6], outputs[6 + 5], atol=2e-5) # dvar
def setUp(self): utt.seed_rng() # Using vectors make things a lot simpler for generating the same # computations using scan self.x = tensor.vector("x") self.v = tensor.vector("v") self.rng = numpy.random.RandomState(utt.fetch_seed()) self.in_shape = (5 + self.rng.randint(3),) self.mx = tensor.matrix("mx") self.mv = tensor.matrix("mv") self.mat_in_shape = (5 + self.rng.randint(3), 5 + self.rng.randint(3))
def test_dnn_batchnorm_train(): if not dnn.dnn_available(test_ctx_name): raise SkipTest(dnn.dnn_available.msg) if dnn.version(raises=False) < 5000: raise SkipTest("batch normalization requires cudnn v5+") utt.seed_rng() for mode in ('per-activation', 'spatial'): for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector): x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias')) ndim = x.ndim eps = 5e-3 # some non-standard value to test if it's used # forward pass out, x_mean, x_invstd = dnn.dnn_batch_normalization_train( x, scale, bias, mode, eps) # reference forward pass if mode == 'per-activation': axes = (0,) elif mode == 'spatial': axes = (0,) + tuple(range(2, ndim)) x_mean2 = x.mean(axis=axes, keepdims=True) x_invstd2 = T.inv(T.sqrt(x.var(axis=axes, keepdims=True) + eps)) scale2 = T.addbroadcast(scale, *axes) bias2 = T.addbroadcast(bias, *axes) out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2 # backward pass dy = vartype('dy') grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy}) # reference backward pass grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy}) # compile f = theano.function([x, scale, bias, dy], [out, x_mean, x_invstd, out2, x_mean2, x_invstd2] + grads + grads2, mode=mode_with_gpu) # run for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)): data_shape = data_shape[:ndim] param_shape = tuple(1 if d in axes else s for d, s in enumerate(data_shape)) X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32') Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32') Scale = numpy.random.randn(*param_shape).astype('float32') Bias = numpy.random.randn(*param_shape).astype('float32') outputs = f(X, Scale, Bias, Dy) # compare outputs utt.assert_allclose(outputs[0], outputs[0 + 3]) # out utt.assert_allclose(outputs[1], outputs[1 + 3]) # mean utt.assert_allclose(outputs[2], outputs[2 + 3]) # invstd # compare gradients utt.assert_allclose(outputs[6], outputs[6 + 3]) # dx utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=3e-3) # dscale utt.assert_allclose(outputs[8], outputs[8 + 3]) # dbias
def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize, ignore_error=False, n_train=10, gpu_only=False, cpu_only=False, float_atol=1e-06, check_isfinite=True, pickle=False, verbose=0, version=-1): """Run the nnet2 function on 1 or 2 devices, and compares the results. float_atol: None mean use the default value. check_isfinite: the debug mode option. We forward this value to debug mode. For some parameter CrossentropyCategorical1Hot op generate inf when not optimized. """ if config.mode == 'DEBUG_MODE': n_train = 1 # Change global tolerance, used in DebugMode for instance orig_float32_atol = theano.tensor.basic.float32_atol try: if float_atol: #print "float_atol", float_atol theano.tensor.basic.float32_atol = float_atol if gpu_only and cpu_only: raise ValueError("Please use only one of cpu_only and gpu_only") elif cpu_only: use_gpu = False compare = False elif gpu_only: use_gpu = True compare = False else: compare = True if not compare: return run_conv_nnet2_classif(use_gpu=use_gpu, seed=seed, isize=isize, ksize=ksize, bsize=bsize, n_train=n_train, check_isfinite=check_isfinite, pickle=pickle, verbose=verbose, version=version) utt.seed_rng(seed) # Seeds numpy.random with seed train_cpu, params_cpu, x_shape, y_shape, mode_cpu = \ build_conv_nnet2_classif( use_gpu=False, isize=isize, ksize=ksize, n_batch=bsize, verbose=verbose, version=version, check_isfinite=check_isfinite) utt.seed_rng(seed) # Seeds numpy.random with seed train_gpu, params_gpu, x_shape_gpu, y_shape_gpu, mode_gpu = \ build_conv_nnet2_classif( use_gpu=True, isize=isize, ksize=ksize, n_batch=bsize, verbose=verbose, version=version, check_isfinite=check_isfinite) assert x_shape == x_shape_gpu assert y_shape == y_shape_gpu xval = my_rand(*x_shape) yval = my_rand(*y_shape) lr = theano._asarray(0.01, dtype='float32') time_cpu = 0 time_gpu = 0 for i in range(n_train): # Train one batch on CPU t0 = time.time() rval_cpu = train_cpu(xval, yval, lr)[0] t1 = time.time() time_cpu += (t1 - t0) # Train one batch on GPU t0 = time.time() rval_gpu = train_gpu(xval, yval, lr)[0] t1 = time.time() time_gpu += (t1 - t0) # Compare results if (verbose or not numpy.allclose( rval_cpu, rval_gpu, rtol=1e-5, atol=float_atol)): print "At batch:", i + 1 print "CPU:", rval_cpu print "GPU:", rval_gpu print "abs diff:", numpy.absolute(rval_gpu - rval_cpu) print "rel diff:", numpy.absolute( (rval_gpu - rval_cpu) / rval_gpu) if not ignore_error: assert numpy.allclose(rval_cpu, rval_gpu, rtol=1e-5, atol=float_atol) # Synchronize parameters to start from the same point next time if i < n_train - 1: for cpu_p, gpu_p in zip(params_cpu, params_gpu): cpu_p.set_value(gpu_p.get_value(borrow=False), borrow=True) finally: theano.tensor.basic.float32_atol = orig_float32_atol if pickle: if isinstance(cpu_mode, theano.compile.ProfileMode): import pickle print "BEGIN CPU profile mode dump" print pickle.dumps(cpu_mode) print "END CPU profile mode dump" if isinstance(gpu_mode, theano.compile.ProfileMode): import pickle print "BEGIN GPU profile mode dump" print pickle.dumps(gpu_mode) print "END GPU profile mode dump"
def test_conv_nnet1(): utt.seed_rng() rval_cpu = run_conv_nnet1(False) utt.seed_rng() rval_gpu = run_conv_nnet1(True) assert numpy.allclose(rval_cpu, rval_gpu, rtol=1e-4, atol=1e-6)
def setUp(self): if not cusolver_available: self.skipTest( 'Optional package scikits.cuda.cusolver not available') utt.seed_rng()
def setUp(self): utt.seed_rng() self.mode = mode_with_gpu self.shared = gpuarray_shared_constructor self.dtypes = ['float64', 'float32']
def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): """ This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias We check that we loop when their is too much threads """ n_in = 1000 batch_size = 4097 n_out = 1250 if not isinstance(mode_with_gpu, theano.compile.DebugMode): n_in = 4098 n_out = 4099 x = T.fmatrix('x') y = T.lvector('y') b = T.fvector('b') #W = T.fmatrix('W') # we precompute the dot with big shape before to allow the test of # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error #(the launch timed out and was terminated) on GPU card not # powerful enough. We need the big shape to check for corner # case. dot_result = T.fmatrix('dot_result') # Seed numpy.random with config.unittests.rseed utt.seed_rng() xx = numpy.asarray(numpy.random.rand(batch_size, n_in), dtype=numpy.float32) #?????yy = numpy.ones((batch_size,),dtype='float32') yy = numpy.ones((batch_size,), dtype='int32') b_values = numpy.zeros((n_out,), dtype='float32') W_values = numpy.asarray(numpy.random.rand(n_in, n_out), dtype='float32') dot_value = numpy.asarray(numpy.dot(xx, W_values), dtype='float32') del W_values p_y_given_x = T.nnet.softmax(dot_result + b) y_pred = T.argmax(p_y_given_x, axis=-1) loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y]) dW = T.grad(loss, dot_result) classify = theano.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu) classify_gpu = theano.function(inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_with_gpu) # theano.printing.debugprint(classify) # theano.printing.debugprint(classify_gpu) assert any([isinstance(node.op, T.nnet.CrossentropySoftmaxArgmax1HotWithBias) for node in classify.maker.fgraph.toposort()]) assert any([isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias) for node in classify_gpu.maker.fgraph.toposort()]) out = classify(yy, b_values, dot_value) gout = classify_gpu(yy, b_values, dot_value) assert len(out) == len(gout) == 3 assert numpy.allclose(out[0], gout[0]) assert numpy.allclose(out[2], gout[2], atol=3e-6), numpy.absolute( gout[2] - out[2]).max() assert numpy.allclose(out[1], gout[1]), [(id, out[1][id], gout[1][id], val) for id, val in enumerate(out[1] - gout[1]) if val != 0]
def _test_full(cls, mode=None, version=[-1], extra_shapes=[], test_bigger_kernels=True): seed_rng() shapes = get_basic_shapes() shapes += get_shapes2() # test image stride shapes += get_shapes2(scales_img=(2, 2), img_stride=(1, 2)) shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 1)) shapes += get_shapes2(scales_img=(2, 2), img_stride=(2, 2)) shapes += get_shapes2(scales_img=(2, 2), img_stride=(-1, -1)) shapes += get_shapes2(scales_img=(2, 2), kern_stride=(-1, -1)) # test subsample done in a separate fct shapes += [ # other test ((2, 1, 2, 2), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1)), ((3, 2, 4, 4), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1)), ((4, 1, 10, 10), (1, 1, 2, 2), (1, 1), (1, 1), (1, 1)), ((1, 1, 4, 4), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1)), ((4, 1, 10, 10), (1, 1, 2, 3), (1, 1), (1, 1), (1, 1)), ((4, 1, 10, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1)), ((4, 1, 20, 10), (1, 1, 2, 10), (1, 1), (1, 1), (1, 1)), ((3, 2, 8, 8), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1) ) # stack, nkern, bsize , ((3, 2, 8, 6), (4, 2, 4, 4), (1, 1), (1, 1), (1, 1) ) # stack, nkern, bsize, non-square image , ((3, 2, 8, 6), (4, 2, 4, 3), (1, 1), (1, 1), (1, 1) ) # stack, nkern, bsize, non-square image, non-square kern , ( (3, 2, 8, 6), (4, 2, 4, 6), (1, 1), (1, 1), (1, 1) ) # stack, nkern, bsize ,non-square image, non-square kern, kernsize==imgsize on one dim , ((16, 5, 64, 64), (8, 5, 8, 8), (1, 1), (1, 1), (1, 1)) # a big one , ((16, 1, 28, 28), (20, 1, 5, 5), (1, 1), (1, 1), (1, 1) ) # MNIST LeNET layer 1 , ((20, 16, 32, 32), (1, 16, 28, 28), (1, 1), (1, 1), (1, 1) ) # layer 1 backprop to weights ] if test_bigger_kernels: # Shapes where the kernel is larger than the image in some dimension shapes += [((3, 1, 1, 1), (2, 1, 5, 3), (1, 1), (1, 1), (1, 1)), ((3, 2, 1, 1), (4, 2, 1, 1), (1, 1), (1, 1), (1, 1)), ((3, 2, 4, 4), (4, 2, 2, 6), (1, 1), (1, 1), (1, 1)), ((3, 2, 4, 4), (4, 2, 8, 6), (1, 1), (1, 1), (1, 1)), ((4, 2, 10, 10), (3, 2, 2, 12), (1, 1), (1, 1), (1, 1))] shapes += [ # ((60,1,28,28),(20,1,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 1 layers # , ((60,20,12,12),(30,20,5,5), (1, 1), (1, 1), (1, 1))#test_lenet_28 2 layers ((60, 30, 8, 8), (20, 30, 5, 5), (1, 1), (1, 1), (1, 1) ) # test_lenet_28 bprop 1 full # , ((20,60,12,12),(30,60,8,8), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid # , ((1,60,28,28),(20,60,24,24), (1, 1), (1, 1), (1, 1))#test_lenet_28 bprop 2 valid # , ((10,1,64,64),(20,1,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 1 layers # , ((10,20,29,29),(30,20,7,7), (1, 1), (1, 1), (1, 1))#test_lenet_64 2 layers , ((10, 30, 23, 23), (20, 30, 7, 7), (1, 1), (1, 1), (1, 1) ) # test_lenet_64 full # , ((20,10,29,29),(30,10,23,23), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 1 # , ((1,10,64,64),(20,10,58,58), (1, 1), (1, 1), (1, 1))#test_lenet_64 bprop 2 # Test more than maxThreadsDim0 , ((2, 4, 13, 1050), (3, 4, 10, 11), (1, 1), (1, 1), (1, 1)), ((2, 4, 1050, 13), (3, 4, 10, 11), (1, 1), (1, 1), (1, 1)), ((1, 1, 44800, 1), (6, 1, 1, 1), (1, 1), (1, 1), (1, 1) ) # This caused crash ] verbose = 0 random = True shapes += extra_shapes return exec_conv(version, shapes, verbose, random, 'full', theano_mode=mode, cls=cls)
def setup_method(self): super(TestLogDet, self).setup_method() utt.seed_rng() self.op_class = LogDet self.op = logdet
def setup_method(self): if not cusolver_available: self.skipTest( "Optional package scikits.cuda.cusolver not available") utt.seed_rng()
def setUp(self): utt.seed_rng() self.rng = np.random.RandomState(seed=utt.fetch_seed())
def setUp(self): super(TestConv3D, self).setUp() utt.seed_rng() self.rng = N.random.RandomState(utt.fetch_seed()) mode = copy.copy(theano.compile.mode.get_default_mode()) mode.check_py_code = False self.W = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX)) self.W.name = 'W' self.b = shared(N.zeros(1, dtype=floatX)) self.b.name = 'b' self.rb = shared(N.zeros(1, dtype=floatX)) self.rb.name = 'rb' self.V = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX)) self.V.name = 'V' self.d = shared(N.ndarray(shape=(3, ), dtype=int)) self.d.name = 'd' self.H = conv3D(self.V, self.W, self.b, self.d) self.H.name = 'H' self.H_func = function([], self.H, mode=mode) self.H_shape_func = function([], self.H.shape, mode=mode) self.RShape = T.vector(dtype='int64') self.RShape.name = 'RShape' self.otherH = T.TensorType(floatX, (False, False, False, False, False))(name='otherH') self.transp = convTransp3D(self.W, self.rb, self.d, self.otherH, self.RShape) self.transp.name = 'transp' self.transp_func = function([self.otherH, self.RShape], self.transp, mode=mode) self.R = convTransp3D(self.W, self.rb, self.d, self.H, self.RShape) self.R.name = 'R' self.R_func = function([self.RShape], self.R, mode=mode) self.R_shape_func = function([self.RShape], self.R.shape) diff = self.V - self.R diff.name = 'diff' sqr = T.sqr(diff) sqr.name = 'sqr' self.reconsObj = T.sum(sqr) self.reconsObj.name = 'reconsObj' self.reconsObjFunc = function([self.RShape], self.reconsObj, mode=mode) W_grad = T.grad(self.reconsObj, self.W) self.gradientsFunc = function([self.RShape], [W_grad, T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj, self.b)], mode=mode) self.check_c_against_python = function([self.RShape], [T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj, self.b)], mode='DEBUG_MODE') self.dCdW_shape_func = function([self.RShape], T.grad(self.reconsObj, self.W).shape, mode=mode)
move_shared_float32_to_gpu=False, enable_cuda=False) theano.sandbox.gpuarray.init_dev('cuda') if not theano.sandbox.gpuarray.pygpu_activated: raise SkipTest("pygpu disabled") from ..type import (GpuArrayType, gpuarray_shared_constructor) from ..basic_ops import (host_from_gpu, gpu_from_host, gpu_alloc, GpuAlloc, gpu_from_cuda, cuda_from_gpu, HostFromGpu, GpuFromHost, GpuReshape, gpu_join, GpuJoin, GpuSplit, GpuEye, gpu_contiguous) from ..subtensor import GpuSubtensor from theano.tests import unittest_tools as utt utt.seed_rng() rng = numpy.random.RandomState(seed=utt.fetch_seed()) from pygpu import gpuarray if theano.config.mode == 'FAST_COMPILE': mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including( 'gpuarray').excluding('gpu') mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding( 'gpuarray') else: mode_with_gpu = theano.compile.mode.get_default_mode().including( 'gpuarray').excluding('gpu') mode_without_gpu = theano.compile.mode.get_default_mode().excluding( 'gpuarray')
def setUp(self): utt.seed_rng()
def setUp(self): unittest_tools.seed_rng()
def test_run_nnet_small(): utt.seed_rng() rval_cpu = run_nnet(False, 10, 10, 4, 4, n_train=100000)
def test_GpuCrossentropySoftmax1HotWithBiasDx(): """ This is basic test for GpuCrossentropySoftmax1HotWithBiasDx We check that we loop when their is too much threads """ n_in = 1000 batch_size = 4097 n_out = 1250 if not isinstance(mode_with_gpu, theano.compile.DebugMode): n_in = 4098 n_out = 4099 # Seed numpy.random with config.unittests.rseed utt.seed_rng() softmax_output_value = numpy.random.rand(batch_size, n_out).astype('float32') dnll_value = numpy.asarray(numpy.random.rand(batch_size), dtype='float32') y_idx_value = numpy.random.randint(low=0, high=5, size=batch_size) softmax_output = T.fmatrix() softmax_output /= softmax_output.sum(axis=1).reshape( softmax_output.shape[1], 1) op = theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx( dnll_value, softmax_output, y_idx_value) cpu_f = theano.function([softmax_output], op, mode=mode_without_gpu) gpu_f = theano.function([softmax_output], op, mode=mode_with_gpu) # theano.printing.debugprint(cpu_f) # theano.printing.debugprint(gpu_f) assert any([isinstance(node.op, T.nnet.CrossentropySoftmax1HotWithBiasDx) for node in cpu_f.maker.fgraph.toposort()]) assert any([isinstance(node.op, GpuCrossentropySoftmax1HotWithBiasDx) for node in gpu_f.maker.fgraph.toposort()]) cpu_out = cpu_f(softmax_output_value) gpu_out = gpu_f(softmax_output_value) rtol = 1e-5 atol = 1e-6 if not numpy.allclose(cpu_out, gpu_out, rtol=rtol, atol=atol): abs_err, rel_err = T.numeric_grad.abs_rel_err(cpu_out, gpu_out) scaled_err = numpy.minimum(abs_err / atol, rel_err / rtol) max_i = scaled_err.argmax() print('max err index:', max_i, max_i / batch_size, end=' ') print(max_i % batch_size, max_i / n_out, max_i & n_out) print('At that index:') print('err:', scaled_err.flatten()[max_i]) print('absolute error:', abs_err.flatten()[max_i]) print('relative error:', rel_err.flatten()[max_i]) print('cpu_out:', cpu_out.flatten()[max_i]) print('gpu_out:', gpu_out.flatten()[max_i]) print('softmax_output_value:', softmax_output_value.flatten()[max_i]) print('dnll_value:', dnll_value[max_i / n_out]) print('y_idx_value:', y_idx_value[max_i / n_out]) assert False, "numpy.allclose(cpu_out, gpu_out, rtol=%s, atol=%s)" % ( rtol, atol)
def test_dimshuffle(self): utt.seed_rng() rng = numpy.random.RandomState(utt.fetch_seed()) # 2d -> 0d a = theano._asarray(rng.randn(1, 1), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b, ())) # Test when we drop a axis that don't have shape 1 a = theano._asarray(rng.randn(2, 1), dtype='float32') b = cuda_ndarray.CudaNdarray(a) self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b, ()) # Test that we can't take a dimensions multiple time a = theano._asarray(rng.randn(2, 1), dtype='float32') b = cuda_ndarray.CudaNdarray(a) self.assertRaises(ValueError, cuda_ndarray.dimshuffle, b, (1, 1)) # 1d a = theano._asarray(rng.randn(3, ), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b, (0, ))) assert numpy.allclose(a[None, :, None], cuda_ndarray.dimshuffle(b, (-1, 0, -1))) # 2d a = theano._asarray(rng.randn(3, 11), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(numpy.transpose(a), cuda_ndarray.dimshuffle(b, (1, 0))) assert numpy.allclose( numpy.transpose(a)[None, :, None, :, None], cuda_ndarray.dimshuffle(b, (-1, 1, -1, 0, -1))) # 2d -> 1d a = theano._asarray(rng.randn(1, 11), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(a[:, ], cuda_ndarray.dimshuffle(b, (1, ))) a = theano._asarray(rng.randn(11, 1), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(a.reshape((11, )), cuda_ndarray.dimshuffle(b, (0, ))) # 3d a = theano._asarray(rng.randn(3, 4, 5), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(a, cuda_ndarray.dimshuffle(b, (0, 1, 2))) assert numpy.allclose(numpy.swapaxes(a, 0, 1), cuda_ndarray.dimshuffle(b, (1, 0, 2))) assert numpy.allclose(numpy.swapaxes(a, 0, 2), cuda_ndarray.dimshuffle(b, (2, 1, 0))) assert numpy.allclose(numpy.swapaxes(a, 1, 2), cuda_ndarray.dimshuffle(b, (0, 2, 1))) assert numpy.allclose( numpy.swapaxes(a, 1, 2)[None, :, None, :, :, None], cuda_ndarray.dimshuffle(b, (-1, 0, -1, 2, 1, -1))) # 4d a = theano._asarray(rng.randn(3, 11, 4, 5), dtype='float32') b = cuda_ndarray.CudaNdarray(a) assert numpy.allclose(numpy.swapaxes(a, 0, 1), cuda_ndarray.dimshuffle(b, (1, 0, 2, 3))) assert numpy.allclose(numpy.swapaxes(a, 0, 2), cuda_ndarray.dimshuffle(b, (2, 1, 0, 3))) assert numpy.allclose(numpy.swapaxes(a, 0, 3), cuda_ndarray.dimshuffle(b, (3, 1, 2, 0))) assert numpy.allclose(numpy.swapaxes(a, 0, 3), cuda_ndarray.dimshuffle(b, (3, 1, 2, 0))) assert numpy.allclose( numpy.swapaxes(a, 0, 3)[None, :, None, :, :, :], cuda_ndarray.dimshuffle(b, (-1, 3, -1, 1, 2, 0)))
def test_run_nnet_med(): utt.seed_rng() rval_cpu = run_nnet(False, 10, 128, 50, 4, n_train=10000)