def test_gradient(self): utt.verify_grad( self.op_class(), [np.random.rand(5, 4), np.random.rand(5, 4)], n_tests=1, rng=TestProdOp.rng, )
def test_vectors(self): y_idx = [3] def f(a, b): return crossentropy_softmax_1hot(shape_padleft(a) + b, y_idx)[0] utt.verify_grad(f, [np.random.random((4)), np.random.random((4))])
def test_expm_grad_3(): # with non-symmetric matrix (complex eigenvectors) rng = np.random.default_rng(utt.fetch_seed()) # Always test in float64 for better numerical stability. A = rng.standard_normal((5, 5)) utt.verify_grad(expm, [A], rng=rng)
def test_max_pool_2d_2D(self): rng = np.random.RandomState(utt.fetch_seed()) maxpoolshps = ((1, 1), (3, 2)) imval = rng.rand(4, 5) images = tensor.dmatrix() for maxpoolshp, ignore_border, mode in product( maxpoolshps, [True, False], ["max", "sum", "average_inc_pad", "average_exc_pad"], ): # print 'maxpoolshp =', maxpoolshp # print 'ignore_border =', ignore_border numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border, mode=mode) output = pool_2d(images, maxpoolshp, ignore_border, mode=mode) output_val = function([images], output)(imval) utt.assert_allclose(output_val, numpy_output_val) def mp(input): return pool_2d(input, maxpoolshp, ignore_border, mode=mode) utt.verify_grad(mp, [imval], rng=rng)
def test_AveragePoolGrad_grad_stride(self, example, ignore_border, mode): # checks the gradient of the gradient for # the case that stride is used rng = np.random.RandomState(utt.fetch_seed()) (avgpoolshp, stride, inputsize) = example imval = rng.rand(*inputsize) grad_shape = Pool.out_shape( imval.shape, avgpoolshp, ndim=len(avgpoolshp), ignore_border=ignore_border, stride=stride, ) # skip the grad verification when the output is empty if np.prod(grad_shape) != 0: grad_val = rng.rand(*grad_shape) def mp(input, grad): grad_op = AveragePoolGrad(ndim=len(avgpoolshp), ignore_border=ignore_border, mode=mode) return grad_op(input, grad, avgpoolshp, stride) utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMaxPaddingStride_grad(self): rng = np.random.RandomState(utt.fetch_seed()) # maxpool, stride, pad, input sizes examples = ( ((10, ), (5, ), (3, ), (2, )), ((10, ), (5, ), (3, ), (2, 2)), ((10, ), (5, ), (3, ), (1, 1, 2)), ((10, 10), (5, 3), (3, 2), (1, 1, 2, 2)), ((10, 5), (3, 5), (2, 3), (1, 1, 2, 1)), ((5, 5), (3, 3), (3, 3), (1, 1, 2, 2)), ((5, 5, 5), (3, 3, 3), (3, 3, 3), (1, 1, 2, 2, 2)), ) # average_inc_pad and average_exc_pad do not # support grad with padding for mode in ["max", "sum"]: for example in examples: (maxpoolshp, stridesize, padsize, inputsize) = example imval = rng.rand(*inputsize) * 10.0 def mp(input): return Pool( ndim=len(maxpoolshp), ignore_border=True, mode=mode, )(input, maxpoolshp, stridesize, padsize) utt.verify_grad(mp, [imval], rng=rng)
def test_lower_triangular_and_cholesky_grad(): # Random lower triangular system is ill-conditioned. # # Reference # ----------- # Viswanath, Divakar, and L. N. Trefethen. "Condition numbers of random triangular matrices." # SIAM Journal on Matrix Analysis and Applications 19.2 (1998): 564-581. # # Use smaller number of N when using float32 if config.floatX == "float64": N = 100 else: N = 5 rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(N, N).astype(config.floatX) y = rng.rand(N, 1).astype(config.floatX) def f(r, y): PD = r.dot(r.T) L = gpu_cholesky(PD) A = gpu_solve_lower_triangular(L, y) AAT = aesara.tensor.dot(A, A.T) B = AAT + aesara.tensor.eye(N) LB = gpu_cholesky(B) return aesara.tensor.sum(aesara.tensor.log(aesara.tensor.diag(LB))) utt.verify_grad(f, [r, y], 3, rng)
def test_pseudoinverse_grad(): rng = np.random.RandomState(utt.fetch_seed()) d1 = rng.randint(4) + 2 d2 = rng.randint(4) + 2 r = rng.randn(d1, d2).astype(config.floatX) utt.verify_grad(pinv, [r])
def test_local_logsoftmax_grad_opt(self, axis): # Test the Logsoftmax's grad substitution. # # Check that Log(Softmax(x))'s grad is substituted with Logsoftmax(x)'s # grad and that the new operation does not explode for big inputs. # Note that only the grad is checked. m = config.mode m = aesara.compile.get_mode(m) m.check_isfinite = False # some inputs that are large to make the gradient explode in the non # optimized case rng = np.random.default_rng(98324) a = np.exp(10 * rng.random((5, 10)).astype(config.floatX)) def myfunc(x): sm = softmax(x, axis=axis) logsm = log(sm) return logsm # We set step to 0.1 because for big values we need a big epsilon utt.verify_grad(myfunc, [a], eps=0.1, mode=m) sa = aesara.shared(a) f = aesara.function([], myfunc(sa)) assert check_stack_trace(f, ops_to_check="all")
def test_gradient(self, fn, input_dims): utt.verify_grad( fn, [np.random.rand(*input_dims).astype(config.floatX)], n_tests=1, rng=self.rng, )
def test_DownsampleFactorMax_grad(self): rng = np.random.RandomState(utt.fetch_seed()) # maxpool, input sizes examples = ( ((2, ), (3, )), ((2, ), (2, 3)), ((2, ), (2, 3, 3)), ((1, 1), (2, 3, 3, 4)), ((3, 2), (2, 3, 3, 4)), ((2, 3), (2, 3, 3, 4)), ((1, 1, 1), (2, 3, 3)), ((3, 2, 2), (2, 3, 3, 4)), ((2, 2, 3), (2, 3, 3, 4, 4)), ) for example, ignore_border, mode in product( examples, [True, False], ["max", "sum", "average_inc_pad", "average_exc_pad"], ): (maxpoolshp, inputsize) = example imval = rng.rand(*inputsize) * 10.0 # more variance means numeric gradient will be more accurate def mp(input): return Pool(ndim=len(maxpoolshp), ignore_border=ignore_border, mode=mode)(input, maxpoolshp) utt.verify_grad(mp, [imval], rng=rng)
def test_pseudoinverse_grad(): rng = np.random.default_rng(utt.fetch_seed()) d1 = rng.integers(4) + 2 d2 = rng.integers(4) + 2 r = rng.standard_normal((d1, d2)).astype(config.floatX) utt.verify_grad(pinv, [r])
def test_batch_normalization(): def bn_ref(x, G, B, M, V): n = (x - M) / V return n * G + B np.random.seed(1234) X = 1 + np.random.random([10, 20]).astype("float32") B = 1 + np.random.random([20]).astype("float32") G = 1 + np.random.random([20]).astype("float32") M = 1 + np.random.random([20]).astype("float32") V = 1 + np.random.random([20]).astype("float32") x = matrix("x") b = vector("b") g = vector("g") m = vector("m") v = vector("v") bn_ref_op = bn_ref(x, g, b, m, v) f_ref = aesara.function([x, g, b, m, v], [bn_ref_op]) res_ref = f_ref(X, G, B, M, V) for mode in ["low_mem", "high_mem"]: bn_op = batchnorm.batch_normalization(x, g, b, m, v, mode=mode) f = aesara.function([x, g, b, m, v], [bn_op]) res = f(X, G, B, M, V) utt.assert_allclose(res_ref, res) def bn_f(inputs, gamma, beta, mean, std): return batchnorm.batch_normalization( inputs, gamma, beta, mean, std, mode=mode ) utt.verify_grad(bn_f, [X, G, B, M, V]) bn_ref_op = bn_ref( x, g, b, x.mean(axis=0, keepdims=True), x.std(axis=0, keepdims=True) ) f_ref = aesara.function([x, b, g], [bn_ref_op]) res_ref = f_ref(X, G, B) for mode in ["low_mem", "high_mem"]: bn_op = batchnorm.batch_normalization( x, g, b, x.mean(axis=0, keepdims=True), x.std(axis=0, keepdims=True), mode=mode, ) f = aesara.function([x, b, g], [bn_op]) res = f(X, G, B) utt.assert_allclose(res_ref, res) def bn_f(inputs, gamma, beta, mean, std): return batchnorm.batch_normalization( inputs, gamma, beta, mean, std, mode=mode ) utt.verify_grad( bn_f, [X, G, B, X.mean(axis=0)[np.newaxis], X.std(axis=0)[np.newaxis]] )
def test_eigvalsh_grad(): rng = np.random.default_rng(utt.fetch_seed()) a = rng.standard_normal((5, 5)) a = a + a.T b = 10 * np.eye(5, 5) + rng.standard_normal((5, 5)) utt.verify_grad(lambda a, b: eigvalsh(a, b).dot([1, 2, 3, 4, 5]), [a, b], rng=np.random)
def test_grad(self): # Disable old warning that may be triggered by this test. backup = config.warn__sum_div_dimshuffle_bug config.warn__sum_div_dimshuffle_bug = False try: for testname, inputs in self.grad.items(): inputs = [copy(input) for input in inputs] try: utt.verify_grad( self.op, inputs, mode=self.mode, rel_tol=_grad_rtol, eps=_grad_eps, ) except Exception as exc: err_msg = ( "Test %s::%s: Error occurred while" " computing the gradient on the following" " inputs: %s" ) % (self.op, testname, inputs) exc.args += (err_msg,) raise finally: config.warn__sum_div_dimshuffle_bug = backup
def test_expm_grad_3(): # with non-symmetric matrix (complex eigenvectors) pytest.importorskip("scipy") rng = np.random.RandomState(utt.fetch_seed()) # Always test in float64 for better numerical stability. A = rng.randn(5, 5) utt.verify_grad(expm, [A], rng=rng)
def test_abs_grad(self): def f(m): c = complex(m[0], m[1]) return 0.5 * abs(c) rng = np.random.default_rng(9333) mval = np.asarray(rng.standard_normal((2, 5))) utt.verify_grad(f, [mval])
def test_complex_grads(self): def f(m): c = at_complex(m[0], m[1]) return 0.5 * real(c) + 0.9 * imag(c) rng = np.random.default_rng(9333) mval = np.asarray(rng.standard_normal((2, 5))) utt.verify_grad(f, [mval])
def test_complex_grads(self): def f(m): c = complex(m[0], m[1]) return 0.5 * real(c) + 0.9 * imag(c) rng = np.random.RandomState(9333) mval = np.asarray(rng.randn(2, 5)) utt.verify_grad(f, [mval])
def test_abs_grad(self): def f(m): c = complex(m[0], m[1]) return 0.5 * abs(c) rng = np.random.RandomState(9333) mval = np.asarray(rng.randn(2, 5)) utt.verify_grad(f, [mval])
def test_vectors(self): y_idx = [3] def f(a, b): return crossentropy_softmax_1hot(shape_padleft(a) + b, y_idx)[0] rng = np.random.default_rng(utt.fetch_seed()) utt.verify_grad(f, [rng.random((4, )), rng.random((4))])
def test_basic(self): utt.verify_grad(xlogy0, [numpy.random.rand(3, 4), numpy.random.rand(3, 4)]) x = as_tensor_variable([1, 0]) y = as_tensor_variable([1, 0]) z = xlogy0(x, y) f = theano.function([], z) assert numpy.all(f() == numpy.asarray([0, 0.0]))
def test_grad_ignore_border(self): shape = (2, 3, 5, 5) images_val = np.random.rand(*shape).astype("float32") def fn(images): return images2neibs(images, (2, 2), mode="ignore_borders") unittest_tools.verify_grad(fn, [images_val], mode=self.mode, eps=0.1)
def test_gradient(self, fn, input_dims): rng = np.random.default_rng(43) utt.verify_grad( fn, [rng.random(input_dims).astype(config.floatX)], n_tests=1, rng=rng, )
def test_gradient(self): rng = np.random.default_rng(43) utt.verify_grad( self.op_class(), [rng.random((5, 4)), rng.random((5, 4))], n_tests=1, rng=TestProdOp.rng, )
def test_inverse_grad(): rng = np.random.default_rng(utt.fetch_seed()) r = rng.standard_normal((4, 4)) utt.verify_grad(matrix_inverse, [r], rng=np.random) rng = np.random.default_rng(utt.fetch_seed()) r = rng.standard_normal((4, 4)) utt.verify_grad(matrix_inverse, [r], rng=np.random)
def test_neibs2images_grad(self): # say we had images of size (2, 3, 10, 10) # then we extracted 2x2 neighbors on this, we get (2 * 3 * 5 * 5, 4) neibs_val = np.random.rand(150, 4) def fn(neibs): return neibs2images(neibs, (2, 2), (2, 3, 10, 10)) unittest_tools.verify_grad(fn, [neibs_val], mode=self.mode, eps=0.1)
def test_inverse_grad(): rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(4, 4) utt.verify_grad(matrix_inverse, [r], rng=np.random) rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(4, 4) utt.verify_grad(matrix_inverse, [r], rng=np.random)
def test_grad_softmax_grad(): rng = np.random.default_rng(utt.fetch_seed()) x = aesara.shared(rng.normal(size=(3, 4))) def f(inputs): y = softmax_legacy(x) return aesara.grad(None, x, known_grads={y: inputs}) utt.verify_grad(f, [rng.random((3, 4))])
def mp(input, grad): out = Pool(ndim=len(maxpoolshp), ignore_border=ignore_border)(input, maxpoolshp, stride) grad_op = MaxPoolGrad(ndim=len(maxpoolshp), ignore_border=ignore_border) return grad_op(input, out, grad, maxpoolshp, stride) utt.verify_grad(mp, [imval, grad_val], rng=rng)