def test_asymptotic_32(): # This test makes sure that our functions behave sensibly when # huge values are present # TODO: consider adding the optimization of crossentropy into the current # mode for the purpose of running this test for dtype in "float32", "float64": if dtype == "float32": x = fmatrix() x2 = fvector() else: x = dmatrix() x2 = dvector() y = lvector() c = categorical_crossentropy(softmax(x + x2), y) f = aesara.function([x, y, x2], [c.sum(), grad(c.sum(), x)], mode="FAST_RUN") xval = np.zeros((5, 5), dtype=dtype).astype(dtype) x2val = np.zeros(5, dtype=xval.dtype).astype(dtype) for i in range(100): cval, gxval = f(xval, np.arange(5), x2val) xval -= 100.3 * gxval assert cval == 0 # no problem going to zero error # what about when x gets really big? xval = np.zeros((5, 5), dtype=dtype) x2val = np.zeros(5, dtype=xval.dtype) for i in range(100): cval, gxval = f(xval, np.arange(5), x2val) xval += 100000.3 * gxval assert cval > 61750000 assert gxval[0, 0] == -1.0 assert gxval[0, 1] == 0.25
def test_matrix_perform_and_opt(self): m = config.mode m = aesara.compile.get_mode(m) m.check_isfinite = False x, y = matrices("xy") # regular softmax and crossentropy sm = softmax(x) cm = categorical_crossentropy(sm, y) # numerically stable log-softmax with crossentropy logsm = logsoftmax(x) sm2 = exp(logsm) # just used to show equivalence with sm cm2 = -aet_sum(y * logsm, axis=1) grad_node = grad(cm2.mean(), x) # create some inputs into a softmax that are large and labels a = np.exp(10 * np.random.random((5, 10)).astype(config.floatX)) # create some one-hot coded labels b = np.eye(5, 10).astype(config.floatX) # show equivalence of softmax and exponentiated numerically stable # log-softmax f1 = aesara.function([x], [sm, sm2]) sm_, sm2_ = f1(a) utt.assert_allclose(sm_, sm2_) # now show that the two versions result in the same crossentropy cost # this indicates that the forward function does provide some numerical # stability f2 = aesara.function([x, y], [cm, cm2], mode=m) cm_, cm2_ = f2(a, b) utt.assert_allclose(cm_, cm2_) # now, show that in the standard softmax case the gradients blow up # while in the log-softmax case they don't f3 = aesara.function([x, y], [grad_node]) grad_ = f3(a, b) assert not np.any(np.isnan(grad_))