def test_local_sigm_times_exp(self): # Test the `local_sigm_times_exp` optimization. # exp(x) * sigm(-x) -> sigm(x) # exp(-x) * sigm(x) -> sigm(-x) def match(func, ops): # print [node.op.scalar_op for node in func.maker.fgraph.toposort()] assert [node.op for node in func.maker.fgraph.toposort()] == ops m = self.get_mode(excluding=["local_elemwise_fusion", "inplace"]) x, y = vectors("x", "y") f = aesara.function([x], sigmoid(-x) * exp(x), mode=m) match(f, [sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function([x], sigmoid(x) * exp(-x), mode=m) match(f, [neg, sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function([x], -(-(-(sigmoid(x)))) * exp(-x), mode=m) match(f, [neg, sigmoid, neg]) # assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function( [x, y], (sigmoid(x) * sigmoid(-y) * -exp(-x) * exp(x * y) * exp(y)), mode=m, ) topo = f.maker.fgraph.toposort() for op, nb in [(sigmoid, 2), (mul, 2), (neg, 1), (exp, 1)]: assert sum([n.op == op for n in topo]) == nb
def test_argmax_pushdown_bias(): x = matrix() b = vector() out = argmax(softmax_with_bias(x, b), axis=-1) fgraph = FunctionGraph([x, b], [out]) optdb.query(OPT_FAST_RUN).optimize(fgraph) types_to_check = (DimShuffle, Elemwise, Argmax) assert len(fgraph.toposort()) == 3 for i, type in enumerate(types_to_check): assert isinstance(fgraph.toposort()[i].op, type) assert check_stack_trace(fgraph, ops_to_check=types_to_check) x = matrix() b = vector() out = max_and_argmax(softmax_with_bias(x, b), axis=-1)[0] fgraph = FunctionGraph([x, b], [out]) optdb.query(OPT_FAST_RUN).optimize(fgraph) assert len(fgraph.toposort()) == 2 assert isinstance(fgraph.toposort()[0].op, SoftmaxWithBias) assert isinstance(fgraph.toposort()[1].op, CAReduce) assert isinstance(fgraph.toposort()[1].op.scalar_op, aesara.scalar.ScalarMaximum) assert check_stack_trace(fgraph, ops_to_check=(SoftmaxWithBias, CAReduce))
def test_softmax_with_bias_trace(self): rng = np.random.default_rng(utt.fetch_seed()) a = aesara.shared(rng.standard_normal((3, )).astype(config.floatX)) b = aesara.shared(np.float32(rng.standard_normal())) sm = softmax(a + b) f = aesara.function([], sm) assert check_stack_trace(f, ops_to_check="last")
def test_local_logsoftmax_grad_opt(self, axis): # Test the Logsoftmax's grad substitution. # # Check that Log(Softmax(x))'s grad is substituted with Logsoftmax(x)'s # grad and that the new operation does not explode for big inputs. # Note that only the grad is checked. m = config.mode m = aesara.compile.get_mode(m) m.check_isfinite = False # some inputs that are large to make the gradient explode in the non # optimized case rng = np.random.default_rng(98324) a = np.exp(10 * rng.random((5, 10)).astype(config.floatX)) def myfunc(x): sm = softmax(x, axis=axis) logsm = log(sm) return logsm # We set step to 0.1 because for big values we need a big epsilon utt.verify_grad(myfunc, [a], eps=0.1, mode=m) sa = aesara.shared(a) f = aesara.function([], myfunc(sa)) assert check_stack_trace(f, ops_to_check="all")
def test_argmax_pushdown(): x = matrix() for sm in [softmax_graph, softmax_legacy]: # test that the max_and_argmax is pushed down if the max is not used out = max_and_argmax(sm(exp(tanh(sigmoid(x)))), axis=-1)[1] fgraph = FunctionGraph([x], [out]) optdb.query(OPT_FAST_RUN).optimize(fgraph) # print 'AFTER' # for node in fgraph.toposort(): # print node.op assert len(fgraph.toposort()) == 1 assert isinstance(fgraph.toposort()[0].op, Argmax) assert check_stack_trace(fgraph, ops_to_check=Argmax) x = matrix() # test that the max_and_argmax is not pushed down if the max is used out = max_and_argmax(sm(exp(tanh(sigmoid(x)))), axis=-1)[0] fgraph = FunctionGraph([x], [out]) assert hasattr(fgraph.outputs[0].tag, "trace") optdb.query(OPT_FAST_RUN).optimize(fgraph) # print 'AFTER' # for node in fgraph.toposort(): # print node.op assert len(fgraph.toposort()) == 3 assert isinstance(fgraph.toposort()[0].op, Elemwise) assert isinstance(fgraph.toposort()[1].op, Softmax) assert isinstance(fgraph.toposort()[2].op, CAReduce) assert isinstance(fgraph.toposort()[2].op.scalar_op, aesara.scalar.ScalarMaximum)
def test_blocksparse_inplace_gemv_opt(): b = fmatrix() W = ftensor4() h = ftensor3() iIdx = lmatrix() oIdx = lmatrix() o = sparse_block_dot(W, h, iIdx, b, oIdx) f = aesara.function([W, h, iIdx, b, oIdx], o) if aesara.config.mode == "FAST_COMPILE": assert not f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=[sparse_block_gemv]) else: assert f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=[sparse_block_gemv_inplace])
def test_blocksparse_inplace_outer_opt(): b = fmatrix() W = ftensor4() h = ftensor3() iIdx = lmatrix() oIdx = lmatrix() o = sparse_block_dot(W, h, iIdx, b, oIdx) f = aesara.function([W, h, iIdx, b, oIdx], [o, aesara.gradient.grad(o.sum(), wrt=W)]) if aesara.config.mode == "FAST_COMPILE": assert not f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=sparse_block_outer) else: assert f.maker.fgraph.toposort()[-1].op.inplace assert check_stack_trace(f, ops_to_check=sparse_block_outer_inplace)
def test_local_ultra_fast_sigmoid(self): x = matrix("x") s = sigmoid(x) mode = self.get_mode("local_ultra_fast_sigmoid") f = aesara.function([x], s, mode=mode) assert check_stack_trace(f, ops_to_check=sigmoid) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert topo[0].op == sigmoid mode = self.get_mode().including("local_ultra_fast_sigmoid") f = aesara.function([x], s, mode=mode) assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid) topo = f.maker.fgraph.toposort() assert topo[0].op == ultra_fast_sigmoid assert len(topo) == 1 f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])
def test_local_hard_sigmoid(self): x = matrix("x") s = sigmoid(x) mode = self.get_mode("local_hard_sigmoid") f = aesara.function([x], s, mode=mode) assert check_stack_trace(f, ops_to_check=sigmoid) topo = f.maker.fgraph.toposort() assert topo[0].op == sigmoid assert len(topo) == 1 mode = self.get_mode().including("local_hard_sigmoid") f = aesara.function([x], s, mode=mode) topo = f.maker.fgraph.toposort() assert not any([n.op == sigmoid for n in topo]) f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]]) mode2 = mode.excluding("fusion").excluding("inplace") f2 = aesara.function([x], s, mode=mode2) assert check_stack_trace(f2, ops_to_check=clip)
def test_local_logsoftmax_opt(self, axis): # Test the Logsoftmax substitution # # Check that Log(Softmax(x)) is substituted with Logsoftmax(x). Note that # only the forward pass is checked (i.e., doesn't check the gradient) x = matrix("x") sm = softmax(x, axis=axis) logsm = log(sm) f = aesara.function([x], logsm) assert isinstance(f.maker.fgraph.outputs[0].owner.op, LogSoftmax) assert check_stack_trace(f, ops_to_check=LogSoftmax)
def test_1msigmoid(self): if not register_local_1msigmoid: return m = self.get_mode() x = fmatrix() # tests exp_over_1_plus_exp f = aesara.function([x], 1 - exp(x) / (1 + exp(x)), mode=m) assert check_stack_trace(f, ops_to_check=[neg, sigmoid_inplace]) assert [node.op for node in f.maker.fgraph.toposort()] == [ neg, sigmoid_inplace, ] # tests inv_1_plus_exp f = aesara.function([x], 1 - aet.fill(x, 1.0) / (1 + exp(-x)), mode=m) assert check_stack_trace(f, ops_to_check=[neg, sigmoid_inplace]) assert [node.op for node in f.maker.fgraph.toposort()] == [ neg, sigmoid_inplace, ]
def test_local_ultra_fast_sigmoid(self): x = matrix("x") s = sigmoid(x) mode = self.get_mode("local_ultra_fast_sigmoid") f = aesara.function([x], s, mode=mode) assert check_stack_trace(f, ops_to_check=sigmoid) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert topo[0].op == sigmoid mode = self.get_mode().including("local_ultra_fast_sigmoid") f = aesara.function([x], s, mode=mode) assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid) topo = f.maker.fgraph.toposort() assert topo[0].op == ultra_fast_sigmoid assert len(topo) == 1 s = sigmoid_inplace(x) f = aesara.function([x], s, mode=mode, accept_inplace=True) assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid_inplace) topo = f.maker.fgraph.toposort() assert topo[0].op == ultra_fast_sigmoid_inplace assert len(topo) == 1
def test_basic_keepdims(self, axis): c = tensor3() p_y = exp(c) / exp(c).sum(axis=axis, keepdims=True) # test that function contains softmax and no div. f = aesara.function([c], p_y, mode=self.mode) assert check_stack_trace(f, ops_to_check=Softmax) f_ops = [n.op for n in f.maker.fgraph.toposort()] assert len(f_ops) == 1 assert isinstance(f_ops[0], Softmax) c_val = self.rng.random((3, 4, 5)).astype(config.floatX) assert np.allclose(f(c_val), sp.softmax(c_val, axis=axis))
def test_stabilize_log_softmax(): mode = aesara.compile.mode.get_default_mode() mode = mode.including("local_log_softmax", "specialize") x = matrix() y = softmax(x) z = log(y) f = aesara.function([x], z, mode=mode) assert check_stack_trace(f, ops_to_check="all") # check that the softmax has been optimized out for node in f.maker.fgraph.toposort(): assert not isinstance(node.op, y.owner.op.__class__) # call the function so debug mode can verify the optimized # version matches the unoptimized version rng = np.random.default_rng([2012, 8, 22]) f(np.cast[config.floatX](rng.random((2, 3))))
def test_softmax_grad_optimizations(self): x = matrix("x") one_of_n = lvector("one_of_n") op = crossentropy_categorical_1hot xe = op(softmax_legacy(x), one_of_n) sum_xe = aet_sum(xe) g_x = grad(sum_xe, x) fgraph = FunctionGraph([x, one_of_n], [g_x]) assert check_stack_trace( fgraph, ops_to_check=[ crossentropy_softmax_1hot_with_bias_dx, softmax_legacy ], ) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = {node.op for node in fgraph.toposort()} assert crossentropy_softmax_argmax_1hot_with_bias not in ops assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_legacy in ops assert softmax_grad_legacy not in ops
def test_basic(self, axis): c = matrix() if axis is None: p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", "x") elif axis == 0: p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", 0) elif axis == (0, 1): p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", "x") else: p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle(0, "x") # test that function contains softmax and no div. f = aesara.function([c], p_y, mode=self.mode) assert check_stack_trace(f, ops_to_check=Softmax) f_ops = [n.op for n in f.maker.fgraph.toposort()] assert len(f_ops) == 1 assert isinstance(f_ops[0], Softmax) c_val = self.rng.random((3, 4)).astype(config.floatX) assert np.allclose(f(c_val), sp.softmax(c_val, axis=axis))
def check_diagonal_subtensor_view_traces(fn): assert check_stack_trace(fn, ops_to_check=(DiagonalSubtensor, IncDiagonalSubtensor))
def test_softmax_with_bias_trace(self): a = aesara.shared(np.random.randn(3).astype(config.floatX)) b = aesara.shared(np.float32(np.random.randn())) sm = softmax(a + b) f = aesara.function([], sm) assert check_stack_trace(f, ops_to_check="last")