def test_local_sigm_times_exp(self): # Test the `local_sigm_times_exp` optimization. # exp(x) * sigm(-x) -> sigm(x) # exp(-x) * sigm(x) -> sigm(-x) def match(func, ops): # print [node.op.scalar_op for node in func.maker.fgraph.toposort()] assert [node.op for node in func.maker.fgraph.toposort()] == ops m = self.get_mode(excluding=["local_elemwise_fusion", "inplace"]) x, y = tt.vectors("x", "y") f = aesara.function([x], sigmoid(-x) * tt.exp(x), mode=m) match(f, [sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function([x], sigmoid(x) * tt.exp(-x), mode=m) match(f, [tt.neg, sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function([x], -(-(-(sigmoid(x)))) * tt.exp(-x), mode=m) match(f, [tt.neg, sigmoid, tt.neg]) # assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function( [x, y], (sigmoid(x) * sigmoid(-y) * -tt.exp(-x) * tt.exp(x * y) * tt.exp(y)), mode=m, ) topo = f.maker.fgraph.toposort() for op, nb in [(sigmoid, 2), (tt.mul, 2), (tt.neg, 1), (tt.exp, 1)]: assert sum([n.op == op for n in topo]) == nb
def test_connection_pattern_override(self, cls_ofg): x, y = tt.vectors("xy") def f1(x, y): del x # but we know how to backpropagate for x for some reasons # and we don't care about the gradient wrt y. return y + tt.round(y) def f1_back(inputs, output_gradients): return [output_gradients[0], aesara.gradient.disconnected_type()] op = cls_ofg( inputs=[x, y], outputs=[f1(x, y)], grad_overrides=f1_back, connection_pattern=[[True], [False]], # This is new on_unused_input="ignore", ) # This is new c = op(x, y) g1 = aesara.grad(c.sum(), x) out = g1.eval({ x: np.ones((5, ), dtype=np.float32), y: np.ones((5, ), dtype=np.float32) }) assert np.allclose(out, [1.0] * 5)
def test_parse_mul_tree(self): x, y, z = tt.vectors("x", "y", "z") assert parse_mul_tree(x * y) == [False, [[False, x], [False, y]]] assert parse_mul_tree(-(x * y)) == [True, [[False, x], [False, y]]] assert parse_mul_tree(-x * y) == [False, [[True, x], [False, y]]] assert parse_mul_tree(-x) == [True, x] assert parse_mul_tree((x * y) * -z) == [ False, [[False, [[False, x], [False, y]]], [True, z]], ]
def test_full_graph(self): # Test `is_same_graph` with more complex graphs. x, y, z = tensor.vectors("x", "y", "z") t = x * y self.check( [ (x * 2, x * 2, (({}, True),)), ( x * 2, y * 2, ( ({}, False), ({y: x}, True), ), ), ( x * 2, y * 2, ( ({}, False), ({x: y}, True), ), ), ( x * 2, y * 3, ( ({}, False), ({y: x}, False), ), ), ( t * 2, z * 2, ( ({}, False), ({t: z}, True), ), ), ( t * 2, z * 2, ( ({}, False), ({z: t}, True), ), ), (x * (y * z), (x * y) * z, (({}, False),)), ] )
def test_nested(self, cls_ofg): x, y = tt.vectors("xy") u, v = x + y, x - y op_ft = cls_ofg([x, y], [u, v]) op_ift = cls_ofg([x, y], [u / 2, v / 2]) xx, yy = tt.vector("xx"), tt.vector("yy") xx2, yy2 = op_ift(*op_ft(xx, yy)) fn = function([xx, yy], [xx2, yy2]) xv = np.random.rand(16).astype(config.floatX) yv = np.random.rand(16).astype(config.floatX) xv2, yv2 = fn(xv, yv) assert np.allclose(xv, xv2) assert np.allclose(yv, yv2)
def test_rop_override(self, cls_ofg): x, y = tt.vectors("xy") def ro(inps, epts): x, y = inps u, v = epts return [u * y * 2.0 + x * v * 1.5] u, v = tt.vectors("uv") op_mul_rop = cls_ofg([x, y, u, v], ro([x, y], [u, v])) op_mul = cls_ofg([x, y], [x * y], rop_overrides=ro) op_mul2 = cls_ofg([x, y], [x * y], rop_overrides=op_mul_rop) # single override case xx, yy = tt.vector("xx"), tt.vector("yy") du, dv = tt.vector("du"), tt.vector("dv") for op in [op_mul, op_mul2]: zz = op_mul(xx, yy) dw = tt.Rop(zz, [xx, yy], [du, dv]) fn = function([xx, yy, du, dv], dw) vals = np.random.rand(4, 32).astype(config.floatX) dwval = fn(*vals) assert np.allclose( dwval, vals[0] * vals[3] * 1.5 + vals[1] * vals[2] * 2.0)
def test_c_thunks(): a = tensor.scalars("a") b, c = tensor.vectors("bc") cases = [False] if aesara.config.cxx: cases.append(True) for c_thunks in cases: f = function( [a, b, c], ifelse(a, a * b, b * c), mode=Mode(optimizer=None, linker=vm.VM_Linker(c_thunks=c_thunks, use_cloop=False)), ) f(1, [2], [3, 2]) with pytest.raises(ValueError): f(0, [2], [3, 4]) assert any([hasattr(t, "cthunk") for t in f.fn.thunks]) == c_thunks
def test_single_var(self): # Test `is_same_graph` with some trivial graphs (one Variable). x, y, z = tensor.vectors("x", "y", "z") self.check( [ (x, x, (({}, True),)), ( x, y, ( ({}, False), ({y: x}, True), ), ), (x, tensor.neg(x), (({}, False),)), (x, tensor.neg(y), (({}, False),)), ] )
def test_perform_sigm_times_exp(self): # Test the core function doing the `sigm_times_exp` optimization. # # It is easier to test different graph scenarios this way than by # compiling a aesara function. x, y, z, t = tt.vectors("x", "y", "z", "t") exp = tt.exp def ok(expr1, expr2): trees = [parse_mul_tree(e) for e in (expr1, expr2)] perform_sigm_times_exp(trees[0]) trees[0] = simplify_mul(trees[0]) good = is_same_graph(compute_mul(trees[0]), compute_mul(trees[1])) if not good: print(trees[0]) print(trees[1]) print("***") aesara.printing.debugprint(compute_mul(trees[0])) print("***") aesara.printing.debugprint(compute_mul(trees[1])) assert good ok(sigmoid(x) * exp(-x), sigmoid(-x)) ok( -x * sigmoid(x) * (y * (-1 * z) * exp(-x)), -x * sigmoid(-x) * (y * (-1 * z)), ) ok( -sigmoid(-x) * (exp(y) * (-exp(-z) * 3 * -exp(x)) * (y * 2 * (-sigmoid(-y) * (z + t) * exp(z)) * sigmoid(z))) * -sigmoid(x), sigmoid(x) * (-sigmoid(y) * (-sigmoid(-z) * 3) * (y * 2 * ((z + t) * exp(z)))) * (-sigmoid(x)), ) ok( exp(-x) * -exp(-x) * (-sigmoid(x) * -sigmoid(x)), -sigmoid(-x) * sigmoid(-x)) ok(-exp(x) * -sigmoid(-x) * -exp(-x), -sigmoid(-x))
def test_merge_only(self): # Test `is_same_graph` when `equal_computations` cannot be used. x, y, z = tensor.vectors("x", "y", "z") t = x * y self.check( [ (x, t, (({}, False), ({t: x}, True))), ( t * 2, x * 2, ( ({}, False), ({t: x}, True), ), ), ( x * x, x * y, ( ({}, False), ({y: x}, True), ), ), ( x * x, x * y, ( ({}, False), ({y: x}, True), ), ), ( x * x + z, x * y + t, (({}, False), ({y: x}, False), ({y: x, t: z}, True)), ), ], )
def test_CheckAndRaise_equal(): x, y = at.vectors("xy") g1 = assert_op(x, (x > y).all()) g2 = assert_op(x, (x > y).all()) assert equal_computations([g1], [g2])
def test_grad_override(self, cls_ofg): x, y = tt.vectors("xy") def go(inps, gs): x, y = inps (g, ) = gs return [g * y * 2, g * x * 1.5] dedz = tt.vector("dedz") op_mul_grad = cls_ofg([x, y, dedz], go([x, y], [dedz])) op_mul = cls_ofg([x, y], [x * y], grad_overrides=go) op_mul2 = cls_ofg([x, y], [x * y], grad_overrides=op_mul_grad) # single override case (function or OfG instance) xx, yy = tt.vector("xx"), tt.vector("yy") for op in [op_mul, op_mul2]: zz = tt.sum(op(xx, yy)) dx, dy = tt.grad(zz, [xx, yy]) fn = function([xx, yy], [dx, dy]) xv = np.random.rand(16).astype(config.floatX) yv = np.random.rand(16).astype(config.floatX) dxv, dyv = fn(xv, yv) assert np.allclose(yv * 2, dxv) assert np.allclose(xv * 1.5, dyv) # list override case def go1(inps, gs): x, w, b = inps g = gs[0] return g * w * 2 def go2(inps, gs): x, w, b = inps g = gs[0] return g * x * 1.5 w, b = tt.vectors("wb") # we make the 3rd gradient default (no override) op_linear = cls_ofg([x, w, b], [x * w + b], grad_overrides=[go1, go2, "default"]) xx, ww, bb = tt.vector("xx"), tt.vector("yy"), tt.vector("bb") zz = tt.sum(op_linear(xx, ww, bb)) dx, dw, db = tt.grad(zz, [xx, ww, bb]) fn = function([xx, ww, bb], [dx, dw, db]) xv = np.random.rand(16).astype(config.floatX) wv = np.random.rand(16).astype(config.floatX) bv = np.random.rand(16).astype(config.floatX) dxv, dwv, dbv = fn(xv, wv, bv) assert np.allclose(wv * 2, dxv) assert np.allclose(xv * 1.5, dwv) assert np.allclose(np.ones(16, dtype=config.floatX), dbv) # NullType and DisconnectedType op_linear2 = cls_ofg( [x, w, b], [x * w + b], grad_overrides=[go1, NullType()(), DisconnectedType()()], ) zz2 = tt.sum(op_linear2(xx, ww, bb)) dx2, dw2, db2 = tt.grad( zz2, [xx, ww, bb], return_disconnected="Disconnected", disconnected_inputs="ignore", null_gradients="return", ) assert isinstance(dx2.type, tt.TensorType) assert dx2.ndim == 1 assert isinstance(dw2.type, NullType) assert isinstance(db2.type, DisconnectedType)
def test_compute_mul(self): x, y, z = tt.vectors("x", "y", "z") tree = (x * y) * -z mul_tree = parse_mul_tree(tree) assert parse_mul_tree(compute_mul(mul_tree)) == mul_tree assert is_same_graph(compute_mul(parse_mul_tree(tree)), tree)