def test_rop_override(self, cls_ofg): x, y = vectors("xy") def ro(inps, epts): x, y = inps u, v = epts return [u * y * 2.0 + x * v * 1.5] u, v = vectors("uv") op_mul_rop = cls_ofg([x, y, u, v], ro([x, y], [u, v])) op_mul = cls_ofg([x, y], [x * y], rop_overrides=ro) op_mul2 = cls_ofg([x, y], [x * y], rop_overrides=op_mul_rop) # single override case xx, yy = vector("xx"), vector("yy") du, dv = vector("du"), vector("dv") for op in [op_mul, op_mul2]: zz = op_mul(xx, yy) dw = Rop(zz, [xx, yy], [du, dv]) fn = function([xx, yy, du, dv], dw) vals = np.random.rand(4, 32).astype(config.floatX) dwval = fn(*vals) np.testing.assert_array_almost_equal( dwval, vals[0] * vals[3] * 1.5 + vals[1] * vals[2] * 2.0, 4 )
def test_local_sigm_times_exp(self): # Test the `local_sigm_times_exp` optimization. # exp(x) * sigm(-x) -> sigm(x) # exp(-x) * sigm(x) -> sigm(-x) def match(func, ops): # print [node.op.scalar_op for node in func.maker.fgraph.toposort()] assert [node.op for node in func.maker.fgraph.toposort()] == ops m = self.get_mode(excluding=["local_elemwise_fusion", "inplace"]) x, y = vectors("x", "y") f = aesara.function([x], sigmoid(-x) * exp(x), mode=m) match(f, [sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function([x], sigmoid(x) * exp(-x), mode=m) match(f, [neg, sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function([x], -(-(-(sigmoid(x)))) * exp(-x), mode=m) match(f, [neg, sigmoid, neg]) # assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function( [x, y], (sigmoid(x) * sigmoid(-y) * -exp(-x) * exp(x * y) * exp(y)), mode=m, ) topo = f.maker.fgraph.toposort() for op, nb in [(sigmoid, 2), (mul, 2), (neg, 1), (exp, 1)]: assert sum([n.op == op for n in topo]) == nb
def test_connection_pattern_override(self, cls_ofg): x, y = vectors("xy") def f1(x, y): del x # but we know how to backpropagate for x for some reasons # and we don't care about the gradient wrt y. return y + tt_round(y) def f1_back(inputs, output_gradients): return [output_gradients[0], aesara.gradient.disconnected_type()] op = cls_ofg( inputs=[x, y], outputs=[f1(x, y)], grad_overrides=f1_back, connection_pattern=[[True], [False]], # This is new on_unused_input="ignore", ) # This is new c = op(x, y) g1 = aesara.grad(c.sum(), x) out = g1.eval({ x: np.ones((5, ), dtype=np.float32), y: np.ones((5, ), dtype=np.float32) }) assert np.allclose(out, [1.0] * 5)
def test_full_graph(self): # Test `is_same_graph` with more complex graphs. x, y, z = vectors("x", "y", "z") t = x * y self.check([ (x * 2, x * 2, (({}, True), )), ( x * 2, y * 2, ( ({}, False), ({ y: x }, True), ), ), ( x * 2, y * 2, ( ({}, False), ({ x: y }, True), ), ), ( x * 2, y * 3, ( ({}, False), ({ y: x }, False), ), ), ( t * 2, z * 2, ( ({}, False), ({ t: z }, True), ), ), ( t * 2, z * 2, ( ({}, False), ({ z: t }, True), ), ), (x * (y * z), (x * y) * z, (({}, False), )), ])
def test_input_dimensions_overflow(self): # Elemwise.perform used to compute the product # of input shapes to check if there was a zero in them, # it overflowed in this case. a, b, c, d, e, f = vectors("abcdef") s = a + b + c + d + e + f g = aesara.function([a, b, c, d, e, f], s, mode=Mode(linker="py")) g(*[np.zeros(2**11, config.floatX) for i in range(6)])
def test_parse_mul_tree(self): x, y, z = vectors("x", "y", "z") assert parse_mul_tree(x * y) == [False, [[False, x], [False, y]]] assert parse_mul_tree(-(x * y)) == [True, [[False, x], [False, y]]] assert parse_mul_tree(-x * y) == [False, [[True, x], [False, y]]] assert parse_mul_tree(-x) == [True, x] assert parse_mul_tree((x * y) * -z) == [ False, [[False, [[False, x], [False, y]]], [True, z]], ]
def test_matches_binary_crossentropy(self): # Test sigmoid_binary_crossentropy(p, t) == # binary_crossentropy(sigmoid(p), t). pred, target = inputs = vectors("pt") reference_val = binary_crossentropy(sigmoid(pred), target) f_reference = aesara.function(inputs, reference_val) test_val = sigmoid_binary_crossentropy(pred, target) f_test = aesara.function(inputs, test_val) test_inputs = self._get_test_inputs() utt.assert_allclose(f_reference(*test_inputs), f_test(*test_inputs))
def test_merge_only(self): # Test `is_same_graph` when `equal_computations` cannot be used. x, y, z = vectors("x", "y", "z") t = x * y self.check([ (x, t, (({}, False), ({ t: x }, True))), ( t * 2, x * 2, ( ({}, False), ({ t: x }, True), ), ), ( x * x, x * y, ( ({}, False), ({ y: x }, True), ), ), ( x * x, x * y, ( ({}, False), ({ y: x }, True), ), ), ( x * x + z, x * y + t, (({}, False), ({ y: x }, False), ({ y: x, t: z }, True)), ), ], )
def test_nested(self, cls_ofg): x, y = vectors("xy") u, v = x + y, x - y op_ft = cls_ofg([x, y], [u, v]) op_ift = cls_ofg([x, y], [u / 2, v / 2]) xx, yy = vector("xx"), vector("yy") xx2, yy2 = op_ift(*op_ft(xx, yy)) fn = function([xx, yy], [xx2, yy2]) xv = np.random.rand(16).astype(config.floatX) yv = np.random.rand(16).astype(config.floatX) xv2, yv2 = fn(xv, yv) assert np.allclose(xv, xv2) assert np.allclose(yv, yv2)
def test_nested(self, cls_ofg): x, y = vectors("xy") u, v = x + y, x - y op_ft = cls_ofg([x, y], [u, v]) op_ift = cls_ofg([x, y], [u / 2, v / 2]) xx, yy = vector("xx"), vector("yy") xx2, yy2 = op_ift(*op_ft(xx, yy)) fn = function([xx, yy], [xx2, yy2]) xv = np.random.random((16, )).astype(config.floatX) yv = np.random.random((16, )).astype(config.floatX) xv2, yv2 = fn(xv, yv) np.testing.assert_array_almost_equal(xv, xv2, 4) np.testing.assert_array_almost_equal(yv, yv2, 4)
def test_optimize_graph(): x, y = vectors("xy") @optimizer def custom_opt(fgraph): fgraph.replace(x, y, import_missing=True) x_opt = optimize_graph(x, custom_opt=custom_opt) assert x_opt is y x_opt = optimize_graph(FunctionGraph(outputs=[x], clone=False), custom_opt=custom_opt) assert x_opt.outputs[0] is y
def test_matches_binary_crossentropy(self): # Test sigmoid_binary_crossentropy(p, t) == # binary_crossentropy(sigmoid(p), t). pred, target = inputs = vectors("pt") reference_val = binary_crossentropy(sigmoid(pred), target) f_reference = aesara.function(inputs, reference_val) test_val = sigmoid_binary_crossentropy(pred, target) f_test = aesara.function(inputs, test_val) rng = np.random.default_rng(utt.fetch_seed()) pred, target = rng.standard_normal((2, 50)).astype(config.floatX) test_inputs = [pred, 1 / (1 + np.exp(-target))] utt.assert_allclose(f_reference(*test_inputs), f_test(*test_inputs))
def test_single_var(self): # Test `is_same_graph` with some trivial graphs (one Variable). x, y, z = vectors("x", "y", "z") self.check([ (x, x, (({}, True), )), ( x, y, ( ({}, False), ({ y: x }, True), ), ), (x, neg(x), (({}, False), )), (x, neg(y), (({}, False), )), ])
def test_use_c_thunks(): a_at = scalars("a") b_at = vectors("b") a = np.array(0.0).astype(config.floatX) b = np.array([2.0]).astype(config.floatX) cases = [False] if config.cxx: cases.append(True) for use_c_thunks in cases: f = function( [a_at, b_at], a_at * b_at, mode=Mode(optimizer=None, linker=VMLinker(c_thunks=use_c_thunks, use_cloop=False)), ) assert np.array_equal(a * b, f(a, b)) assert any([hasattr(t, "cthunk") for t in f.fn.thunks]) == use_c_thunks
def test_perform_sigm_times_exp(self): # Test the core function doing the `sigm_times_exp` optimization. # # It is easier to test different graph scenarios this way than by # compiling an Aesara function. x, y, z, t = vectors("x", "y", "z", "t") exp_op = exp def ok(expr1, expr2): trees = [parse_mul_tree(e) for e in (expr1, expr2)] perform_sigm_times_exp(trees[0]) trees[0] = simplify_mul(trees[0]) good = is_same_graph(compute_mul(trees[0]), compute_mul(trees[1])) if not good: print(trees[0]) print(trees[1]) print("***") aesara.printing.debugprint(compute_mul(trees[0])) print("***") aesara.printing.debugprint(compute_mul(trees[1])) assert good ok(sigmoid(x) * exp_op(-x), sigmoid(-x)) ok( -x * sigmoid(x) * (y * (-1 * z) * exp_op(-x)), -x * sigmoid(-x) * (y * (-1 * z)), ) ok( -sigmoid(-x) * (exp_op(y) * (-exp_op(-z) * 3 * -exp_op(x)) * (y * 2 * (-sigmoid(-y) * (z + t) * exp_op(z)) * sigmoid(z))) * -sigmoid(x), sigmoid(x) * (-sigmoid(y) * (-sigmoid(-z) * 3) * (y * 2 * ((z + t) * exp_op(z)))) * (-sigmoid(x)), ) ok( exp_op(-x) * -exp_op(-x) * (-sigmoid(x) * -sigmoid(x)), -sigmoid(-x) * sigmoid(-x), ) ok(-exp_op(x) * -sigmoid(-x) * -exp_op(-x), -sigmoid(-x))
def test_compute_mul(self): x, y, z = vectors("x", "y", "z") tree = (x * y) * -z mul_tree = parse_mul_tree(tree) assert parse_mul_tree(compute_mul(mul_tree)) == mul_tree assert is_same_graph(compute_mul(parse_mul_tree(tree)), tree)
def test_grad_override(self, cls_ofg): x, y = vectors("xy") def go(inps, gs): x, y = inps (g, ) = gs return [g * y * 2, g * x * 1.5] dedz = vector("dedz") op_mul_grad = cls_ofg([x, y, dedz], go([x, y], [dedz])) op_mul = cls_ofg([x, y], [x * y], grad_overrides=go) op_mul2 = cls_ofg([x, y], [x * y], grad_overrides=op_mul_grad) # single override case (function or OfG instance) xx, yy = vector("xx"), vector("yy") for op in [op_mul, op_mul2]: zz = tt_sum(op(xx, yy)) dx, dy = grad(zz, [xx, yy]) fn = function([xx, yy], [dx, dy]) xv = np.random.rand(16).astype(config.floatX) yv = np.random.rand(16).astype(config.floatX) dxv, dyv = fn(xv, yv) assert np.allclose(yv * 2, dxv) assert np.allclose(xv * 1.5, dyv) # list override case def go1(inps, gs): x, w, b = inps g = gs[0] return g * w * 2 def go2(inps, gs): x, w, b = inps g = gs[0] return g * x * 1.5 w, b = vectors("wb") # we make the 3rd gradient default (no override) op_linear = cls_ofg([x, w, b], [x * w + b], grad_overrides=[go1, go2, "default"]) xx, ww, bb = vector("xx"), vector("yy"), vector("bb") zz = tt_sum(op_linear(xx, ww, bb)) dx, dw, db = grad(zz, [xx, ww, bb]) fn = function([xx, ww, bb], [dx, dw, db]) xv = np.random.rand(16).astype(config.floatX) wv = np.random.rand(16).astype(config.floatX) bv = np.random.rand(16).astype(config.floatX) dxv, dwv, dbv = fn(xv, wv, bv) assert np.allclose(wv * 2, dxv) assert np.allclose(xv * 1.5, dwv) assert np.allclose(np.ones(16, dtype=config.floatX), dbv) # NullType and DisconnectedType op_linear2 = cls_ofg( [x, w, b], [x * w + b], grad_overrides=[go1, NullType()(), DisconnectedType()()], ) zz2 = tt_sum(op_linear2(xx, ww, bb)) dx2, dw2, db2 = grad( zz2, [xx, ww, bb], return_disconnected="Disconnected", disconnected_inputs="ignore", null_gradients="return", ) assert isinstance(dx2.type, TensorType) assert dx2.ndim == 1 assert isinstance(dw2.type, NullType) assert isinstance(db2.type, DisconnectedType)