def test_local_sigm_times_exp(self): # Test the `local_sigm_times_exp` optimization. # exp(x) * sigm(-x) -> sigm(x) # exp(-x) * sigm(x) -> sigm(-x) def match(func, ops): # print [node.op.scalar_op for node in func.maker.fgraph.toposort()] assert [node.op for node in func.maker.fgraph.toposort()] == ops m = self.get_mode(excluding=["local_elemwise_fusion", "inplace"]) x, y = vectors("x", "y") f = aesara.function([x], sigmoid(-x) * exp(x), mode=m) match(f, [sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function([x], sigmoid(x) * exp(-x), mode=m) match(f, [neg, sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function([x], -(-(-(sigmoid(x)))) * exp(-x), mode=m) match(f, [neg, sigmoid, neg]) # assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function( [x, y], (sigmoid(x) * sigmoid(-y) * -exp(-x) * exp(x * y) * exp(y)), mode=m, ) topo = f.maker.fgraph.toposort() for op, nb in [(sigmoid, 2), (mul, 2), (neg, 1), (exp, 1)]: assert sum([n.op == op for n in topo]) == nb
def test_log1msigm_to_softplus(self): x = matrix() out = log(1 - sigmoid(x)) f = aesara.function([x], out, mode=self.m) topo = f.maker.fgraph.toposort() assert len(topo) == 2 assert isinstance(topo[0].op.scalar_op, ScalarSoftplus) assert isinstance(topo[1].op.scalar_op, aesara.scalar.Neg) # assert check_stack_trace(f, ops_to_check='all') f(np.random.rand(54, 11).astype(config.floatX)) # Same test with a flatten out = log(1 - aet.flatten(sigmoid(x))) f = aesara.function([x], out, mode=self.m) # assert check_stack_trace(f, ops_to_check='all') topo = f.maker.fgraph.toposort() assert len(topo) == 3 assert aet.is_flat(topo[0].outputs[0]) assert isinstance(topo[1].op.scalar_op, ScalarSoftplus) assert isinstance(topo[2].op.scalar_op, aesara.scalar.Neg) f(np.random.rand(54, 11).astype(config.floatX)) # Same test with a reshape out = log(1 - sigmoid(x).reshape([x.size])) f = aesara.function([x], out, mode=self.m) topo = f.maker.fgraph.toposort() # assert len(topo) == 3 assert any(isinstance(node.op, Reshape) for node in topo) assert any( isinstance( getattr(node.op, "scalar_op", None), ScalarSoftplus, ) for node in topo) f(np.random.rand(54, 11).astype(config.floatX))
def test_logsigm_to_softplus(self): x = vector() out = log(sigmoid(x)) f = aesara.function([x], out, mode=self.m) # Fix ticket #4581 first # assert check_stack_trace( # f, ops_to_check=(aesara.scalar.Neg, # ScalarSoftplus)) topo = f.maker.fgraph.toposort() assert len(topo) == 3 assert isinstance(topo[0].op.scalar_op, aesara.scalar.Neg) assert isinstance(topo[1].op.scalar_op, ScalarSoftplus) assert isinstance(topo[2].op.scalar_op, aesara.scalar.Neg) f(np.random.rand(54).astype(config.floatX))
def test_local_ultra_fast_sigmoid(self): x = matrix("x") s = sigmoid(x) mode = self.get_mode("local_ultra_fast_sigmoid") f = aesara.function([x], s, mode=mode) assert check_stack_trace(f, ops_to_check=sigmoid) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert topo[0].op == sigmoid mode = self.get_mode().including("local_ultra_fast_sigmoid") f = aesara.function([x], s, mode=mode) assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid) topo = f.maker.fgraph.toposort() assert topo[0].op == ultra_fast_sigmoid assert len(topo) == 1 f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])
def test_grad_log1msigm(self): # At some point, this returned nan, because (1 - sigm(x)) was # on both the numerator and the denominator of a fraction, # but the two nodes in question had not been merged. x = matrix("x") lr = scalar("lr") s = sigmoid(x) l = log(1 - s) c = l.mean() ux = x - lr * aesara.grad(c, x) # Before the optimization, inf and NaN will be produced in the graph, # and DebugMode will complain. Everything is fine afterwards. mode = self.get_mode() if not isinstance(mode, aesara.compile.debugmode.DebugMode): f = aesara.function([x, lr], ux, mode=mode) ux_v = f([[50]], 0.1) assert not np.isnan(ux_v)
def test_local_hard_sigmoid(self): x = matrix("x") s = sigmoid(x) mode = self.get_mode("local_hard_sigmoid") f = aesara.function([x], s, mode=mode) assert check_stack_trace(f, ops_to_check=sigmoid) topo = f.maker.fgraph.toposort() assert topo[0].op == sigmoid assert len(topo) == 1 mode = self.get_mode().including("local_hard_sigmoid") f = aesara.function([x], s, mode=mode) topo = f.maker.fgraph.toposort() assert not any([n.op == sigmoid for n in topo]) f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]]) mode2 = mode.excluding("fusion").excluding("inplace") f2 = aesara.function([x], s, mode=mode2) assert check_stack_trace(f2, ops_to_check=clip)
def test_perform_sigm_times_exp(self): # Test the core function doing the `sigm_times_exp` optimization. # # It is easier to test different graph scenarios this way than by # compiling an Aesara function. x, y, z, t = vectors("x", "y", "z", "t") exp_op = exp def ok(expr1, expr2): trees = [parse_mul_tree(e) for e in (expr1, expr2)] perform_sigm_times_exp(trees[0]) trees[0] = simplify_mul(trees[0]) good = is_same_graph(compute_mul(trees[0]), compute_mul(trees[1])) if not good: print(trees[0]) print(trees[1]) print("***") aesara.printing.debugprint(compute_mul(trees[0])) print("***") aesara.printing.debugprint(compute_mul(trees[1])) assert good ok(sigmoid(x) * exp_op(-x), sigmoid(-x)) ok( -x * sigmoid(x) * (y * (-1 * z) * exp_op(-x)), -x * sigmoid(-x) * (y * (-1 * z)), ) ok( -sigmoid(-x) * (exp_op(y) * (-exp_op(-z) * 3 * -exp_op(x)) * (y * 2 * (-sigmoid(-y) * (z + t) * exp_op(z)) * sigmoid(z))) * -sigmoid(x), sigmoid(x) * (-sigmoid(y) * (-sigmoid(-z) * 3) * (y * 2 * ((z + t) * exp_op(z)))) * (-sigmoid(x)), ) ok( exp_op(-x) * -exp_op(-x) * (-sigmoid(x) * -sigmoid(x)), -sigmoid(-x) * sigmoid(-x), ) ok(-exp_op(x) * -sigmoid(-x) * -exp_op(-x), -sigmoid(-x))