def test_local_sigm_times_exp(self): # Test the `local_sigm_times_exp` optimization. # exp(x) * sigm(-x) -> sigm(x) # exp(-x) * sigm(x) -> sigm(-x) def match(func, ops): # print [node.op.scalar_op for node in func.maker.fgraph.toposort()] assert [node.op for node in func.maker.fgraph.toposort()] == ops m = self.get_mode(excluding=["local_elemwise_fusion", "inplace"]) x, y = tt.vectors("x", "y") f = aesara.function([x], sigmoid(-x) * tt.exp(x), mode=m) match(f, [sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function([x], sigmoid(x) * tt.exp(-x), mode=m) match(f, [tt.neg, sigmoid]) assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function([x], -(-(-(sigmoid(x)))) * tt.exp(-x), mode=m) match(f, [tt.neg, sigmoid, tt.neg]) # assert check_stack_trace(f, ops_to_check=sigmoid) f = aesara.function( [x, y], (sigmoid(x) * sigmoid(-y) * -tt.exp(-x) * tt.exp(x * y) * tt.exp(y)), mode=m, ) topo = f.maker.fgraph.toposort() for op, nb in [(sigmoid, 2), (tt.mul, 2), (tt.neg, 1), (tt.exp, 1)]: assert sum([n.op == op for n in topo]) == nb
def test_lop_override(self, cls_ofg): x = vector() y = 1.0 / (1.0 + exp(-x)) def lop_ov(inps, outs, grads): (y_, ) = outs (dedy_, ) = grads return [2.0 * y_ * (1.0 - y_) * dedy_] y_, dedy = vector(), vector() op_lop_ov = cls_ofg([x, y_, dedy], [2.0 * y_ * (1.0 - y_) * dedy]) xx = vector() yy1 = tt_sum(sigmoid(xx)) gyy1 = 2.0 * grad(yy1, xx) for ov in [lop_ov, op_lop_ov]: op = cls_ofg([x], [y], lop_overrides=ov) yy2 = tt_sum(op(xx)) gyy2 = grad(yy2, xx) fn = function([xx], [gyy1, gyy2]) xval = np.random.rand(32).astype(config.floatX) y1val, y2val = fn(xval) assert np.allclose(y1val, y2val)
def rnn_step1( # sequences x, ri, zi, # outputs_info h, ): pre_r = ri + h.dot(U) pre_z = zi + h.dot(V) r = nnet.sigmoid(pre_r) z = nnet.sigmoid(pre_z) after_r = r * h pre_h = x + after_r.dot(W) new_h = tanh(pre_h) res_h = z * new_h + (1 - z) * h return res_h
def test_log1msigm_to_softplus(self): x = tt.matrix() out = tt.log(1 - sigmoid(x)) f = aesara.function([x], out, mode=self.m) topo = f.maker.fgraph.toposort() assert len(topo) == 2 assert isinstance(topo[0].op.scalar_op, ScalarSoftplus) assert isinstance(topo[1].op.scalar_op, aesara.scalar.Neg) # assert check_stack_trace(f, ops_to_check='all') f(np.random.rand(54, 11).astype(config.floatX)) # Same test with a flatten out = tt.log(1 - tt.flatten(sigmoid(x))) f = aesara.function([x], out, mode=self.m) # assert check_stack_trace(f, ops_to_check='all') topo = f.maker.fgraph.toposort() assert len(topo) == 3 assert tt.is_flat(topo[0].outputs[0]) assert isinstance(topo[1].op.scalar_op, ScalarSoftplus) assert isinstance(topo[2].op.scalar_op, aesara.scalar.Neg) f(np.random.rand(54, 11).astype(config.floatX)) # Same test with a reshape out = tt.log(1 - sigmoid(x).reshape([x.size])) f = aesara.function([x], out, mode=self.m) topo = f.maker.fgraph.toposort() # assert len(topo) == 3 assert any(isinstance(node.op, tt.Reshape) for node in topo) assert any( isinstance( getattr(node.op, "scalar_op", None), ScalarSoftplus, ) for node in topo) f(np.random.rand(54, 11).astype(config.floatX))
def test_logsigm_to_softplus(self): x = tt.vector() out = tt.log(sigmoid(x)) f = aesara.function([x], out, mode=self.m) # Fix ticket #4581 first # assert check_stack_trace( # f, ops_to_check=(aesara.scalar.Neg, # ScalarSoftplus)) topo = f.maker.fgraph.toposort() assert len(topo) == 3 assert isinstance(topo[0].op.scalar_op, aesara.scalar.Neg) assert isinstance(topo[1].op.scalar_op, ScalarSoftplus) assert isinstance(topo[2].op.scalar_op, aesara.scalar.Neg) f(np.random.rand(54).astype(config.floatX))
def test_local_ultra_fast_sigmoid(self): x = tt.matrix("x") s = sigmoid(x) mode = self.get_mode("local_ultra_fast_sigmoid") f = aesara.function([x], s, mode=mode) assert check_stack_trace(f, ops_to_check=sigmoid) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert topo[0].op == sigmoid mode = self.get_mode().including("local_ultra_fast_sigmoid") f = aesara.function([x], s, mode=mode) assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid) topo = f.maker.fgraph.toposort() assert topo[0].op == ultra_fast_sigmoid assert len(topo) == 1 f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])
def test_nnet(): x = vector("x") x.tag.test_value = np.r_[1.0, 2.0].astype(config.floatX) out = aet_nnet.sigmoid(x) fgraph = FunctionGraph([x], [out]) compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs]) out = aet_nnet.ultra_fast_sigmoid(x) fgraph = FunctionGraph([x], [out]) compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs]) out = aet_nnet.softplus(x) fgraph = FunctionGraph([x], [out]) compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs]) out = aet_nnet.softmax(x) fgraph = FunctionGraph([x], [out]) compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
def test_grad_log1msigm(self): # At some point, this returned nan, because (1 - sigm(x)) was # on both the numerator and the denominator of a fraction, # but the two nodes in question had not been merged. x = tt.matrix("x") lr = tt.scalar("lr") s = sigmoid(x) l = tt.log(1 - s) c = l.mean() ux = x - lr * aesara.grad(c, x) # Before the optimization, inf and NaN will be produced in the graph, # and DebugMode will complain. Everything is fine afterwards. mode = self.get_mode() if not isinstance(mode, aesara.compile.DebugMode): f = aesara.function([x, lr], ux, mode=mode) ux_v = f([[50]], 0.1) assert not np.isnan(ux_v)
def __init__( self, input=None, target=None, n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw, ): super().__init__(**kw) if input is None: input = tensor.dvector("input") if target is None: target = tensor.dvector("target") self.input = input self.target = target self.lr = shared(lr, "learning_rate") self.w1 = shared(np.zeros((n_hidden, n_input)), "w1") self.w2 = shared(np.zeros((n_output, n_hidden)), "w2") # print self.lr.type self.hidden = sigmoid(tensor.dot(self.w1, self.input)) self.output = tensor.dot(self.w2, self.hidden) self.cost = tensor.sum((self.output - self.target) ** 2) self.sgd_updates = { self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1), self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2), } self.sgd_step = pfunc( params=[self.input, self.target], outputs=[self.output, self.cost], updates=self.sgd_updates, ) self.compute_output = pfunc([self.input], self.output) self.output_from_hidden = pfunc([self.hidden], self.output)
def test_local_hard_sigmoid(self): x = tt.matrix("x") s = sigmoid(x) mode = self.get_mode("local_hard_sigmoid") f = aesara.function([x], s, mode=mode) assert check_stack_trace(f, ops_to_check=sigmoid) topo = f.maker.fgraph.toposort() assert topo[0].op == sigmoid assert len(topo) == 1 mode = self.get_mode().including("local_hard_sigmoid") f = aesara.function([x], s, mode=mode) topo = f.maker.fgraph.toposort() assert not any([n.op == sigmoid for n in topo]) f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]]) mode2 = mode.excluding("fusion").excluding("inplace") f2 = aesara.function([x], s, mode=mode2) assert check_stack_trace(f2, ops_to_check=tt.clip)
def test_perform_sigm_times_exp(self): # Test the core function doing the `sigm_times_exp` optimization. # # It is easier to test different graph scenarios this way than by # compiling a aesara function. x, y, z, t = tt.vectors("x", "y", "z", "t") exp = tt.exp def ok(expr1, expr2): trees = [parse_mul_tree(e) for e in (expr1, expr2)] perform_sigm_times_exp(trees[0]) trees[0] = simplify_mul(trees[0]) good = is_same_graph(compute_mul(trees[0]), compute_mul(trees[1])) if not good: print(trees[0]) print(trees[1]) print("***") aesara.printing.debugprint(compute_mul(trees[0])) print("***") aesara.printing.debugprint(compute_mul(trees[1])) assert good ok(sigmoid(x) * exp(-x), sigmoid(-x)) ok( -x * sigmoid(x) * (y * (-1 * z) * exp(-x)), -x * sigmoid(-x) * (y * (-1 * z)), ) ok( -sigmoid(-x) * (exp(y) * (-exp(-z) * 3 * -exp(x)) * (y * 2 * (-sigmoid(-y) * (z + t) * exp(z)) * sigmoid(z))) * -sigmoid(x), sigmoid(x) * (-sigmoid(y) * (-sigmoid(-z) * 3) * (y * 2 * ((z + t) * exp(z)))) * (-sigmoid(x)), ) ok( exp(-x) * -exp(-x) * (-sigmoid(x) * -sigmoid(x)), -sigmoid(-x) * sigmoid(-x)) ok(-exp(x) * -sigmoid(-x) * -exp(-x), -sigmoid(-x))