Example #1
0
    def test_local_sigm_times_exp(self):
        # Test the `local_sigm_times_exp` optimization.
        # exp(x) * sigm(-x) -> sigm(x)
        # exp(-x) * sigm(x) -> sigm(-x)

        def match(func, ops):
            # print [node.op.scalar_op for node in func.maker.fgraph.toposort()]
            assert [node.op for node in func.maker.fgraph.toposort()] == ops

        m = self.get_mode(excluding=["local_elemwise_fusion", "inplace"])
        x, y = tt.vectors("x", "y")

        f = aesara.function([x], sigmoid(-x) * tt.exp(x), mode=m)
        match(f, [sigmoid])
        assert check_stack_trace(f, ops_to_check=sigmoid)

        f = aesara.function([x], sigmoid(x) * tt.exp(-x), mode=m)
        match(f, [tt.neg, sigmoid])
        assert check_stack_trace(f, ops_to_check=sigmoid)

        f = aesara.function([x], -(-(-(sigmoid(x)))) * tt.exp(-x), mode=m)
        match(f, [tt.neg, sigmoid, tt.neg])
        # assert check_stack_trace(f, ops_to_check=sigmoid)

        f = aesara.function(
            [x, y],
            (sigmoid(x) * sigmoid(-y) * -tt.exp(-x) * tt.exp(x * y) *
             tt.exp(y)),
            mode=m,
        )
        topo = f.maker.fgraph.toposort()
        for op, nb in [(sigmoid, 2), (tt.mul, 2), (tt.neg, 1), (tt.exp, 1)]:
            assert sum([n.op == op for n in topo]) == nb
Example #2
0
    def test_lop_override(self, cls_ofg):
        x = vector()
        y = 1.0 / (1.0 + exp(-x))

        def lop_ov(inps, outs, grads):
            (y_, ) = outs
            (dedy_, ) = grads
            return [2.0 * y_ * (1.0 - y_) * dedy_]

        y_, dedy = vector(), vector()
        op_lop_ov = cls_ofg([x, y_, dedy], [2.0 * y_ * (1.0 - y_) * dedy])

        xx = vector()
        yy1 = tt_sum(sigmoid(xx))
        gyy1 = 2.0 * grad(yy1, xx)

        for ov in [lop_ov, op_lop_ov]:
            op = cls_ofg([x], [y], lop_overrides=ov)
            yy2 = tt_sum(op(xx))
            gyy2 = grad(yy2, xx)
            fn = function([xx], [gyy1, gyy2])

            xval = np.random.rand(32).astype(config.floatX)
            y1val, y2val = fn(xval)
            assert np.allclose(y1val, y2val)
Example #3
0
        def rnn_step1(
            # sequences
            x,
            ri,
            zi,
            # outputs_info
            h,
        ):
            pre_r = ri + h.dot(U)
            pre_z = zi + h.dot(V)
            r = nnet.sigmoid(pre_r)
            z = nnet.sigmoid(pre_z)

            after_r = r * h
            pre_h = x + after_r.dot(W)
            new_h = tanh(pre_h)

            res_h = z * new_h + (1 - z) * h
            return res_h
Example #4
0
    def test_log1msigm_to_softplus(self):
        x = tt.matrix()

        out = tt.log(1 - sigmoid(x))
        f = aesara.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 2
        assert isinstance(topo[0].op.scalar_op, ScalarSoftplus)
        assert isinstance(topo[1].op.scalar_op, aesara.scalar.Neg)
        # assert check_stack_trace(f, ops_to_check='all')
        f(np.random.rand(54, 11).astype(config.floatX))

        # Same test with a flatten
        out = tt.log(1 - tt.flatten(sigmoid(x)))
        f = aesara.function([x], out, mode=self.m)

        # assert check_stack_trace(f, ops_to_check='all')
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 3
        assert tt.is_flat(topo[0].outputs[0])
        assert isinstance(topo[1].op.scalar_op, ScalarSoftplus)
        assert isinstance(topo[2].op.scalar_op, aesara.scalar.Neg)
        f(np.random.rand(54, 11).astype(config.floatX))

        # Same test with a reshape
        out = tt.log(1 - sigmoid(x).reshape([x.size]))
        f = aesara.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        # assert len(topo) == 3
        assert any(isinstance(node.op, tt.Reshape) for node in topo)
        assert any(
            isinstance(
                getattr(node.op, "scalar_op", None),
                ScalarSoftplus,
            ) for node in topo)
        f(np.random.rand(54, 11).astype(config.floatX))
Example #5
0
    def test_logsigm_to_softplus(self):
        x = tt.vector()

        out = tt.log(sigmoid(x))
        f = aesara.function([x], out, mode=self.m)

        # Fix ticket #4581 first
        # assert check_stack_trace(
        #     f, ops_to_check=(aesara.scalar.Neg,
        #                      ScalarSoftplus))
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 3
        assert isinstance(topo[0].op.scalar_op, aesara.scalar.Neg)
        assert isinstance(topo[1].op.scalar_op, ScalarSoftplus)
        assert isinstance(topo[2].op.scalar_op, aesara.scalar.Neg)
        f(np.random.rand(54).astype(config.floatX))
Example #6
0
    def test_local_ultra_fast_sigmoid(self):
        x = tt.matrix("x")
        s = sigmoid(x)

        mode = self.get_mode("local_ultra_fast_sigmoid")
        f = aesara.function([x], s, mode=mode)
        assert check_stack_trace(f, ops_to_check=sigmoid)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 1
        assert topo[0].op == sigmoid

        mode = self.get_mode().including("local_ultra_fast_sigmoid")
        f = aesara.function([x], s, mode=mode)
        assert check_stack_trace(f, ops_to_check=ultra_fast_sigmoid)
        topo = f.maker.fgraph.toposort()
        assert topo[0].op == ultra_fast_sigmoid
        assert len(topo) == 1
        f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])
Example #7
0
def test_nnet():
    x = vector("x")
    x.tag.test_value = np.r_[1.0, 2.0].astype(config.floatX)

    out = aet_nnet.sigmoid(x)
    fgraph = FunctionGraph([x], [out])
    compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])

    out = aet_nnet.ultra_fast_sigmoid(x)
    fgraph = FunctionGraph([x], [out])
    compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])

    out = aet_nnet.softplus(x)
    fgraph = FunctionGraph([x], [out])
    compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])

    out = aet_nnet.softmax(x)
    fgraph = FunctionGraph([x], [out])
    compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
Example #8
0
    def test_grad_log1msigm(self):
        # At some point, this returned nan, because (1 - sigm(x)) was
        # on both the numerator and the denominator of a fraction,
        # but the two nodes in question had not been merged.
        x = tt.matrix("x")
        lr = tt.scalar("lr")

        s = sigmoid(x)
        l = tt.log(1 - s)
        c = l.mean()
        ux = x - lr * aesara.grad(c, x)

        # Before the optimization, inf and NaN will be produced in the graph,
        # and DebugMode will complain. Everything is fine afterwards.
        mode = self.get_mode()
        if not isinstance(mode, aesara.compile.DebugMode):
            f = aesara.function([x, lr], ux, mode=mode)
            ux_v = f([[50]], 0.1)
            assert not np.isnan(ux_v)
Example #9
0
    def __init__(
        self,
        input=None,
        target=None,
        n_input=1,
        n_hidden=1,
        n_output=1,
        lr=1e-3,
        **kw,
    ):
        super().__init__(**kw)

        if input is None:
            input = tensor.dvector("input")
        if target is None:
            target = tensor.dvector("target")

        self.input = input
        self.target = target
        self.lr = shared(lr, "learning_rate")
        self.w1 = shared(np.zeros((n_hidden, n_input)), "w1")
        self.w2 = shared(np.zeros((n_output, n_hidden)), "w2")
        # print self.lr.type

        self.hidden = sigmoid(tensor.dot(self.w1, self.input))
        self.output = tensor.dot(self.w2, self.hidden)
        self.cost = tensor.sum((self.output - self.target) ** 2)

        self.sgd_updates = {
            self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1),
            self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2),
        }

        self.sgd_step = pfunc(
            params=[self.input, self.target],
            outputs=[self.output, self.cost],
            updates=self.sgd_updates,
        )

        self.compute_output = pfunc([self.input], self.output)

        self.output_from_hidden = pfunc([self.hidden], self.output)
Example #10
0
    def test_local_hard_sigmoid(self):
        x = tt.matrix("x")
        s = sigmoid(x)

        mode = self.get_mode("local_hard_sigmoid")
        f = aesara.function([x], s, mode=mode)
        assert check_stack_trace(f, ops_to_check=sigmoid)
        topo = f.maker.fgraph.toposort()
        assert topo[0].op == sigmoid
        assert len(topo) == 1

        mode = self.get_mode().including("local_hard_sigmoid")
        f = aesara.function([x], s, mode=mode)
        topo = f.maker.fgraph.toposort()
        assert not any([n.op == sigmoid for n in topo])
        f([[-50, -10, -4, -1, 0, 1, 4, 10, 50]])

        mode2 = mode.excluding("fusion").excluding("inplace")
        f2 = aesara.function([x], s, mode=mode2)
        assert check_stack_trace(f2, ops_to_check=tt.clip)
Example #11
0
    def test_perform_sigm_times_exp(self):
        # Test the core function doing the `sigm_times_exp` optimization.
        #
        # It is easier to test different graph scenarios this way than by
        # compiling a aesara function.

        x, y, z, t = tt.vectors("x", "y", "z", "t")
        exp = tt.exp

        def ok(expr1, expr2):
            trees = [parse_mul_tree(e) for e in (expr1, expr2)]
            perform_sigm_times_exp(trees[0])
            trees[0] = simplify_mul(trees[0])
            good = is_same_graph(compute_mul(trees[0]), compute_mul(trees[1]))
            if not good:
                print(trees[0])
                print(trees[1])
                print("***")
                aesara.printing.debugprint(compute_mul(trees[0]))
                print("***")
                aesara.printing.debugprint(compute_mul(trees[1]))
            assert good

        ok(sigmoid(x) * exp(-x), sigmoid(-x))
        ok(
            -x * sigmoid(x) * (y * (-1 * z) * exp(-x)),
            -x * sigmoid(-x) * (y * (-1 * z)),
        )
        ok(
            -sigmoid(-x) * (exp(y) * (-exp(-z) * 3 * -exp(x)) *
                            (y * 2 * (-sigmoid(-y) *
                                      (z + t) * exp(z)) * sigmoid(z))) *
            -sigmoid(x),
            sigmoid(x) * (-sigmoid(y) * (-sigmoid(-z) * 3) *
                          (y * 2 * ((z + t) * exp(z)))) * (-sigmoid(x)),
        )
        ok(
            exp(-x) * -exp(-x) * (-sigmoid(x) * -sigmoid(x)),
            -sigmoid(-x) * sigmoid(-x))
        ok(-exp(x) * -sigmoid(-x) * -exp(-x), -sigmoid(-x))