예제 #1
0
    def test_grad(self):
        a = np.asarray(self.rng.standard_normal((5, 5)), dtype=config.floatX)

        x = matrix("x")

        expressions_gradients = [
            (x * zero_grad(x), x),
            (x * zero_grad(exp(x)), exp(x)),
            (zero_grad(x), at.constant(0.0)),
            (x**2 * zero_grad(x), 2 * x**2),
        ]

        for expr, expr_grad in expressions_gradients:
            g = grad(expr.sum(), x)
            # gradient according to aesara
            f = aesara.function([x], g, on_unused_input="ignore")
            # desired gradient
            f2 = aesara.function([x], expr_grad, on_unused_input="ignore")

            assert np.allclose(f(a), f2(a))
예제 #2
0
    def test_local_sigm_times_exp(self):
        # Test the `local_sigm_times_exp` optimization.
        # exp(x) * sigm(-x) -> sigm(x)
        # exp(-x) * sigm(x) -> sigm(-x)

        def match(func, ops):
            # print [node.op.scalar_op for node in func.maker.fgraph.toposort()]
            assert [node.op for node in func.maker.fgraph.toposort()] == ops

        m = self.get_mode(excluding=["local_elemwise_fusion", "inplace"])
        x, y = vectors("x", "y")

        f = aesara.function([x], sigmoid(-x) * exp(x), mode=m)
        match(f, [sigmoid])
        assert check_stack_trace(f, ops_to_check=sigmoid)

        f = aesara.function([x], sigmoid(x) * exp(-x), mode=m)
        match(f, [neg, sigmoid])
        assert check_stack_trace(f, ops_to_check=sigmoid)

        f = aesara.function([x], -(-(-(sigmoid(x)))) * exp(-x), mode=m)
        match(f, [neg, sigmoid, neg])
        # assert check_stack_trace(f, ops_to_check=sigmoid)

        f = aesara.function(
            [x, y],
            (sigmoid(x) * sigmoid(-y) * -exp(-x) * exp(x * y) * exp(y)),
            mode=m,
        )
        topo = f.maker.fgraph.toposort()
        for op, nb in [(sigmoid, 2), (mul, 2), (neg, 1), (exp, 1)]:
            assert sum([n.op == op for n in topo]) == nb
예제 #3
0
    def test_1msigmoid(self):
        if not register_local_1msigmoid:
            return

        m = self.get_mode()
        x = fmatrix()

        # tests exp_over_1_plus_exp
        f = aesara.function([x], 1 - exp(x) / (1 + exp(x)), mode=m)
        assert check_stack_trace(f, ops_to_check=[neg, sigmoid_inplace])
        assert [node.op for node in f.maker.fgraph.toposort()] == [
            neg,
            sigmoid_inplace,
        ]

        # tests inv_1_plus_exp
        f = aesara.function([x], 1 - aet.fill(x, 1.0) / (1 + exp(-x)), mode=m)
        assert check_stack_trace(f, ops_to_check=[neg, sigmoid_inplace])
        assert [node.op for node in f.maker.fgraph.toposort()] == [
            neg,
            sigmoid_inplace,
        ]
예제 #4
0
파일: sigm.py 프로젝트: ricardoV94/aesara
def local_exp_over_1_plus_exp(fgraph, node):
    """
    exp(x)/(1+exp(x)) -> sigm(x)
    c/(1+exp(x)) -> c*sigm(-x)

    """
    # this optimization should be done for numerical stability
    # so we don't care to check client counts
    if node.op == true_div:

        # find all the exp() terms in the numerator
        num, denom = node.inputs
        num_exp_x, num_rest, num_neg = partition_num_or_denom(num, is_exp)
        denom_1pexp, denom_rest, denom_neg = partition_num_or_denom(
            denom, is_1pexp)

        sigmoids = []
        for t in denom_1pexp:
            if t in num_exp_x:
                # case: exp(x) /(1+exp(x))
                sigmoids.append(sigmoid(t))
                del num_exp_x[num_exp_x.index(t)]
            else:
                # case: 1/(1+exp(x))
                sigmoids.append(sigmoid(-t))
            copy_stack_trace(node.outputs[0], sigmoids[-1])

        if not sigmoids:  # we didn't find any.  abort
            return
        # put the new numerator together
        new_num = sigmoids + [exp(t) for t in num_exp_x] + num_rest
        if len(new_num) == 1:
            new_num = new_num[0]
        else:
            new_num = mul(*new_num)

        if num_neg ^ denom_neg:
            new_num = -new_num

        copy_stack_trace(num, new_num)

        if len(denom_rest) == 0:
            return [new_num]
        elif len(denom_rest) == 1:
            out = new_num / denom_rest[0]
        else:
            out = new_num / mul(*denom_rest)

        copy_stack_trace(node.outputs[0], out)
        return [out]
예제 #5
0
    def grad(self, inputs, cost_grad):
        """
        In defining the gradient, the Finite Fourier Transform is viewed as
        a complex-differentiable function of a complex variable
        """
        a = inputs[0]
        n = inputs[1]
        axis = inputs[2]
        grad = cost_grad[0]
        if not isinstance(axis, TensorConstant):
            raise NotImplementedError(
                f"{self.__class__.__name__}: gradient is currently implemented"
                " only for axis being an Aesara constant")
        axis = int(axis.data)
        # notice that the number of actual elements in wrto is independent of
        # possible padding or truncation:
        elem = arange(0, shape(a)[axis], 1)
        # accounts for padding:
        freq = arange(0, n, 1)
        outer_res = outer(freq, elem)
        pow_outer = exp(((-2 * math.pi * 1j) * outer_res) / (1.0 * n))
        res = tensordot(grad, pow_outer, (axis, 0))

        # This would be simpler but not implemented by aesara:
        # res = switch(lt(n, shape(a)[axis]),
        # set_subtensor(res[...,n::], 0, False, False), res)

        # Instead we resort to that to account for truncation:
        flip_shape = list(np.arange(0, a.ndim)[::-1])
        res = res.dimshuffle(flip_shape)
        res = switch(
            lt(n,
               shape(a)[axis]),
            set_subtensor(
                res[n::, ],
                0,
                False,
                False,
            ),
            res,
        )
        res = res.dimshuffle(flip_shape)

        # insures that gradient shape conforms to input shape:
        out_shape = (list(np.arange(0, axis)) + [a.ndim - 1] +
                     list(np.arange(axis, a.ndim - 1)))
        res = res.dimshuffle(*out_shape)
        return [res, None, None]
예제 #6
0
    def test_log1pexp_to_softplus(self):
        m = aesara.config.mode
        if m == "FAST_COMPILE":
            m = "FAST_RUN"

        x = vector()

        out = log(1 + exp(x))
        f = aesara.function([x], out, mode=self.m)

        # Fix ticket #4581 first
        # assert check_stack_trace(f, ops_to_check='all')
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 1
        assert isinstance(topo[0].op.scalar_op, ScalarSoftplus)
        f(np.random.rand(54).astype(config.floatX))
예제 #7
0
def test_broadcast_grad():
    # rng = numpy.random.RandomState(utt.fetch_seed())
    x1 = tensor4("x")
    # x1_data = rng.randn(1, 1, 300, 300)
    sigma = scalar("sigma")
    # sigma_data = 20
    window_radius = 3

    filter_1d = aet.arange(-window_radius, window_radius + 1)
    filter_1d = filter_1d.astype(aesara.config.floatX)
    filter_1d = exp(-0.5 * filter_1d ** 2 / sigma ** 2)
    filter_1d = filter_1d / filter_1d.sum()

    filter_W = filter_1d.dimshuffle(["x", "x", 0, "x"])

    y = conv2d(x1, filter_W, border_mode="full", filter_shape=[1, 1, None, None])
    aesara.grad(y.sum(), sigma)
예제 #8
0
파일: test_conv.py 프로젝트: mgorny/aesara
def test_broadcast_grad():
    x1 = tensor4("x")
    sigma = scalar("sigma")
    window_radius = 3

    filter_1d = at.arange(-window_radius, window_radius + 1)
    filter_1d = filter_1d.astype(aesara.config.floatX)
    filter_1d = exp(-0.5 * filter_1d**2 / sigma**2)
    filter_1d = filter_1d / filter_1d.sum()

    filter_W = filter_1d.dimshuffle(["x", "x", 0, "x"])

    y = conv2d(x1,
               filter_W,
               border_mode="full",
               filter_shape=[1, 1, None, None])
    # TODO FIXME: Make this a real test and `assert` something
    aesara.grad(y.sum(), sigma)
예제 #9
0
    def test_basic(self, axis):
        c = matrix()
        if axis is None:
            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", "x")
        elif axis == 0:
            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", 0)
        elif axis == (0, 1):
            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", "x")
        else:
            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle(0, "x")

        # test that function contains softmax and no div.
        f = aesara.function([c], p_y, mode=self.mode)

        assert check_stack_trace(f, ops_to_check=Softmax)

        f_ops = [n.op for n in f.maker.fgraph.toposort()]

        assert len(f_ops) == 1
        assert isinstance(f_ops[0], Softmax)

        c_val = self.rng.random((3, 4)).astype(config.floatX)
        assert np.allclose(f(c_val), sp.softmax(c_val, axis=axis))
예제 #10
0
    def test_matrix_perform_and_opt(self):
        m = config.mode
        m = aesara.compile.get_mode(m)
        m.check_isfinite = False
        x, y = matrices("xy")
        # regular softmax and crossentropy
        sm = softmax(x)
        cm = categorical_crossentropy(sm, y)

        # numerically stable log-softmax with crossentropy
        logsm = logsoftmax(x)
        sm2 = exp(logsm)  # just used to show equivalence with sm
        cm2 = -aet_sum(y * logsm, axis=1)
        grad_node = grad(cm2.mean(), x)

        # create some inputs into a softmax that are large and labels
        a = np.exp(10 * np.random.random((5, 10)).astype(config.floatX))
        # create some one-hot coded labels
        b = np.eye(5, 10).astype(config.floatX)

        # show equivalence of softmax and exponentiated numerically stable
        # log-softmax
        f1 = aesara.function([x], [sm, sm2])
        sm_, sm2_ = f1(a)
        utt.assert_allclose(sm_, sm2_)

        # now show that the two versions result in the same crossentropy cost
        # this indicates that the forward function does provide some numerical
        # stability
        f2 = aesara.function([x, y], [cm, cm2], mode=m)
        cm_, cm2_ = f2(a, b)
        utt.assert_allclose(cm_, cm2_)

        # now, show that in the standard softmax case the gradients blow up
        # while in the log-softmax case they don't
        f3 = aesara.function([x, y], [grad_node])
        grad_ = f3(a, b)
        assert not np.any(np.isnan(grad_))
예제 #11
0
def softmax_graph(c):
    return exp(c) / exp(c).sum(axis=-1, keepdims=True)
예제 #12
0
class TestSoftmaxOpt:
    # Test that expressions of softmax in terms of exponentiated things
    # divided by row sums are replaced by softmax expressions.
    #
    # Softmax_grad isn't that interesting as an Op, but it has the signature
    # we look for when trying to insert CrossEntropySoftmax... grad.  So, for
    # now, we add softmax_grad to graphs. In the future, we may modify the
    # CrossEntropySoftmax...grad to look for the more basic pattern.
    #

    def setup_method(self):
        self.rng = np.random.default_rng(utt.fetch_seed())
        self.mode = aesara.compile.mode.get_default_mode()
        self.mode = self.mode.including("canonicalize")

    @pytest.mark.parametrize("axis", [None, 0, 1, -1, (0, 1)])
    def test_basic(self, axis):
        c = matrix()
        if axis is None:
            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", "x")
        elif axis == 0:
            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", 0)
        elif axis == (0, 1):
            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle("x", "x")
        else:
            p_y = exp(c) / exp(c).sum(axis=axis).dimshuffle(0, "x")

        # test that function contains softmax and no div.
        f = aesara.function([c], p_y, mode=self.mode)

        assert check_stack_trace(f, ops_to_check=Softmax)

        f_ops = [n.op for n in f.maker.fgraph.toposort()]

        assert len(f_ops) == 1
        assert isinstance(f_ops[0], Softmax)

        c_val = self.rng.random((3, 4)).astype(config.floatX)
        assert np.allclose(f(c_val), sp.softmax(c_val, axis=axis))

    @pytest.mark.parametrize("axis", [None, 0, 1, 2, -1, -2, -3, (0, 1, 2)])
    def test_basic_keepdims(self, axis):
        c = tensor3()
        p_y = exp(c) / exp(c).sum(axis=axis, keepdims=True)

        # test that function contains softmax and no div.
        f = aesara.function([c], p_y, mode=self.mode)

        assert check_stack_trace(f, ops_to_check=Softmax)

        f_ops = [n.op for n in f.maker.fgraph.toposort()]

        assert len(f_ops) == 1
        assert isinstance(f_ops[0], Softmax)

        c_val = self.rng.random((3, 4, 5)).astype(config.floatX)
        assert np.allclose(f(c_val), sp.softmax(c_val, axis=axis))

    @pytest.mark.skip(reason="Optimization not enabled for the moment")
    def test_grad(self):
        c = matrix()
        p_y = exp(c) / exp(c).sum(axis=1).dimshuffle(0, "x")

        # test that function contains softmax and softmaxgrad
        w = matrix()

        g = aesara.function([c, w], grad((p_y * w).sum(), c), mode=self.mode)

        g_ops = [n.op for n in g.maker.fgraph.toposort()]

        assert len(g_ops) == 2, g_ops
        assert isinstance(g_ops[0], Softmax)
        assert isinstance(g_ops[1], SoftmaxGrad)

        g(self.rng.random((3, 4)), self.rng.uniform(0.5, 1, (3, 4)))

    def test_transpose_basic(self):
        # this should be a transposed softmax
        c = matrix()
        p_y = exp(c) / exp(c).sum(axis=0)

        # test that function contains softmax and no div.
        f = aesara.function([c], p_y, mode=self.mode)
        f_ops = [n.op for n in f.maker.fgraph.toposort()]
        assert len(f_ops) == 1
        assert isinstance(f_ops[0], Softmax)

    @pytest.mark.skip(reason="Optimization not enabled for the moment")
    def test_transpose_grad(self):
        # this should be a transposed softmax
        c = matrix()
        p_y = exp(c) / exp(c).sum(axis=0)

        # test that function contains softmax and no div.
        g = aesara.function([c], grad(p_y.sum(), c), mode=self.mode)
        g_ops = [n.op for n in g.maker.fgraph.toposort()]
        assert len(g_ops) == 2
        assert isinstance(g_ops[0], Softmax)
        assert isinstance(g_ops[1], SoftmaxGrad)

    def test_1d_basic(self):
        c = vector()
        p_y = exp(c) / exp(c).sum()

        # test that function contains softmax and no div.
        f = aesara.function([c], p_y, mode=self.mode)
        f_ops = [n.op for n in f.maker.fgraph.toposort()]
        assert len(f_ops) == 1
        assert isinstance(f_ops[0], Softmax)

    @pytest.mark.skip(reason="Optimization not enabled for the moment")
    def test_1D_grad(self):
        c = vector()
        p_y = exp(c) / exp(c).sum()

        # test that function contains softmax and no div.
        g = aesara.function([c], grad(p_y.sum(), c), mode=self.mode)
        g_ops = [n.op for n in g.maker.fgraph.toposort()]
        assert len(g_ops) == 2
        assert isinstance(g_ops[0], Softmax)
        assert isinstance(g_ops[1], SoftmaxGrad)

    @pytest.mark.parametrize(
        "f",
        [
            lambda c: exp(c) / exp(c).sum(axis=0).dimshuffle(0, 1, "x"),
            lambda c: exp(c) / exp(c).sum(axis=0).dimshuffle("x", 0, 1, "x"),
            lambda c: exp(c) / exp(c).sum(axis=0).dimshuffle("x", 1, 0),
            lambda c: exp(c) / exp(c).sum(axis=(0, 1), keepdims=True),
        ],
    )
    def test_invalid_softmax_expressions(self, f):
        # Test that graphs are not rewritten into a softmax when a dimshuffle
        # swaps or adds extra dimensions, or when more than one but not all axis
        # are summed over (which is not allowed by the Softmax Op but otherwise
        # valid)
        c = tensor3("c")
        out = f(c)
        f = aesara.function([c], out, mode=self.mode)

        f_ops = [n.op for n in f.maker.fgraph.toposort()]
        assert len(f_ops) > 1
        assert not any(isinstance(op, Softmax) for op in f_ops)
예제 #13
0
    def test_exp_over_1_plus_exp(self):
        m = self.get_mode(excluding=["local_elemwise_fusion"])

        x = vector()
        data = np.random.rand(54).astype(config.floatX)

        backup = config.warn__identify_1pexp_bug
        config.warn__identify_1pexp_bug = False
        try:
            # tests exp_over_1_plus_exp
            f = aesara.function([x], exp(x) / (1 + exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
            f(data)
            f = aesara.function([x], exp(x) / (2 + exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = aesara.function([x], exp(x) / (1 - exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = aesara.function([x], exp(x + 1) / (1 + exp(x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)

            # tests inv_1_plus_exp
            f = aesara.function([x], aet.fill(x, 1.0) / (1 + exp(-x)), mode=m)
            # todo: solve issue #4589 first
            # assert check_stack_trace(f, ops_to_check=sigmoid)
            assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
            f(data)
            f = aesara.function([x], aet.fill(x, 1.0) / (2 + exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = aesara.function([x], aet.fill(x, 1.0) / (1 - exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)
            f = aesara.function([x], aet.fill(x, 1.1) / (1 + exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
            f(data)

            # tests inv_1_plus_exp with neg
            f = aesara.function([x], aet.fill(x, -1.0) / (1 + exp(-x)), mode=m)
            # todo: solve issue #4589 first
            # assert check_stack_trace(
            #     f, ops_to_check=[sigmoid, neg_inplace])
            assert [node.op for node in f.maker.fgraph.toposort()] == [
                sigmoid,
                neg_inplace,
            ]
            f(data)
            f = aesara.function([x], aet.fill(x, -1.0) / (1 - exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                neg_inplace,
            ]
            f(data)
            f = aesara.function([x], aet.fill(x, -1.0) / (2 + exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                neg_inplace,
            ]
            f(data)
            f = aesara.function([x], aet.fill(x, -1.1) / (1 + exp(-x)), mode=m)
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                neg_inplace,
            ]
            f(data)

            # tests double inv_1_plus_exp with neg
            # (-1)(exp(x)) / (1+exp(x))(1+exp(-x))
            # = (-1)/(1+exp(-x)) * exp(x)/(1+exp(x))
            # = - (sigm(x) * sigm(x))
            f = aesara.function(
                [x],
                (aet.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (1 + exp(-x))),
                mode=m,
            )
            # todo: solve issue #4589 first
            # assert check_stack_trace(f, ops_to_check=[sigmoid, mul])
            assert [node.op
                    for node in f.maker.fgraph.toposort()] == [sigmoid, mul]
            f(data)
            f = aesara.function(
                [x],
                (aet.fill(x, -1.1) * exp(x)) / ((1 + exp(x)) * (1 + exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                mul,
                neg_inplace,
            ]
            f(data)
            f = aesara.function(
                [x],
                (aet.fill(x, -1.0) * exp(x)) / ((2 + exp(x)) * (1 + exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                mul,
                neg_inplace,
            ]
            f(data)
            f = aesara.function(
                [x],
                (aet.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (2 + exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                mul,
                neg_inplace,
            ]
            f(data)
            f = aesara.function(
                [x],
                (aet.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (1 + exp(x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                mul,
                neg_inplace,
            ]
            f(data)
            f = aesara.function(
                [x],
                (aet.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (2 + exp(-x))),
                mode=m,
            )
            assert [node.op for node in f.maker.fgraph.toposort()] != [
                sigmoid,
                mul,
                neg_inplace,
            ]
            f(data)

        finally:
            # Restore config option.
            config.warn__identify_1pexp_bug = backup