Ejemplo n.º 1
0
    def test_xent_thing_int32(self):
        x = matrix("x")
        y = lvector("y")
        yi = aet.cast(y, "int32")
        expressions = [
            aet_sum(-log(softmax(x)[aet.arange(yi.shape[0]), yi])),
            -aet_sum(log(softmax(x)[aet.arange(yi.shape[0]), yi])),
            -aet_sum(log(softmax(x))[aet.arange(yi.shape[0]), yi]),
            aet_sum(-log(softmax(x))[aet.arange(yi.shape[0]), yi]),
        ]

        for expr in expressions:
            fgraph = FunctionGraph([x, y], [expr])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 5
            assert crossentropy_softmax_argmax_1hot_with_bias in ops
            assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]

            # Also verify the gradient wrt x
            fgraph = FunctionGraph([x, y], [grad(expr, x)])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 3
            assert crossentropy_softmax_1hot_with_bias_dx in ops
            assert softmax_legacy in ops
            assert softmax_grad_legacy not in ops
Ejemplo n.º 2
0
    def __init__(self, input, n_in, n_out, name_prefix=""):
        """Initialize the parameters of the logistic regression

        :type input: TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie

        """

        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        self.W = aesara.shared(
            value=np.zeros((n_in, n_out), dtype=aesara.config.floatX),
            name=name_prefix + "W",
        )

        # compute vector of class-membership probabilities in symbolic form
        self.p_y_given_x = softmax(dot(input, self.W))

        # compute prediction as class whose probability is maximal in
        # symbolic form
        self.y_pred = argmax(self.p_y_given_x, axis=1)

        # parameters of the model
        self.params = [self.W]
Ejemplo n.º 3
0
 def test_softmax_with_bias_trace(self):
     rng = np.random.default_rng(utt.fetch_seed())
     a = aesara.shared(rng.standard_normal((3, )).astype(config.floatX))
     b = aesara.shared(np.float32(rng.standard_normal()))
     sm = softmax(a + b)
     f = aesara.function([], sm)
     assert check_stack_trace(f, ops_to_check="last")
Ejemplo n.º 4
0
    def test_vector_perform(self):
        x = vector()
        f = aesara.function([x], softmax(x, axis=None))

        rng = np.random.default_rng(utt.fetch_seed())
        xv = rng.standard_normal((6, )).astype(config.floatX)
        assert np.allclose(f(xv), sp.softmax(xv))
Ejemplo n.º 5
0
    def test_perform(self, axis):
        x = tensor4("x")
        rng = np.random.default_rng(utt.fetch_seed())
        xv = rng.standard_normal((2, 3, 4, 5)).astype(config.floatX)

        f = aesara.function([x], softmax(x, axis=axis))
        assert np.allclose(f(xv), sp.softmax(xv, axis=axis))
Ejemplo n.º 6
0
    def test_local_logsoftmax_opt(self, axis):
        # Test the Logsoftmax substitution
        #
        # Check that Log(Softmax(x)) is substituted with Logsoftmax(x). Note that
        # only the forward pass is checked (i.e., doesn't check the gradient)

        x = matrix("x")
        sm = softmax(x, axis=axis)
        logsm = log(sm)
        f = aesara.function([x], logsm)
        assert isinstance(f.maker.fgraph.outputs[0].owner.op, LogSoftmax)
        assert check_stack_trace(f, ops_to_check=LogSoftmax)
Ejemplo n.º 7
0
def test_stabilize_log_softmax():
    mode = aesara.compile.mode.get_default_mode()
    mode = mode.including("local_log_softmax", "specialize")

    x = matrix()
    y = softmax(x)
    z = log(y)

    f = aesara.function([x], z, mode=mode)
    assert check_stack_trace(f, ops_to_check="all")

    # check that the softmax has been optimized out
    for node in f.maker.fgraph.toposort():
        assert not isinstance(node.op, y.owner.op.__class__)

    # call the function so debug mode can verify the optimized
    # version matches the unoptimized version
    rng = np.random.default_rng([2012, 8, 22])
    f(np.cast[config.floatX](rng.random((2, 3))))
Ejemplo n.º 8
0
def test_asymptotic_32():
    # This test makes sure that our functions behave sensibly when
    # huge values are present

    # TODO: consider adding the optimization of crossentropy into the current
    # mode for the purpose of running this test

    for dtype in "float32", "float64":
        if dtype == "float32":
            x = fmatrix()
            x2 = fvector()
        else:
            x = dmatrix()
            x2 = dvector()
        y = lvector()

        c = categorical_crossentropy(softmax(x + x2), y)
        f = aesara.function([x, y, x2], [c.sum(), grad(c.sum(), x)],
                            mode="FAST_RUN")

        xval = np.zeros((5, 5), dtype=dtype).astype(dtype)
        x2val = np.zeros(5, dtype=xval.dtype).astype(dtype)
        for i in range(100):
            cval, gxval = f(xval, np.arange(5), x2val)
            xval -= 100.3 * gxval

        assert cval == 0  # no problem going to zero error

        # what about when x gets really big?

        xval = np.zeros((5, 5), dtype=dtype)
        x2val = np.zeros(5, dtype=xval.dtype)
        for i in range(100):

            cval, gxval = f(xval, np.arange(5), x2val)
            xval += 100000.3 * gxval

        assert cval > 61750000
        assert gxval[0, 0] == -1.0
        assert gxval[0, 1] == 0.25
Ejemplo n.º 9
0
    def test_logsoftmax_grad_true_div_elemwise(self):
        # Checks that the gradient of an expression similar to a log(softmax)
        # but with a different elemwise operation than true_div is not
        # optimized.

        x = matrix("x")
        y = log(softmax(x))
        g = grad(y.sum(), x)

        softmax_grad_node = g.owner
        assert softmax_grad_node.op == softmax_grad_legacy
        true_div_node = softmax_grad_node.inputs[0].owner
        assert true_div_node.op == true_div

        # We replace the elemwise true_div op by an elemwise add.
        new_g = softmax_grad_legacy(add(*true_div_node.inputs),
                                    softmax_grad_node.inputs[1])

        fgraph = FunctionGraph([x], [new_g])
        optdb.query(OPT_FAST_RUN).optimize(fgraph)

        assert softmax_grad_legacy in [n.op for n in fgraph.toposort()]
Ejemplo n.º 10
0
    def test_matrix_perform_and_opt(self):
        m = config.mode
        m = aesara.compile.get_mode(m)
        m.check_isfinite = False
        x, y = matrices("xy")
        # regular softmax and crossentropy
        sm = softmax(x)
        cm = categorical_crossentropy(sm, y)

        # numerically stable log-softmax with crossentropy
        logsm = logsoftmax(x)
        sm2 = exp(logsm)  # just used to show equivalence with sm
        cm2 = -aet_sum(y * logsm, axis=1)
        grad_node = grad(cm2.mean(), x)

        # create some inputs into a softmax that are large and labels
        a = np.exp(10 * np.random.random((5, 10)).astype(config.floatX))
        # create some one-hot coded labels
        b = np.eye(5, 10).astype(config.floatX)

        # show equivalence of softmax and exponentiated numerically stable
        # log-softmax
        f1 = aesara.function([x], [sm, sm2])
        sm_, sm2_ = f1(a)
        utt.assert_allclose(sm_, sm2_)

        # now show that the two versions result in the same crossentropy cost
        # this indicates that the forward function does provide some numerical
        # stability
        f2 = aesara.function([x, y], [cm, cm2], mode=m)
        cm_, cm2_ = f2(a, b)
        utt.assert_allclose(cm_, cm2_)

        # now, show that in the standard softmax case the gradients blow up
        # while in the log-softmax case they don't
        f3 = aesara.function([x, y], [grad_node])
        grad_ = f3(a, b)
        assert not np.any(np.isnan(grad_))
Ejemplo n.º 11
0
 def test_softmax_with_bias_trace(self):
     a = aesara.shared(np.random.randn(3).astype(config.floatX))
     b = aesara.shared(np.float32(np.random.randn()))
     sm = softmax(a + b)
     f = aesara.function([], sm)
     assert check_stack_trace(f, ops_to_check="last")
Ejemplo n.º 12
0
    def test_crossentropy_softmax_1hot_with_bias_dxcale_cost(self):
        x = matrix("x")
        y = lvector("y")
        a = scalar("a")

        def validate_grad_graph(func):
            # The graph of the gradient should not have softmaxgrad anymore
            has_cx1hotdx = False
            has_softmax = False
            has_softmaxdx = False
            for node in func.maker.fgraph.toposort():
                if node.op == crossentropy_softmax_1hot_with_bias_dx:
                    has_cx1hotdx = True
                if node.op == softmax_legacy:
                    has_softmax = True
                if node.op == softmax_grad_legacy:
                    has_softmaxdx = True

            assert has_cx1hotdx
            assert has_softmax
            assert not has_softmaxdx

        # Cases to test
        expressions = [
            a * aet_sum(-log(softmax(x)[aet.arange(y.shape[0]), y])),
            -a * aet_sum(log(softmax(x)[aet.arange(y.shape[0]), y])),
            a * (-aet_sum(log(softmax(x)[aet.arange(y.shape[0]), y]))),
            a * aet_sum(log(softmax(x)[aet.arange(y.shape[0]), y])),
            a * aet_sum(-log(softmax(x))[aet.arange(y.shape[0]), y]),
            -a * aet_sum(log(softmax(x))[aet.arange(y.shape[0]), y]),
            a * (-aet_sum(log(softmax(x))[aet.arange(y.shape[0]), y])),
            a * aet_sum(log(softmax(x))[aet.arange(y.shape[0]), y]),
            a * mean(-log(softmax(x)[aet.arange(y.shape[0]), y])),
            -a * mean(log(softmax(x)[aet.arange(y.shape[0]), y])),
            a * (-mean(log(softmax(x)[aet.arange(y.shape[0]), y]))),
            a * mean(log(softmax(x)[aet.arange(y.shape[0]), y])),
            a * mean(-log(softmax(x))[aet.arange(y.shape[0]), y]),
            -a * mean(log(softmax(x))[aet.arange(y.shape[0]), y]),
            a * (-mean(log(softmax(x))[aet.arange(y.shape[0]), y])),
            a * mean(log(softmax(x))[aet.arange(y.shape[0]), y]),
        ]

        for expr in expressions:
            fgraph = FunctionGraph([x, y, a], [expr])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            assert 5 <= len(fgraph.toposort()) <= 10

            ops = {node.op for node in fgraph.toposort()}
            assert crossentropy_softmax_argmax_1hot_with_bias in ops
            assert softmax_legacy not in ops

            # Verify the gradient wrt x
            fgraph = FunctionGraph([x, y, a], [grad(expr, x)])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            assert 3 <= len(fgraph.toposort()) <= 6

            ops = {node.op for node in fgraph.toposort()}
            assert crossentropy_softmax_1hot_with_bias_dx in ops
            assert softmax_legacy in ops
            assert softmax_grad_legacy not in ops

            # Verify the gradient when providing output gradient
            fgraph = FunctionGraph(
                [x, y, a], [grad(expr, x, known_grads={expr: a * x.sum()})])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            assert 6 <= len(fgraph.toposort()) <= 8

            ops = {node.op for node in fgraph.toposort()}
            assert crossentropy_softmax_1hot_with_bias_dx in ops
            assert softmax_legacy in ops
            assert softmax_grad_legacy not in ops
Ejemplo n.º 13
0
    def test_perform(self, axis):
        x = tensor4("x")
        xv = np.random.randn(2, 3, 4, 5).astype(config.floatX)

        f = aesara.function([x], softmax(x, axis=axis))
        assert np.allclose(f(xv), sp.softmax(xv, axis=axis))
Ejemplo n.º 14
0
    def test_get_rid_of_advanced_indexing_version_of_xent(self):
        x = matrix("x")
        b = vector("b")
        y = lvector("y")

        # Basic case
        expressions = [
            aet_sum(-log(softmax(x)[aet.arange(y.shape[0]), y])),
            -aet_sum(log(softmax(x)[aet.arange(y.shape[0]), y])),
            -aet_sum(log(softmax(x))[aet.arange(y.shape[0]), y]),
            aet_sum(-log(softmax(x))[aet.arange(y.shape[0]), y]),
        ]
        for expr in expressions:

            fgraph = FunctionGraph([x, y], [expr])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 4
            assert crossentropy_softmax_argmax_1hot_with_bias in ops
            assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]

            # Also verify the gradient wrt x
            fgraph = FunctionGraph([x, y], [grad(expr, x)])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 2
            assert crossentropy_softmax_1hot_with_bias_dx in ops
            assert softmax_legacy in ops
            assert softmax_grad_legacy not in ops

        # Test that a biased softmax is optimized correctly
        bias_expressions = [
            aet_sum(-log(softmax(x + b)[aet.arange(y.shape[0]), y])),
            -aet_sum(log(softmax(b + x)[aet.arange(y.shape[0]), y])),
            -aet_sum(log(softmax(x + b))[aet.arange(y.shape[0]), y]),
            aet_sum(-log(softmax(b + x))[aet.arange(y.shape[0]), y]),
        ]

        for expr in bias_expressions:
            fgraph = FunctionGraph([x, b, y], [expr, x])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 2  # [big_op, sum]
            assert crossentropy_softmax_argmax_1hot_with_bias in ops

            fgraph = FunctionGraph([x, b, y], [grad(expr, x)])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 2
            assert crossentropy_softmax_1hot_with_bias_dx in ops
            assert softmax_with_bias in ops
            assert softmax_grad_legacy not in ops

        # Test that using "mean" instead of sum works, too
        mean_expressions = [
            mean(-log(softmax(x)[aet.arange(y.shape[0]), y])),
            -mean(log(softmax(x)[aet.arange(y.shape[0]), y])),
            -mean(log(softmax(x))[aet.arange(y.shape[0]), y]),
            mean(-log(softmax(x))[aet.arange(y.shape[0]), y]),
        ]

        for expr in mean_expressions:

            fgraph = FunctionGraph([x, y], [expr])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 6
            assert crossentropy_softmax_argmax_1hot_with_bias in ops
            assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]

            fgraph = FunctionGraph([x, y], [grad(expr, x)])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 5
            # there's an extra dimshuffle in there
            # but I can't think of a good rule to get rid of it
            assert crossentropy_softmax_1hot_with_bias_dx in ops
            assert softmax_legacy in ops
            assert softmax_grad_legacy not in ops

        mean_bias_expressions = [
            mean(-log(softmax(x + b)[aet.arange(y.shape[0]), y])),
            -mean(log(softmax(b + x)[aet.arange(y.shape[0]), y])),
            -mean(log(softmax(x + b))[aet.arange(y.shape[0]), y]),
            mean(-log(softmax(b + x))[aet.arange(y.shape[0]), y]),
        ]

        for expr in mean_bias_expressions:

            fgraph = FunctionGraph([x, b, y], [expr])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 4
            assert crossentropy_softmax_argmax_1hot_with_bias in ops
            assert not [1 for o in ops if isinstance(o, AdvancedSubtensor)]

            fgraph = FunctionGraph([x, b, y], [grad(expr, x)])
            optdb.query(OPT_FAST_RUN).optimize(fgraph)

            ops = [node.op for node in fgraph.toposort()]
            assert len(ops) == 5
            assert crossentropy_softmax_1hot_with_bias_dx in ops
            assert softmax_with_bias in ops
            assert softmax_grad_legacy not in ops
Ejemplo n.º 15
0
 def myfunc(x):
     sm = softmax(x, axis=axis)
     logsm = log(sm)
     return logsm
Ejemplo n.º 16
0
 def f(a):
     return softmax(a, axis=axis)[:, column]
Ejemplo n.º 17
0
    def test_vector_perform(self):
        x = vector()
        f = aesara.function([x], softmax(x, axis=None))

        xv = np.random.randn(6).astype(config.floatX)
        assert np.allclose(f(xv), sp.softmax(xv))
Ejemplo n.º 18
0
 def f(a):
     return softmax(a, axis=None)