Esempio n. 1
0
    def __call__(self, model, X, Y, **kwargs):

        Y_hat_e = model.fprop(X)
        Y_hat = model.fprop(X, apply_dropout=True)

        softmax_r = softmax_ratio(Y_hat_e, Y_hat)

        softmax_r = block_gradient(softmax_r)

        neg_terms = softmax_r * Y_hat

        neg = - neg_terms.sum(axis=1).mean(axis=0)

        assert hasattr(Y_hat, 'owner')
        owner = Y_hat.owner
        assert owner is not None
        op = owner.op
        if isinstance(op, Print):
            assert len(owner.inputs) == 1
            Y_hat, = owner.inputs
            owner = Y_hat.owner
            op = owner.op
        assert isinstance(op, T.nnet.Softmax)
        z ,= owner.inputs
        assert z.ndim == 2

        z = z - z.max(axis=1).dimshuffle(0, 'x')
        log_prob = z - T.log(T.exp(z).sum(axis=1).dimshuffle(0, 'x'))
        # we use sum and not mean because this is really one variable per row
        log_prob_of = (Y * log_prob).sum(axis=1)
        assert log_prob_of.ndim == 1
        log_prob_of = log_prob_of.mean()

        return -(log_prob_of + neg)
Esempio n. 2
0
def test_softmax_ratio():
    # Tests that the numerically stabilized version of the softmax ratio
    # matches the naive implementation, for small input values

    n = 3
    m = 4

    rng = np.random.RandomState([2013, 3, 23])

    Z_numer = sharedX(rng.randn(m, n))
    Z_denom = sharedX(rng.randn(m, n))

    numer = T.nnet.softmax(Z_numer)
    denom = T.nnet.softmax(Z_denom)

    naive = numer / denom
    stable = softmax_ratio(numer, denom)

    naive = naive.eval()
    stable = stable.eval()

    assert np.allclose(naive, stable)