예제 #1
0
    def loss(y, y_pred):
        """
        Cross-entropy (log) loss. Returns the sum (not average!) of the
        losses per-sample.

        Parameters
        ----------
        y : numpy array of shape (n, m)
            Class labels (one-hot with m possible classes) for each of n examples
        y_pred : numpy array of shape (n, m)
            Probabilities of each of m classes for the n examples in the batch

        Returns
        -------
        loss : float
            The sum of the cross-entropy across classes and examples
        """
        assert_is_binary(y)
        assert_is_stochastic(y_pred)

        # prevent taking the log of 0
        eps = np.finfo(float).eps

        # each example is associated with a single class; sum the negative log
        # probability of the correct label over all samples in the batch.
        # observe that we are taking advantage of the fact that y is one-hot
        # encoded!
        cross_entropy = -np.sum(y * np.log(y_pred + eps))
        return cross_entropy
예제 #2
0
    def grad(self, y_true, y_pred, **kwargs):
        """  ???????????????????????????????????????????????????
        Let:  f(z) = cross_entropy(softmax(z)).
        Then: df / dz = softmax(z) - y_true
                      = y_pred - y_true

        Note that this gradient goes through both the cross-entropy loss AND the
        softmax non-linearity to return df / dz (rather than df / d softmax(z) ).

        Input
        -----
        y : numpy array of shape (n, m)
            A one-hot encoding of the true class labels. Each row constitues a
            training example, and each column is a different class
        y_pred: numpy array of shape (n, m)
            The network predictions for the probability of each of m class labels on
            each of n examples in a batch.

        Returns
        -------
        grad : numpy array of shape (n, m)
            The gradient of the cross-entropy loss with respect to the *input*
            to the softmax function.
        """
        assert_is_binary(y_true)
        assert_is_stochastic(y_pred)
        g = y_pred - y_true

        return g
예제 #3
0
    def grad(y, y_pred):
        """
        Let:  f(z) = cross_entropy(softmax(z)).
        Then: df / dz = softmax(z) - y_true
                      = y_pred - y_true

        Note that this gradient goes through both the cross-entropy loss AND the
        softmax non-linearity to return df / dz (rather than df / d softmax(z) ).

        Input
        -----
        y : numpy array of shape (n, m)
            A one-hot encoding of the true class labels. Each row constitues a
            training example, and each column is a different class
        y_pred: numpy array of shape (n, m)
            The network predictions for the probability of each of m class labels on
            each of n examples in a batch.

        Returns
        -------
        grad : numpy array of shape (n, m)
            The gradient of the cross-entropy loss with respect to the *input*
            to the softmax function.
        """
        assert_is_binary(y)
        assert_is_stochastic(y_pred)

        # derivative of xe wrt z is y_pred - y_true, hence we can just
        # subtract 1 from the probability of the correct class labels
        grad = y_pred - y

        # [optional] scale the gradients by the number of examples in the batch
        # n, m = y.shape
        # grad /= n
        return grad
예제 #4
0
    def loss(y_true, y_pred):
        """Cross-entropy (log) loss. Returns the sum (not average!) of the
        losses per-sample.

        :param y_true: (n, m) for n_samples and m_classes
        :param y_pred: (n, m)
        :return:
        """
        assert_is_binary(y_true)
        assert_is_stochastic(y_pred)
        eps = np.finfo(float).eps

        cross_entropy = -np.sum(y_true * np.log(y_pred + eps))

        return cross_entropy