def test_backward_keepdims(self): w = ad.variable(np.random.random(), name='W') y = ad.sum(w, keepdims=True) self.numeric_gradient_check(y, {}, [w]) val = np.random.random((3, 5)) w = ad.variable(val, name='W') y = ad.sum(w.transpose(), keepdims=True) self.numeric_gradient_check(y, {}, [w]) y = w.transpose().sum(axis=-1, keepdims=True) self.numeric_gradient_check(y, {}, [w]) y = w.transpose().sum(axis=0, keepdims=True) self.numeric_gradient_check(y, {}, [w]) y = w.transpose().sum(axis=(0, -1), keepdims=True) self.numeric_gradient_check(y, {}, [w]) val = np.random.random((3, 4, 5)) w = ad.variable(val, name='W') y = w.transpose().sum(keepdims=True) self.numeric_gradient_check(y, {}, [w]) y = w.transpose().sum(axis=(0, 2), keepdims=True).sum(axis=1, keepdims=True) self.numeric_gradient_check(y, {}, [w])
def cross_entropy(y_true: ad.Operation, y_pred: ad.Operation) -> ad.Operation: """Cross entropy over the last axis. .. math:: H(y, \\hat{y}) = - \\sum_i y_i \\log P \\left ( \\hat{y}_i \\right ) :param y_true: Real label. :param y_pred: Probabilities for each label. :return: The result operation. """ return ad.sum(-y_true * ad.log(y_pred), axis=-1)
def test_forward_keepdims(self): val = np.random.random((3, 5)) w = ad.array(val) y = ad.sum(w.transpose(), keepdims=True) actual = y.forward() expect = np.sum(val, keepdims=True) self.assertEqual((1, 1), y.shape) self.assertTrue(np.allclose(expect, actual), (expect, actual)) y = w.transpose().sum(axis=-1, keepdims=True) actual = y.forward() expect = np.transpose(np.sum(val, axis=0, keepdims=True)) self.assertEqual((5, 1), y.shape) self.assertTrue(np.allclose(expect, actual), (expect, actual)) y = w.transpose().sum(axis=0, keepdims=True) actual = y.forward() expect = np.transpose(np.sum(val, axis=-1, keepdims=True)) self.assertEqual((1, 3), y.shape) self.assertTrue(np.allclose(expect, actual), (expect, actual)) y = w.transpose().sum(axis=(0, -1), keepdims=True) actual = y.forward() expect = np.sum(val, keepdims=True) self.assertEqual((1, 1), y.shape) self.assertTrue(np.allclose(expect, actual), (expect, actual))
def softmax(x: ad.Operation) -> ad.Operation: """Softmax over the last axis. .. math:: \\text{softmax}(x)_i = \\frac{e^{x_i}}{\\sum_j e^{x_j}} The result and gradient of `exp` may be very large. For numerical stability, the maximum value is subtracted: .. math:: \\text{softmax}(x)_i = \\frac{e^{x_i}}{\\sum_j e^{x_j}} = \\frac{e^{x_i} \\cdot e^{-\\max(x)}}{\\sum_j e^{x_j} \\cdot e^{-\\max(x)}} = \\frac{e^{x_i - \\max(x)}}{\\sum_j e^{x_j - \\max(x)}} :param x: Input operation. :return: The result operation. """ m = ad.max(x, axis=-1, keepdims=True) e = ad.exp(x - m) s = ad.sum(e, axis=-1, keepdims=True) y = e / (s + 1e-8) y.name = 'softmax(%s)' % x.name return y
def mean_square_error(y_true: ad.Operation, y_pred: ad.Operation) -> ad.Operation: return ad.sum(ad.square(y_true - y_pred), axis=-1)