def test_SoftmaxLayerGradientCheck(self): x = np.random.rand(3) layer = Softmax() layer.forward(x) grad = layer.backward(np.array([1.])) numgrad = numerical_gradient.calc(layer.forward, x) numgrad = np.sum(numgrad, axis=1) numerical_gradient.assert_are_similar(grad, numgrad)
class AttentionWeight: def __init__(self) -> None: self.params = [] self.grads = [] self.softmax = Softmax() self.cache = None def forward(self, hs: np.ndarray, h: np.ndarray) -> np.ndarray: N, T, H = hs.shape hr = h.reshape(N, 1, H).repeat(T, axis=1) t = hs * hr s = np.sum(t, axis=2) a = self.softmax.forward(s) self.cache = (hs, hr) return a def backward(self, da: np.ndarray) -> np.ndarray: hs, hr = self.cache N, T, H = hs.shape ds = self.softmax.backward(da) dt = ds.reshape(N, T, 1).repeat(H, axis=2) dhs = dt * hr dhr = dt * hs dh = np.sum(dhr, axis=1) return dhs, dh
def test_softmax_grad(N=None): from layers import Softmax from functools import partial np.random.seed(12345) N = np.inf if N is None else N p_soft = partial(F.softmax, dim=1) gold = torch_gradient_generator(p_soft) i = 0 while i < N: mine = Softmax() n_ex = np.random.randint(1, 3) n_dims = np.random.randint(1, 50) z = random_tensor((n_ex, n_dims), standardize=True) out = mine.forward(z) assert_almost_equal( gold(z), mine.backward(np.ones_like(out)), err_msg="Theirs:\n{}\n\nMine:\n{}\n".format( gold(z), mine.backward(np.ones_like(out)) ), decimal=3, ) print("PASSED") i += 1
def test_softmax_activation(N=None): from layers import Softmax N = np.inf if N is None else N mine = Softmax() gold = lambda z: F.softmax(torch.FloatTensor(z), dim=1).numpy() i = 0 while i < N: n_dims = np.random.randint(1, 100) z = random_stochastic_matrix(1, n_dims) assert_almost_equal(mine.forward(z), gold(z)) print("PASSED") i += 1