def test_forward_step(self): N, D, H = 3, 4, 5 # Create the layer layer = LSTMRecurrentLayer(D, H) layer.Wx = np.linspace(-2.1, 1.3, num=4 * D * H).reshape(D, 4 * H) layer.Wh = np.linspace(-0.7, 2.2, num=4 * H * H).reshape(H, 4 * H) layer.b = np.linspace(0.3, 0.7, num=4 * H) # Create some arbitrary inputs x = np.linspace(-0.4, 1.2, num=N * D).reshape(N, D) prev_h = np.linspace(-0.3, 0.7, num=N * H).reshape(N, H) prev_c = np.linspace(-0.4, 0.9, num=N * H).reshape(N, H) next_h, next_c, _ = layer._forward_step(x, prev_h, prev_c) expected = np.asarray( [[0.24635157, 0.28610883, 0.32240467, 0.35525807, 0.38474904], [0.49223563, 0.55611431, 0.61507696, 0.66844003, 0.7159181], [0.56735664, 0.66310127, 0.74419266, 0.80889665, 0.858299]]) self.assertAlmostEqual(rel_error(expected, next_h), 1e-9, places=2) expected = np.asarray( [[0.32986176, 0.39145139, 0.451556, 0.51014116, 0.56717407], [0.66382255, 0.76674007, 0.87195994, 0.97902709, 1.08751345], [0.74192008, 0.90592151, 1.07717006, 1.25120233, 1.42395676]]) self.assertAlmostEqual(rel_error(expected, next_c), 1e-9, places=2)
def test_backward(self): np.random.seed(271) N, T, H, M = 2, 3, 4, 5 # Create the layer layer = TemporalAffineLayer(H, M) W = np.random.randn(H, M) b = np.random.randn(M) # Create some arbitrary inputs x = np.random.randn(N, T, H) out = layer.forward(x, W=W, b=b) grad_out = np.random.randn(*out.shape) grad_x, grad_W, grad_b = layer.backward(grad_out) fx = lambda x: layer.forward(x, W=W, b=b) fw = lambda W: layer.forward(x, W=W, b=b) fb = lambda b: layer.forward(x, W=W, b=b) grad_x_num = eval_numerical_gradient_array(fx, x, grad_out) grad_W_num = eval_numerical_gradient_array(fw, W, grad_out) grad_b_num = eval_numerical_gradient_array(fb, b, grad_out) self.assertAlmostEqual(rel_error(grad_x_num, grad_x), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_W_num, grad_W), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_b_num, grad_b), 1e-9, places=2)
def validate_gradient(): """ Function to validate the implementation of gradient computation. Should be used together with gradient_check.py. This is a useful thing to do when you implement your own gradient calculation methods. It is not required for this assignment. """ from gradient_check import eval_numerical_gradient, rel_error # randomly initialize W dim = 4 num_classes = 4 num_inputs = 5 params = {} std = 0.001 params['W1'] = std * np.random.randn(dim, 10) params['b1'] = np.zeros(10) params['W2'] = std * np.random.randn(10, num_classes) params['b2'] = np.zeros(num_classes) X = np.random.randn(num_inputs, dim) y = np.array([0, 1, 2, 2, 1]) loss, grads = compute_neural_net_loss(params, X, y, reg=0.1) # these should all be less than 1e-8 or so for param_name in params: f = lambda W: compute_neural_net_loss(params, X, y, reg=0.1)[0] param_grad_num = eval_numerical_gradient(f, params[param_name], verbose=False) print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))
def test_forward(self): N, D, H, T = 2, 5, 4, 3 # Create the layer layer = LSTMRecurrentLayer(D, H) layer.Wx = np.linspace(-0.2, 0.9, num=4 * D * H).reshape(D, 4 * H) layer.Wh = np.linspace(-0.3, 0.6, num=4 * H * H).reshape(H, 4 * H) layer.b = np.linspace(0.2, 0.7, num=4 * H) # Create some arbitrary inputs x = np.linspace(-0.4, 0.6, num=N * T * D).reshape(N, T, D) h0 = np.linspace(-0.4, 0.8, num=N * H).reshape(N, H) hidden_state_over_time = layer.forward(x, h0) expected = np.asarray( [[[0.01764008, 0.01823233, 0.01882671, 0.0194232], [0.11287491, 0.12146228, 0.13018446, 0.13902939], [0.31358768, 0.33338627, 0.35304453, 0.37250975]], [[0.45767879, 0.4761092, 0.4936887, 0.51041945], [0.6704845, 0.69350089, 0.71486014, 0.7346449], [0.81733511, 0.83677871, 0.85403753, 0.86935314]]]) self.assertAlmostEqual(rel_error(hidden_state_over_time, expected), 1e-9, places=2)
def test_backward(self): np.random.seed(271) N, D, T, H = 2, 3, 10, 6 # Create the layer layer = LSTMRecurrentLayer(D, H) Wx = np.random.randn(D, 4 * H) Wh = np.random.randn(H, 4 * H) b = np.random.randn(4 * H) # Create some arbitrary inputs x = np.random.randn(N, T, D) h0 = np.random.randn(N, H) hidden_state_over_time = layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) grad_h_over_time = np.random.randn(*hidden_state_over_time.shape) grad_x, grad_h0, grad_Wx, grad_Wh, grad_b = layer.backward( grad_h_over_time) fx = lambda x: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) fh0 = lambda h0: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) fWx = lambda Wx: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) fWh = lambda Wh: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) fb = lambda b: layer.forward(x, h0, Wx=Wx, Wh=Wh, b=b) grad_x_num = eval_numerical_gradient_array(fx, x, grad_h_over_time) grad_h0_num = eval_numerical_gradient_array(fh0, h0, grad_h_over_time) grad_Wx_num = eval_numerical_gradient_array(fWx, Wx, grad_h_over_time) grad_Wh_num = eval_numerical_gradient_array(fWh, Wh, grad_h_over_time) grad_b_num = eval_numerical_gradient_array(fb, b, grad_h_over_time) self.assertAlmostEqual(rel_error(grad_x_num, grad_x), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_h0_num, grad_h0), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_Wx_num, grad_Wx), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_Wh_num, grad_Wh), 1e-9, places=2) self.assertAlmostEqual(rel_error(grad_b_num, grad_b), 1e-9, places=2)
def test_temporal_softmax_loss(self): N, T, V = 7, 8, 9 score = np.random.randn(N, T, V) y = np.random.randint(V, size=(N, T)) mask = (np.random.rand(N, T) > 0.5) _, grad_score = temporal_softmax_loss(score, y, mask, verbose=False) grad_score_num = eval_numerical_gradient( lambda x: temporal_softmax_loss(x, y, mask)[0], score, verbose=False) self.assertAlmostEqual(rel_error(grad_score, grad_score_num), 1e-9, places=2)
def gradient_check(self, X, y): """Runs gradient check on every parameter of the model Args: X: Input data in matrix form, of any shape y: Vector of labels """ print "Running numeric gradient check with reg = %s" % self.reg loss, grads = self.loss(X, y) for param_key in sorted(self.params): f = lambda _: self.loss(X, y)[0] num_grad = eval_numerical_gradient(f, self.params[param_key], verbose=False) print "%s relative error: %.2e" % ( param_key, rel_error(num_grad, grads[param_key]))
def test_backward(self): np.random.seed(271) N, T, V, D = 50, 3, 5, 6 # Create the layer layer = WordEmbeddingLayer(V, D) layer.W = np.random.randn(V, D) # Create some arbitrary inputs x = np.random.randint(V, size=(N, T)) out = layer.forward(x) grad_out = np.random.randn(*out.shape) grad_W = layer.backward(grad_out) f = lambda W: layer.forward(x, W=W) grad_W_num = eval_numerical_gradient_array(f, layer.W, grad_out) self.assertAlmostEqual(rel_error(grad_W_num, grad_W), 1e-9, places=2)
def test_forward(self): V, D = 5, 3 # Create the layer layer = WordEmbeddingLayer(V, D) layer.W = np.linspace(0, 1, num=V * D).reshape(V, D) # Create some arbitrary inputs x = np.asarray([[0, 3, 1, 2], [2, 1, 0, 3]]) out = layer.forward(x) expected = np.asarray([[[0., 0.07142857, 0.14285714], [0.64285714, 0.71428571, 0.78571429], [0.21428571, 0.28571429, 0.35714286], [0.42857143, 0.5, 0.57142857]], [[0.42857143, 0.5, 0.57142857], [0.21428571, 0.28571429, 0.35714286], [0., 0.07142857, 0.14285714], [0.64285714, 0.71428571, 0.78571429]]]) self.assertAlmostEqual(rel_error(expected, out), 1e-9, places=2)