def test_backward(self): np.random.seed(231) N, D, T, H = 2, 3, 10, 6 lstm = LSTMLayer(3, 6) x = np.random.randn(N, T, D) h0 = np.random.randn(N, H) lstm.Wx = np.random.randn(D, 4 * H) lstm.Wh = np.random.randn(H, 4 * H) lstm.b = np.random.randn(4 * H) out = lstm.forward(x, h0) dnext_h = np.random.randn(*out.shape) lstm.backward(dnext_h) dx, dh0, dWx, dWh, db = lstm.grad['dx'], lstm.grad['dh0'], lstm.grad[ 'dWx'], lstm.grad['dWh'], lstm.grad['db'] f = lambda _: lstm.forward(x, h0) dx_num = grad_check(f, x, dnext_h) dh0_num = grad_check(f, h0, dnext_h) dWx_num = grad_check(f, lstm.Wx, dnext_h) dWh_num = grad_check(f, lstm.Wh, dnext_h) db_num = grad_check(f, lstm.b, dnext_h) assert rel_error(dx_num, dx) < 1e-6 assert rel_error(dh0_num, dh0) < 1e-6 assert rel_error(dWx_num, dWx) < 1e-6 assert rel_error(dWh_num, dWh) < 1e-6 assert rel_error(db_num, db) < 1e-6
def test_backward(self): np.random.seed(145) N, D, T, H = 3, 10, 7, 5 rnn = RNNLayer(D, H) x = np.random.randn(N, T, D) h0 = np.random.randn(N, H) rnn.Wx = np.random.randn(D, H) rnn.Wh = np.random.randn(H, H) rnn.b = np.random.randn(H) out = rnn.forward(x, h0) dnext_h = np.random.randn(*out.shape) rnn.backward(dnext_h) dx, dh0, dWx, dWh, db = rnn.grad['dx'], rnn.grad['dh0'], rnn.grad[ 'dWx'], rnn.grad['dWh'], rnn.grad['db'] f = lambda _: rnn.forward(x, h0) dx_num = grad_check(f, x, dnext_h) dh0_num = grad_check(f, h0, dnext_h) dWx_num = grad_check(f, rnn.Wx, dnext_h) dWh_num = grad_check(f, rnn.Wh, dnext_h) db_num = grad_check(f, rnn.b, dnext_h) assert rel_error(dx_num, dx) < 1e-6 assert rel_error(dh0_num, dh0) < 1e-6 assert rel_error(dWx_num, dWx) < 1e-6 assert rel_error(dWh_num, dWh) < 1e-6 assert rel_error(db_num, db) < 1e-6
def test_backward_step(self): np.random.seed(145) N, D, H = 3, 10, 5 rnn = RNNLayer(D, H) x = np.random.randn(N, D) prev_h = np.random.randn(N, H) rnn.Wx = np.random.randn(D, H) rnn.Wh = np.random.randn(H, H) rnn.b = np.random.randn(H) out, cache = rnn.forward_step(x, prev_h) dnext_h = np.linspace(-0.2, 0.4, num=N * H).reshape(N, H) dx, dprev_h, dWx, dWh, db = rnn.backward_step(dnext_h, cache) f = lambda _: rnn.forward_step(x, prev_h)[0] dx_num = grad_check(f, x, dnext_h) dprev_h_num = grad_check(f, prev_h, dnext_h) dWx_num = grad_check(f, rnn.Wx, dnext_h) dWh_num = grad_check(f, rnn.Wh, dnext_h) db_num = grad_check(f, rnn.b, dnext_h) assert rel_error(dx_num, dx) < 1e-6 assert rel_error(dprev_h_num, dprev_h) < 1e-6 assert rel_error(dWx_num, dWx) < 1e-6 assert rel_error(dWh_num, dWh) < 1e-6 assert rel_error(db_num, db) < 1e-6
def test_forward(self): N, T, D, H = 2, 3, 4, 5 rnn = RNNLayer(4, 5) x = np.linspace(-0.1, 0.3, num=N * T * D).reshape(N, T, D) prev_h = np.linspace(-0.3, 0.1, num=N * H).reshape(N, H) rnn.Wx = np.linspace(-0.2, 0.4, num=D * H).reshape(D, H) rnn.Wh = np.linspace(-0.4, 0.1, num=H * H).reshape(H, H) rnn.b = np.linspace(-0.7, 0.1, num=H) h = rnn.forward(x, prev_h) expected_h = np.array( [[ [ -0.42070749, -0.27279261, -0.11074945, 0.05740409, 0.22236251 ], [-0.39525808, -0.22554661, -0.0409454, 0.14649412, 0.32397316], [ -0.42305111, -0.24223728, -0.04287027, 0.15997045, 0.35014525 ], ], [[-0.55857474, -0.39065825, -0.19198182, 0.02378408, 0.23735671], [-0.27150199, -0.07088804, 0.13562939, 0.33099728, 0.50158768], [-0.51014825, -0.30524429, -0.06755202, 0.17806392, 0.40333043]]]) assert rel_error(expected_h[0], h[0]) < 1e-6
def test_forward_step(self): N, D, H = 3, 4, 5 lstm = LSTMLayer(4, 5) x = np.linspace(-0.4, 1.2, num=N * D).reshape(N, D) prev_h = np.linspace(-0.3, 0.7, num=N * H).reshape(N, H) prev_c = np.linspace(-0.4, 0.9, num=N * H).reshape(N, H) lstm.Wx = np.linspace(-2.1, 1.3, num=4 * D * H).reshape(D, 4 * H) lstm.Wh = np.linspace(-0.7, 2.2, num=4 * H * H).reshape(H, 4 * H) lstm.b = np.linspace(0.3, 0.7, num=4 * H) next_h, next_c, _ = lstm.forward_step(x, prev_h, prev_c) expected_next_h = np.asarray( [[0.24635157, 0.28610883, 0.32240467, 0.35525807, 0.38474904], [0.49223563, 0.55611431, 0.61507696, 0.66844003, 0.7159181], [0.56735664, 0.66310127, 0.74419266, 0.80889665, 0.858299]]) expected_next_c = np.asarray( [[0.32986176, 0.39145139, 0.451556, 0.51014116, 0.56717407], [0.66382255, 0.76674007, 0.87195994, 0.97902709, 1.08751345], [0.74192008, 0.90592151, 1.07717006, 1.25120233, 1.42395676]]) assert rel_error(expected_next_h, next_h) < 1e-6 assert rel_error(expected_next_c, next_c) < 1e-6
def test_forward_step(self): N, D, H = 3, 10, 4 rnn = RNNLayer(10, 4) x = np.linspace(-0.4, 0.7, num=N * D).reshape(N, D) prev_h = np.linspace(-0.2, 0.5, num=N * H).reshape(N, H) rnn.Wx = np.linspace(-0.1, 0.9, num=D * H).reshape(D, H) rnn.Wh = np.linspace(-0.3, 0.7, num=H * H).reshape(H, H) rnn.b = np.linspace(-0.2, 0.4, num=H) next_h, _ = rnn.forward_step(x, prev_h) expected_next_h = np.array( [[-0.58172089, -0.50182032, -0.41232771, -0.31410098], [0.66854692, 0.79562378, 0.87755553, 0.92795967], [0.97934501, 0.99144213, 0.99646691, 0.99854353]]) assert rel_error(expected_next_h, next_h) < 1e-6
def test_backward(self): np.random.seed(145) N, D, T, H = 2, 3, 10, 6 gru = GRULayer(3, 6) x = np.random.randn(N, T, D) h0 = np.random.randn(N, H) gru.Wx = np.random.randn(D, 2 * H) gru.Wh = np.random.randn(H, 2 * H) gru.b = np.random.randn(2 * H) gru.Wxi = np.random.randn(D, H) gru.Whi = np.random.randn(H, H) gru.bi = np.random.randn(H) out = gru.forward(x, h0) dnext_h = np.random.randn(*out.shape) gru.backward(dnext_h) dx, dh0, dWx, dWh, db, dWxi, dWhi, dbi = gru.grad['dx'], gru.grad['dh0'], gru.grad['dWx'], \ gru.grad['dWh'], gru.grad['db'], \ gru.grad['dWxi'], gru.grad['dWhi'], gru.grad['dbi'] f = lambda _: gru.forward(x, h0) dx_num = grad_check(f, x, dnext_h) dh0_num = grad_check(f, h0, dnext_h) dWx_num = grad_check(f, gru.Wx, dnext_h) dWh_num = grad_check(f, gru.Wh, dnext_h) db_num = grad_check(f, gru.b, dnext_h) dWxi_num = grad_check(f, gru.Wxi, dnext_h) dWhi_num = grad_check(f, gru.Whi, dnext_h) dbi_num = grad_check(f, gru.bi, dnext_h) assert rel_error(dx_num, dx) < 1e-6 assert rel_error(dh0_num, dh0) < 1e-6 assert rel_error(dWx_num, dWx) < 1e-6 assert rel_error(dWh_num, dWh) < 1e-6 assert rel_error(db_num, db) < 1e-6 assert rel_error(dWxi_num, dWxi) < 1e-6 assert rel_error(dWhi_num, dWhi) < 1e-6 assert rel_error(dbi_num, dbi) < 1e-6
def test_backward_step(self): np.random.seed(145) N, D, H = 4, 5, 6 gru = GRULayer(5, 6) x = np.random.randn(N, D) prev_h = np.random.randn(N, H) gru.Wx = np.random.randn(D, 2 * H) gru.Wh = np.random.randn(H, 2 * H) gru.b = np.random.randn(2 * H) gru.Wxi = np.random.randn(D, H) gru.Whi = np.random.randn(H, H) gru.bi = np.random.randn(H) next_h, cache = gru.forward_step(x, prev_h) dnext_h = np.random.randn(*next_h.shape) f = lambda _: gru.forward_step(x, prev_h)[0] dx_num = grad_check(f, x, dnext_h) dprev_h_num = grad_check(f, prev_h, dnext_h) dWx_num = grad_check(f, gru.Wx, dnext_h) dWh_num = grad_check(f, gru.Wh, dnext_h) db_num = grad_check(f, gru.b, dnext_h) dWxi_num = grad_check(f, gru.Wxi, dnext_h) dWhi_num = grad_check(f, gru.Whi, dnext_h) dbi_num = grad_check(f, gru.bi, dnext_h) dx, dprev_h, dWx, dWh, db, dWxi, dWhi, dbi = gru.backward_step(dnext_h, cache) assert rel_error(dx_num, dx) < 1e-6 assert rel_error(dprev_h_num, dprev_h) < 1e-6 assert rel_error(dWx_num, dWx) < 1e-6 assert rel_error(dWh_num, dWh) < 1e-6 assert rel_error(db_num, db) < 1e-6 assert rel_error(dWxi_num, dWxi) < 1e-6 assert rel_error(dWhi_num, dWhi) < 1e-6 assert rel_error(dbi_num, dbi) < 1e-6
def test_forward_step(self): N, D, H = 3, 4, 5 gru = GRULayer(4, 5) x = np.linspace(-0.4, 1.2, num=N*D).reshape(N, D) prev_h = np.linspace(-0.3, 0.7, num=N*H).reshape(N, H) gru.Wx = np.linspace(-2.1, 1.3, num=2*D*H).reshape(D, 2 * H) gru.Wh = np.linspace(-0.7, 2.2, num=2*H*H).reshape(H, 2 * H) gru.b = np.linspace(0.3, 0.7, num=2*H) gru.Wxi = np.linspace(-1.8, 2.1, num=D*H).reshape(D, H) gru.Whi = np.linspace(-0.9, 1.6, num=H*H).reshape(H, H) gru.bi = np.linspace(0.1, 0.9, num=H) next_h, _ = gru.forward_step(x, prev_h) expected_next_h = np.asarray([ [-0.0999449, -0.03125071, 0.03639522, 0.10290262, 0.16817868], [ 0.2976273, 0.36884449, 0.3992976, 0.42559245, 0.45798687], [ 0.49371886, 0.70402669, 0.70335355, 0.71138395, 0.7425779 ]]) assert rel_error(expected_next_h, next_h) < 1e-6
def test_forward(self): N, D, H, T = 2, 5, 4, 3 lstm = LSTMLayer(5, 4) x = np.linspace(-0.4, 0.6, num=N * T * D).reshape(N, T, D) h0 = np.linspace(-0.4, 0.8, num=N * H).reshape(N, H) lstm.Wx = np.linspace(-0.2, 0.9, num=4 * D * H).reshape(D, 4 * H) lstm.Wh = np.linspace(-0.3, 0.6, num=4 * H * H).reshape(H, 4 * H) lstm.b = np.linspace(0.2, 0.7, num=4 * H) h = lstm.forward(x, h0) expected_h = np.asarray( [[[0.01764008, 0.01823233, 0.01882671, 0.0194232], [0.11287491, 0.12146228, 0.13018446, 0.13902939], [0.31358768, 0.33338627, 0.35304453, 0.37250975]], [[0.45767879, 0.4761092, 0.4936887, 0.51041945], [0.6704845, 0.69350089, 0.71486014, 0.7346449], [0.81733511, 0.83677871, 0.85403753, 0.86935314]]]) assert rel_error(expected_h[0], h[0]) < 1e-6
def test_forward(self): N, D, H, T = 2, 5, 4, 3 gru = GRULayer(5, 4) x = np.linspace(-0.4, 0.6, num=N*T*D).reshape(N, T, D) h0 = np.linspace(-0.4, 0.8, num=N*H).reshape(N, H) gru.Wx = np.linspace(-0.2, 0.9, num=2*D*H).reshape(D, 2 * H) gru.Wh = np.linspace(-0.3, 0.6, num=2*H*H).reshape(H, 2 * H) gru.b = np.linspace(0.2, 0.7, num=2*H) gru.Wxi = np.linspace(-0.4, 1.6, num=D*H).reshape(D, H) gru.Whi = np.linspace(-0.7, 0.4, num=H*H).reshape(H, H) gru.bi = np.linspace(0.4, 0.9, num=H) h = gru.forward(x, h0) expected_h = np.asarray([ [[-0.19332136, -0.12098466, -0.0478229, 0.02618775], [ 0.03427635, 0.09687263, 0.15884181, 0.22017801], [ 0.22173995, 0.30237624, 0.36888384, 0.42432841]], [[ 0.3842619, 0.55153297, 0.69594892, 0.83361145], [ 0.48151744, 0.63219579, 0.74877489, 0.85939455], [ 0.56501369, 0.69358245, 0.78641933, 0.87723583]]]) assert rel_error(expected_h[0], h[0]) < 1e-6
def test_backward_step(self): np.random.seed(132) N, D, H = 4, 5, 6 lstm = LSTMLayer(5, 6) x = np.random.randn(N, D) prev_h = np.random.randn(N, H) prev_c = np.random.randn(N, H) lstm.Wx = np.random.randn(D, 4 * H) lstm.Wh = np.random.randn(H, 4 * H) lstm.b = np.random.randn(4 * H) next_h, next_c, cache = lstm.forward_step(x, prev_h, prev_c) dnext_h = np.random.randn(*next_h.shape) dnext_c = np.random.randn(*next_c.shape) f_h = lambda _: lstm.forward_step(x, prev_h, prev_c)[0] f_c = lambda _: lstm.forward_step(x, prev_h, prev_c)[1] dx_num = grad_check(f_h, x, dnext_h) + grad_check(f_c, x, dnext_c) dprev_h_num = grad_check(f_h, prev_h, dnext_h) + grad_check( f_c, prev_h, dnext_c) dprev_c_num = grad_check(f_h, prev_c, dnext_h) + grad_check( f_c, prev_c, dnext_c) dWx_num = grad_check(f_h, lstm.Wx, dnext_h) + grad_check( f_c, lstm.Wx, dnext_c) dWh_num = grad_check(f_h, lstm.Wh, dnext_h) + grad_check( f_c, lstm.Wh, dnext_c) db_num = grad_check(f_h, lstm.b, dnext_h) + grad_check( f_c, lstm.b, dnext_c) dx, dh, dc, dWx, dWh, db = lstm.backward_step(dnext_h, dnext_c, cache) assert rel_error(dx_num, dx) < 1e-6 assert rel_error(dprev_h_num, dh) < 1e-6 assert rel_error(dprev_c_num, dc) < 1e-6 assert rel_error(dWx_num, dWx) < 1e-6 assert rel_error(dWh_num, dWh) < 1e-6 assert rel_error(db_num, db) < 1e-6