def test_multilayer_bidirectional_gru(self): h0 = np.random.rand(4, 4, 200).astype(np.float32) input = np.random.rand(5, 4, 100).astype(np.float32) t_rnn = tnn.GRU(100, 200, num_layers=2, bidirectional=True, bias=False) j_rnn = nn.GRU(100, 200, num_layers=2, bidirectional=True, bias=False) check_equal_1(t_rnn, j_rnn, input, h0)
def test_cudnn_gru_train(self): dev = torch.device('cuda:0') t_rnn = tnn.GRU(32, 64).to(dev) t_optim = torch.optim.SGD(t_rnn.parameters(), lr=1e-3, momentum=0.9) j_rnn = nn.GRU(32, 64) j_rnn.load_state_dict(t_rnn.state_dict()) j_optim = nn.SGD(j_rnn.parameters(), lr=1e-3, momentum=0.9) h0 = np.random.rand(1, 4, 64).astype(np.float32) input = np.random.rand(12, 4, 32).astype(np.float32) for _ in range(10): t_optim.zero_grad() t_output, th = t_rnn( torch.from_numpy(input).to(dev), torch.from_numpy(h0).to(dev)) t_loss = (t_output**2).sum() + (th**2).sum() t_loss.backward() t_optim.step() j_input, jh = jt.array(input), jt.array(h0) j_output, jh = j_rnn(j_input, jh) j_loss = (j_output**2).sum() + (jh**2).sum() j_optim.step(j_loss) np.testing.assert_allclose(t_loss.item(), j_loss.item(), rtol=1e-4) np.testing.assert_allclose(t_rnn.bias_hh_l0.detach().cpu().numpy(), j_rnn.bias_hh_l0.data, atol=1e-4, rtol=1e-4)
def test_bidirectional_gru(self): h0 = np.random.rand(2, 1, 200).astype(np.float32) input = np.random.rand(5, 1, 100).astype(np.float32) t_rnn = tnn.GRU(100, 200, bidirectional=True) j_rnn = nn.GRU(100, 200, bidirectional=True) check_equal_1(t_rnn, j_rnn, input, h0)
def test_multilayer_gru(self): h0 = np.random.rand(4, 4, 200).astype(np.float32) input = np.random.rand(5, 4, 100).astype(np.float32) t_rnn = tnn.GRU(100, 200, num_layers=4) j_rnn = nn.GRU(100, 200, num_layers=4) check_equal_1(t_rnn, j_rnn, input, h0)
def test_basic_gru(self): h0 = np.random.rand(1, 24, 200).astype(np.float32) input = np.random.rand(32, 24, 100).astype(np.float32) t_rnn = tnn.GRU(100, 200) j_rnn = nn.GRU(100, 200) check_equal_1(t_rnn, j_rnn, input, h0)
def test_multilayer_dropout_gru(self): h0 = np.random.rand(2, 4, 200).astype(np.float32) input = np.random.rand(5, 4, 100).astype(np.float32) t_rnn = tnn.GRU(100, 200, num_layers=2, dropout=0.5, bias=False) j_rnn = nn.GRU(100, 200, num_layers=2, dropout=0.5, bias=False) t_rnn.eval() j_rnn.eval() check_equal_1(t_rnn, j_rnn, input, h0)
def test_gru(self): h0 = np.random.rand(1, 24, 200).astype(np.float32) input = np.random.rand(32, 24, 100).astype(np.float32) t_rnn = tnn.GRU(100, 200) j_rnn = nn.GRU(100, 200) check_equal_1(t_rnn, j_rnn, input, h0) h0 = np.random.rand(4, 4, 200).astype(np.float32) input = np.random.rand(5, 4, 100).astype(np.float32) t_rnn = tnn.GRU(100, 200, num_layers=4) j_rnn = nn.GRU(100, 200, num_layers=4) check_equal_1(t_rnn, j_rnn, input, h0) h0 = np.random.rand(2, 1, 200).astype(np.float32) input = np.random.rand(5, 1, 100).astype(np.float32) t_rnn = tnn.GRU(100, 200, bidirectional=True) j_rnn = nn.GRU(100, 200, bidirectional=True) check_equal_1(t_rnn, j_rnn, input, h0) h0 = np.random.rand(4, 4, 200).astype(np.float32) input = np.random.rand(5, 4, 100).astype(np.float32) t_rnn = tnn.GRU(100, 200, num_layers=2, bidirectional=True, bias=False) j_rnn = nn.GRU(100, 200, num_layers=2, bidirectional=True, bias=False) check_equal_1(t_rnn, j_rnn, input, h0) h0 = np.random.rand(2, 4, 200).astype(np.float32) input = np.random.rand(5, 4, 100).astype(np.float32) t_rnn = tnn.GRU(100, 200, num_layers=2, dropout=0.5, bias=False) j_rnn = nn.GRU(100, 200, num_layers=2, dropout=0.5, bias=False) t_rnn.eval() j_rnn.eval() check_equal_1(t_rnn, j_rnn, input, h0)