def test_independent_normal(): loc = torch.as_tensor([[0.3, 0.7], [0.2, 0.4]]) scale = torch.as_tensor([[0.1, 0.2], [0.3, 0.8]]) dist = torch.distributions.Independent( torch.distributions.Normal(loc, scale), 1) mode = mode_of_distribution(dist) torch_assert_allclose(mode, loc)
def test_evaluate_actions_as_quantiles(self): sample_actions = torch.randint(self.action_size, size=(self.batch_size, )) z = self.av.evaluate_actions_as_quantiles(sample_actions) self.assertIsInstance(z, torch.Tensor) for b in range(self.batch_size): torch_assert_allclose(z[b], self.quantiles[b, :, sample_actions[b]])
def test_lambda(): model = nn.Sequential( nn.ReLU(), Lambda(lambda x: x + 1), nn.ReLU(), ) x = torch.rand(3, 2) # Since x is all positive, ReLU will not have any effects y = model(x) torch_assert_allclose(y, x + 1)
def test_cosine_basis_functions(batch_size, m, n_basis_functions): x = torch.rand(batch_size, m, dtype=torch.float) y = iqn.cosine_basis_functions(x, n_basis_functions=n_basis_functions) assert y.shape == (batch_size, m, n_basis_functions) for i in range(batch_size): for j in range(m): for k in range(n_basis_functions): torch_assert_allclose( y[i, j, k], torch.cos(x[i, j] * (k + 1) * np.pi), atol=1e-5, )
def _test(self, device): # A must be symmetric and positive-definite random_mat = torch.normal(0, 1, size=(self.n, self.n)) random_mat.to(device) A = torch.matmul(random_mat, random_mat.T) x_ans = torch.normal(0, 1, size=(self.n, )) x_ans.to(device) b = torch.matmul(A, x_ans) def A_product_func(vec): assert vec.shape == b.shape return torch.matmul(A, vec) x = pfrl.utils.conjugate_gradient(A_product_func, b) torch_assert_allclose(x, x_ans, rtol=1e-1)
def test_compute_eltwise_huber_quantile_loss(batch_size, N, N_prime): # Overestimation is penalized proportionally to 1-tau # Underestimation is penalized proportionally to tau y = torch.randn(batch_size, N, dtype=torch.float, requires_grad=True) t = torch.randn(batch_size, N_prime, dtype=torch.float) tau = torch.rand(batch_size, N, dtype=torch.float) loss = iqn.compute_eltwise_huber_quantile_loss(y, t, tau) y_b, t_b = torch.broadcast_tensors( y.reshape(batch_size, N, 1), t.reshape(batch_size, 1, N_prime), ) assert loss.shape == (batch_size, N, N_prime) huber_loss = nn.functional.smooth_l1_loss(y_b, t_b, reduction="none") assert huber_loss.shape == (batch_size, N, N_prime) for i in range(batch_size): for j in range(N): for k in range(N_prime): # loss is always positive scalar_loss = loss[i, j, k] scalar_grad = torch.autograd.grad([scalar_loss], [y], retain_graph=True)[0][i, j] assert float(scalar_loss) > 0 if y[i, j] > t[i, k]: # y over-estimates t # loss equals huber loss scaled by (1-tau) correct_scalar_loss = (1 - tau[i, j]) * huber_loss[i, j, k] else: # y under-estimates t # loss equals huber loss scaled by tau correct_scalar_loss = tau[i, j] * huber_loss[i, j, k] correct_scalar_grad = torch.autograd.grad( [correct_scalar_loss], [y], retain_graph=True)[0][i, j] torch_assert_allclose( scalar_loss, correct_scalar_loss, atol=1e-5, ) torch_assert_allclose( scalar_grad, correct_scalar_grad, atol=1e-5, )
def test_getitem(self): n_batch = 7 ndim_action = 3 mu = np.random.randn(n_batch, ndim_action).astype(np.float32) mat = np.broadcast_to( np.eye(ndim_action, dtype=np.float32)[None], (n_batch, ndim_action, ndim_action), ) v = np.random.randn(n_batch).astype(np.float32) min_action, max_action = -1, 1 qout = action_value.QuadraticActionValue( torch.tensor(mu), torch.tensor(mat), torch.tensor(v), min_action, max_action, ) sliced = qout[:3] torch_assert_allclose(sliced.mu, mu[:3]) torch_assert_allclose(sliced.mat, mat[:3]) torch_assert_allclose(sliced.v, v[:3]) torch_assert_allclose(sliced.min_action[0], min_action) torch_assert_allclose(sliced.max_action[0], max_action)
def test_soft_copy_param_scalar(self): a = nn.Module() a.p = nn.Parameter(torch.as_tensor(0.5)) b = nn.Module() b.p = nn.Parameter(torch.as_tensor(1.0)) # a = (1 - tau) * a + tau * b copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1) torch_assert_allclose(a.p, torch.full_like(a.p, 0.55)) torch_assert_allclose(b.p, torch.full_like(b.p, 1.0)) copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1) torch_assert_allclose(a.p, torch.full_like(a.p, 0.595)) torch_assert_allclose(b.p, torch.full_like(b.p, 1.0))
def test_soft_copy_param(self): a = nn.Linear(1, 5) b = nn.Linear(1, 5) with torch.no_grad(): a.weight.fill_(0.5) b.weight.fill_(1) # a = (1 - tau) * a + tau * b copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1) torch_assert_allclose(a.weight, torch.full_like(a.weight, 0.55)) torch_assert_allclose(b.weight, torch.full_like(b.weight, 1.0)) copy_param.soft_copy_param(target_link=a, source_link=b, tau=0.1) torch_assert_allclose(a.weight, torch.full_like(a.weight, 0.595)) torch_assert_allclose(b.weight, torch.full_like(b.weight, 1.0))
def test_branched(batch_size): link1 = nn.Linear(2, 3) link2 = nn.Linear(2, 5) link3 = nn.Sequential( nn.Linear(2, 7), nn.Tanh(), ) plink = Branched(link1, link2, link3) x = torch.zeros(batch_size, 2, dtype=torch.float) pout = plink(x) assert isinstance(pout, tuple) assert len(pout) == 3 out1 = link1(x) out2 = link2(x) out3 = link3(x) torch_assert_allclose(pout[0], out1) torch_assert_allclose(pout[1], out2) torch_assert_allclose(pout[2], out3)
def test_copy_grad(self): def set_random_grad(link): link.zero_grad() x = np.random.normal(size=(1, 1)).astype(np.float32) y = link(torch.from_numpy(x)) * np.random.normal() torch.sum(y).backward() # When source is not None and target is None a = nn.Linear(1, 5) b = nn.Linear(1, 5) set_random_grad(a) b.zero_grad() assert a.weight.grad is not None assert a.bias.grad is not None assert b.weight.grad is None assert b.bias.grad is None copy_param.copy_grad(target_link=b, source_link=a) torch_assert_allclose(a.weight.grad, b.weight.grad) torch_assert_allclose(a.bias.grad, b.bias.grad) assert a.weight.grad is not b.weight.grad assert a.bias.grad is not b.bias.grad # When both are not None a = nn.Linear(1, 5) b = nn.Linear(1, 5) set_random_grad(a) set_random_grad(b) assert a.weight.grad is not None assert a.bias.grad is not None assert b.weight.grad is not None assert b.bias.grad is not None copy_param.copy_grad(target_link=b, source_link=a) torch_assert_allclose(a.weight.grad, b.weight.grad) torch_assert_allclose(a.bias.grad, b.bias.grad) assert a.weight.grad is not b.weight.grad assert a.bias.grad is not b.bias.grad # When source is None and target is not None a = nn.Linear(1, 5) b = nn.Linear(1, 5) a.zero_grad() set_random_grad(b) assert a.weight.grad is None assert a.bias.grad is None assert b.weight.grad is not None assert b.bias.grad is not None copy_param.copy_grad(target_link=b, source_link=a) assert a.weight.grad is None assert a.bias.grad is None assert b.weight.grad is None assert b.bias.grad is None # When both are None a = nn.Linear(1, 5) b = nn.Linear(1, 5) a.zero_grad() b.zero_grad() assert a.weight.grad is None assert a.bias.grad is None assert b.weight.grad is None assert b.bias.grad is None copy_param.copy_grad(target_link=b, source_link=a) assert a.weight.grad is None assert a.bias.grad is None assert b.weight.grad is None assert b.bias.grad is None
def test_ppo_dataset_recurrent_and_non_recurrent_equivalence( use_obs_normalizer, gamma, lambd, max_recurrent_sequence_len): """Test equivalence between recurrent and non-recurrent datasets. When the same feed-forward model is used, the values of log_prob, v_pred, next_v_pred obtained by both recurrent and non-recurrent dataset creation functions should be the same. """ episodes = make_random_episodes() if use_obs_normalizer: obs_normalizer = pfrl.nn.EmpiricalNormalization(2, clip_threshold=5) obs_normalizer.experience(torch.rand(10, 2)) else: obs_normalizer = None def phi(obs): return (obs * 0.5).astype(np.float32) device = torch.device("cpu") obs_size = 2 n_actions = 3 non_recurrent_model = pfrl.nn.Branched( nn.Sequential( nn.Linear(obs_size, n_actions), SoftmaxCategoricalHead(), ), nn.Linear(obs_size, 1), ) recurrent_model = RecurrentSequential(non_recurrent_model, ) dataset = pfrl.agents.ppo._make_dataset( episodes=copy.deepcopy(episodes), model=non_recurrent_model, phi=phi, batch_states=batch_states, obs_normalizer=obs_normalizer, gamma=gamma, lambd=lambd, device=device, ) dataset_recurrent = pfrl.agents.ppo._make_dataset_recurrent( episodes=copy.deepcopy(episodes), model=recurrent_model, phi=phi, batch_states=batch_states, obs_normalizer=obs_normalizer, gamma=gamma, lambd=lambd, max_recurrent_sequence_len=max_recurrent_sequence_len, device=device, ) assert "log_prob" not in episodes[0][0] assert "log_prob" in dataset[0] assert "log_prob" in dataset_recurrent[0][0] # They are not just shallow copies assert dataset[0]["log_prob"] is not dataset_recurrent[0][0]["log_prob"] states = [tr["state"] for tr in dataset] recurrent_states = [ tr["state"] for tr in itertools.chain.from_iterable(dataset_recurrent) ] torch_assert_allclose(states, recurrent_states) actions = [tr["action"] for tr in dataset] recurrent_actions = [ tr["action"] for tr in itertools.chain.from_iterable(dataset_recurrent) ] torch_assert_allclose(actions, recurrent_actions) rewards = [tr["reward"] for tr in dataset] recurrent_rewards = [ tr["reward"] for tr in itertools.chain.from_iterable(dataset_recurrent) ] torch_assert_allclose(rewards, recurrent_rewards) nonterminals = [tr["nonterminal"] for tr in dataset] recurrent_nonterminals = [ tr["nonterminal"] for tr in itertools.chain.from_iterable(dataset_recurrent) ] torch_assert_allclose(nonterminals, recurrent_nonterminals) log_probs = [tr["log_prob"] for tr in dataset] recurrent_log_probs = [ tr["log_prob"] for tr in itertools.chain.from_iterable(dataset_recurrent) ] torch_assert_allclose(log_probs, recurrent_log_probs) vs_pred = [tr["v_pred"] for tr in dataset] recurrent_vs_pred = [ tr["v_pred"] for tr in itertools.chain.from_iterable(dataset_recurrent) ] torch_assert_allclose(vs_pred, recurrent_vs_pred) next_vs_pred = [tr["next_v_pred"] for tr in dataset] recurrent_next_vs_pred = [ tr["next_v_pred"] for tr in itertools.chain.from_iterable(dataset_recurrent) ] torch_assert_allclose(next_vs_pred, recurrent_next_vs_pred) advs = [tr["adv"] for tr in dataset] recurrent_advs = [ tr["adv"] for tr in itertools.chain.from_iterable(dataset_recurrent) ] torch_assert_allclose(advs, recurrent_advs) vs_teacher = [tr["v_teacher"] for tr in dataset] recurrent_vs_teacher = [ tr["v_teacher"] for tr in itertools.chain.from_iterable(dataset_recurrent) ] torch_assert_allclose(vs_teacher, recurrent_vs_teacher)
def _test_non_lstm(self, gpu, name): in_size = 2 out_size = 3 device = "cuda:{}".format(gpu) if gpu >= 0 else "cpu" seqs_x = [ torch.rand(4, in_size, device=device), torch.rand(1, in_size, device=device), torch.rand(3, in_size, device=device), ] seqs_x = torch.nn.utils.rnn.pack_sequence(seqs_x, enforce_sorted=False) self.assertTrue(name in ("GRU", "RNN")) cls = getattr(nn, name) link = cls(num_layers=1, input_size=in_size, hidden_size=out_size) link.to(device) # Forward twice: with None and non-None random states y0, h0 = link(seqs_x, None) y1, h1 = link(seqs_x, h0) y0, _ = torch.nn.utils.rnn.pad_packed_sequence(y0, batch_first=True) y1, _ = torch.nn.utils.rnn.pad_packed_sequence(y1, batch_first=True) self.assertEqual(h0.shape, (1, 3, out_size)) self.assertEqual(h1.shape, (1, 3, out_size)) self.assertEqual(y0.shape, (3, 4, out_size)) self.assertEqual(y1.shape, (3, 4, out_size)) # Masked at 0 rs0_mask0 = mask_recurrent_state_at(h0, 0) y1m0, _ = link(seqs_x, rs0_mask0) y1m0, _ = torch.nn.utils.rnn.pad_packed_sequence(y1m0, batch_first=True) torch_assert_allclose(y1m0[0], y0[0]) torch_assert_allclose(y1m0[1], y1[1]) torch_assert_allclose(y1m0[2], y1[2]) # Masked at (1, 2) rs0_mask12 = mask_recurrent_state_at(h0, (1, 2)) y1m12, _ = link(seqs_x, rs0_mask12) y1m12, _ = torch.nn.utils.rnn.pad_packed_sequence(y1m12, batch_first=True) torch_assert_allclose(y1m12[0], y1[0]) torch_assert_allclose(y1m12[1], y0[1]) torch_assert_allclose(y1m12[2], y0[2]) # Get at 1 and concat with None rs0_get1 = get_recurrent_state_at(h0, 1, detach=False) assert rs0_get1.requires_grad torch_assert_allclose(rs0_get1, h0[:, 1]) concat_rs_get1 = concatenate_recurrent_states([None, rs0_get1, None]) y1g1, _ = link(seqs_x, concat_rs_get1) y1g1, _ = torch.nn.utils.rnn.pad_packed_sequence(y1g1, batch_first=True) torch_assert_allclose(y1g1[0], y0[0]) torch_assert_allclose(y1g1[1], y1[1]) torch_assert_allclose(y1g1[2], y0[2]) # Get at 1 with detach=True rs0_get1_detach = get_recurrent_state_at(h0, 1, detach=True) assert not rs0_get1_detach.requires_grad torch_assert_allclose(rs0_get1_detach, h0[:, 1])
def test_getitem(self): sliced = self.av[:10] torch_assert_allclose(sliced.q_values, self.q_values[:10]) torch_assert_allclose(sliced.quantiles, self.quantiles[:10]) self.assertEqual(sliced.n_actions, self.action_size) self.assertIs(sliced.q_values_formatter, self.av.q_values_formatter)
def test_normal(): loc = torch.as_tensor([0.3, 0.5]) scale = torch.as_tensor([0.1, 0.9]) dist = torch.distributions.Normal(loc, scale) mode = mode_of_distribution(dist) torch_assert_allclose(mode, loc)
def test_torch_assert_allclose(): x = [torch.zeros(2), torch.ones(2)] y = [[0, 0], [1, 1]] torch_assert_allclose(x, y)
def test_hessian_vector_product_nonzero(vec): hvp = compute_hessian_vector_product(y, params, vec) hessian = compute_hessian(y, params) self.assertGreater(np.count_nonzero(hvp.numpy()), 0) self.assertGreater(np.count_nonzero(hessian.numpy()), 0) torch_assert_allclose(hvp, torch.matmul(hessian, vec), atol=1e-3)
def test_multivariate_normal(): loc = torch.as_tensor([0.3, 0.7]) cov = torch.as_tensor([[0.1, 0.0], [0.0, 0.9]]) dist = torch.distributions.MultivariateNormal(loc, cov) mode = mode_of_distribution(dist) torch_assert_allclose(mode, loc)
def _test_forward_with_modified_recurrent_state(self, gpu): in_size = 2 out0_size = 2 out1_size = 3 par = RecurrentBranched( nn.GRU(num_layers=1, input_size=in_size, hidden_size=out0_size), RecurrentSequential( nn.LSTM(num_layers=1, input_size=in_size, hidden_size=out1_size), ), ) if gpu >= 0: device = torch.device("cuda:{}".format(gpu)) par.to(device) else: device = torch.device("cpu") seqs_x = [ torch.rand(2, in_size, device=device), torch.rand(2, in_size, device=device), ] packed_x = nn.utils.rnn.pack_sequence(seqs_x, enforce_sorted=False) x_t0 = torch.stack((seqs_x[0][0], seqs_x[1][0])) x_t1 = torch.stack((seqs_x[0][1], seqs_x[1][1])) (gru_out, lstm_out), (gru_rs, (lstm_rs,)) = par(packed_x, None) # Check if n_step_forward and forward twice results are same def no_mask_forward_twice(): _, rs = one_step_forward(par, x_t0, None) return one_step_forward(par, x_t1, rs) ( (nomask_gru_out, nomask_lstm_out), (nomask_gru_rs, (nomask_lstm_rs,)), ) = no_mask_forward_twice() # GRU torch_assert_allclose(gru_out.data[2:], nomask_gru_out, atol=1e-5) torch_assert_allclose(gru_rs, nomask_gru_rs) # LSTM torch_assert_allclose(lstm_out.data[2:], nomask_lstm_out, atol=1e-5) torch_assert_allclose(lstm_rs[0], nomask_lstm_rs[0], atol=1e-5) torch_assert_allclose(lstm_rs[1], nomask_lstm_rs[1], atol=1e-5) # 1st-only mask forward twice: only 2nd should be the same def mask0_forward_twice(): _, rs = one_step_forward(par, x_t0, None) rs = mask_recurrent_state_at(rs, 0) return one_step_forward(par, x_t1, rs) ( (mask0_gru_out, mask0_lstm_out), (mask0_gru_rs, (mask0_lstm_rs,)), ) = mask0_forward_twice() # GRU with self.assertRaises(AssertionError): torch_assert_allclose(gru_out.data[2], mask0_gru_out[0], atol=1e-5) torch_assert_allclose(gru_out.data[3], mask0_gru_out[1], atol=1e-5) # LSTM with self.assertRaises(AssertionError): torch_assert_allclose(lstm_out.data[2], mask0_lstm_out[0], atol=1e-5) torch_assert_allclose(lstm_out.data[3], mask0_lstm_out[1], atol=1e-5) # 2nd-only mask forward twice: only 1st should be the same def mask1_forward_twice(): _, rs = one_step_forward(par, x_t0, None) rs = mask_recurrent_state_at(rs, 1) return one_step_forward(par, x_t1, rs) ( (mask1_gru_out, mask1_lstm_out), (mask1_gru_rs, (mask1_lstm_rs,)), ) = mask1_forward_twice() # GRU torch_assert_allclose(gru_out.data[2], mask1_gru_out[0], atol=1e-5) with self.assertRaises(AssertionError): torch_assert_allclose(gru_out.data[3], mask1_gru_out[1], atol=1e-5) # LSTM torch_assert_allclose(lstm_out.data[2], mask1_lstm_out[0], atol=1e-5) with self.assertRaises(AssertionError): torch_assert_allclose(lstm_out.data[3], mask1_lstm_out[1], atol=1e-5) # both 1st and 2nd mask forward twice: both should be different def mask01_forward_twice(): _, rs = one_step_forward(par, x_t0, None) rs = mask_recurrent_state_at(rs, [0, 1]) return one_step_forward(par, x_t1, rs) ( (mask01_gru_out, mask01_lstm_out), (mask01_gru_rs, (mask01_lstm_rs,)), ) = mask01_forward_twice() # GRU with self.assertRaises(AssertionError): torch_assert_allclose(gru_out.data[2], mask01_gru_out[0], atol=1e-5) with self.assertRaises(AssertionError): torch_assert_allclose(gru_out.data[3], mask01_gru_out[1], atol=1e-5) # LSTM with self.assertRaises(AssertionError): torch_assert_allclose(lstm_out.data[2], mask01_lstm_out[0], atol=1e-5) with self.assertRaises(AssertionError): torch_assert_allclose(lstm_out.data[3], mask01_lstm_out[1], atol=1e-5) # get and concat recurrent states and resume forward def get_and_concat_rs_forward(): _, rs = one_step_forward(par, x_t0, None) rs0 = get_recurrent_state_at(rs, 0, detach=True) rs1 = get_recurrent_state_at(rs, 1, detach=True) concat_rs = concatenate_recurrent_states([rs0, rs1]) return one_step_forward(par, x_t1, concat_rs) ( (getcon_gru_out, getcon_lstm_out), (getcon_gru_rs, (getcon_lstm_rs,)), ) = get_and_concat_rs_forward() # GRU torch_assert_allclose(gru_out.data[2], getcon_gru_out[0], atol=1e-5) torch_assert_allclose(gru_out.data[3], getcon_gru_out[1], atol=1e-5) # LSTM torch_assert_allclose(lstm_out.data[2], getcon_lstm_out[0], atol=1e-5) torch_assert_allclose(lstm_out.data[3], getcon_lstm_out[1], atol=1e-5)
def test_transform(): base_dist = torch.distributions.Normal(loc=2, scale=1) dist = torch.distributions.TransformedDistribution( base_dist, [torch.distributions.transforms.TanhTransform()]) mode = mode_of_distribution(dist) torch_assert_allclose(mode.tolist(), math.tanh(2))
def test_q_values(self): self.assertIsInstance(self.av.q_values, torch.Tensor) torch_assert_allclose(self.av.q_values, self.q_values)
def _test_forward(self, gpu): in_size = 2 out_size = 6 rseq = RecurrentSequential( nn.Linear(in_size, 3), nn.ELU(), nn.LSTM(num_layers=1, input_size=3, hidden_size=4), nn.Linear(4, 5), nn.RNN(num_layers=1, input_size=5, hidden_size=out_size), nn.Tanh(), ) if gpu >= 0: device = torch.device("cuda:{}".format(gpu)) rseq.to(device) else: device = torch.device("cpu") assert len(rseq.recurrent_children) == 2 assert rseq.recurrent_children[0] is rseq[2] assert rseq.recurrent_children[1] is rseq[4] linear1 = rseq[0] lstm = rseq[2] linear2 = rseq[3] rnn = rseq[4] seqs_x = [ torch.rand(4, in_size, requires_grad=True, device=device), torch.rand(1, in_size, requires_grad=True, device=device), torch.rand(3, in_size, requires_grad=True, device=device), ] packed_x = nn.utils.rnn.pack_sequence(seqs_x, enforce_sorted=False) out, _ = rseq(packed_x, None) self.assertEqual(out.data.shape, (8, out_size)) # Check if the output matches that of step-by-step execution def manual_forward(seqs_x): seqs_y = [] for seq_x in seqs_x: lstm_st = None rnn_st = None seq_y = [] for i in range(len(seq_x)): h = seq_x[i:i + 1] h = linear1(h) h = F.elu(h) h, lstm_st = _step_lstm(lstm, h, lstm_st) h = linear2(h) h, rnn_st = _step_rnn_tanh(rnn, h, rnn_st) y = F.tanh(h) seq_y.append(y[0]) seqs_y.append(torch.stack(seq_y)) return nn.utils.rnn.pack_sequence(seqs_y, enforce_sorted=False) manual_out = manual_forward(seqs_x) torch_assert_allclose(out.data, manual_out.data, atol=1e-4) # Finally, check the gradient (wrt input) grads = torch.autograd.grad([torch.sum(out.data)], seqs_x) manual_grads = torch.autograd.grad([torch.sum(manual_out.data)], seqs_x) assert len(grads) == len(manual_grads) == 3 for grad, manual_grad in zip(grads, manual_grads): torch_assert_allclose(grad, manual_grad, atol=1e-4)
def test_torch_assert_allclose_fail(): with pytest.raises(AssertionError): x = [torch.zeros(2), torch.ones(2)] y = [[0, 0], [1, 0]] torch_assert_allclose(x, y)