def test_torch_shape(self): dim1, dim2 = self.dims env = VectorizedCartPoleEnv() state, action = self.state_action env.state = torch.tensor(state) obs, reward, done, _ = env.step(torch.tensor(action)) assert obs.shape == (dim1, dim2, 4) assert reward.shape == (dim1, dim2) assert done.shape == (dim1, dim2)
def test_np_shape(self): dim1, dim2 = self.dims env = VectorizedCartPoleEnv() state, action = self.state_action env.state = state obs, reward, done, _ = env.step(action) assert obs.shape == (dim1, dim2, 4) assert reward.shape == (dim1, dim2) assert done.shape == (dim1, dim2)
def test_vectorized_original_equality(self): venv = VectorizedCartPoleEnv() state, action = self.state_action action = (action > 0).astype(int) dim1, dim2 = self.dims venv.state = state vobs, vreward, vdone, _ = venv.step(2 * action - 1) env = CartPoleEnv() for i in range(dim1): for j in range(dim2): env.reset() env.state = state[i, j] obs, reward, done, _ = env.step(action[i, j, 0]) np.testing.assert_allclose(obs, vobs[i, j]) np.testing.assert_allclose(reward, vreward[i, j]) np.testing.assert_allclose(done, vdone[i, j])
def test_torch_np_equality(self): env = VectorizedCartPoleEnv() state, action = self.state_action env.state = state np_obs, np_reward, np_done, _ = env.step(action) env.state = torch.tensor(state) t_obs, t_reward, t_done, _ = env.step(torch.tensor(action)) np.testing.assert_almost_equal(np_obs, t_obs, 1e-6, 1e-6) np.testing.assert_almost_equal(np_reward, t_reward) np.testing.assert_almost_equal(np_done, t_done)