Esempio n. 1
0
    def test_torch_shape(self):
        dim1, dim2 = self.dims

        env = VectorizedCartPoleEnv()
        state, action = self.state_action

        env.state = torch.tensor(state)
        obs, reward, done, _ = env.step(torch.tensor(action))
        assert obs.shape == (dim1, dim2, 4)
        assert reward.shape == (dim1, dim2)
        assert done.shape == (dim1, dim2)
Esempio n. 2
0
    def test_np_shape(self):
        dim1, dim2 = self.dims

        env = VectorizedCartPoleEnv()
        state, action = self.state_action

        env.state = state
        obs, reward, done, _ = env.step(action)
        assert obs.shape == (dim1, dim2, 4)
        assert reward.shape == (dim1, dim2)
        assert done.shape == (dim1, dim2)
Esempio n. 3
0
    def test_vectorized_original_equality(self):
        venv = VectorizedCartPoleEnv()
        state, action = self.state_action
        action = (action > 0).astype(int)

        dim1, dim2 = self.dims

        venv.state = state
        vobs, vreward, vdone, _ = venv.step(2 * action - 1)

        env = CartPoleEnv()
        for i in range(dim1):
            for j in range(dim2):
                env.reset()
                env.state = state[i, j]
                obs, reward, done, _ = env.step(action[i, j, 0])

                np.testing.assert_allclose(obs, vobs[i, j])
                np.testing.assert_allclose(reward, vreward[i, j])
                np.testing.assert_allclose(done, vdone[i, j])
Esempio n. 4
0
    def test_torch_np_equality(self):
        env = VectorizedCartPoleEnv()
        state, action = self.state_action

        env.state = state
        np_obs, np_reward, np_done, _ = env.step(action)

        env.state = torch.tensor(state)
        t_obs, t_reward, t_done, _ = env.step(torch.tensor(action))

        np.testing.assert_almost_equal(np_obs, t_obs, 1e-6, 1e-6)
        np.testing.assert_almost_equal(np_reward, t_reward)
        np.testing.assert_almost_equal(np_done, t_done)