Esempio n. 1
0
    def test_stateless(self):
        env = ObservationWrapper(self.env)
        rng = np.random.default_rng(123)
        mems = []
        instance_idxs = []
        for _ in range(3):
            env.reset()
            instance_idxs.append(env.instance_index)

            done = False
            mem = []
            step = 0
            while not done and step < 5:
                action = np.exp(rng.integers(low=-10, high=1))
                state, reward, done, _ = env.step(action)
                mem.append(np.concatenate([state, [reward, int(done), action]]))
                step += 1
            mems.append(np.array(mem))

        rng = np.random.default_rng(123)
        for i, idx in enumerate(reversed(instance_idxs)):
            env.instance_index = idx - 1
            env.reset()
            self.assertTrue(env.instance_index == idx)

            done = False
            mem = []
            step = 0
            while not done and step < 5:
                action = mems[-(i + 1)][step][-1]
                state, reward, done, _ = env.step(action)
                mem.append(np.concatenate([state, [reward, int(done), action]]))
                step += 1
            np.testing.assert_allclose(mems[-(i + 1)], np.array(mem))
Esempio n. 2
0
    def test_reproducibility(self):
        mems = []
        instances = []
        env = ObservationWrapper(self.env)
        for _ in range(2):
            rng = np.random.default_rng(123)
            env.seed(123)
            env.instance_index = 0
            instances.append(env.get_instance_set())

            env.reset()

            done = False
            mem = []
            step = 0
            while not done and step < 5:
                action = np.exp(rng.integers(low=-10, high=1))
                state, reward, done, _ = env.step(action)
                mem.append(np.concatenate([state, [reward, int(done), action]]))
                step += 1
            mems.append(np.array(mem))
        self.assertEqual(mems[0].size, mems[1].size)
        self.assertEqual(instances[0], instances[1])
        np.testing.assert_allclose(mems[0], mems[1])