def test_seed_not_set(self): seed = 123 env1 = TestEnv() env1.seed(seed) brain1 = TestBrain(self._n_states, self._n_actions) acting1 = EpsGreedyPolicy(self._eps) replay_memory1 = Memory(self._capacity, self._batch_size) agent1 = TdAgent(self._n_episodes, env1, brain1, acting1, replay_memory1, self._gamma) rewards1, _ = agent1.run() env2 = TestEnv() env2.seed(seed) brain2 = TestBrain(self._n_states, self._n_actions) acting2 = EpsGreedyPolicy(self._eps) replay_memory2 = Memory(self._capacity, self._batch_size) agent2 = TdAgent(self._n_episodes, env2, brain2, acting2, replay_memory2, self._gamma) rewards2, _ = agent2.run() # assert rewards1 != rewards2 diff = rewards1 - rewards2 diff = np.abs(diff) diff = np.sum(diff) self.assertGreater(diff, 0)
def test_seed_set(self): seed = 123 env1 = TestEnv() env1.seed(seed) brain1 = TestBrain(self._n_states, self._n_actions) acting1 = EpsGreedyPolicy(self._eps) replay_memory1 = Memory(self._capacity, self._batch_size) agent1 = TdAgent(self._n_episodes, env1, brain1, acting1, replay_memory1, self._gamma) agent1.seed(seed) rewards1, _ = agent1.run() rewards1 = rewards1.tolist() env2 = TestEnv() env2.seed(seed) brain2 = TestBrain(self._n_states, self._n_actions) acting2 = EpsGreedyPolicy(self._eps) replay_memory2 = Memory(self._capacity, self._batch_size) agent2 = TdAgent(self._n_episodes, env2, brain2, acting2, replay_memory2, self._gamma) agent2.seed(seed) rewards2, _ = agent2.run() rewards2 = rewards2.tolist() self.assertListEqual(rewards1, rewards2)
def setUp(self): self._n_episodes = 10 self._env = TestEnv() self._n_states = 3 self._n_actions = 7 self._brain = TestBrain(self._n_states, self._n_actions) self._eps = 0.1 self._acting = EpsGreedyPolicy(self._eps) self._capacity = 1000 self._batch_size = 8 self._replay_memory = Memory(self._capacity, self._batch_size) self._gamma = 0.99 self._train_freq = 4 self._agent = TdAgent(self._n_episodes, self._env, self._brain, self._acting, self._replay_memory, self._gamma, train_freq=self._train_freq)
def test_seed(self): capacity = 100 batch_size = 16 n_experiences = 120 seed = 123 memory1 = Memory(capacity, batch_size) memory1.seed(seed) memory2 = Memory(capacity, batch_size) memory2.seed(seed) for _ in range(n_experiences): experience = _random_experience() memory1.add(experience) memory2.add(experience) # (s, a, r, s1, s1_mask) batch1 = memory1.sample() batch2 = memory2.sample() for b1, b2 in zip(batch1, batch2): self.assertTrue(np.array_equal(b1, b2))
def setUp(self): self._capacity = 5 self._batch_size = 2 self._memory = Memory(self._capacity, self._batch_size)
class MemoryTest(unittest.TestCase): def setUp(self): self._capacity = 5 self._batch_size = 2 self._memory = Memory(self._capacity, self._batch_size) def test_seed(self): capacity = 100 batch_size = 16 n_experiences = 120 seed = 123 memory1 = Memory(capacity, batch_size) memory1.seed(seed) memory2 = Memory(capacity, batch_size) memory2.seed(seed) for _ in range(n_experiences): experience = _random_experience() memory1.add(experience) memory2.add(experience) # (s, a, r, s1, s1_mask) batch1 = memory1.sample() batch2 = memory2.sample() for b1, b2 in zip(batch1, batch2): self.assertTrue(np.array_equal(b1, b2)) def test_add_honors_capacity(self): n_experiences = self._capacity + 3 experiences = [_random_experience() for _ in range(n_experiences)] for experience in experiences: self._memory.add(experience) self.assertEqual(self._capacity, len(self._memory._samples)) expected_samples = experiences[n_experiences - self._capacity:] self.assertListEqual(expected_samples, self._memory._samples) def test_add_to_memory(self): experience = _random_experience() self._memory.add(experience) self.assertEqual(1, len(self._memory._samples)) self.assertIn(experience, self._memory._samples) self.assertEqual(0, len(self._memory._buffer)) def test_add_to_buffer(self): experience = _random_experience() self._memory.add(experience, buffer=True) self.assertEqual(0, len(self._memory._samples)) self.assertEqual(1, len(self._memory._buffer)) self.assertIn(experience, self._memory._buffer) def test_sample_via_memory(self): experience1 = _random_experience() experience2 = _random_experience() self._memory.add(experience1) self._memory.add(experience2) s, a, r, s1, s1_mask = self._memory.sample() self.assertEqual((2, ), s.shape) self.assertEqual((2, ), a.shape) self.assertEqual((2, ), r.shape) self.assertEqual((2, ), s1.shape) self.assertEqual((2, ), s1_mask.shape) def test_sample_ignores_buffer(self): for _ in range(5 * self._batch_size): experience = _random_experience() self._memory.add(experience, buffer=True) actual = self._memory.sample() self.assertIsNone(actual) def test_sample_honors_batch_size(self): for _ in range(self._capacity): self._memory.add(_random_experience()) # (s, a, r, s1, s1_mask) batch = self._memory.sample() for b in batch: self.assertEqual(self._batch_size, b.shape[0]) def test_flush_no_disount(self): experience = _random_experience() self._memory.add(experience, buffer=True) self._memory.flush() self.assertEqual(1, len(self._memory._samples)) self.assertIn(experience, self._memory._samples) self.assertEqual(0, len(self._memory._buffer)) def test_flush_with_disount(self): for _ in range(3): experience = _random_experience(r=1) self._memory.add(experience, buffer=True) self._memory.flush(gamma=0.9) self.assertAlmostEqual(1, self._memory._samples[2][2], places=4) self.assertAlmostEqual(1.9, self._memory._samples[1][2]) self.assertAlmostEqual(2.71, self._memory._samples[0][2])