예제 #1
0
    def test_seed_not_set(self):
        seed = 123

        env1 = TestEnv()
        env1.seed(seed)
        brain1 = TestBrain(self._n_states, self._n_actions)
        acting1 = EpsGreedyPolicy(self._eps)
        replay_memory1 = Memory(self._capacity, self._batch_size)
        agent1 = TdAgent(self._n_episodes, env1, brain1, acting1,
                         replay_memory1, self._gamma)

        rewards1, _ = agent1.run()

        env2 = TestEnv()
        env2.seed(seed)
        brain2 = TestBrain(self._n_states, self._n_actions)
        acting2 = EpsGreedyPolicy(self._eps)
        replay_memory2 = Memory(self._capacity, self._batch_size)
        agent2 = TdAgent(self._n_episodes, env2, brain2, acting2,
                         replay_memory2, self._gamma)

        rewards2, _ = agent2.run()

        # assert rewards1 != rewards2
        diff = rewards1 - rewards2
        diff = np.abs(diff)
        diff = np.sum(diff)
        self.assertGreater(diff, 0)
예제 #2
0
    def test_seed_set(self):
        seed = 123

        env1 = TestEnv()
        env1.seed(seed)
        brain1 = TestBrain(self._n_states, self._n_actions)
        acting1 = EpsGreedyPolicy(self._eps)
        replay_memory1 = Memory(self._capacity, self._batch_size)
        agent1 = TdAgent(self._n_episodes, env1, brain1, acting1,
                         replay_memory1, self._gamma)
        agent1.seed(seed)

        rewards1, _ = agent1.run()
        rewards1 = rewards1.tolist()

        env2 = TestEnv()
        env2.seed(seed)
        brain2 = TestBrain(self._n_states, self._n_actions)
        acting2 = EpsGreedyPolicy(self._eps)
        replay_memory2 = Memory(self._capacity, self._batch_size)
        agent2 = TdAgent(self._n_episodes, env2, brain2, acting2,
                         replay_memory2, self._gamma)
        agent2.seed(seed)

        rewards2, _ = agent2.run()
        rewards2 = rewards2.tolist()

        self.assertListEqual(rewards1, rewards2)
예제 #3
0
    def setUp(self):
        self._n_episodes = 10
        self._env = TestEnv()
        self._n_states = 3
        self._n_actions = 7
        self._brain = TestBrain(self._n_states, self._n_actions)
        self._eps = 0.1
        self._acting = EpsGreedyPolicy(self._eps)
        self._capacity = 1000
        self._batch_size = 8
        self._replay_memory = Memory(self._capacity, self._batch_size)
        self._gamma = 0.99
        self._train_freq = 4

        self._agent = TdAgent(self._n_episodes, self._env, self._brain, self._acting,
                              self._replay_memory, self._gamma, train_freq=self._train_freq)
예제 #4
0
    def test_seed(self):
        capacity = 100
        batch_size = 16
        n_experiences = 120
        seed = 123

        memory1 = Memory(capacity, batch_size)
        memory1.seed(seed)

        memory2 = Memory(capacity, batch_size)
        memory2.seed(seed)

        for _ in range(n_experiences):
            experience = _random_experience()

            memory1.add(experience)
            memory2.add(experience)

        # (s, a, r, s1, s1_mask)
        batch1 = memory1.sample()
        batch2 = memory2.sample()

        for b1, b2 in zip(batch1, batch2):
            self.assertTrue(np.array_equal(b1, b2))
예제 #5
0
 def setUp(self):
     self._capacity = 5
     self._batch_size = 2
     self._memory = Memory(self._capacity, self._batch_size)
예제 #6
0
class MemoryTest(unittest.TestCase):
    def setUp(self):
        self._capacity = 5
        self._batch_size = 2
        self._memory = Memory(self._capacity, self._batch_size)

    def test_seed(self):
        capacity = 100
        batch_size = 16
        n_experiences = 120
        seed = 123

        memory1 = Memory(capacity, batch_size)
        memory1.seed(seed)

        memory2 = Memory(capacity, batch_size)
        memory2.seed(seed)

        for _ in range(n_experiences):
            experience = _random_experience()

            memory1.add(experience)
            memory2.add(experience)

        # (s, a, r, s1, s1_mask)
        batch1 = memory1.sample()
        batch2 = memory2.sample()

        for b1, b2 in zip(batch1, batch2):
            self.assertTrue(np.array_equal(b1, b2))

    def test_add_honors_capacity(self):
        n_experiences = self._capacity + 3
        experiences = [_random_experience() for _ in range(n_experiences)]

        for experience in experiences:
            self._memory.add(experience)

        self.assertEqual(self._capacity, len(self._memory._samples))

        expected_samples = experiences[n_experiences - self._capacity:]
        self.assertListEqual(expected_samples, self._memory._samples)

    def test_add_to_memory(self):
        experience = _random_experience()
        self._memory.add(experience)

        self.assertEqual(1, len(self._memory._samples))
        self.assertIn(experience, self._memory._samples)

        self.assertEqual(0, len(self._memory._buffer))

    def test_add_to_buffer(self):
        experience = _random_experience()
        self._memory.add(experience, buffer=True)

        self.assertEqual(0, len(self._memory._samples))

        self.assertEqual(1, len(self._memory._buffer))
        self.assertIn(experience, self._memory._buffer)

    def test_sample_via_memory(self):
        experience1 = _random_experience()
        experience2 = _random_experience()
        self._memory.add(experience1)
        self._memory.add(experience2)

        s, a, r, s1, s1_mask = self._memory.sample()

        self.assertEqual((2, ), s.shape)
        self.assertEqual((2, ), a.shape)
        self.assertEqual((2, ), r.shape)
        self.assertEqual((2, ), s1.shape)
        self.assertEqual((2, ), s1_mask.shape)

    def test_sample_ignores_buffer(self):
        for _ in range(5 * self._batch_size):
            experience = _random_experience()
            self._memory.add(experience, buffer=True)

        actual = self._memory.sample()
        self.assertIsNone(actual)

    def test_sample_honors_batch_size(self):
        for _ in range(self._capacity):
            self._memory.add(_random_experience())

        # (s, a, r, s1, s1_mask)
        batch = self._memory.sample()

        for b in batch:
            self.assertEqual(self._batch_size, b.shape[0])

    def test_flush_no_disount(self):
        experience = _random_experience()
        self._memory.add(experience, buffer=True)

        self._memory.flush()

        self.assertEqual(1, len(self._memory._samples))
        self.assertIn(experience, self._memory._samples)

        self.assertEqual(0, len(self._memory._buffer))

    def test_flush_with_disount(self):
        for _ in range(3):
            experience = _random_experience(r=1)
            self._memory.add(experience, buffer=True)

        self._memory.flush(gamma=0.9)

        self.assertAlmostEqual(1, self._memory._samples[2][2], places=4)
        self.assertAlmostEqual(1.9, self._memory._samples[1][2])
        self.assertAlmostEqual(2.71, self._memory._samples[0][2])