def _dataloader(self) -> DataLoader: """Initialize the Replay Buffer dataset used for retrieving experiences""" self.buffer = MultiStepBuffer(self.replay_size, self.n_steps) self.populate(self.warm_start_size) self.dataset = ExperienceSourceDataset(self.train_batch) return DataLoader(dataset=self.dataset, batch_size=self.batch_size)
def test_sample_3_step(self): """Test that final output of the 3 step sample is correct""" self.buffer = MultiStepBuffer(buffer_size=10, n_step=3) self.buffer.append(self.experience01) self.buffer.append(self.experience02) self.buffer.append(self.experience02) reward_gt = 1.71 batch = self.buffer.sample(1) self.assertEqual(batch[0].all(), self.experience01.state.all()) self.assertEqual(batch[1], self.experience01.action) self.assertEqual(batch[2], reward_gt) self.assertEqual(batch[3], self.experience02.done) self.assertEqual(batch[4].all(), self.experience02.new_state.all())
def test_get_transition_info_3_step(self): """Test that the accumulated experience is correct with multi step""" self.buffer = MultiStepBuffer(buffer_size=10, n_step=3) self.buffer.append(self.experience01) self.buffer.append(self.experience02) self.buffer.append(self.experience02) reward, next_state, done = self.buffer.get_transition_info() reward_01 = self.experience02.reward + 0.9 * self.experience03.reward * ( 1 - done) reward_gt = self.experience01.reward + 0.9 * reward_01 * (1 - done) self.assertEqual(reward, reward_gt) self.assertEqual(next_state.all(), self.next_state_02.all()) self.assertEqual(self.experience03.done, done)
def test_get_transition_info_3_step(self): """Test that the accumulated experience is correct with multi step""" self.buffer = MultiStepBuffer(capacity=10, n_steps=3, gamma=self.gamma) self.buffer.append(self.experience01) self.buffer.append(self.experience02) self.buffer.append(self.experience02) reward = self.buffer.buffer[0].reward next_state = self.buffer.buffer[0].new_state done = self.buffer.buffer[0].done reward_01 = self.experience02.reward + self.gamma * self.experience03.reward * (1 - done) reward_gt = self.experience01.reward + self.gamma * reward_01 * (1 - done) self.assertEqual(reward, reward_gt) self.assertEqual(next_state.all(), self.next_state_02.all()) self.assertEqual(self.experience03.done, done)
def setUp(self) -> None: self.gamma = 0.9 self.buffer = MultiStepBuffer(capacity=10, n_steps=2, gamma=self.gamma) self.state = np.zeros([32, 32]) self.state_02 = np.ones([32, 32]) self.next_state = np.zeros([32, 32]) self.next_state_02 = np.ones([32, 32]) self.action = np.zeros([1]) self.action_02 = np.ones([1]) self.reward = np.zeros([1]) self.reward_02 = np.ones([1]) self.done = np.zeros([1]) self.done_02 = np.zeros([1]) self.experience01 = Experience(self.state, self.action, self.reward, self.done, self.next_state) self.experience02 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02) self.experience03 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02)
def setUp(self) -> None: self.buffer = MultiStepBuffer(buffer_size=10, n_step=2) self.state = np.zeros([32, 32]) self.state_02 = np.ones([32, 32]) self.next_state = np.zeros([32, 32]) self.next_state_02 = np.ones([32, 32]) self.action = np.zeros([1]) self.action_02 = np.ones([1]) self.reward = np.zeros([1]) self.reward_02 = np.ones([1]) self.done = np.zeros([1]) self.done_02 = np.zeros([1]) self.experience01 = Experience(self.state, self.action, self.reward, self.done, self.next_state) self.experience02 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02) self.experience03 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02)
class TestMultiStepReplayBuffer(TestCase): def setUp(self) -> None: self.buffer = MultiStepBuffer(buffer_size=10, n_step=2) self.state = np.zeros([32, 32]) self.state_02 = np.ones([32, 32]) self.next_state = np.zeros([32, 32]) self.next_state_02 = np.ones([32, 32]) self.action = np.zeros([1]) self.action_02 = np.ones([1]) self.reward = np.zeros([1]) self.reward_02 = np.ones([1]) self.done = np.zeros([1]) self.done_02 = np.zeros([1]) self.experience01 = Experience(self.state, self.action, self.reward, self.done, self.next_state) self.experience02 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02) self.experience03 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02) def test_append_single_experience_less_than_n(self): """ If a single experience is added and n > 1 nothing should be added to the buffer as it is waiting experiences to equal n """ self.assertEqual(len(self.buffer), 0) self.buffer.append(self.experience01) self.assertEqual(len(self.buffer), 0) def test_append_single_experience(self): """ If a single experience is added and n > 1 nothing should be added to the buffer as it is waiting experiences to equal n """ self.assertEqual(len(self.buffer), 0) self.buffer.append(self.experience01) self.assertEqual(len(self.buffer), 0) self.assertEqual(len(self.buffer.n_step_buffer), 1) def test_append_single_experience2(self): """ If a single experience is added and the number of experiences collected >= n, the multi step experience should be added to the full buffer. """ self.assertEqual(len(self.buffer), 0) self.buffer.append(self.experience01) self.buffer.append(self.experience02) self.assertEqual(len(self.buffer), 1) self.assertEqual(len(self.buffer.n_step_buffer), 2) def test_sample_single_experience(self): """if there is only a single experience added, sample should return nothing""" self.buffer.append(self.experience01) with self.assertRaises(Exception) as context: _ = self.buffer.sample(batch_size=1) self.assertIsInstance(context.exception, Exception) def test_sample_multi_experience(self): """if there is only a single experience added, sample should return nothing""" self.buffer.append(self.experience01) self.buffer.append(self.experience02) batch = self.buffer.sample(batch_size=1) next_state = batch[4] self.assertEqual(next_state.all(), self.next_state_02.all()) def test_get_transition_info_2_step(self): """Test that the accumulated experience is correct and""" self.buffer.append(self.experience01) self.buffer.append(self.experience02) reward, next_state, done = self.buffer.get_transition_info() reward_gt = self.experience01.reward + ( 0.9 * self.experience02.reward) * (1 - done) self.assertEqual(reward, reward_gt) self.assertEqual(next_state.all(), self.next_state_02.all()) self.assertEqual(self.experience02.done, done) def test_get_transition_info_3_step(self): """Test that the accumulated experience is correct with multi step""" self.buffer = MultiStepBuffer(buffer_size=10, n_step=3) self.buffer.append(self.experience01) self.buffer.append(self.experience02) self.buffer.append(self.experience02) reward, next_state, done = self.buffer.get_transition_info() reward_01 = self.experience02.reward + 0.9 * self.experience03.reward * ( 1 - done) reward_gt = self.experience01.reward + 0.9 * reward_01 * (1 - done) self.assertEqual(reward, reward_gt) self.assertEqual(next_state.all(), self.next_state_02.all()) self.assertEqual(self.experience03.done, done) def test_sample_3_step(self): """Test that final output of the 3 step sample is correct""" self.buffer = MultiStepBuffer(buffer_size=10, n_step=3) self.buffer.append(self.experience01) self.buffer.append(self.experience02) self.buffer.append(self.experience02) reward_gt = 1.71 batch = self.buffer.sample(1) self.assertEqual(batch[0].all(), self.experience01.state.all()) self.assertEqual(batch[1], self.experience01.action) self.assertEqual(batch[2], reward_gt) self.assertEqual(batch[3], self.experience02.done) self.assertEqual(batch[4].all(), self.experience02.new_state.all())