def setUp(self) -> None: self.state = np.random.rand(32, 32) self.next_state = np.random.rand(32, 32) self.action = np.ones([1]) self.reward = np.ones([1]) self.done = np.zeros([1]) self.experience = Experience(self.state, self.action, self.reward, self.done, self.next_state) self.source = Mock() self.source.step = Mock(return_value=(self.experience, torch.tensor(0), False)) self.warm_start = 10 self.buffer = ReplayBuffer(20) for _ in range(self.warm_start): self.buffer.append(self.experience)
def _dataloader(self) -> DataLoader: """Initialize the Replay Buffer dataset used for retrieving experiences""" self.buffer = ReplayBuffer(self.replay_size) self.populate(self.warm_start_size) self.dataset = ExperienceSourceDataset(self.train_batch) return DataLoader(dataset=self.dataset, batch_size=self.batch_size)
def prepare_data(self) -> None: """Initialize the Replay Buffer dataset used for retrieving experiences""" self.source = ExperienceSource(self.env, self.agent) self.buffer = ReplayBuffer(self.replay_size) self.populate(self.warm_start_size) self.dataset = RLDataset(self.buffer, self.sample_len)
def prepare_data(self) -> None: """Initialize the Replay Buffer dataset used for retrieving experiences""" device = torch.device(self.trainer.root_gpu) if self.trainer.num_gpus >= 1 else self.device self.source = ExperienceSource(self.env, self.agent, device) self.buffer = ReplayBuffer(self.replay_size) self.populate(self.warm_start_size) self.dataset = RLDataset(self.buffer, self.sample_len)
def _dataloader(self) -> DataLoader: """Initialize the Replay Buffer dataset used for retrieving experiences""" self.buffer = ReplayBuffer(self.replay_size) self.populate(self.warm_start_size) dataset = RLDataset(self.buffer, self.sample_len) dataloader = DataLoader( dataset=dataset, batch_size=self.batch_size, ) return dataloader
class TestReplayBuffer(TestCase): def setUp(self) -> None: self.state = np.random.rand(32, 32) self.next_state = np.random.rand(32, 32) self.action = np.ones([1]) self.reward = np.ones([1]) self.done = np.zeros([1]) self.experience = Experience(self.state, self.action, self.reward, self.done, self.next_state) self.source = Mock() self.source.step = Mock(return_value=(self.experience, torch.tensor(0), False)) self.warm_start = 10 self.buffer = ReplayBuffer(20) for _ in range(self.warm_start): self.buffer.append(self.experience) def test_replay_buffer_append(self): """Test that you can append to the replay buffer""" self.assertEqual(len(self.buffer), self.warm_start) self.buffer.append(self.experience) self.assertEqual(len(self.buffer), self.warm_start + 1) def test_replay_buffer_populate(self): """Tests that the buffer is populated correctly with warm_start""" self.assertEqual(len(self.buffer.buffer), self.warm_start) def test_replay_buffer_update(self): """Tests that buffer append works correctly""" batch_size = 3 self.assertEqual(len(self.buffer.buffer), self.warm_start) for i in range(batch_size): self.buffer.append(self.experience) self.assertEqual(len(self.buffer.buffer), self.warm_start + batch_size) def test_replay_buffer_sample(self): """Test that you can sample from the buffer and the outputs are the correct shape""" batch_size = 3 for i in range(10): self.buffer.append(self.experience) batch = self.buffer.sample(batch_size) self.assertEqual(len(batch), 5) # states states = batch[0] self.assertEqual(states.shape, (batch_size, 32, 32)) # action actions = batch[1] self.assertEqual(actions.shape, (batch_size, 1)) # reward rewards = batch[2] self.assertEqual(rewards.shape, (batch_size, 1)) # dones dones = batch[3] self.assertEqual(dones.shape, (batch_size, 1)) # next states next_states = batch[4] self.assertEqual(next_states.shape, (batch_size, 32, 32))