def testMultiplePasses(self): local, remotes = self._make_evs() optimizer = AsyncSamplesOptimizer(local, remotes, minibatch_buffer_size=10, num_sgd_iter=10, sample_batch_size=10, train_batch_size=50) self._wait_for(optimizer, 1000, 10000) self.assertLess(optimizer.stats()["num_steps_sampled"], 5000) self.assertGreater(optimizer.stats()["num_steps_trained"], 8000)
def test_multiple_passes(self): local, remotes = self._make_envs() workers = WorkerSet._from_existing(local, remotes) optimizer = AsyncSamplesOptimizer(workers, minibatch_buffer_size=10, num_sgd_iter=10, rollout_fragment_length=10, train_batch_size=50) self._wait_for(optimizer, 1000, 10000) self.assertLess(optimizer.stats()["num_steps_sampled"], 5000) self.assertGreater(optimizer.stats()["num_steps_trained"], 8000)
def testMultiplePasses(self): local, remotes = self._make_evs() optimizer = AsyncSamplesOptimizer( local, remotes, { "minibatch_buffer_size": 10, "num_sgd_iter": 10, "sample_batch_size": 10, "train_batch_size": 50, }) self._wait_for(optimizer, 1000, 10000) self.assertLess(optimizer.stats()["num_steps_sampled"], 5000) self.assertGreater(optimizer.stats()["num_steps_trained"], 8000)
def testReplay(self): local, remotes = self._make_evs() optimizer = AsyncSamplesOptimizer( local, remotes, { "replay_buffer_num_slots": 100, "replay_proportion": 10, "sample_batch_size": 10, "train_batch_size": 10, }) self._wait_for(optimizer, 1000, 1000) self.assertLess(optimizer.stats()["num_steps_sampled"], 5000) self.assertGreater(optimizer.stats()["num_steps_replayed"], 8000) self.assertGreater(optimizer.stats()["num_steps_trained"], 8000)
def testReplay(self): local, remotes = self._make_evs() optimizer = AsyncSamplesOptimizer( local, remotes, { "replay_buffer_num_slots": 100, "replay_proportion": 10, "sample_batch_size": 10, "train_batch_size": 10, }) self._wait_for(optimizer, 1000, 1000) self.assertLess(optimizer.stats()["num_steps_sampled"], 5000) self.assertGreater(optimizer.stats()["num_steps_replayed"], 8000) self.assertGreater(optimizer.stats()["num_steps_trained"], 8000)
def testReplay(self): local, remotes = self._make_evs() optimizer = AsyncSamplesOptimizer( local, remotes, replay_buffer_num_slots=100, replay_proportion=10, sample_batch_size=10, train_batch_size=10, ) self._wait_for(optimizer, 1000, 1000) stats = optimizer.stats() self.assertLess(stats["num_steps_sampled"], 5000) replay_ratio = stats["num_steps_replayed"] / stats["num_steps_sampled"] self.assertGreater(replay_ratio, 0.7) self.assertLess(stats["num_steps_trained"], stats["num_steps_sampled"])
def test_replay(self): local, remotes = self._make_envs() workers = WorkerSet._from_existing(local, remotes) optimizer = AsyncSamplesOptimizer( workers, replay_buffer_num_slots=100, replay_proportion=10, rollout_fragment_length=10, train_batch_size=10, ) self._wait_for(optimizer, 1000, 1000) stats = optimizer.stats() self.assertLess(stats["num_steps_sampled"], 5000) replay_ratio = stats["num_steps_replayed"] / stats["num_steps_sampled"] self.assertGreater(replay_ratio, 0.7) self.assertLess(stats["num_steps_trained"], stats["num_steps_sampled"])
def testReplayAndMultiplePasses(self): local, remotes = self._make_envs() workers = WorkerSet._from_existing(local, remotes) optimizer = AsyncSamplesOptimizer(workers, minibatch_buffer_size=10, num_sgd_iter=10, replay_buffer_num_slots=100, replay_proportion=10, sample_batch_size=10, train_batch_size=10) self._wait_for(optimizer, 1000, 1000) stats = optimizer.stats() print(stats) self.assertLess(stats["num_steps_sampled"], 5000) replay_ratio = stats["num_steps_replayed"] / stats["num_steps_sampled"] self.assertGreater(replay_ratio, 0.7)
def testReplayAndMultiplePasses(self): local, remotes = self._make_evs() optimizer = AsyncSamplesOptimizer( local, remotes, { "minibatch_buffer_size": 10, "num_sgd_iter": 10, "replay_buffer_num_slots": 100, "replay_proportion": 10, "sample_batch_size": 10, "train_batch_size": 10, }) self._wait_for(optimizer, 1000, 1000) stats = optimizer.stats() print(stats) self.assertLess(stats["num_steps_sampled"], 5000) replay_ratio = stats["num_steps_replayed"] / stats["num_steps_sampled"] train_ratio = stats["num_steps_sampled"] / stats["num_steps_trained"] self.assertGreater(replay_ratio, 0.7) self.assertLess(train_ratio, 0.4)