Esempio n. 1
0
 def testMultiplePasses(self):
     local, remotes = self._make_evs()
     optimizer = AsyncSamplesOptimizer(local,
                                       remotes,
                                       minibatch_buffer_size=10,
                                       num_sgd_iter=10,
                                       sample_batch_size=10,
                                       train_batch_size=50)
     self._wait_for(optimizer, 1000, 10000)
     self.assertLess(optimizer.stats()["num_steps_sampled"], 5000)
     self.assertGreater(optimizer.stats()["num_steps_trained"], 8000)
Esempio n. 2
0
 def test_multiple_passes(self):
     local, remotes = self._make_envs()
     workers = WorkerSet._from_existing(local, remotes)
     optimizer = AsyncSamplesOptimizer(workers,
                                       minibatch_buffer_size=10,
                                       num_sgd_iter=10,
                                       rollout_fragment_length=10,
                                       train_batch_size=50)
     self._wait_for(optimizer, 1000, 10000)
     self.assertLess(optimizer.stats()["num_steps_sampled"], 5000)
     self.assertGreater(optimizer.stats()["num_steps_trained"], 8000)
Esempio n. 3
0
 def testMultiplePasses(self):
     local, remotes = self._make_evs()
     optimizer = AsyncSamplesOptimizer(
         local, remotes, {
             "minibatch_buffer_size": 10,
             "num_sgd_iter": 10,
             "sample_batch_size": 10,
             "train_batch_size": 50,
         })
     self._wait_for(optimizer, 1000, 10000)
     self.assertLess(optimizer.stats()["num_steps_sampled"], 5000)
     self.assertGreater(optimizer.stats()["num_steps_trained"], 8000)
Esempio n. 4
0
 def testReplay(self):
     local, remotes = self._make_evs()
     optimizer = AsyncSamplesOptimizer(
         local, remotes, {
             "replay_buffer_num_slots": 100,
             "replay_proportion": 10,
             "sample_batch_size": 10,
             "train_batch_size": 10,
         })
     self._wait_for(optimizer, 1000, 1000)
     self.assertLess(optimizer.stats()["num_steps_sampled"], 5000)
     self.assertGreater(optimizer.stats()["num_steps_replayed"], 8000)
     self.assertGreater(optimizer.stats()["num_steps_trained"], 8000)
Esempio n. 5
0
 def testReplay(self):
     local, remotes = self._make_evs()
     optimizer = AsyncSamplesOptimizer(
         local, remotes, {
             "replay_buffer_num_slots": 100,
             "replay_proportion": 10,
             "sample_batch_size": 10,
             "train_batch_size": 10,
         })
     self._wait_for(optimizer, 1000, 1000)
     self.assertLess(optimizer.stats()["num_steps_sampled"], 5000)
     self.assertGreater(optimizer.stats()["num_steps_replayed"], 8000)
     self.assertGreater(optimizer.stats()["num_steps_trained"], 8000)
Esempio n. 6
0
 def testReplay(self):
     local, remotes = self._make_evs()
     optimizer = AsyncSamplesOptimizer(
         local,
         remotes,
         replay_buffer_num_slots=100,
         replay_proportion=10,
         sample_batch_size=10,
         train_batch_size=10,
     )
     self._wait_for(optimizer, 1000, 1000)
     stats = optimizer.stats()
     self.assertLess(stats["num_steps_sampled"], 5000)
     replay_ratio = stats["num_steps_replayed"] / stats["num_steps_sampled"]
     self.assertGreater(replay_ratio, 0.7)
     self.assertLess(stats["num_steps_trained"], stats["num_steps_sampled"])
Esempio n. 7
0
 def test_replay(self):
     local, remotes = self._make_envs()
     workers = WorkerSet._from_existing(local, remotes)
     optimizer = AsyncSamplesOptimizer(
         workers,
         replay_buffer_num_slots=100,
         replay_proportion=10,
         rollout_fragment_length=10,
         train_batch_size=10,
     )
     self._wait_for(optimizer, 1000, 1000)
     stats = optimizer.stats()
     self.assertLess(stats["num_steps_sampled"], 5000)
     replay_ratio = stats["num_steps_replayed"] / stats["num_steps_sampled"]
     self.assertGreater(replay_ratio, 0.7)
     self.assertLess(stats["num_steps_trained"], stats["num_steps_sampled"])
Esempio n. 8
0
    def testReplayAndMultiplePasses(self):
        local, remotes = self._make_envs()
        workers = WorkerSet._from_existing(local, remotes)
        optimizer = AsyncSamplesOptimizer(workers,
                                          minibatch_buffer_size=10,
                                          num_sgd_iter=10,
                                          replay_buffer_num_slots=100,
                                          replay_proportion=10,
                                          sample_batch_size=10,
                                          train_batch_size=10)
        self._wait_for(optimizer, 1000, 1000)

        stats = optimizer.stats()
        print(stats)
        self.assertLess(stats["num_steps_sampled"], 5000)
        replay_ratio = stats["num_steps_replayed"] / stats["num_steps_sampled"]
        self.assertGreater(replay_ratio, 0.7)
Esempio n. 9
0
    def testReplayAndMultiplePasses(self):
        local, remotes = self._make_evs()
        optimizer = AsyncSamplesOptimizer(
            local, remotes, {
                "minibatch_buffer_size": 10,
                "num_sgd_iter": 10,
                "replay_buffer_num_slots": 100,
                "replay_proportion": 10,
                "sample_batch_size": 10,
                "train_batch_size": 10,
            })
        self._wait_for(optimizer, 1000, 1000)

        stats = optimizer.stats()
        print(stats)
        self.assertLess(stats["num_steps_sampled"], 5000)
        replay_ratio = stats["num_steps_replayed"] / stats["num_steps_sampled"]
        train_ratio = stats["num_steps_sampled"] / stats["num_steps_trained"]
        self.assertGreater(replay_ratio, 0.7)
        self.assertLess(train_ratio, 0.4)