Beispiel #1
0
 def test_no_error(self, env, task, algo):
     policies = algo.train_policies if task.training else [algo.test_policy]
     envs = [copy.deepcopy(env) for _ in policies]
     simulator = Simulator(task, policies, envs)
     algo.begin_epoch()
     for score in simulator:
         assert score is not None
         algo.end_epoch()
         algo.begin_epoch()
Beispiel #2
0
    def test_individual_episode_when_parallel(self, task, policy, env):
        def equal(x):
            x = list(x)
            return all(y == x[0] for y in x)

        policies = copy.copy(policy), copy.copy(policy)
        envs = copy.copy(env), copy.copy(env)
        simulator = Simulator(task, policies, envs)
        for _ in range(task.epochs):
            simulator()
            assert equal(x.task.episode for x in policies)
Beispiel #3
0
 def test_individual_episode_when_parallel(
         self, task, policy, second_policy, env, second_env):
     def equal(x):
         x = list(x)
         return all(y == x[0] for y in x)
     policies = [policy, second_policy]
     envs = [env, second_env]
     simulator = Simulator(task, policies, envs)
     for _ in range(task.epochs):
         simulator()
         assert equal(x.task.episode for x in policies)
Beispiel #4
0
 def _create_testing(self, algorithm):
     policies = [self._prepend_score_step(algorithm.test_policy)]
     envs = [self._create_env(self._task.directory)]
     return Simulator(self._test_task, policies, envs)
Beispiel #5
0
 def _create_training(self, algorithm):
     policies = algorithm.train_policies
     policies = [self._prepend_score_step(x) for x in policies]
     envs = [self._create_env() for _ in policies]
     return Simulator(self._train_task, policies, envs)