Beispiel #1
0
 def __init__(self):
     self.agents = [MockEnv(3), MockEnv(5)]
     self.dones = set()
     self.last_obs = {}
     self.last_rew = {}
     self.last_done = {}
     self.last_info = {}
     self.i = 0
     self.observation_space = gym.spaces.Discrete(10)
     self.action_space = gym.spaces.Discrete(2)
Beispiel #2
0
 def testExternalEnvHorizonNotSupported(self):
     ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)),
                        policy=MockPolicy,
                        episode_horizon=20,
                        batch_steps=10,
                        batch_mode="complete_episodes")
     self.assertRaises(ValueError, lambda: ev.sample())
 def test_external_env_horizon_not_supported(self):
     ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)),
                        policy_spec=MockPolicy,
                        episode_horizon=20,
                        rollout_fragment_length=10,
                        batch_mode="complete_episodes")
     self.assertRaises(ValueError, lambda: ev.sample())
Beispiel #4
0
 def testExternalEnvBadActions(self):
     ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)),
                        policy=BadPolicy,
                        sample_async=True,
                        batch_steps=40,
                        batch_mode="truncate_episodes")
     self.assertRaises(Exception, lambda: ev.sample())
 def test_external_env_bad_actions(self):
     ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)),
                        policy_spec=BadPolicy,
                        sample_async=True,
                        rollout_fragment_length=40,
                        batch_mode="truncate_episodes")
     self.assertRaises(Exception, lambda: ev.sample())
Beispiel #6
0
 def testExternalEnvTruncateEpisodes(self):
     ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)),
                        policy=MockPolicy,
                        batch_steps=40,
                        batch_mode="truncate_episodes")
     for _ in range(3):
         batch = ev.sample()
         self.assertEqual(batch.count, 40)
 def test_external_env_truncate_episodes(self):
     ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)),
                        policy_spec=MockPolicy,
                        rollout_fragment_length=40,
                        batch_mode="truncate_episodes")
     for _ in range(3):
         batch = ev.sample()
         self.assertEqual(batch.count, 40)
Beispiel #8
0
 def testExternalEnvOffPolicy(self):
     ev = RolloutWorker(
         env_creator=lambda _: SimpleOffPolicyServing(MockEnv(25), 42),
         policy=MockPolicy,
         batch_steps=40,
         batch_mode="complete_episodes")
     for _ in range(3):
         batch = ev.sample()
         self.assertEqual(batch.count, 50)
         self.assertEqual(batch["actions"][0], 42)
         self.assertEqual(batch["actions"][-1], 42)
 def test_external_env_off_policy(self):
     ev = RolloutWorker(
         env_creator=lambda _: SimpleOffPolicyServing(MockEnv(25), 42),
         policy_spec=MockPolicy,
         rollout_fragment_length=40,
         batch_mode="complete_episodes")
     for _ in range(3):
         batch = ev.sample()
         self.assertEqual(batch.count, 50)
         self.assertEqual(batch["actions"][0], 42)
         self.assertEqual(batch["actions"][-1], 42)
Beispiel #10
0
 def __init__(self, num, increment_obs=False):
     if increment_obs:
         # Observations are 0, 1, 2, 3... etc. as time advances
         self.agents = [MockEnv2(5) for _ in range(num)]
     else:
         # Observations are all zeros
         self.agents = [MockEnv(5) for _ in range(num)]
     self.dones = set()
     self.last_obs = {}
     self.last_rew = {}
     self.last_done = {}
     self.last_info = {}
     self.i = 0
     self.num = num
     self.observation_space = gym.spaces.Discrete(10)
     self.action_space = gym.spaces.Discrete(2)
Beispiel #11
0
 def __init__(self, num):
     self.agents = [MockEnv(25) for _ in range(num)]
     self.dones = set()
     self.observation_space = gym.spaces.Discrete(2)
     self.action_space = gym.spaces.Discrete(2)
     self.resetted = False