Example #1
0
 def testServingEnvBadActions(self):
     ev = CommonPolicyEvaluator(
         env_creator=lambda _: SimpleServing(MockEnv(25)),
         policy_graph=BadPolicyGraph,
         sample_async=True,
         batch_steps=40,
         batch_mode="truncate_episodes")
     self.assertRaises(Exception, lambda: ev.sample())
Example #2
0
 def testServingEnvOffPolicy(self):
     ev = CommonPolicyEvaluator(
         env_creator=lambda _: SimpleOffPolicyServing(MockEnv(25)),
         policy_graph=MockPolicyGraph,
         batch_steps=40,
         batch_mode="complete_episodes")
     for _ in range(3):
         batch = ev.sample()
         self.assertEqual(batch.count, 50)
Example #3
0
 def testServingEnvHorizonNotSupported(self):
     ev = CommonPolicyEvaluator(
         env_creator=lambda _: SimpleServing(MockEnv(25)),
         policy_graph=MockPolicyGraph,
         episode_horizon=20,
         batch_steps=10,
         batch_mode="complete_episodes")
     ev.sample()
     self.assertRaises(Exception, lambda: ev.sample())
Example #4
0
 def __init__(self, num, increment_obs=False):
     if increment_obs:
         # Observations are 0, 1, 2, 3... etc. as time advances
         self.agents = [MockEnv2(5) for _ in range(num)]
     else:
         # Observations are all zeros
         self.agents = [MockEnv(5) for _ in range(num)]
     self.dones = set()
     self.last_obs = {}
     self.last_rew = {}
     self.last_done = {}
     self.last_info = {}
     self.i = 0
     self.num = num
     self.observation_space = gym.spaces.Discrete(2)
     self.action_space = gym.spaces.Discrete(2)
Example #5
0
 def __init__(self, num):
     self.agents = [MockEnv(25) for _ in range(num)]
     self.dones = set()
     self.observation_space = gym.spaces.Discrete(2)
     self.action_space = gym.spaces.Discrete(2)