def testMultiAgent(self): register_env("multi_cartpole", lambda _: MultiCartpole(10)) single_env = gym.make("CartPole-v0") def gen_policy(): obs_space = single_env.observation_space act_space = single_env.action_space return (PGPolicyGraph, obs_space, act_space, {}) pg = PGAgent( env="multi_cartpole", config={ "num_workers": 0, "output": self.test_dir, "multiagent": { "policy_graphs": { "policy_1": gen_policy(), "policy_2": gen_policy(), }, "policy_mapping_fn": ( lambda agent_id: random.choice( ["policy_1", "policy_2"])), }, }) pg.train() self.assertEqual(len(os.listdir(self.test_dir)), 1) pg.stop() pg = PGAgent( env="multi_cartpole", config={ "num_workers": 0, "input": self.test_dir, "input_evaluation": ["simulation"], "train_batch_size": 2000, "multiagent": { "policy_graphs": { "policy_1": gen_policy(), "policy_2": gen_policy(), }, "policy_mapping_fn": ( lambda agent_id: random.choice( ["policy_1", "policy_2"])), }, }) for _ in range(50): result = pg.train() if not np.isnan(result["episode_reward_mean"]): return # simulation ok time.sleep(0.1) assert False, "did not see any simulation results"
def testRolloutDictSpace(self): register_env("nested", lambda _: NestedDictEnv()) agent = PGAgent(env="nested") agent.train() path = agent.save() agent.stop() # Test train works on restore agent2 = PGAgent(env="nested") agent2.restore(path) agent2.train() # Test rollout works on restore rollout(agent2, "nested", 100)
def testMultiAgent(self): register_env("multi_cartpole", lambda _: MultiCartpole(10)) single_env = gym.make("CartPole-v0") def gen_policy(): obs_space = single_env.observation_space act_space = single_env.action_space return (PGPolicyGraph, obs_space, act_space, {}) pg = PGAgent( env="multi_cartpole", config={ "num_workers": 0, "output": self.test_dir, "multiagent": { "policy_graphs": { "policy_1": gen_policy(), "policy_2": gen_policy(), }, "policy_mapping_fn": (lambda agent_id: random.choice(["policy_1", "policy_2"])), }, }) pg.train() self.assertEqual(len(os.listdir(self.test_dir)), 1) pg.stop() pg = PGAgent( env="multi_cartpole", config={ "num_workers": 0, "input": self.test_dir, "input_evaluation": "simulation", "train_batch_size": 2000, "multiagent": { "policy_graphs": { "policy_1": gen_policy(), "policy_2": gen_policy(), }, "policy_mapping_fn": (lambda agent_id: random.choice(["policy_1", "policy_2"])), }, }) for _ in range(50): result = pg.train() if not np.isnan(result["episode_reward_mean"]): return # simulation ok time.sleep(0.1) assert False, "did not see any simulation results"