def test_callbacks(self): for fw in framework_iterator(frameworks=("torch", "tf")): counts = Counter() pg = PGTrainer( env="CartPole-v0", config={ "num_workers": 0, "rollout_fragment_length": 50, "train_batch_size": 50, "callbacks": { "on_episode_start": lambda x: counts.update({"start": 1}), "on_episode_step": lambda x: counts.update({"step": 1}), "on_episode_end": lambda x: counts.update({"end": 1}), "on_sample_end": lambda x: counts.update({"sample": 1}), }, "framework": fw, }) pg.train() pg.train() self.assertGreater(counts["sample"], 0) self.assertGreater(counts["start"], 0) self.assertGreater(counts["end"], 0) self.assertGreater(counts["step"], 0) pg.stop()
def test_gpus_in_local_mode(self): # Local mode. ray.init(num_gpus=8, local_mode=True) actual_gpus_available = torch.cuda.device_count() config = DEFAULT_CONFIG.copy() config["num_workers"] = 2 config["env"] = "CartPole-v0" # Expect no errors in local mode. for num_gpus in [0, 0.1, 1, actual_gpus_available + 4]: print(f"num_gpus={num_gpus}") for fake_gpus in [False, True]: print(f"_fake_gpus={fake_gpus}") config["num_gpus"] = num_gpus config["_fake_gpus"] = fake_gpus frameworks = ("tf", "torch") if num_gpus > 1 else \ ("tf2", "tf", "torch") for _ in framework_iterator(config, frameworks=frameworks): print("direct RLlib") trainer = PGTrainer(config, env="CartPole-v0") trainer.stop() print("via ray.tune.run()") tune.run("PG", config=config, stop={"training_iteration": 0}) ray.shutdown()
def test_gpus_in_non_local_mode(self): # Non-local mode. ray.init(num_cpus=8) actual_gpus = torch.cuda.device_count() print(f"Actual GPUs found (by torch): {actual_gpus}") config = DEFAULT_CONFIG.copy() config["num_workers"] = 2 config["env"] = "CartPole-v0" # Expect errors when we run a config w/ num_gpus>0 w/o a GPU # and _fake_gpus=False. for num_gpus in [0, 0.1, 1, actual_gpus + 4]: # Only allow possible num_gpus_per_worker (so test would not # block infinitely due to a down worker). per_worker = [0] if actual_gpus == 0 or actual_gpus < num_gpus \ else [0, 0.5, 1] for num_gpus_per_worker in per_worker: for fake_gpus in [False] + ([] if num_gpus == 0 else [True]): config["num_gpus"] = num_gpus config["num_gpus_per_worker"] = num_gpus_per_worker config["_fake_gpus"] = fake_gpus print(f"\n------------\nnum_gpus={num_gpus} " f"num_gpus_per_worker={num_gpus_per_worker} " f"_fake_gpus={fake_gpus}") frameworks = ("tf", "torch") if num_gpus > 1 else \ ("tf2", "tf", "torch") for _ in framework_iterator(config, frameworks=frameworks): # Expect that trainer creation causes a num_gpu error. if actual_gpus < num_gpus + 2 * num_gpus_per_worker \ and not fake_gpus: # "Direct" RLlib (create Trainer on the driver). # Cannot run through ray.tune.run() as it would # simply wait infinitely for the resources to # become available. print("direct RLlib") self.assertRaisesRegex( RuntimeError, "Found 0 GPUs on your machine", lambda: PGTrainer(config, env="CartPole-v0"), ) # If actual_gpus >= num_gpus or faked, # expect no error. else: print("direct RLlib") trainer = PGTrainer(config, env="CartPole-v0") trainer.stop() # Cannot run through ray.tune.run() w/ fake GPUs # as it would simply wait infinitely for the # resources to become available (even though, we # wouldn't really need them). if num_gpus == 0: print("via ray.tune.run()") tune.run("PG", config=config, stop={"training_iteration": 0}) ray.shutdown()
def test_local(self): cf = DEFAULT_CONFIG.copy() cf["model"]["fcnet_hiddens"] = [10] for _ in framework_iterator(cf): agent = PGTrainer(cf, "CartPole-v0") print(agent.train()) agent.stop()
def test_nested_action_spaces(self): config = DEFAULT_CONFIG.copy() config["env"] = RandomEnv # Write output to check, whether actions are written correctly. tmp_dir = os.popen("mktemp -d").read()[:-1] if not os.path.exists(tmp_dir): # Last resort: Resolve via underlying tempdir (and cut tmp_. tmp_dir = ray._private.utils.tempfile.gettempdir() + tmp_dir[4:] assert os.path.exists(tmp_dir), f"'{tmp_dir}' not found!" config["output"] = tmp_dir # Switch off OPE as we don't write action-probs. # TODO: We should probably always write those if `output` is given. config["input_evaluation"] = [] # Pretend actions in offline files are already normalized. config["actions_in_input_normalized"] = True for _ in framework_iterator(config): for name, action_space in SPACES.items(): config["env_config"] = { "action_space": action_space, } for flatten in [False, True]: print(f"A={action_space} flatten={flatten}") shutil.rmtree(config["output"]) config["_disable_action_flattening"] = not flatten trainer = PGTrainer(config) trainer.train() trainer.stop() # Check actions in output file (whether properly flattened # or not). reader = JsonReader( inputs=config["output"], ioctx=trainer.workers.local_worker().io_context, ) sample_batch = reader.next() if flatten: assert isinstance(sample_batch["actions"], np.ndarray) assert len(sample_batch["actions"].shape) == 2 assert sample_batch["actions"].shape[0] == len( sample_batch) else: tree.assert_same_structure( trainer.get_policy().action_space_struct, sample_batch["actions"], ) # Test, whether offline data can be properly read by a # BCTrainer, configured accordingly. config["input"] = config["output"] del config["output"] bc_trainer = BCTrainer(config=config) bc_trainer.train() bc_trainer.stop() config["output"] = tmp_dir config["input"] = "sampler"
def test_no_step_on_init(self): register_env("fail", lambda _: FailOnStepEnv()) for fw in framework_iterator(frameworks=()): pg = PGTrainer( env="fail", config={ "num_workers": 1, "framework": fw, }) self.assertRaises(Exception, lambda: pg.train()) pg.stop()
def testMultiAgent(self): register_env("multi_agent_cartpole", lambda _: MultiAgentCartPole({"num_agents": 10})) single_env = gym.make("CartPole-v0") def gen_policy(): obs_space = single_env.observation_space act_space = single_env.action_space return (PGTFPolicy, obs_space, act_space, {}) for fw in framework_iterator(): pg = PGTrainer( env="multi_agent_cartpole", config={ "num_workers": 0, "output": self.test_dir, "multiagent": { "policies": { "policy_1": gen_policy(), "policy_2": gen_policy(), }, "policy_mapping_fn": ( lambda agent_id: random.choice( ["policy_1", "policy_2"])), }, "framework": fw, }) pg.train() self.assertEqual(len(os.listdir(self.test_dir)), 1) pg.stop() pg = PGTrainer( env="multi_agent_cartpole", config={ "num_workers": 0, "input": self.test_dir, "input_evaluation": ["simulation"], "train_batch_size": 2000, "multiagent": { "policies": { "policy_1": gen_policy(), "policy_2": gen_policy(), }, "policy_mapping_fn": ( lambda agent_id: random.choice( ["policy_1", "policy_2"])), }, "framework": fw, }) for _ in range(50): result = pg.train() if not np.isnan(result["episode_reward_mean"]): return # simulation ok time.sleep(0.1) assert False, "did not see any simulation results"
def test_rollout_dict_space(self): register_env("nested", lambda _: NestedDictEnv()) agent = PGTrainer(env="nested", config={"framework": "tf"}) agent.train() path = agent.save() agent.stop() # Test train works on restore agent2 = PGTrainer(env="nested", config={"framework": "tf"}) agent2.restore(path) agent2.train() # Test rollout works on restore rollout(agent2, "nested", 100)
def testRolloutDictSpace(self): register_env("nested", lambda _: NestedDictEnv()) agent = PGTrainer(env="nested") agent.train() path = agent.save() agent.stop() # Test train works on restore agent2 = PGTrainer(env="nested") agent2.restore(path) agent2.train() # Test rollout works on restore rollout(agent2, "nested", 100)
def test_multi_agent(self): register_env("multi_agent_cartpole", lambda _: MultiAgentCartPole({"num_agents": 10})) for fw in framework_iterator(): pg = PGTrainer( env="multi_agent_cartpole", config={ "num_workers": 0, "output": self.test_dir, "multiagent": { "policies": {"policy_1", "policy_2"}, "policy_mapping_fn": (lambda aid, **kwargs: random.choice( ["policy_1", "policy_2"])), }, "framework": fw, }, ) pg.train() self.assertEqual(len(os.listdir(self.test_dir)), 1) pg.stop() pg = PGTrainer( env="multi_agent_cartpole", config={ "num_workers": 0, "input": self.test_dir, "input_evaluation": ["simulation"], "train_batch_size": 2000, "multiagent": { "policies": {"policy_1", "policy_2"}, "policy_mapping_fn": (lambda aid, **kwargs: random.choice( ["policy_1", "policy_2"])), }, "framework": fw, }, ) for _ in range(50): result = pg.train() if not np.isnan(result["episode_reward_mean"]): return # simulation ok time.sleep(0.1) assert False, "did not see any simulation results"
def test_query_evaluators(self): register_env("test", lambda _: gym.make("CartPole-v0")) for fw in framework_iterator(frameworks=("torch", "tf")): pg = PGTrainer(env="test", config={ "num_workers": 2, "rollout_fragment_length": 5, "num_envs_per_worker": 2, "framework": fw, }) results = pg.workers.foreach_worker( lambda ev: ev.rollout_fragment_length) results2 = pg.workers.foreach_worker_with_index( lambda ev, i: (i, ev.rollout_fragment_length)) results3 = pg.workers.foreach_worker( lambda ev: ev.foreach_env(lambda env: 1)) self.assertEqual(results, [10, 10, 10]) self.assertEqual(results2, [(0, 10), (1, 10), (2, 10)]) self.assertEqual(results3, [[1, 1], [1, 1], [1, 1]]) pg.stop()