def test_a2c_exec_impl(ray_start_regular): trainer = A2CTrainer(env="CartPole-v0", config={ "min_iter_time_s": 0, }) assert isinstance(trainer.train(), dict) check_compute_action(trainer)
def test_impala_compilation(self): """Test whether an ImpalaTrainer can be built with both frameworks.""" config = impala.DEFAULT_CONFIG.copy() num_iterations = 1 for _ in framework_iterator(config, frameworks=("torch", "tf")): local_cfg = config.copy() for env in ["Pendulum-v0", "CartPole-v0"]: print("Env={}".format(env)) print("w/ LSTM") # Test w/o LSTM. trainer = impala.ImpalaTrainer(config=local_cfg, env=env) for i in range(num_iterations): print(trainer.train()) check_compute_action(trainer) trainer.stop() # Test w/ LSTM. print("w/o LSTM") local_cfg["model"]["use_lstm"] = True trainer = impala.ImpalaTrainer(config=local_cfg, env=env) for i in range(num_iterations): print(trainer.train()) check_compute_action(trainer) trainer.stop()
def test_apex_dqn_compilation_and_per_worker_epsilon_values(self): """Test whether an APEX-DQNTrainer can be built on all frameworks.""" config = apex.APEX_DEFAULT_CONFIG.copy() config["num_workers"] = 3 config["prioritized_replay"] = True config["timesteps_per_iteration"] = 100 config["min_iter_time_s"] = 1 config["optimizer"]["num_replay_buffer_shards"] = 1 for _ in framework_iterator(config, ("torch", "tf", "eager")): plain_config = config.copy() trainer = apex.ApexTrainer(config=plain_config, env="CartPole-v0") # Test per-worker epsilon distribution. infos = trainer.workers.foreach_policy( lambda p, _: p.get_exploration_info()) expected = [0.4, 0.016190862, 0.00065536] check([i["cur_epsilon"] for i in infos], [0.0] + expected) check_compute_action(trainer) # TODO(ekl) fix iterator metrics bugs w/multiple trainers. # for i in range(1): # results = trainer.train() # print(results) # Test again per-worker epsilon distribution # (should not have changed). infos = trainer.workers.foreach_policy( lambda p, _: p.get_exploration_info()) check([i["cur_epsilon"] for i in infos], [0.0] + expected) trainer.stop()
def test_dqn_compilation(self): """Test whether a DQNTrainer can be built on all frameworks.""" config = dqn.DEFAULT_CONFIG.copy() config["num_workers"] = 2 num_iterations = 1 for fw in framework_iterator(config): # Double-dueling DQN. plain_config = config.copy() trainer = dqn.DQNTrainer(config=plain_config, env="CartPole-v0") for i in range(num_iterations): results = trainer.train() print(results) check_compute_action(trainer) # Rainbow. # TODO(sven): Add torch once DQN-torch supports distributional-Q. if fw == "torch": continue rainbow_config = config.copy() rainbow_config["num_atoms"] = 10 rainbow_config["noisy"] = True rainbow_config["double_q"] = True rainbow_config["dueling"] = True rainbow_config["n_step"] = 5 trainer = dqn.DQNTrainer(config=rainbow_config, env="CartPole-v0") for i in range(num_iterations): results = trainer.train() print(results) check_compute_action(trainer)
def test_a2c_exec_impl_microbatch(ray_start_regular): config = { "min_iter_time_s": 0, "microbatch_size": 10, } for _ in framework_iterator(config, ("tf", "torch")): trainer = a3c.A2CTrainer(env="CartPole-v0", config=config) assert isinstance(trainer.train(), dict) check_compute_action(trainer)
def test_a2c_exec_impl_microbatch(ray_start_regular): trainer = A2CTrainer(env="CartPole-v0", config={ "min_iter_time_s": 0, "microbatch_size": 10, "use_exec_api": True, }) assert isinstance(trainer.train(), dict) check_compute_action(trainer)
def test_ppo_compilation(self): """Test whether a PPOTrainer can be built with both frameworks.""" config = ppo.DEFAULT_CONFIG.copy() config["num_workers"] = 0 # Run locally. num_iterations = 2 for _ in framework_iterator(config): trainer = ppo.PPOTrainer(config=config, env="CartPole-v0") for i in range(num_iterations): trainer.train() check_compute_action(trainer, include_prev_action_reward=True)
def test_ddppo_compilation(self): """Test whether a DDPPOTrainer can be built with both frameworks.""" config = ppo.ddppo.DEFAULT_CONFIG.copy() config["num_gpus_per_worker"] = 0 num_iterations = 2 for _ in framework_iterator(config, "torch"): trainer = ppo.ddppo.DDPPOTrainer(config=config, env="CartPole-v0") for i in range(num_iterations): trainer.train() check_compute_action(trainer)
def test_marwil_compilation(self): """Test whether a MARWILTrainer can be built with all frameworks.""" config = marwil.DEFAULT_CONFIG.copy() config["num_workers"] = 0 # Run locally. num_iterations = 2 # Test for all frameworks. for _ in framework_iterator(config): trainer = marwil.MARWILTrainer(config=config, env="CartPole-v0") for i in range(num_iterations): trainer.train() check_compute_action(trainer, include_prev_action_reward=True)
def test_td3_compilation(self): """Test whether a TD3Trainer can be built with both frameworks.""" config = td3.TD3_DEFAULT_CONFIG.copy() config["num_workers"] = 0 # Run locally. # Test against all frameworks. for _ in framework_iterator(config, frameworks=["tf"]): trainer = td3.TD3Trainer(config=config, env="Pendulum-v0") num_iterations = 2 for i in range(num_iterations): results = trainer.train() print(results) check_compute_action(trainer)
def test_simple_q_compilation(self): """Test whether a SimpleQTrainer can be built on all frameworks.""" config = dqn.SIMPLE_Q_DEFAULT_CONFIG.copy() config["num_workers"] = 0 # Run locally. for _ in framework_iterator(config): trainer = dqn.SimpleQTrainer(config=config, env="CartPole-v0") num_iterations = 2 for i in range(num_iterations): results = trainer.train() print(results) check_compute_action(trainer)
def test_ddpg_compilation(self): """Test whether a DDPGTrainer can be built with both frameworks.""" config = ddpg.DEFAULT_CONFIG.copy() config["num_workers"] = 0 # Run locally. config["num_envs_per_worker"] = 2 # Run locally. num_iterations = 2 # Test against all frameworks. for _ in framework_iterator(config, ("tf", "torch")): trainer = ddpg.DDPGTrainer(config=config, env="Pendulum-v0") for i in range(num_iterations): results = trainer.train() print(results) check_compute_action(trainer)
def test_a2c_compilation(self): """Test whether an A2CTrainer can be built with both frameworks.""" config = a3c.DEFAULT_CONFIG.copy() config["num_workers"] = 2 config["num_envs_per_worker"] = 2 num_iterations = 1 # Test against all frameworks. for fw in framework_iterator(config, ("tf", "torch")): config["sample_async"] = fw == "tf" for env in ["PongDeterministic-v0"]: trainer = a3c.A2CTrainer(config=config, env=env) for i in range(num_iterations): results = trainer.train() print(results) check_compute_action(trainer)
def test_es_compilation(self): """Test whether an ESTrainer can be built on all frameworks.""" ray.init() config = es.DEFAULT_CONFIG.copy() # Keep it simple. config["model"]["fcnet_hiddens"] = [10] config["model"]["fcnet_activation"] = None num_iterations = 2 for _ in framework_iterator(config, ("torch", "tf")): plain_config = config.copy() trainer = es.ESTrainer(config=plain_config, env="CartPole-v0") for i in range(num_iterations): results = trainer.train() print(results) check_compute_action(trainer)
def test_appo_compilation(self): """Test whether an APPOTrainer can be built with both frameworks.""" config = ppo.appo.DEFAULT_CONFIG.copy() config["num_workers"] = 1 num_iterations = 2 for _ in framework_iterator(config, frameworks=("torch", "tf")): _config = config.copy() trainer = ppo.APPOTrainer(config=_config, env="CartPole-v0") for i in range(num_iterations): print(trainer.train()) check_compute_action(trainer) _config = config.copy() _config["vtrace"] = True trainer = ppo.APPOTrainer(config=_config, env="CartPole-v0") for i in range(num_iterations): print(trainer.train()) check_compute_action(trainer)
def test_apex_ddpg_compilation_and_per_worker_epsilon_values(self): """Test whether an APEX-DDPGTrainer can be built on all frameworks.""" config = apex_ddpg.APEX_DDPG_DEFAULT_CONFIG.copy() config["num_workers"] = 3 config["prioritized_replay"] = True config["timesteps_per_iteration"] = 100 config["min_iter_time_s"] = 1 config["learning_starts"] = 0 config["optimizer"]["num_replay_buffer_shards"] = 1 num_iterations = 1 for _ in framework_iterator(config, ("torch", "tf")): plain_config = config.copy() trainer = apex_ddpg.ApexDDPGTrainer( config=plain_config, env="Pendulum-v0") # Test per-worker scale distribution. infos = trainer.workers.foreach_policy( lambda p, _: p.get_exploration_info()) scale = [i["cur_scale"] for i in infos] expected = [ 0.4**(1 + (i + 1) / float(config["num_workers"] - 1) * 7) for i in range(config["num_workers"]) ] check(scale, [0.0] + expected) for _ in range(num_iterations): print(trainer.train()) check_compute_action(trainer) # Test again per-worker scale distribution # (should not have changed). infos = trainer.workers.foreach_policy( lambda p, _: p.get_exploration_info()) scale = [i["cur_scale"] for i in infos] check(scale, [0.0] + expected) trainer.stop()
def test_sac_compilation(self): """Tests whether an SACTrainer can be built with all frameworks.""" config = sac.DEFAULT_CONFIG.copy() config["num_workers"] = 0 # Run locally. config["twin_q"] = True config["soft_horizon"] = True config["clip_actions"] = False config["normalize_actions"] = True config["learning_starts"] = 0 config["prioritized_replay"] = True num_iterations = 1 for _ in framework_iterator(config, ("tf", "torch")): # Test for different env types (discrete w/ and w/o image, + cont). for env in [ "Pendulum-v0", "MsPacmanNoFrameskip-v4", "CartPole-v0" ]: print("Env={}".format(env)) config["use_state_preprocessor"] = \ env == "MsPacmanNoFrameskip-v4" trainer = sac.SACTrainer(config=config, env=env) for i in range(num_iterations): results = trainer.train() print(results) check_compute_action(trainer)