Beispiel #1
0
 def test_a2c_exec_impl(ray_start_regular):
     trainer = A2CTrainer(env="CartPole-v0",
                          config={
                              "min_iter_time_s": 0,
                          })
     assert isinstance(trainer.train(), dict)
     check_compute_action(trainer)
Beispiel #2
0
    def test_impala_compilation(self):
        """Test whether an ImpalaTrainer can be built with both frameworks."""
        config = impala.DEFAULT_CONFIG.copy()
        num_iterations = 1

        for _ in framework_iterator(config, frameworks=("torch", "tf")):
            local_cfg = config.copy()
            for env in ["Pendulum-v0", "CartPole-v0"]:
                print("Env={}".format(env))
                print("w/ LSTM")
                # Test w/o LSTM.
                trainer = impala.ImpalaTrainer(config=local_cfg, env=env)
                for i in range(num_iterations):
                    print(trainer.train())
                check_compute_action(trainer)
                trainer.stop()

                # Test w/ LSTM.
                print("w/o LSTM")
                local_cfg["model"]["use_lstm"] = True
                trainer = impala.ImpalaTrainer(config=local_cfg, env=env)
                for i in range(num_iterations):
                    print(trainer.train())
                check_compute_action(trainer)
                trainer.stop()
Beispiel #3
0
    def test_apex_dqn_compilation_and_per_worker_epsilon_values(self):
        """Test whether an APEX-DQNTrainer can be built on all frameworks."""
        config = apex.APEX_DEFAULT_CONFIG.copy()
        config["num_workers"] = 3
        config["prioritized_replay"] = True
        config["timesteps_per_iteration"] = 100
        config["min_iter_time_s"] = 1
        config["optimizer"]["num_replay_buffer_shards"] = 1

        for _ in framework_iterator(config, ("torch", "tf", "eager")):
            plain_config = config.copy()
            trainer = apex.ApexTrainer(config=plain_config, env="CartPole-v0")

            # Test per-worker epsilon distribution.
            infos = trainer.workers.foreach_policy(
                lambda p, _: p.get_exploration_info())
            expected = [0.4, 0.016190862, 0.00065536]
            check([i["cur_epsilon"] for i in infos], [0.0] + expected)

            check_compute_action(trainer)

            # TODO(ekl) fix iterator metrics bugs w/multiple trainers.
            #            for i in range(1):
            #                results = trainer.train()
            #                print(results)

            # Test again per-worker epsilon distribution
            # (should not have changed).
            infos = trainer.workers.foreach_policy(
                lambda p, _: p.get_exploration_info())
            check([i["cur_epsilon"] for i in infos], [0.0] + expected)

            trainer.stop()
Beispiel #4
0
    def test_dqn_compilation(self):
        """Test whether a DQNTrainer can be built on all frameworks."""
        config = dqn.DEFAULT_CONFIG.copy()
        config["num_workers"] = 2
        num_iterations = 1

        for fw in framework_iterator(config):
            # Double-dueling DQN.
            plain_config = config.copy()
            trainer = dqn.DQNTrainer(config=plain_config, env="CartPole-v0")
            for i in range(num_iterations):
                results = trainer.train()
                print(results)

            check_compute_action(trainer)

            # Rainbow.
            # TODO(sven): Add torch once DQN-torch supports distributional-Q.
            if fw == "torch":
                continue
            rainbow_config = config.copy()
            rainbow_config["num_atoms"] = 10
            rainbow_config["noisy"] = True
            rainbow_config["double_q"] = True
            rainbow_config["dueling"] = True
            rainbow_config["n_step"] = 5
            trainer = dqn.DQNTrainer(config=rainbow_config, env="CartPole-v0")
            for i in range(num_iterations):
                results = trainer.train()
                print(results)

            check_compute_action(trainer)
Beispiel #5
0
 def test_a2c_exec_impl_microbatch(ray_start_regular):
     config = {
         "min_iter_time_s": 0,
         "microbatch_size": 10,
     }
     for _ in framework_iterator(config, ("tf", "torch")):
         trainer = a3c.A2CTrainer(env="CartPole-v0", config=config)
         assert isinstance(trainer.train(), dict)
         check_compute_action(trainer)
Beispiel #6
0
 def test_a2c_exec_impl_microbatch(ray_start_regular):
     trainer = A2CTrainer(env="CartPole-v0",
                          config={
                              "min_iter_time_s": 0,
                              "microbatch_size": 10,
                              "use_exec_api": True,
                          })
     assert isinstance(trainer.train(), dict)
     check_compute_action(trainer)
Beispiel #7
0
    def test_ppo_compilation(self):
        """Test whether a PPOTrainer can be built with both frameworks."""
        config = ppo.DEFAULT_CONFIG.copy()
        config["num_workers"] = 0  # Run locally.
        num_iterations = 2

        for _ in framework_iterator(config):
            trainer = ppo.PPOTrainer(config=config, env="CartPole-v0")
            for i in range(num_iterations):
                trainer.train()
            check_compute_action(trainer, include_prev_action_reward=True)
Beispiel #8
0
    def test_ddppo_compilation(self):
        """Test whether a DDPPOTrainer can be built with both frameworks."""
        config = ppo.ddppo.DEFAULT_CONFIG.copy()
        config["num_gpus_per_worker"] = 0
        num_iterations = 2

        for _ in framework_iterator(config, "torch"):
            trainer = ppo.ddppo.DDPPOTrainer(config=config, env="CartPole-v0")
            for i in range(num_iterations):
                trainer.train()
            check_compute_action(trainer)
Beispiel #9
0
    def test_marwil_compilation(self):
        """Test whether a MARWILTrainer can be built with all frameworks."""
        config = marwil.DEFAULT_CONFIG.copy()
        config["num_workers"] = 0  # Run locally.
        num_iterations = 2

        # Test for all frameworks.
        for _ in framework_iterator(config):
            trainer = marwil.MARWILTrainer(config=config, env="CartPole-v0")
            for i in range(num_iterations):
                trainer.train()
            check_compute_action(trainer, include_prev_action_reward=True)
Beispiel #10
0
    def test_td3_compilation(self):
        """Test whether a TD3Trainer can be built with both frameworks."""
        config = td3.TD3_DEFAULT_CONFIG.copy()
        config["num_workers"] = 0  # Run locally.

        # Test against all frameworks.
        for _ in framework_iterator(config, frameworks=["tf"]):
            trainer = td3.TD3Trainer(config=config, env="Pendulum-v0")
            num_iterations = 2
            for i in range(num_iterations):
                results = trainer.train()
                print(results)
            check_compute_action(trainer)
Beispiel #11
0
    def test_simple_q_compilation(self):
        """Test whether a SimpleQTrainer can be built on all frameworks."""
        config = dqn.SIMPLE_Q_DEFAULT_CONFIG.copy()
        config["num_workers"] = 0  # Run locally.

        for _ in framework_iterator(config):
            trainer = dqn.SimpleQTrainer(config=config, env="CartPole-v0")
            num_iterations = 2
            for i in range(num_iterations):
                results = trainer.train()
                print(results)

            check_compute_action(trainer)
Beispiel #12
0
    def test_ddpg_compilation(self):
        """Test whether a DDPGTrainer can be built with both frameworks."""
        config = ddpg.DEFAULT_CONFIG.copy()
        config["num_workers"] = 0  # Run locally.
        config["num_envs_per_worker"] = 2  # Run locally.

        num_iterations = 2

        # Test against all frameworks.
        for _ in framework_iterator(config, ("tf", "torch")):
            trainer = ddpg.DDPGTrainer(config=config, env="Pendulum-v0")
            for i in range(num_iterations):
                results = trainer.train()
                print(results)
            check_compute_action(trainer)
Beispiel #13
0
    def test_a2c_compilation(self):
        """Test whether an A2CTrainer can be built with both frameworks."""
        config = a3c.DEFAULT_CONFIG.copy()
        config["num_workers"] = 2
        config["num_envs_per_worker"] = 2

        num_iterations = 1

        # Test against all frameworks.
        for fw in framework_iterator(config, ("tf", "torch")):
            config["sample_async"] = fw == "tf"
            for env in ["PongDeterministic-v0"]:
                trainer = a3c.A2CTrainer(config=config, env=env)
                for i in range(num_iterations):
                    results = trainer.train()
                    print(results)
                check_compute_action(trainer)
Beispiel #14
0
    def test_es_compilation(self):
        """Test whether an ESTrainer can be built on all frameworks."""
        ray.init()
        config = es.DEFAULT_CONFIG.copy()
        # Keep it simple.
        config["model"]["fcnet_hiddens"] = [10]
        config["model"]["fcnet_activation"] = None

        num_iterations = 2

        for _ in framework_iterator(config, ("torch", "tf")):
            plain_config = config.copy()
            trainer = es.ESTrainer(config=plain_config, env="CartPole-v0")
            for i in range(num_iterations):
                results = trainer.train()
                print(results)

            check_compute_action(trainer)
Beispiel #15
0
    def test_appo_compilation(self):
        """Test whether an APPOTrainer can be built with both frameworks."""
        config = ppo.appo.DEFAULT_CONFIG.copy()
        config["num_workers"] = 1
        num_iterations = 2

        for _ in framework_iterator(config, frameworks=("torch", "tf")):
            _config = config.copy()
            trainer = ppo.APPOTrainer(config=_config, env="CartPole-v0")
            for i in range(num_iterations):
                print(trainer.train())
            check_compute_action(trainer)

            _config = config.copy()
            _config["vtrace"] = True
            trainer = ppo.APPOTrainer(config=_config, env="CartPole-v0")
            for i in range(num_iterations):
                print(trainer.train())
            check_compute_action(trainer)
Beispiel #16
0
    def test_apex_ddpg_compilation_and_per_worker_epsilon_values(self):
        """Test whether an APEX-DDPGTrainer can be built on all frameworks."""
        config = apex_ddpg.APEX_DDPG_DEFAULT_CONFIG.copy()
        config["num_workers"] = 3
        config["prioritized_replay"] = True
        config["timesteps_per_iteration"] = 100
        config["min_iter_time_s"] = 1
        config["learning_starts"] = 0
        config["optimizer"]["num_replay_buffer_shards"] = 1
        num_iterations = 1
        for _ in framework_iterator(config, ("torch", "tf")):
            plain_config = config.copy()
            trainer = apex_ddpg.ApexDDPGTrainer(
                config=plain_config, env="Pendulum-v0")

            # Test per-worker scale distribution.
            infos = trainer.workers.foreach_policy(
                lambda p, _: p.get_exploration_info())
            scale = [i["cur_scale"] for i in infos]
            expected = [
                0.4**(1 + (i + 1) / float(config["num_workers"] - 1) * 7)
                for i in range(config["num_workers"])
            ]
            check(scale, [0.0] + expected)

            for _ in range(num_iterations):
                print(trainer.train())
            check_compute_action(trainer)

            # Test again per-worker scale distribution
            # (should not have changed).
            infos = trainer.workers.foreach_policy(
                lambda p, _: p.get_exploration_info())
            scale = [i["cur_scale"] for i in infos]
            check(scale, [0.0] + expected)

            trainer.stop()
Beispiel #17
0
 def test_sac_compilation(self):
     """Tests whether an SACTrainer can be built with all frameworks."""
     config = sac.DEFAULT_CONFIG.copy()
     config["num_workers"] = 0  # Run locally.
     config["twin_q"] = True
     config["soft_horizon"] = True
     config["clip_actions"] = False
     config["normalize_actions"] = True
     config["learning_starts"] = 0
     config["prioritized_replay"] = True
     num_iterations = 1
     for _ in framework_iterator(config, ("tf", "torch")):
         # Test for different env types (discrete w/ and w/o image, + cont).
         for env in [
                 "Pendulum-v0", "MsPacmanNoFrameskip-v4", "CartPole-v0"
         ]:
             print("Env={}".format(env))
             config["use_state_preprocessor"] = \
                 env == "MsPacmanNoFrameskip-v4"
             trainer = sac.SACTrainer(config=config, env=env)
             for i in range(num_iterations):
                 results = trainer.train()
                 print(results)
             check_compute_action(trainer)