Exemple #1
0
    def test_a2c_exec_impl(self):
        config = (
            a2c.A2CConfig()
            .environment(env="CartPole-v0")
            .reporting(min_time_s_per_iteration=0)
        )

        for _ in framework_iterator(config):
            trainer = config.build()
            results = trainer.train()
            check_train_results(results)
            print(results)
            check_compute_single_action(trainer)
            trainer.stop()
Exemple #2
0
    def test_a2c_compilation(self):
        """Test whether an A2C can be built with both frameworks."""
        config = a2c.A2CConfig().rollouts(num_rollout_workers=2, num_envs_per_worker=2)

        num_iterations = 1

        # Test against all frameworks.
        for _ in framework_iterator(config, with_eager_tracing=True):
            for env in ["CartPole-v0", "Pendulum-v1", "PongDeterministic-v0"]:
                trainer = config.build(env=env)
                for i in range(num_iterations):
                    results = trainer.train()
                    check_train_results(results)
                    print(results)
                check_compute_single_action(trainer)
                trainer.stop()
Exemple #3
0
def _import_a2c():
    import ray.rllib.algorithms.a2c as a2c

    return a2c.A2C, a2c.A2CConfig().to_dict()