def test_a2c_exec_impl(self): config = ( a2c.A2CConfig() .environment(env="CartPole-v0") .reporting(min_time_s_per_iteration=0) ) for _ in framework_iterator(config): trainer = config.build() results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer) trainer.stop()
def test_a2c_compilation(self): """Test whether an A2C can be built with both frameworks.""" config = a2c.A2CConfig().rollouts(num_rollout_workers=2, num_envs_per_worker=2) num_iterations = 1 # Test against all frameworks. for _ in framework_iterator(config, with_eager_tracing=True): for env in ["CartPole-v0", "Pendulum-v1", "PongDeterministic-v0"]: trainer = config.build(env=env) for i in range(num_iterations): results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer) trainer.stop()
def _import_a2c(): import ray.rllib.algorithms.a2c as a2c return a2c.A2C, a2c.A2CConfig().to_dict()