def test_DDPG(self): config = generate_algorithm_config("DDPG", {}) config["frame_config"]["models"] = [ "DDPGActorDisc", "DDPGActorDisc", "DDPGCritic", "DDPGCritic", ] config["frame_config"]["model_kwargs"] = [{ "state_dim": 4, "action_dim": 2 }] * 4 ddpg = init_algorithm_from_config(config) env = gym.make("CartPole-v0") dataset = RLGymDiscActDataset(ddpg, env, render_every_episode=1) self.assert_valid_disc_output(next(dataset))
def test_A2C(self): config = generate_algorithm_config("A2C", {}) config["frame_config"]["models"] = ["A2CActorDisc", "A2CCritic"] config["frame_config"]["model_kwargs"] = [ { "state_dim": 4, "action_num": 2 }, { "state_dim": 4 }, ] a2c = init_algorithm_from_config(config) env = gym.make("CartPole-v0") dataset = RLGymDiscActDataset(a2c, env, render_every_episode=1) self.assert_valid_disc_output(next(dataset))
def test_A2C(self): config = generate_algorithm_config("A2C", {}) config["frame_config"]["models"] = ["A2CActorCont", "A2CCritic"] config["frame_config"]["model_kwargs"] = [ { "state_dim": 3, "action_dim": 1, "action_range": 2 }, { "state_dim": 3 }, ] a2c = init_algorithm_from_config(config) env = gym.make("Pendulum-v0") dataset = RLGymContActDataset(a2c, env, render_every_episode=1) self.assert_valid_cont_output(next(dataset))
def test_DQN(self): config = generate_algorithm_config("DQN", {}) config["frame_config"]["models"] = ["QNet", "QNet"] config["frame_config"]["model_kwargs"] = [ { "state_dim": 4, "action_num": 2 }, { "state_dim": 4, "action_num": 2 }, ] dqn = init_algorithm_from_config(config) env = gym.make("CartPole-v0") dataset = RLGymDiscActDataset(dqn, env, render_every_episode=1) self.assert_valid_disc_output(next(dataset))
def test_gym_env_dataset_creator(): # Discrete action environment config = generate_gym_env_config("CartPole-v0", {}) config = generate_algorithm_config("DDPG", config) config["frame_config"]["models"] = [ "DDPGActorCont", "DDPGActorCont", "DDPGCritic", "DDPGCritic", ] config["frame_config"]["model_kwargs"] = [{ "state_dim": 3, "action_dim": 1, "action_range": 2 }] * 2 + [{ "state_dim": 3, "action_dim": 1 }] * 2 ddpg = init_algorithm_from_config(config) assert isinstance( gym_env_dataset_creator(ddpg, config["train_env_config"]), RLGymDiscActDataset) assert isinstance(gym_env_dataset_creator(ddpg, config["test_env_config"]), RLGymDiscActDataset) # Continuous action environment config = generate_gym_env_config("Pendulum-v0", {}) assert isinstance( gym_env_dataset_creator(ddpg, config["train_env_config"]), RLGymContActDataset) assert isinstance(gym_env_dataset_creator(ddpg, config["test_env_config"]), RLGymContActDataset) # Unsupported environment, # like algorithmic, which uses a tuple action space # or robotics, which uses the goal action space config = generate_gym_env_config("Copy-v0", {}) with pytest.raises(ValueError, match="not supported"): gym_env_dataset_creator(ddpg, config["train_env_config"]) with pytest.raises(ValueError, match="not supported"): gym_env_dataset_creator(ddpg, config["test_env_config"])
def test_DDPG(self): config = generate_algorithm_config("DDPG", {}) config["frame_config"]["models"] = [ "DDPGActorCont", "DDPGActorCont", "DDPGCritic", "DDPGCritic", ] config["frame_config"]["model_kwargs"] = [{ "state_dim": 3, "action_dim": 1, "action_range": 2 }] * 2 + [{ "state_dim": 3, "action_dim": 1 }] * 2 ddpg = init_algorithm_from_config(config) env = gym.make("Pendulum-v0") dataset = RLGymContActDataset(ddpg, env, render_every_episode=1) self.assert_valid_cont_output(next(dataset))