def test_ppo_get_state(): # Assign state_size, action_size = 3, 4 init_config = {'actor_lr': 0.1, 'gamma': 0.6} agent = PPOAgent(state_size, action_size, device='cpu', **init_config) # Act agent_state = agent.get_state() # Assert assert isinstance(agent_state, AgentState) assert agent_state.model == PPOAgent.name assert agent_state.state_space == state_size assert agent_state.action_space == action_size assert agent_state.config == agent._config assert agent_state.config['actor_lr'] == 0.1 assert agent_state.config['gamma'] == 0.6 network_state = agent_state.network assert isinstance(network_state, NetworkState) assert {'actor', 'critic', 'policy'} == set(network_state.net.keys()) buffer_state = agent_state.buffer assert isinstance(buffer_state, BufferState) assert buffer_state.type == agent.buffer.type assert buffer_state.batch_size == agent.buffer.batch_size assert buffer_state.buffer_size == agent.buffer.buffer_size
def test_ppo_from_state(): # Assign state_shape, action_size = 10, 3 agent = PPOAgent(state_shape, action_size) agent_state = agent.get_state() # Act new_agent = PPOAgent.from_state(agent_state) # Assert assert id(agent) != id(new_agent) # assert new_agent == agent assert isinstance(new_agent, PPOAgent) assert new_agent.hparams == agent.hparams assert all([ torch.all(x == y) for ( x, y) in zip(agent.policy.parameters(), new_agent.policy.parameters()) ]) assert all([ torch.all(x == y) for (x, y) in zip(agent.actor.parameters(), new_agent.actor.parameters()) ]) assert all([ torch.all(x == y) for ( x, y) in zip(agent.critic.parameters(), new_agent.critic.parameters()) ]) assert new_agent.buffer == agent.buffer
def test_ppo_from_state_one_updated(): # Assign state_shape, action_size = 10, 3 agent = PPOAgent(state_shape, action_size) deterministic_interactions(agent, num_iters=100) agent_state = agent.get_state() deterministic_interactions(agent, num_iters=400) # Act new_agent = PPOAgent.from_state(agent_state) # Assert assert id(agent) != id(new_agent) # assert new_agent == agent assert isinstance(new_agent, PPOAgent) # assert any([torch.any(x != y) for (x, y) in zip(agent.policy.parameters(), new_agent.policy.parameters())]) assert any([ torch.any(x != y) for (x, y) in zip(agent.actor.parameters(), new_agent.actor.parameters()) ]) assert any([ torch.any(x != y) for ( x, y) in zip(agent.critic.parameters(), new_agent.critic.parameters()) ]) assert new_agent.buffer != agent.buffer
def test_agent_factory_ppo_agent_from_state_network_buffer_none(): # Assign state_size, action_size = 10, 5 agent = PPOAgent(state_size, action_size, device="cpu") state = agent.get_state() state.network = None state.buffer = None # Act new_agent = AgentFactory.from_state(state) # Assert assert id(new_agent) != id(agent) assert new_agent.hparams == agent.hparams
def test_agent_factory_ppo_agent_from_state(): # Assign state_size, action_size = 10, 5 agent = PPOAgent(state_size, action_size, device="cpu") state = agent.get_state() # Act new_agent = AgentFactory.from_state(state) # Assert assert id(new_agent) != id(agent) assert new_agent == agent assert new_agent.name == PPOAgent.name assert new_agent.hparams == agent.hparams assert new_agent.buffer == agent.buffer
def test_ppo_from_state_network_state_none(): # Assign state_shape, action_size = 10, 3 agent = PPOAgent(state_shape, action_size) agent_state = agent.get_state() agent_state.network = None # Act new_agent = PPOAgent.from_state(agent_state) # Assert assert id(agent) != id(new_agent) # assert new_agent == agent assert isinstance(new_agent, PPOAgent) assert new_agent.hparams == agent.hparams assert new_agent.buffer == agent.buffer
def test_runs_ppo(): # Assign task = GymTask('Pendulum-v0') agent = PPOAgent(task.state_size, task.action_size, device=DEVICE) env_runner = EnvRunner(task, agent, max_iterations=50) # Act env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
def from_state(state: AgentState) -> AgentBase: if state.model == DQNAgent.name: return DQNAgent.from_state(state) elif state.model == PPOAgent.name: return PPOAgent.from_state(state) else: raise ValueError( f"Agent state contains unsupported model type: '{state.model}'" )
def from_state(state: AgentState) -> AgentBase: norm_model = state.model.upper() if norm_model == DQNAgent.name.upper(): return DQNAgent.from_state(state) elif norm_model == PPOAgent.name.upper(): return PPOAgent.from_state(state) elif norm_model == DDPGAgent.name.upper(): return DDPGAgent.from_state(state) elif norm_model == RainbowAgent.name.upper(): return RainbowAgent.from_state(state) else: raise ValueError( f"Agent state contains unsupported model type: {state.model}")
def test_multi_sync_env_runner_run_single_step_multiple_task(): # Assign tasks: List[TaskType] = [test_task, test_task] agent = PPOAgent(test_task.state_size, test_task.action_size, num_workers=len(tasks)) multi_sync_env_runner = MultiSyncEnvRunner(tasks, agent) # Act scores = multi_sync_env_runner.run(max_episodes=1, max_iterations=1, force_new=True) # Assert assert len( scores) == 2 # After 1 iteration both "finished" at the same time
def test_multi_sync_env_runner_run_multiple_step_multiple_task(): # Assign tasks: List[TaskType] = [test_task, test_task] agent = PPOAgent(test_task.state_size, test_task.action_size, num_workers=len(tasks)) multi_sync_env_runner = MultiSyncEnvRunner(tasks, agent) # Act scores = multi_sync_env_runner.run(max_episodes=3, max_iterations=100, force_new=True) # Assert assert len(scores) in ( 3, 4 ) # On rare occasions two tasks can complete twice at the same time.
def test_ppo_get_state_compare_different_agents(): # Assign state_size, action_size = 3, 2 agent_1 = PPOAgent(state_size, action_size, device='cpu', n_steps=1) agent_2 = PPOAgent(state_size, action_size, device='cpu', n_steps=2) # Act state_1 = agent_1.get_state() state_2 = agent_2.get_state() # Assert assert state_1 != state_2 assert state_1.model == state_2.model
def test_ppo_seed(): # Assign agent_0 = PPOAgent(4, 2, device='cpu') # Reference agent_1 = PPOAgent(4, 2, device='cpu') agent_2 = copy.deepcopy(agent_1) # Act # Make sure agents have the same networks assert any([ sum(sum(l1.weight - l2.weight)) != 0 for l1, l2 in zip(agent_0.actor.layers, agent_1.actor.layers) ]) assert all([ sum(sum(l1.weight - l2.weight)) == 0 for l1, l2 in zip(agent_1.actor.layers, agent_2.actor.layers) ]) assert all([ sum(sum(l1.weight - l2.weight)) == 0 for l1, l2 in zip(agent_1.critic.layers, agent_2.critic.layers) ]) agent_0.seed(32167) actions_0 = deterministic_interactions(agent_0) agent_1.seed(0) actions_1 = deterministic_interactions(agent_1) agent_2.seed(0) actions_2 = deterministic_interactions(agent_2) # Assert # First we check that there's definitely more than one type of action assert actions_1[0] != actions_1[1] assert actions_2[0] != actions_2[1] # All generated actions need to identical assert any(a0 != a1 for (a0, a1) in zip(actions_0, actions_1)) for idx, (a1, a2) in enumerate(zip(actions_1, actions_2)): assert a1 == pytest.approx( a2, 1e-4), f"Action mismatch on position {idx}: {a1} != {a2}"
import mock import random from ai_traineree.agents.ppo import PPOAgent from ai_traineree.env_runner import EnvRunner, MultiSyncEnvRunner from ai_traineree.tasks import GymTask from ai_traineree.types import TaskType from typing import List # NOTE: Some of these tests use `test_task` and `test_agent` which are real instances. # This is partially to make sure that the tricky part is covered, and not hid # by aggressive mocking. The other part, however, is the burden of keeping env mocks. # This results in unnecessary performance hit. A lightweight env would be nice. test_task = GymTask('LunarLanderContinuous-v2') test_agent = PPOAgent(test_task.state_size, test_task.action_size) @mock.patch("ai_traineree.env_runner.AgentBase") @mock.patch("ai_traineree.env_runner.TaskType") def test_env_runner_info_no_data_logger(mock_task, mock_agent): # Assign env_runner = EnvRunner(mock_task, mock_agent) env_runner.logger = mock.MagicMock() info_data = dict(episodes=[2], iterations=[10], scores=[1], mean_scores=[2], epsilons=[1]) # Act