Esempio n. 1
0
def test_ppo_get_state():
    # Assign
    state_size, action_size = 3, 4
    init_config = {'actor_lr': 0.1, 'gamma': 0.6}
    agent = PPOAgent(state_size, action_size, device='cpu', **init_config)

    # Act
    agent_state = agent.get_state()

    # Assert
    assert isinstance(agent_state, AgentState)
    assert agent_state.model == PPOAgent.name
    assert agent_state.state_space == state_size
    assert agent_state.action_space == action_size
    assert agent_state.config == agent._config
    assert agent_state.config['actor_lr'] == 0.1
    assert agent_state.config['gamma'] == 0.6

    network_state = agent_state.network
    assert isinstance(network_state, NetworkState)
    assert {'actor', 'critic', 'policy'} == set(network_state.net.keys())

    buffer_state = agent_state.buffer
    assert isinstance(buffer_state, BufferState)
    assert buffer_state.type == agent.buffer.type
    assert buffer_state.batch_size == agent.buffer.batch_size
    assert buffer_state.buffer_size == agent.buffer.buffer_size
Esempio n. 2
0
def test_ppo_from_state():
    # Assign
    state_shape, action_size = 10, 3
    agent = PPOAgent(state_shape, action_size)
    agent_state = agent.get_state()

    # Act
    new_agent = PPOAgent.from_state(agent_state)

    # Assert
    assert id(agent) != id(new_agent)
    # assert new_agent == agent
    assert isinstance(new_agent, PPOAgent)
    assert new_agent.hparams == agent.hparams
    assert all([
        torch.all(x == y) for (
            x,
            y) in zip(agent.policy.parameters(), new_agent.policy.parameters())
    ])
    assert all([
        torch.all(x == y)
        for (x,
             y) in zip(agent.actor.parameters(), new_agent.actor.parameters())
    ])
    assert all([
        torch.all(x == y) for (
            x,
            y) in zip(agent.critic.parameters(), new_agent.critic.parameters())
    ])
    assert new_agent.buffer == agent.buffer
Esempio n. 3
0
def test_ppo_from_state_one_updated():
    # Assign
    state_shape, action_size = 10, 3
    agent = PPOAgent(state_shape, action_size)
    deterministic_interactions(agent, num_iters=100)
    agent_state = agent.get_state()
    deterministic_interactions(agent, num_iters=400)

    # Act
    new_agent = PPOAgent.from_state(agent_state)

    # Assert
    assert id(agent) != id(new_agent)
    # assert new_agent == agent
    assert isinstance(new_agent, PPOAgent)
    # assert any([torch.any(x != y) for (x, y) in zip(agent.policy.parameters(), new_agent.policy.parameters())])
    assert any([
        torch.any(x != y)
        for (x,
             y) in zip(agent.actor.parameters(), new_agent.actor.parameters())
    ])
    assert any([
        torch.any(x != y) for (
            x,
            y) in zip(agent.critic.parameters(), new_agent.critic.parameters())
    ])
    assert new_agent.buffer != agent.buffer
Esempio n. 4
0
def test_agent_factory_ppo_agent_from_state_network_buffer_none():
    # Assign
    state_size, action_size = 10, 5
    agent = PPOAgent(state_size, action_size, device="cpu")
    state = agent.get_state()
    state.network = None
    state.buffer = None

    # Act
    new_agent = AgentFactory.from_state(state)

    # Assert
    assert id(new_agent) != id(agent)
    assert new_agent.hparams == agent.hparams
def test_agent_factory_ppo_agent_from_state():
    # Assign
    state_size, action_size = 10, 5
    agent = PPOAgent(state_size, action_size, device="cpu")
    state = agent.get_state()

    # Act
    new_agent = AgentFactory.from_state(state)

    # Assert
    assert id(new_agent) != id(agent)
    assert new_agent == agent
    assert new_agent.name == PPOAgent.name
    assert new_agent.hparams == agent.hparams
    assert new_agent.buffer == agent.buffer
Esempio n. 6
0
def test_ppo_from_state_network_state_none():
    # Assign
    state_shape, action_size = 10, 3
    agent = PPOAgent(state_shape, action_size)
    agent_state = agent.get_state()
    agent_state.network = None

    # Act
    new_agent = PPOAgent.from_state(agent_state)

    # Assert
    assert id(agent) != id(new_agent)
    # assert new_agent == agent
    assert isinstance(new_agent, PPOAgent)
    assert new_agent.hparams == agent.hparams
    assert new_agent.buffer == agent.buffer
def test_runs_ppo():
    # Assign
    task = GymTask('Pendulum-v0')
    agent = PPOAgent(task.state_size, task.action_size, device=DEVICE)
    env_runner = EnvRunner(task, agent, max_iterations=50)

    # Act
    env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
 def from_state(state: AgentState) -> AgentBase:
     if state.model == DQNAgent.name:
         return DQNAgent.from_state(state)
     elif state.model == PPOAgent.name:
         return PPOAgent.from_state(state)
     else:
         raise ValueError(
             f"Agent state contains unsupported model type: '{state.model}'"
         )
Esempio n. 9
0
 def from_state(state: AgentState) -> AgentBase:
     norm_model = state.model.upper()
     if norm_model == DQNAgent.name.upper():
         return DQNAgent.from_state(state)
     elif norm_model == PPOAgent.name.upper():
         return PPOAgent.from_state(state)
     elif norm_model == DDPGAgent.name.upper():
         return DDPGAgent.from_state(state)
     elif norm_model == RainbowAgent.name.upper():
         return RainbowAgent.from_state(state)
     else:
         raise ValueError(
             f"Agent state contains unsupported model type: {state.model}")
Esempio n. 10
0
def test_multi_sync_env_runner_run_single_step_multiple_task():
    # Assign
    tasks: List[TaskType] = [test_task, test_task]
    agent = PPOAgent(test_task.state_size,
                     test_task.action_size,
                     num_workers=len(tasks))
    multi_sync_env_runner = MultiSyncEnvRunner(tasks, agent)

    # Act
    scores = multi_sync_env_runner.run(max_episodes=1,
                                       max_iterations=1,
                                       force_new=True)

    # Assert
    assert len(
        scores) == 2  # After 1 iteration both "finished" at the same time
Esempio n. 11
0
def test_multi_sync_env_runner_run_multiple_step_multiple_task():
    # Assign
    tasks: List[TaskType] = [test_task, test_task]
    agent = PPOAgent(test_task.state_size,
                     test_task.action_size,
                     num_workers=len(tasks))
    multi_sync_env_runner = MultiSyncEnvRunner(tasks, agent)

    # Act
    scores = multi_sync_env_runner.run(max_episodes=3,
                                       max_iterations=100,
                                       force_new=True)

    # Assert
    assert len(scores) in (
        3, 4
    )  # On rare occasions two tasks can complete twice at the same time.
Esempio n. 12
0
def test_ppo_get_state_compare_different_agents():
    # Assign
    state_size, action_size = 3, 2
    agent_1 = PPOAgent(state_size, action_size, device='cpu', n_steps=1)
    agent_2 = PPOAgent(state_size, action_size, device='cpu', n_steps=2)

    # Act
    state_1 = agent_1.get_state()
    state_2 = agent_2.get_state()

    # Assert
    assert state_1 != state_2
    assert state_1.model == state_2.model
Esempio n. 13
0
def test_ppo_seed():
    # Assign
    agent_0 = PPOAgent(4, 2, device='cpu')  # Reference
    agent_1 = PPOAgent(4, 2, device='cpu')
    agent_2 = copy.deepcopy(agent_1)

    # Act
    # Make sure agents have the same networks
    assert any([
        sum(sum(l1.weight - l2.weight)) != 0
        for l1, l2 in zip(agent_0.actor.layers, agent_1.actor.layers)
    ])
    assert all([
        sum(sum(l1.weight - l2.weight)) == 0
        for l1, l2 in zip(agent_1.actor.layers, agent_2.actor.layers)
    ])
    assert all([
        sum(sum(l1.weight - l2.weight)) == 0
        for l1, l2 in zip(agent_1.critic.layers, agent_2.critic.layers)
    ])

    agent_0.seed(32167)
    actions_0 = deterministic_interactions(agent_0)
    agent_1.seed(0)
    actions_1 = deterministic_interactions(agent_1)
    agent_2.seed(0)
    actions_2 = deterministic_interactions(agent_2)

    # Assert
    # First we check that there's definitely more than one type of action
    assert actions_1[0] != actions_1[1]
    assert actions_2[0] != actions_2[1]

    # All generated actions need to identical
    assert any(a0 != a1 for (a0, a1) in zip(actions_0, actions_1))
    for idx, (a1, a2) in enumerate(zip(actions_1, actions_2)):
        assert a1 == pytest.approx(
            a2, 1e-4), f"Action mismatch on position {idx}: {a1} != {a2}"
Esempio n. 14
0
import mock
import random

from ai_traineree.agents.ppo import PPOAgent
from ai_traineree.env_runner import EnvRunner, MultiSyncEnvRunner
from ai_traineree.tasks import GymTask
from ai_traineree.types import TaskType
from typing import List

# NOTE: Some of these tests use `test_task` and `test_agent` which are real instances.
#       This is partially to make sure that the tricky part is covered, and not hid
#       by aggressive mocking. The other part, however, is the burden of keeping env mocks.
#       This results in unnecessary performance hit. A lightweight env would be nice.

test_task = GymTask('LunarLanderContinuous-v2')
test_agent = PPOAgent(test_task.state_size, test_task.action_size)


@mock.patch("ai_traineree.env_runner.AgentBase")
@mock.patch("ai_traineree.env_runner.TaskType")
def test_env_runner_info_no_data_logger(mock_task, mock_agent):
    # Assign
    env_runner = EnvRunner(mock_task, mock_agent)
    env_runner.logger = mock.MagicMock()
    info_data = dict(episodes=[2],
                     iterations=[10],
                     scores=[1],
                     mean_scores=[2],
                     epsilons=[1])

    # Act