Beispiel #1
0
def test_ddpg_get_state():
    # Assign
    state_size, action_size = 3, 4
    init_config = {'actor_lr': 0.1, 'critic_lr': 0.2, 'gamma': 0.6}
    agent = DDPGAgent(state_size, action_size, device='cpu', **init_config)

    # Act
    agent_state = agent.get_state()

    # Assert
    assert isinstance(agent_state, AgentState)
    assert agent_state.model == DDPGAgent.name
    assert agent_state.state_space == state_size
    assert agent_state.action_space == action_size
    assert agent_state.config == agent._config
    assert agent_state.config['actor_lr'] == 0.1
    assert agent_state.config['critic_lr'] == 0.2
    assert agent_state.config['gamma'] == 0.6

    network_state = agent_state.network
    assert isinstance(network_state, NetworkState)
    assert {'actor', 'target_actor', 'critic',
            'target_critic'} == set(network_state.net.keys())

    buffer_state = agent_state.buffer
    assert isinstance(buffer_state, BufferState)
    assert buffer_state.type == agent.buffer.type
    assert buffer_state.batch_size == agent.buffer.batch_size
    assert buffer_state.buffer_size == agent.buffer.buffer_size
Beispiel #2
0
def test_ddpg_from_state():
    # Assign
    state_shape, action_size = 10, 3
    agent = DDPGAgent(state_shape, action_size)
    agent_state = agent.get_state()

    # Act
    new_agent = DDPGAgent.from_state(agent_state)

    # Assert
    assert id(agent) != id(new_agent)
    # assert new_agent == agent
    assert isinstance(new_agent, DDPGAgent)
    assert new_agent.hparams == agent.hparams
    assert all([
        torch.all(x == y)
        for (x,
             y) in zip(agent.actor.parameters(), new_agent.actor.parameters())
    ])
    assert all([
        torch.all(x == y) for (
            x,
            y) in zip(agent.critic.parameters(), new_agent.critic.parameters())
    ])
    assert all([
        torch.all(x == y)
        for (x, y) in zip(agent.target_actor.parameters(),
                          new_agent.target_actor.parameters())
    ])
    assert all([
        torch.all(x == y)
        for (x, y) in zip(agent.target_critic.parameters(),
                          new_agent.target_critic.parameters())
    ])
    assert new_agent.buffer == agent.buffer
def test_ddpg_seed():
    # Assign
    agent_0 = DDPGAgent(4, 2, device='cpu')
    agent_1 = DDPGAgent(4, 2, device='cpu')
    agent_2 = copy.deepcopy(agent_1)

    # Act
    # Make sure agents have the same networks
    assert all([
        sum(sum(l1.weight - l2.weight)) == 0
        for l1, l2 in zip(agent_1.actor.layers, agent_2.actor.layers)
    ])
    assert all([
        sum(sum(l1.weight - l2.weight)) == 0
        for l1, l2 in zip(agent_1.critic.layers, agent_2.critic.layers)
    ])

    agent_0.seed(32167)
    actions_0 = deterministic_interactions(agent_0)
    agent_1.seed(0)
    actions_1 = deterministic_interactions(agent_1)
    agent_2.seed(0)
    actions_2 = deterministic_interactions(agent_2)

    # Assert
    # First we check that there's definitely more than one type of action
    assert actions_1[0] != actions_1[1]
    assert actions_2[0] != actions_2[1]

    # All generated actions need to identical
    assert any(a0 != a1 for (a0, a1) in zip(actions_0, actions_1))
    for idx, (a1, a2) in enumerate(zip(actions_1, actions_2)):
        assert a1 == pytest.approx(
            a2, 1e-4), f"Action mismatch on position {idx}: {a1} != {a2}"
def test_agent_factory_ddpg_agent_from_state():
    # Assign
    state_size, action_size = 10, 5
    agent = DDPGAgent(state_size, action_size, device="cpu")
    state = agent.get_state()

    # Act
    new_agent = AgentFactory.from_state(state)

    # Assert
    assert id(new_agent) != id(agent)
    assert new_agent.name == DDPGAgent.name
    assert new_agent == agent
    assert new_agent.hparams == agent.hparams
    assert new_agent.buffer == agent.buffer
Beispiel #5
0
def test_ddpg_from_state_network_state_none():
    # Assign
    state_shape, action_size = 10, 3
    agent = DDPGAgent(state_shape, action_size)
    agent_state = agent.get_state()
    agent_state.network = None

    # Act
    new_agent = DDPGAgent.from_state(agent_state)

    # Assert
    assert id(agent) != id(new_agent)
    # assert new_agent == agent
    assert isinstance(new_agent, DDPGAgent)
    assert new_agent.hparams == agent.hparams
    assert new_agent.buffer == agent.buffer
def test_runs_ddpg():
    # Assign
    task = GymTask('Pendulum-v0')
    agent = DDPGAgent(task.state_size, task.action_size, device=DEVICE)
    env_runner = EnvRunner(task, agent, max_iterations=50)

    # Act
    env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
 def from_state(state: AgentState) -> AgentBase:
     if state.model == DQNAgent.name:
         return DQNAgent.from_state(state)
     elif state.model == PPOAgent.name:
         return PPOAgent.from_state(state)
     elif state.model == DDPGAgent.name:
         return DDPGAgent.from_state(state)
     else:
         raise ValueError(
             f"Agent state contains unsupported model type: '{state.model}'"
         )
Beispiel #8
0
 def from_state(state: AgentState) -> AgentBase:
     norm_model = state.model.upper()
     if norm_model == DQNAgent.name.upper():
         return DQNAgent.from_state(state)
     elif norm_model == PPOAgent.name.upper():
         return PPOAgent.from_state(state)
     elif norm_model == DDPGAgent.name.upper():
         return DDPGAgent.from_state(state)
     elif norm_model == RainbowAgent.name.upper():
         return RainbowAgent.from_state(state)
     else:
         raise ValueError(
             f"Agent state contains unsupported model type: {state.model}")
Beispiel #9
0
def test_ddpg_from_state_one_updated():
    # Assign
    state_shape, action_size = 10, 3
    agent = DDPGAgent(state_shape, action_size)
    feed_agent(agent, 2 * agent.batch_size, discrete_action=False)  # Feed 1
    agent_state = agent.get_state()
    feed_agent(agent, 100, discrete_action=False)  # Feed 2 - to make different

    # Act
    new_agent = DDPGAgent.from_state(agent_state)

    # Assert
    assert id(agent) != id(new_agent)
    # assert new_agent == agent
    assert isinstance(new_agent, DDPGAgent)
    assert new_agent.hparams == agent.hparams
    assert all([
        torch.all(x != y)
        for (x,
             y) in zip(agent.actor.parameters(), new_agent.actor.parameters())
    ])
    assert all([
        torch.all(x != y)
        for (x, y) in zip(agent.target_actor.parameters(),
                          new_agent.target_actor.parameters())
    ])
    assert all([
        torch.all(x != y) for (
            x,
            y) in zip(agent.critic.parameters(), new_agent.critic.parameters())
    ])
    assert all([
        torch.all(x != y)
        for (x, y) in zip(agent.target_critic.parameters(),
                          new_agent.target_critic.parameters())
    ])
    assert new_agent.buffer != agent.buffer
Beispiel #10
0
    def __init__(self, env, state_size: int, action_size: int,
                 agents_number: int, config: Dict, **kwargs):

        self.env = env
        self.state_size = state_size
        self.action_size = action_size
        self.agents_number = agents_number

        hidden_layers = config.get('hidden_layers', (256, 128))
        noise_scale = float(config.get('noise_scale', 0.2))
        noise_sigma = float(config.get('noise_sigma', 0.1))
        actor_lr = float(config.get('actor_lr', 1e-3))
        critic_lr = float(config.get('critic_lr', 1e-3))

        self.maddpg_agent = [
            DDPGAgent(agents_number * state_size,
                      action_size,
                      hidden_layers=hidden_layers,
                      actor_lr=actor_lr,
                      critic_lr=critic_lr,
                      noise_scale=noise_scale,
                      noise_sigma=noise_sigma) for _ in range(agents_number)
        ]

        self.gamma: float = float(config.get('gamma', 0.99))
        self.tau: float = float(config.get('tau', 0.002))
        self.gradient_clip: Optional[float] = config.get('gradient_clip')

        self.batch_size: int = int(config.get('batch_size', 64))
        self.buffer_size = int(config.get('buffer_size', int(1e6)))
        self.buffer = ReplayBuffer(self.batch_size, self.buffer_size)

        self.warm_up: int = int(config.get('warm_up', 1e3))
        self.update_freq: int = int(config.get('update_freq', 2))
        self.number_updates: int = int(config.get('number_updates', 2))

        self.critic = CriticBody(agents_number * state_size,
                                 agents_number * action_size,
                                 hidden_layers=hidden_layers).to(DEVICE)
        self.target_critic = CriticBody(agents_number * state_size,
                                        agents_number * action_size,
                                        hidden_layers=hidden_layers).to(DEVICE)
        self.critic_optimizer = optim.Adam(self.critic.parameters(),
                                           lr=critic_lr)
        hard_update(self.target_critic, self.critic)

        self.reset()
Beispiel #11
0
def test_ddpg_get_state_compare_different_agents():
    # Assign
    state_size, action_size = 3, 2
    agent_1 = DDPGAgent(state_size, action_size, device='cpu', actor_lr=0.01)
    agent_2 = DDPGAgent(state_size, action_size, device='cpu', actor_lr=0.02)

    # Act
    state_1 = agent_1.get_state()
    state_2 = agent_2.get_state()

    # Assert
    assert state_1 != state_2
    assert state_1.model == state_2.model
Beispiel #12
0
    def __init__(self, state_size: int, action_size: int, num_agents: int, **kwargs):
        """Initiation of the Multi Agent DDPG.

        All keywords are also passed to DDPG agents.

        Parameters:
            state_size (int): Dimensionality of the state.
            action_size (int): Dimensionality of the action.
            num_agents (int): Number of agents.

        Keyword Arguments:
            hidden_layers (tuple of ints): Shape for fully connected hidden layers.
            noise_scale (float): Default: 1.0. Noise amplitude.
            noise_sigma (float): Default: 0.5. Noise variance.
            actor_lr (float): Default: 0.001. Learning rate for actor network.
            critic_lr (float): Default: 0.001. Learning rate for critic network.
            gamma (float): Default: 0.99. Discount value
            tau (float): Default: 0.02. Soft copy value.
            gradient_clip (optional float): Max norm for learning gradient. If None then no clip.
            batch_size (int): Number of samples per learning.
            buffer_size (int): Number of previous samples to remember.
            warm_up (int): Number of samples to see before start learning.
            update_freq (int): How many samples between learning sessions.
            number_updates (int): How many learning cycles per learning session.

        """

        self.device = self._register_param(kwargs, "device", DEVICE, update=True)
        self.state_size: int = state_size
        self.action_size = action_size
        self.num_agents: int = num_agents
        self.agent_names: List[str] = kwargs.get("agent_names", map(str, range(self.num_agents)))

        hidden_layers = to_numbers_seq(self._register_param(kwargs, 'hidden_layers', (100, 100), update=True))
        noise_scale = float(self._register_param(kwargs, 'noise_scale', 0.5))
        noise_sigma = float(self._register_param(kwargs, 'noise_sigma', 1.0))
        actor_lr = float(self._register_param(kwargs, 'actor_lr', 3e-4))
        critic_lr = float(self._register_param(kwargs, 'critic_lr', 3e-4))

        self.agents: Dict[str, DDPGAgent] = OrderedDict({
            agent_name: DDPGAgent(
                state_size, action_size,
                actor_lr=actor_lr, critic_lr=critic_lr,
                noise_scale=noise_scale, noise_sigma=noise_sigma,
                **kwargs,
            ) for agent_name in self.agent_names
        })

        self.gamma = float(self._register_param(kwargs, 'gamma', 0.99))
        self.tau = float(self._register_param(kwargs, 'tau', 0.02))
        self.gradient_clip: Optional[float] = self._register_param(kwargs, 'gradient_clip')

        self.batch_size = int(self._register_param(kwargs, 'batch_size', 64))
        self.buffer_size = int(self._register_param(kwargs, 'buffer_size', int(1e6)))
        self.buffer = ReplayBuffer(self.batch_size, self.buffer_size)

        self.warm_up = int(self._register_param(kwargs, 'warm_up', 0))
        self.update_freq = int(self._register_param(kwargs, 'update_freq', 1))
        self.number_updates = int(self._register_param(kwargs, 'number_updates', 1))

        self.critic = CriticBody(num_agents*state_size, num_agents*action_size, hidden_layers=hidden_layers).to(self.device)
        self.target_critic = CriticBody(num_agents*state_size, num_agents*action_size, hidden_layers=hidden_layers).to(self.device)
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_lr)
        hard_update(self.target_critic, self.critic)

        self._step_data = {}
        self._loss_critic: float = float('inf')
        self._loss_actor: Dict[str, float] = {name: float('inf') for name in self.agent_names}
        self.reset()