def test_dqn_get_state(): # Assign state_size, action_size = 3, 4 init_config = {'lr': 0.1, 'gamma': 0.6} agent = DQNAgent(state_size, action_size, device='cpu', **init_config) # Act agent_state = agent.get_state() # Assert assert isinstance(agent_state, AgentState) assert agent_state.model == DQNAgent.name assert agent_state.state_space == state_size assert agent_state.action_space == action_size assert agent_state.config == agent._config assert agent_state.config['lr'] == 0.1 assert agent_state.config['gamma'] == 0.6 network_state = agent_state.network assert isinstance(network_state, NetworkState) assert {'net', 'target_net'} == set(network_state.net.keys()) buffer_state = agent_state.buffer assert isinstance(buffer_state, BufferState) assert buffer_state.type == agent.buffer.type assert buffer_state.batch_size == agent.buffer.batch_size assert buffer_state.buffer_size == agent.buffer.buffer_size
def test_serialize_network_state_actual(): from ai_traineree.agents.dqn import DQNAgent agent = DQNAgent(10, 4) deterministic_interactions(agent, 30) network_state = agent.get_network_state() # Act ser = serialize(network_state) # Assert des = json.loads(ser) assert set(des['net'].keys()) == set(('target_net', 'net'))
def test_dqn_seed(): # Assign agent_0 = DQNAgent(4, 4, device='cpu') # Reference agent_1 = DQNAgent(4, 4, device='cpu') agent_2 = copy.deepcopy(agent_1) # Act # Make sure agents have the same networks agent_nets = zip(agent_1.net.value_net.layers, agent_2.net.value_net.layers) agent_target_nets = zip(agent_1.target_net.value_net.layers, agent_2.target_net.value_net.layers) assert all([sum(sum(l1.weight - l2.weight)) == 0 for l1, l2 in agent_nets]) assert all([sum(sum(l1.weight - l2.weight)) == 0 for l1, l2 in agent_target_nets]) agent_0.seed(32167) actions_0 = deterministic_interactions(agent_0) agent_1.seed(0) actions_1 = deterministic_interactions(agent_1) agent_2.seed(0) actions_2 = deterministic_interactions(agent_2) # Assert assert any(a0 != a1 for (a0, a1) in zip(actions_0, actions_1)) # All generated actions need to identical for idx, (a1, a2) in enumerate(zip(actions_1, actions_2)): assert a1 == a2, f"Action mismatch on position {idx}: {a1} != {a2}"
def test_agent_factory_dqn_agent_from_state_network_buffer_none(): # Assign state_size, action_size = 10, 5 agent = DQNAgent(state_size, action_size, device="cpu") state = agent.get_state() state.network = None state.buffer = None # Act new_agent = AgentFactory.from_state(state) # Assert assert id(new_agent) != id(agent) assert new_agent.hparams == agent.hparams
def test_serialize_agent_state_actual(): from ai_traineree.agents.dqn import DQNAgent agent = DQNAgent(10, 4) deterministic_interactions(agent, 30) state = agent.get_state() # Act ser = serialize(state) # Assert des = json.loads(ser) assert des['model'] == DQNAgent.name assert len(des['buffer']['data']) == 30 assert set(des['network']['net'].keys()) == set(('target_net', 'net'))
def test_agent_factory_dqn_agent_from_state(): # Assign state_size, action_size = 10, 5 agent = DQNAgent(state_size, action_size, device="cpu") state = agent.get_state() # Act new_agent = AgentFactory.from_state(state) # Assert assert id(new_agent) != id(agent) assert new_agent == agent assert new_agent.name == DQNAgent.name assert new_agent.hparams == agent.hparams assert new_agent.buffer == agent.buffer
def test_dqn_from_state_network_state_none(): # Assign state_shape, action_size = 10, 3 agent = DQNAgent(state_shape, action_size) agent_state = agent.get_state() agent_state.network = None # Act new_agent = DQNAgent.from_state(agent_state) # Assert assert id(agent) != id(new_agent) # assert new_agent == agent assert isinstance(new_agent, DQNAgent) assert new_agent.hparams == agent.hparams assert new_agent.buffer == agent.buffer
def __init__(self, state_size: int, action_size: int, num_agents: int, **kwargs): """Independent Q-Learning A set of independent Q-Learning agents (:py:class:`DQN <DQNAgent>` implementation) that are organized to work as an `Multi Agent` agent. These agents have defaults as per DQNAgent class. All keyword paramters are passed to each agent. Parameters: state_size (int): Dimensionality of the state. action_size (int): Dimensionality of the action. num_agents (int): Number of agents. Keyword Arguments: hidden_layers (tuple of ints): Shape for fully connected hidden layers. noise_scale (float): Default: 1.0. Noise amplitude. noise_sigma (float): Default: 0.5. Noise variance. actor_lr (float): Default: 0.001. Learning rate for actor network. gamma (float): Default: 0.99. Discount value tau (float): Default: 0.02. Soft copy value. gradient_clip (optional float): Max norm for learning gradient. If None then no clip. batch_size (int): Number of samples per learning. buffer_size (int): Number of previous samples to remember. warm_up (int): Number of samples to see before start learning. update_freq (int): How many samples between learning sessions. number_updates (int): How many learning cycles per learning session. """ self.state_size: int = state_size self.action_size = action_size self.num_agents = num_agents self.agent_names = kwargs.get("agent_names", map(str, range(self.num_agents))) kwargs['device'] = self._register_param(kwargs, "device", DEVICE) kwargs['hidden_layers'] = to_numbers_seq( self._register_param(kwargs, 'hidden_layers', (64, 64))) kwargs['gamma'] = float(self._register_param(kwargs, 'gamma', 0.99)) kwargs['tau'] = float(self._register_param(kwargs, 'tau', 0.002)) kwargs['gradient_clip'] = self._register_param(kwargs, 'gradient_clip') kwargs['batch_size'] = int( self._register_param(kwargs, 'batch_size', 64)) kwargs['buffer_size'] = int( self._register_param(kwargs, 'buffer_size', int(1e6))) kwargs['warm_up'] = int(self._register_param(kwargs, 'warm_up', 0)) kwargs['update_freq'] = int( self._register_param(kwargs, 'update_freq', 1)) kwargs['number_updates'] = int( self._register_param(kwargs, 'number_updates', 1)) self.agents: Dict[str, DQNAgent] = { agent_name: DQNAgent(state_size, action_size, name=agent_name, **kwargs) for agent_name in self.agent_names } self.reset()
def test_runs_dqn(): # Assign task = GymTask('CartPole-v1') agent = DQNAgent(task.state_size, task.action_size, device=DEVICE) env_runner = EnvRunner(task, agent, max_iterations=50) # Act env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
def test_dqn_from_state(): # Assign state_shape, action_size = 10, 3 agent = DQNAgent(state_shape, action_size) agent_state = agent.get_state() # Act new_agent = DQNAgent.from_state(agent_state) # Assert assert id(agent) != id(new_agent) # assert new_agent == agent assert isinstance(new_agent, DQNAgent) assert new_agent.hparams == agent.hparams assert all([torch.all(x == y) for (x, y) in zip(agent.net.parameters(), new_agent.net.parameters())]) assert all([torch.all(x == y) for (x, y) in zip(agent.target_net.parameters(), new_agent.target_net.parameters())]) assert new_agent.buffer == agent.buffer
def from_state(state: AgentState) -> AgentBase: if state.model == DQNAgent.name: return DQNAgent.from_state(state) elif state.model == PPOAgent.name: return PPOAgent.from_state(state) else: raise ValueError( f"Agent state contains unsupported model type: '{state.model}'" )
def test_dqn_from_state_one_updated(): # Assign state_shape, action_size = 10, 3 agent = DQNAgent(state_shape, action_size) feed_agent(agent, 2*agent.batch_size) # Feed 1 agent_state = agent.get_state() feed_agent(agent, 100) # Feed 2 - to make different # Act new_agent = DQNAgent.from_state(agent_state) # Assert assert id(agent) != id(new_agent) # assert new_agent == agent assert isinstance(new_agent, DQNAgent) assert new_agent.hparams == agent.hparams assert any([torch.any(x != y) for (x, y) in zip(agent.net.parameters(), new_agent.net.parameters())]) assert any([torch.any(x != y) for (x, y) in zip(agent.target_net.parameters(), new_agent.target_net.parameters())]) assert new_agent.buffer != agent.buffer
def from_state(state: AgentState) -> AgentBase: norm_model = state.model.upper() if norm_model == DQNAgent.name.upper(): return DQNAgent.from_state(state) elif norm_model == PPOAgent.name.upper(): return PPOAgent.from_state(state) elif norm_model == DDPGAgent.name.upper(): return DDPGAgent.from_state(state) elif norm_model == RainbowAgent.name.upper(): return RainbowAgent.from_state(state) else: raise ValueError( f"Agent state contains unsupported model type: {state.model}")
def test_dqn_get_state_compare_different_agents(): # Assign state_size, action_size = 3, 2 agent_1 = DQNAgent(state_size, action_size, device='cpu', n_steps=1) agent_2 = DQNAgent(state_size, action_size, device='cpu', n_steps=2) # Act state_1 = agent_1.get_state() state_2 = agent_2.get_state() # Assert assert state_1 != state_2 assert state_1.model == state_2.model
config = { "update_freq": 10, "batch_size": 100, "warm_up": 100, "lr": 1e-4, "network_fn": lambda: QNetwork2D(state_size, task.action_size, hidden_layers=(200, 200)), "state_transform": agent_state_tranform, } agent = DQNAgent(state_size, task.action_size, **config) env_runner = EnvRunner(task, agent, max_iterations=2000, writer=writer) scores = env_runner.run(reward_goal=500, max_episodes=1000, log_every=1, eps_start=0.99, gif_every_episodes=100) env_runner.interact_episode(render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #')
from ai_traineree.agents.dqn import DQNAgent from ai_traineree.env_runner import EnvRunner from ai_traineree.tasks import GymTask from torch.utils.tensorboard import SummaryWriter import numpy as np import pylab as plt writer = SummaryWriter() env_name = 'CartPole-v1' task = GymTask(env_name) agent = DQNAgent(task.state_size, task.action_size, n_steps=5) env_runner = EnvRunner(task, agent, writer=writer) scores = env_runner.run( reward_goal=100, max_episodes=5000, eps_end=0.002, eps_decay=0.99, gif_every_episodes=500, force_new=True, ) env_runner.interact_episode(1000, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #')
from ai_traineree.agents.dqn import DQNAgent from ai_traineree.env_runner import EnvRunner from ai_traineree.tasks import GymTask import pylab as plt env_name = 'Breakout-ram-v0' task = GymTask(env_name) agent = DQNAgent(task.state_size, task.action_size, hidden_layers=(400, 300)) env_runner = EnvRunner(task, agent) # env_runner.interact_episode(0, render=True) scores = env_runner.run(reward_goal=5, max_episodes=5, log_every=1) env_runner.interact_episode(100, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
import numpy as np import pylab as plt from ai_traineree.env_runner import EnvRunner from ai_traineree.agents.dqn import DQNAgent from ai_traineree.tasks import GymTask from ai_traineree.types import TaskType env_name = 'LunarLander-v2' task: TaskType = GymTask(env_name) config = {'batch_size': 64} agent = DQNAgent(task.state_size, task.action_size, config=config) env_runner = EnvRunner(task, agent) env_runner.interact_episode(0, render=True) scores = env_runner.run(50, 800, eps_start=1.0, eps_end=0.05, eps_decay=0.995) env_runner.interact_episode(0, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()