def test_gym_task_render_cannot_render(mock_gym, fix_env): # Assign mock_gym.make.return_value = fix_env task = GymTask("CanRender", can_render=False) # Act task.render() # Assert assert not fix_env.render.called
def test_gym_task_render(mock_gym, fix_env): # Assign mock_gym.make.return_value = fix_env task = GymTask("CanRender", can_render=True) # Act task.render() # Assert assert fix_env.render.called_once_with("rgb_array")
def test_gym_task_reset(mock_gym, fix_env): # Assign mock_gym.make.return_value = fix_env task = GymTask("example") # Act out = task.reset() # Assert assert fix_env.reset.called_once() assert len(out) > 0
def test_gym_task_step_discrete(mock_gym, fix_env_discrete): # Assign mock_gym.make.return_value = fix_env_discrete task = GymTask("example") action = 2. # Act out = task.step(action=action) # Assert assert fix_env_discrete.step.called_once_with(int(action)) assert len(out) == 4 assert hasattr(out[0], "__iter__") assert isinstance(out[1], numbers.Number) assert isinstance(out[2], bool) assert isinstance(out[3], str)
def test_runs_rainbow(): # Assign task = GymTask('CartPole-v1') agent = RainbowAgent(task.state_size, task.action_size, device=DEVICE) env_runner = EnvRunner(task, agent, max_iterations=50) # Act env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
def test_runs_d3pg(): # Assign task = GymTask('BipedalWalker-v3') agent = D3PGAgent(task.state_size, task.action_size, device=DEVICE) env_runner = EnvRunner(task, agent, max_iterations=50) # Act env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
def test_runs_td3(): # Assign task = GymTask('Pendulum-v0') agent = TD3Agent(task.state_size, task.action_size, device=DEVICE) env_runner = EnvRunner(task, agent, max_iterations=50) # Act env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
def test_gym_task_actual_openai_discrete(): # Assign gym_name = "CartPole-v1" # Act task = GymTask(gym_name) # Assert assert task.name == gym_name assert task.env is not None assert task.can_render is True assert task.is_discrete is True assert task.state_size == 4 assert task.action_size == 2
def test_gym_task_actual_openai_continious(): # Assign gym_name = 'Pendulum-v0' # Act task = GymTask(gym_name, can_render=False) # Assert assert task.name == gym_name assert task.env is not None assert task.can_render is False assert task.is_discrete is False assert task.state_size == 3 assert task.action_size == 1
def __init__(self, env_name, agent_name: str, hyperparameters: Optional[Hyperparameters] = None): self._logger.info( "Initiating SageMakerExecutor with env_name '%s' and agent '%s'", env_name, agent_name) env = gym.make(env_name) self.task = GymTask(env, env_name) agent = None if agent_name.upper() == "DQN": from ai_traineree.agents.dqn import DQNAgent agent = DQNAgent elif agent_name.upper() == "PPO": from ai_traineree.agents.ppo import PPOAgent agent = PPOAgent elif agent_name.upper() == "DDPG": from ai_traineree.agents.ddpg import DDPGAgent agent = DDPGAgent else: self._logger.warning( "No agent provided. You're given a PPO agent.") from ai_traineree.agents.ppo import PPOAgent agent = PPOAgent self.max_iterations = int(hyperparameters.get("max_iterations", 10000)) self.max_episodes = int(hyperparameters.get("max_episodes", 1000)) self.log_every = int(hyperparameters.get("log_every", 10)) self.score_goal = int(hyperparameters.get("score_goal", 100)) self.eps_start: float = float(hyperparameters.get('eps_start', 1.0)) self.eps_end: float = float(hyperparameters.get('eps_end', 0.02)) self.eps_decay: float = float(hyperparameters.get('eps_decay', 0.995)) self.agent: AgentType = agent(self.task.state_size, self.task.action_size, config=hyperparameters) self.env_runner = EnvRunner(self.task, self.agent, max_iterations=self.max_iterations)
def test_gym_seed(): def _deterministic_gym_eval(task): states = [] for _ in range(3): states.append(task.reset()) for _ in range(3): states.append(task.step(0)[0]) return states # Assign task = GymTask('CartPole-v1') # Act task.seed(0) first_states = _deterministic_gym_eval(task) task.seed(0) second_states = _deterministic_gym_eval(task) # Assert for state_1, state_2 in zip(first_states, second_states): assert all([s1 == s2 for (s1, s2) in zip(state_1, state_2)])
from ai_traineree.agents.dqn import DQNAgent from ai_traineree.env_runner import EnvRunner from ai_traineree.tasks import GymTask from torch.utils.tensorboard import SummaryWriter import numpy as np import pylab as plt writer = SummaryWriter() env_name = 'CartPole-v1' task = GymTask(env_name) agent = DQNAgent(task.state_size, task.action_size, n_steps=5) env_runner = EnvRunner(task, agent, writer=writer) scores = env_runner.run( reward_goal=100, max_episodes=5000, eps_end=0.002, eps_decay=0.99, gif_every_episodes=500, force_new=True, ) env_runner.interact_episode(1000, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #')
"std_max": 1.1, "std_min": 0.05, "ppo_ratio_clip": 0.2, "simple_policy": True, "using_kl_div": True, # "value_loss_weight": 2, "entropy_weight": 0.01, "gamma": 0.999, 'lambda_gae': 0.98, "critic_lr": 3e-4, "actor_lr": 3e-4, "action_scale": 1, "action_min": -20, "action_max": 20, } tasks: List[TaskType] = [GymTask(env_name) for _ in range(num_workers)] agent = Agent(tasks[0].state_size, tasks[0].action_size, hidden_layers=(100, 64, 64), **kwargs) env_runner = MultiSyncEnvRunner(tasks, agent, processes=processes, data_logger=data_logger) scores = env_runner.run(reward_goal=80, max_episodes=5000, force_new=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score')
import mock import random from ai_traineree.agents.ppo import PPOAgent from ai_traineree.env_runner import EnvRunner, MultiSyncEnvRunner from ai_traineree.tasks import GymTask from ai_traineree.types import TaskType from typing import List # NOTE: Some of these tests use `test_task` and `test_agent` which are real instances. # This is partially to make sure that the tricky part is covered, and not hid # by aggressive mocking. The other part, however, is the burden of keeping env mocks. # This results in unnecessary performance hit. A lightweight env would be nice. test_task = GymTask('LunarLanderContinuous-v2') test_agent = PPOAgent(test_task.state_size, test_task.action_size) @mock.patch("ai_traineree.env_runner.AgentBase") @mock.patch("ai_traineree.env_runner.TaskType") def test_env_runner_info_no_data_logger(mock_task, mock_agent): # Assign env_runner = EnvRunner(mock_task, mock_agent) env_runner.logger = mock.MagicMock() info_data = dict(episodes=[2], iterations=[10], scores=[1], mean_scores=[2], epsilons=[1]) # Act
import torch from ai_traineree.agents.dqn import DQNAgent as Agent from ai_traineree.env_runner import EnvRunner from ai_traineree.loggers import TensorboardLogger from ai_traineree.tasks import GymTask from typing import Any, Dict from pprint import pprint config_default = {'hidden_layers': (50, 50)} config_updates = [{'n_steps': n} for n in range(1, 11)] task = GymTask("CartPole-v1") seeds = [32167, 1, 999, 2833700, 13] for idx, config_update in enumerate(config_updates): config: Dict[str, Any] = config_default.copy() config.update(config_update) for seed in seeds: config['seed'] = seed pprint(config) torch.manual_seed(config['seed']) agent = Agent(task.state_size, task.action_size, **config) data_logger = TensorboardLogger( log_dir=f'runs/MultiExp-{task.name}-i{idx}-s{seed}') env_runner = EnvRunner(task, agent, data_logger=data_logger) env_runner.seed(seed) env_runner.run(reward_goal=99999, max_episodes=500,
kernel_sze=(16, 8), max_pool_size=(4, 2), stride=(4, 2), device=device) return NetChainer(net_classes=[ conv_net, nn.Flatten(), FcNet(conv_net.output_size, output_dim, hidden_layers=(200, 200), device=device), ]) env_name = 'SpaceInvaders-v0' task = GymTask(env_name, state_transform=state_transform) device = "cuda" config = { 'device': device, "update_freq": 50, "number_updates": 5, "batch_size": 200, "buffer_size": 1e4, "warm_up": 100, "lr": 1e-4, "pre_network_fn": lambda in_features: network_fn(in_features, 300, device), "hidden_layers": None, "state_transform": agent_state_tranform, } state_size = task.actual_state_size
from ai_traineree.loggers import TensorboardLogger from ai_traineree.tasks import GymTask from ai_traineree.types import TaskType def reward_transform(reward): """Cap reward to [-1, 1]""" return max(-1, min(reward, 1)) seed = 32167 torch.manual_seed(seed) env_name = 'LunarLanderContinuous-v2' task: TaskType = GymTask(env_name, seed=seed, reward_transform=reward_transform) config = { 'warm_up': 400, 'device': 'cpu', 'batch_size': 60, 'update_freq': 2, 'number_updates': 1, 'hidden_layers': (100, 100), 'actor_lr': 5e-4, 'critic_lr': 5e-4, 'alpha_lr': 3e-5, 'tau': 0.02, "alpha": 0.2, 'action_scale': 2, 'action_min': -2,
import pylab as plt def state_transform(state): """ Simple cropping of the top and bottom edge and converting to blackwhite scale. """ return (state[40:-10].sum(-1) > 0)[None, ...] def agent_state_tranform(state): return state env_name = 'Breakout-v0' task = GymTask(env_name, state_transform=state_transform) state_size = np.array(task.reset()).shape writer = SummaryWriter() config = { "update_freq": 10, "batch_size": 100, "warm_up": 100, "lr": 1e-4, "network_fn": lambda: QNetwork2D(state_size, task.action_size, hidden_layers=(200, 200)), "state_transform":
def network_fn(state_dim, output_dim, device): conv_net = ConvNet(state_dim, hidden_layers=(10, 10), device=device) return NetChainer(net_classes=[ ScaleNet(scale=1. / 255), conv_net, FlattenNet(), FcNet(conv_net.output_size, output_dim, hidden_layers=(100, 100, 50), device=device), ]) env_name = 'SpaceInvaders-v0' data_logger = TensorboardLogger() task = GymTask(env_name, state_transform=state_transform) config = { "network_fn": lambda: network_fn(task.actual_state_size, task.action_size, "cuda"), "compress_state": True, "gamma": 0.99, "lr": 1e-3, "update_freq": 150, "batch_size": 400, "buffer_size": int(5e3),
from ai_traineree.agents.ppo import PPOAgent as Agent from ai_traineree.env_runner import EnvRunner from ai_traineree.tasks import GymTask from ai_traineree.types import TaskType import numpy as np import pylab as plt env_name = 'LunarLanderContinuous-v2' task: TaskType = GymTask(env_name) config = { 'rollout_length': 30, 'batch_size': 30, "number_updates": 1, "ppo_ratio_clip": 0.2, "value_loss_weight": 2, "entropy_weight": 0.0005, "gamma": 0.98, "action_scale": 2, "max_grad_norm_actor": 2.0, "max_grad_norm_critic": 2.0, "critic_lr": 1e-3, "actor_lr": 1e-3, } agent = Agent(task.state_size, task.action_size, hidden_layers=(300, 300), config=config) env_runner = EnvRunner(task, agent) env_runner.interact_episode(0, render=True) scores = env_runner.run(80, 4000)
from ai_traineree.agents.dqn import DQNAgent from ai_traineree.env_runner import EnvRunner from ai_traineree.tasks import GymTask from ai_traineree.loggers import TensorboardLogger import numpy as np import pylab as plt import torch seed = 32167 # torch.set_deterministic(True) torch.manual_seed(seed) data_logger = TensorboardLogger() env_name = 'CartPole-v1' task = GymTask(env_name, seed=seed) agent = DQNAgent(task.state_size, task.action_size, n_steps=5, seed=seed) env_runner = EnvRunner(task, agent, data_logger=data_logger, seed=seed) scores = env_runner.run(reward_goal=100, max_episodes=300, force_new=True) env_runner.interact_episode(render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()