Exemple #1
0
def test_gym_task_render_cannot_render(mock_gym, fix_env):
    # Assign
    mock_gym.make.return_value = fix_env
    task = GymTask("CanRender", can_render=False)

    # Act
    task.render()

    # Assert
    assert not fix_env.render.called
Exemple #2
0
def test_gym_task_render(mock_gym, fix_env):
    # Assign
    mock_gym.make.return_value = fix_env
    task = GymTask("CanRender", can_render=True)

    # Act
    task.render()

    # Assert
    assert fix_env.render.called_once_with("rgb_array")
Exemple #3
0
def test_gym_task_reset(mock_gym, fix_env):
    # Assign
    mock_gym.make.return_value = fix_env
    task = GymTask("example")

    # Act
    out = task.reset()

    # Assert
    assert fix_env.reset.called_once()
    assert len(out) > 0
Exemple #4
0
def test_gym_task_step_discrete(mock_gym, fix_env_discrete):
    # Assign
    mock_gym.make.return_value = fix_env_discrete
    task = GymTask("example")
    action = 2.

    # Act
    out = task.step(action=action)

    # Assert
    assert fix_env_discrete.step.called_once_with(int(action))
    assert len(out) == 4
    assert hasattr(out[0], "__iter__")
    assert isinstance(out[1], numbers.Number)
    assert isinstance(out[2], bool)
    assert isinstance(out[3], str)
def test_runs_rainbow():
    # Assign
    task = GymTask('CartPole-v1')
    agent = RainbowAgent(task.state_size, task.action_size, device=DEVICE)
    env_runner = EnvRunner(task, agent, max_iterations=50)

    # Act
    env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
def test_runs_d3pg():
    # Assign
    task = GymTask('BipedalWalker-v3')
    agent = D3PGAgent(task.state_size, task.action_size, device=DEVICE)
    env_runner = EnvRunner(task, agent, max_iterations=50)

    # Act
    env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
def test_runs_td3():
    # Assign
    task = GymTask('Pendulum-v0')
    agent = TD3Agent(task.state_size, task.action_size, device=DEVICE)
    env_runner = EnvRunner(task, agent, max_iterations=50)

    # Act
    env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
Exemple #8
0
def test_gym_task_actual_openai_discrete():
    # Assign
    gym_name = "CartPole-v1"

    # Act
    task = GymTask(gym_name)

    # Assert
    assert task.name == gym_name
    assert task.env is not None
    assert task.can_render is True
    assert task.is_discrete is True
    assert task.state_size == 4
    assert task.action_size == 2
Exemple #9
0
def test_gym_task_actual_openai_continious():
    # Assign
    gym_name = 'Pendulum-v0'

    # Act
    task = GymTask(gym_name, can_render=False)

    # Assert
    assert task.name == gym_name
    assert task.env is not None
    assert task.can_render is False
    assert task.is_discrete is False
    assert task.state_size == 3
    assert task.action_size == 1
Exemple #10
0
    def __init__(self,
                 env_name,
                 agent_name: str,
                 hyperparameters: Optional[Hyperparameters] = None):
        self._logger.info(
            "Initiating SageMakerExecutor with env_name '%s' and agent '%s'",
            env_name, agent_name)

        env = gym.make(env_name)
        self.task = GymTask(env, env_name)
        agent = None
        if agent_name.upper() == "DQN":
            from ai_traineree.agents.dqn import DQNAgent
            agent = DQNAgent
        elif agent_name.upper() == "PPO":
            from ai_traineree.agents.ppo import PPOAgent
            agent = PPOAgent
        elif agent_name.upper() == "DDPG":
            from ai_traineree.agents.ddpg import DDPGAgent
            agent = DDPGAgent
        else:
            self._logger.warning(
                "No agent provided. You're given a PPO agent.")
            from ai_traineree.agents.ppo import PPOAgent
            agent = PPOAgent

        self.max_iterations = int(hyperparameters.get("max_iterations", 10000))
        self.max_episodes = int(hyperparameters.get("max_episodes", 1000))
        self.log_every = int(hyperparameters.get("log_every", 10))
        self.score_goal = int(hyperparameters.get("score_goal", 100))

        self.eps_start: float = float(hyperparameters.get('eps_start', 1.0))
        self.eps_end: float = float(hyperparameters.get('eps_end', 0.02))
        self.eps_decay: float = float(hyperparameters.get('eps_decay', 0.995))

        self.agent: AgentType = agent(self.task.state_size,
                                      self.task.action_size,
                                      config=hyperparameters)

        self.env_runner = EnvRunner(self.task,
                                    self.agent,
                                    max_iterations=self.max_iterations)
Exemple #11
0
def test_gym_seed():
    def _deterministic_gym_eval(task):
        states = []
        for _ in range(3):
            states.append(task.reset())
            for _ in range(3):
                states.append(task.step(0)[0])
        return states

    # Assign
    task = GymTask('CartPole-v1')

    # Act
    task.seed(0)
    first_states = _deterministic_gym_eval(task)
    task.seed(0)
    second_states = _deterministic_gym_eval(task)

    # Assert
    for state_1, state_2 in zip(first_states, second_states):
        assert all([s1 == s2 for (s1, s2) in zip(state_1, state_2)])
Exemple #12
0
from ai_traineree.agents.dqn import DQNAgent
from ai_traineree.env_runner import EnvRunner
from ai_traineree.tasks import GymTask
from torch.utils.tensorboard import SummaryWriter

import numpy as np
import pylab as plt

writer = SummaryWriter()

env_name = 'CartPole-v1'
task = GymTask(env_name)
agent = DQNAgent(task.state_size, task.action_size, n_steps=5)
env_runner = EnvRunner(task, agent, writer=writer)

scores = env_runner.run(
    reward_goal=100,
    max_episodes=5000,
    eps_end=0.002,
    eps_decay=0.99,
    gif_every_episodes=500,
    force_new=True,
)
env_runner.interact_episode(1000, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
    "std_max": 1.1,
    "std_min": 0.05,
    "ppo_ratio_clip": 0.2,
    "simple_policy": True,
    "using_kl_div": True,
    # "value_loss_weight": 2,
    "entropy_weight": 0.01,
    "gamma": 0.999,
    'lambda_gae': 0.98,
    "critic_lr": 3e-4,
    "actor_lr": 3e-4,
    "action_scale": 1,
    "action_min": -20,
    "action_max": 20,
}
tasks: List[TaskType] = [GymTask(env_name) for _ in range(num_workers)]
agent = Agent(tasks[0].state_size,
              tasks[0].action_size,
              hidden_layers=(100, 64, 64),
              **kwargs)
env_runner = MultiSyncEnvRunner(tasks,
                                agent,
                                processes=processes,
                                data_logger=data_logger)
scores = env_runner.run(reward_goal=80, max_episodes=5000, force_new=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
import mock
import random

from ai_traineree.agents.ppo import PPOAgent
from ai_traineree.env_runner import EnvRunner, MultiSyncEnvRunner
from ai_traineree.tasks import GymTask
from ai_traineree.types import TaskType
from typing import List

# NOTE: Some of these tests use `test_task` and `test_agent` which are real instances.
#       This is partially to make sure that the tricky part is covered, and not hid
#       by aggressive mocking. The other part, however, is the burden of keeping env mocks.
#       This results in unnecessary performance hit. A lightweight env would be nice.

test_task = GymTask('LunarLanderContinuous-v2')
test_agent = PPOAgent(test_task.state_size, test_task.action_size)


@mock.patch("ai_traineree.env_runner.AgentBase")
@mock.patch("ai_traineree.env_runner.TaskType")
def test_env_runner_info_no_data_logger(mock_task, mock_agent):
    # Assign
    env_runner = EnvRunner(mock_task, mock_agent)
    env_runner.logger = mock.MagicMock()
    info_data = dict(episodes=[2],
                     iterations=[10],
                     scores=[1],
                     mean_scores=[2],
                     epsilons=[1])

    # Act
import torch

from ai_traineree.agents.dqn import DQNAgent as Agent
from ai_traineree.env_runner import EnvRunner
from ai_traineree.loggers import TensorboardLogger
from ai_traineree.tasks import GymTask
from typing import Any, Dict
from pprint import pprint

config_default = {'hidden_layers': (50, 50)}
config_updates = [{'n_steps': n} for n in range(1, 11)]

task = GymTask("CartPole-v1")
seeds = [32167, 1, 999, 2833700, 13]

for idx, config_update in enumerate(config_updates):
    config: Dict[str, Any] = config_default.copy()
    config.update(config_update)

    for seed in seeds:
        config['seed'] = seed
        pprint(config)
        torch.manual_seed(config['seed'])
        agent = Agent(task.state_size, task.action_size, **config)

        data_logger = TensorboardLogger(
            log_dir=f'runs/MultiExp-{task.name}-i{idx}-s{seed}')
        env_runner = EnvRunner(task, agent, data_logger=data_logger)
        env_runner.seed(seed)
        env_runner.run(reward_goal=99999,
                       max_episodes=500,
Exemple #16
0
                       kernel_sze=(16, 8),
                       max_pool_size=(4, 2),
                       stride=(4, 2),
                       device=device)
    return NetChainer(net_classes=[
        conv_net,
        nn.Flatten(),
        FcNet(conv_net.output_size,
              output_dim,
              hidden_layers=(200, 200),
              device=device),
    ])


env_name = 'SpaceInvaders-v0'
task = GymTask(env_name, state_transform=state_transform)

device = "cuda"
config = {
    'device': device,
    "update_freq": 50,
    "number_updates": 5,
    "batch_size": 200,
    "buffer_size": 1e4,
    "warm_up": 100,
    "lr": 1e-4,
    "pre_network_fn": lambda in_features: network_fn(in_features, 300, device),
    "hidden_layers": None,
    "state_transform": agent_state_tranform,
}
state_size = task.actual_state_size
from ai_traineree.loggers import TensorboardLogger
from ai_traineree.tasks import GymTask
from ai_traineree.types import TaskType


def reward_transform(reward):
    """Cap reward to [-1, 1]"""
    return max(-1, min(reward, 1))


seed = 32167
torch.manual_seed(seed)

env_name = 'LunarLanderContinuous-v2'
task: TaskType = GymTask(env_name,
                         seed=seed,
                         reward_transform=reward_transform)
config = {
    'warm_up': 400,
    'device': 'cpu',
    'batch_size': 60,
    'update_freq': 2,
    'number_updates': 1,
    'hidden_layers': (100, 100),
    'actor_lr': 5e-4,
    'critic_lr': 5e-4,
    'alpha_lr': 3e-5,
    'tau': 0.02,
    "alpha": 0.2,
    'action_scale': 2,
    'action_min': -2,
import pylab as plt


def state_transform(state):
    """
    Simple cropping of the top and bottom edge and converting to blackwhite scale.
    """
    return (state[40:-10].sum(-1) > 0)[None, ...]


def agent_state_tranform(state):
    return state


env_name = 'Breakout-v0'
task = GymTask(env_name, state_transform=state_transform)
state_size = np.array(task.reset()).shape
writer = SummaryWriter()

config = {
    "update_freq":
    10,
    "batch_size":
    100,
    "warm_up":
    100,
    "lr":
    1e-4,
    "network_fn":
    lambda: QNetwork2D(state_size, task.action_size, hidden_layers=(200, 200)),
    "state_transform":
def network_fn(state_dim, output_dim, device):
    conv_net = ConvNet(state_dim, hidden_layers=(10, 10), device=device)
    return NetChainer(net_classes=[
        ScaleNet(scale=1. / 255),
        conv_net,
        FlattenNet(),
        FcNet(conv_net.output_size,
              output_dim,
              hidden_layers=(100, 100, 50),
              device=device),
    ])


env_name = 'SpaceInvaders-v0'
data_logger = TensorboardLogger()
task = GymTask(env_name, state_transform=state_transform)
config = {
    "network_fn":
    lambda: network_fn(task.actual_state_size, task.action_size, "cuda"),
    "compress_state":
    True,
    "gamma":
    0.99,
    "lr":
    1e-3,
    "update_freq":
    150,
    "batch_size":
    400,
    "buffer_size":
    int(5e3),
Exemple #20
0
from ai_traineree.agents.ppo import PPOAgent as Agent
from ai_traineree.env_runner import EnvRunner
from ai_traineree.tasks import GymTask
from ai_traineree.types import TaskType

import numpy as np
import pylab as plt


env_name = 'LunarLanderContinuous-v2'

task: TaskType = GymTask(env_name)
config = {
    'rollout_length': 30,
    'batch_size': 30,
    "number_updates": 1,
    "ppo_ratio_clip": 0.2,
    "value_loss_weight": 2,
    "entropy_weight": 0.0005,
    "gamma": 0.98,
    "action_scale": 2,
    "max_grad_norm_actor": 2.0,
    "max_grad_norm_critic": 2.0,
    "critic_lr": 1e-3,
    "actor_lr": 1e-3,
}
agent = Agent(task.state_size, task.action_size, hidden_layers=(300, 300), config=config)
env_runner = EnvRunner(task, agent)

env_runner.interact_episode(0, render=True)
scores = env_runner.run(80, 4000)
Exemple #21
0
from ai_traineree.agents.dqn import DQNAgent
from ai_traineree.env_runner import EnvRunner
from ai_traineree.tasks import GymTask
from ai_traineree.loggers import TensorboardLogger

import numpy as np
import pylab as plt
import torch

seed = 32167
# torch.set_deterministic(True)
torch.manual_seed(seed)
data_logger = TensorboardLogger()

env_name = 'CartPole-v1'
task = GymTask(env_name, seed=seed)
agent = DQNAgent(task.state_size, task.action_size, n_steps=5, seed=seed)
env_runner = EnvRunner(task, agent, data_logger=data_logger, seed=seed)

scores = env_runner.run(reward_goal=100, max_episodes=300, force_new=True)
env_runner.interact_episode(render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()