Ejemplo n.º 1
0
def test_runs_rainbow():
    # Assign
    task = GymTask('CartPole-v1')
    agent = RainbowAgent(task.state_size, task.action_size, device=DEVICE)
    env_runner = EnvRunner(task, agent, max_iterations=50)

    # Act
    env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
Ejemplo n.º 2
0
def test_runs_td3():
    # Assign
    task = GymTask('Pendulum-v0')
    agent = TD3Agent(task.state_size, task.action_size, device=DEVICE)
    env_runner = EnvRunner(task, agent, max_iterations=50)

    # Act
    env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
Ejemplo n.º 3
0
def test_runs_d3pg():
    # Assign
    task = GymTask('BipedalWalker-v3')
    agent = D3PGAgent(task.state_size, task.action_size, device=DEVICE)
    env_runner = EnvRunner(task, agent, max_iterations=50)

    # Act
    env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
Ejemplo n.º 4
0
def test_env_runner_save_state(mock_task, mock_agent, mock_json, mock_path):
    # Assign
    mock_task.step.return_value = ([1, 0.1], -1, False, {})
    mock_agent.act.return_value = 1
    env_runner = EnvRunner(mock_task, mock_agent, max_iterations=10)

    # Act
    env_runner.run(max_episodes=10)
    with mock.patch('builtins.open'):
        env_runner.save_state('saved_state.state')

    # Assert
    mock_agent.save_state.assert_called_once()
    state = mock_json.dump.call_args[0][0]
    assert state['episode'] == 10
    assert state['tot_iterations'] == 10 * 10
Ejemplo n.º 5
0
class SageMakerExecutor:

    _logger = logging.getLogger("SageMakerExecutor")

    def __init__(self,
                 env_name,
                 agent_name: str,
                 hyperparameters: Optional[Hyperparameters] = None):
        self._logger.info(
            "Initiating SageMakerExecutor with env_name '%s' and agent '%s'",
            env_name, agent_name)

        env = gym.make(env_name)
        self.task = GymTask(env, env_name)
        agent = None
        if agent_name.upper() == "DQN":
            from ai_traineree.agents.dqn import DQNAgent
            agent = DQNAgent
        elif agent_name.upper() == "PPO":
            from ai_traineree.agents.ppo import PPOAgent
            agent = PPOAgent
        elif agent_name.upper() == "DDPG":
            from ai_traineree.agents.ddpg import DDPGAgent
            agent = DDPGAgent
        else:
            self._logger.warning(
                "No agent provided. You're given a PPO agent.")
            from ai_traineree.agents.ppo import PPOAgent
            agent = PPOAgent

        self.max_iterations = int(hyperparameters.get("max_iterations", 10000))
        self.max_episodes = int(hyperparameters.get("max_episodes", 1000))
        self.log_every = int(hyperparameters.get("log_every", 10))
        self.score_goal = int(hyperparameters.get("score_goal", 100))

        self.eps_start: float = float(hyperparameters.get('eps_start', 1.0))
        self.eps_end: float = float(hyperparameters.get('eps_end', 0.02))
        self.eps_decay: float = float(hyperparameters.get('eps_decay', 0.995))

        self.agent: AgentType = agent(self.task.state_size,
                                      self.task.action_size,
                                      config=hyperparameters)

        self.env_runner = EnvRunner(self.task,
                                    self.agent,
                                    max_iterations=self.max_iterations)

    def run(self) -> None:
        self._logger.info("Running model '%s' for env '%s'", self.agent.name,
                          self.task.name)
        self.env_runner.run(
            reward_goal=self.score_goal,
            max_episodes=self.max_episodes,
            eps_start=self.eps_start,
            eps_end=self.eps_end,
            eps_decay=self.eps_decay,
            log_every=self.log_every,
        )

    def save_results(self, path):
        self._logger.info("Saving the model to path %s", path)
        self.agent.save_state(path)
Ejemplo n.º 6
0
    "lr": 2e-4,
    "n_steps": 3,
    "tau": 0.01,
    "max_grad_norm": 10.0,
    "hidden_layers": (1200, 1000),
}

agent = RainbowAgent(state_size, task.action_size, **config)
env_runner = EnvRunner(task,
                       agent,
                       max_iterations=2000,
                       data_logger=data_logger)

scores = env_runner.run(reward_goal=0.75,
                        max_episodes=50000,
                        log_every=1,
                        gif_every_episodes=1000,
                        force_new=True)
env_runner.interact_episode(render=True)
data_logger.close()

avg_length = 100
ma = running_mean(scores, avg_length)
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.plot(range(avg_length, avg_length + len(ma)), ma)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
Ejemplo n.º 7
0
from ai_traineree.loggers import TensorboardLogger
from ai_traineree.tasks import GymTask
from typing import Any, Dict
from pprint import pprint

config_default = {'hidden_layers': (50, 50)}
config_updates = [{'n_steps': n} for n in range(1, 11)]

task = GymTask("CartPole-v1")
seeds = [32167, 1, 999, 2833700, 13]

for idx, config_update in enumerate(config_updates):
    config: Dict[str, Any] = config_default.copy()
    config.update(config_update)

    for seed in seeds:
        config['seed'] = seed
        pprint(config)
        torch.manual_seed(config['seed'])
        agent = Agent(task.state_size, task.action_size, **config)

        data_logger = TensorboardLogger(
            log_dir=f'runs/MultiExp-{task.name}-i{idx}-s{seed}')
        env_runner = EnvRunner(task, agent, data_logger=data_logger)
        env_runner.seed(seed)
        env_runner.run(reward_goal=99999,
                       max_episodes=500,
                       eps_decay=0.95,
                       force_new=True)
        data_logger.close()
import numpy as np
import pylab as plt

from ai_traineree.env_runner import EnvRunner
from ai_traineree.agents.dqn import DQNAgent
from ai_traineree.tasks import GymTask
from ai_traineree.types import TaskType

env_name = 'LunarLander-v2'
task: TaskType = GymTask(env_name)
config = {'batch_size': 64}
agent = DQNAgent(task.state_size, task.action_size, config=config)
env_runner = EnvRunner(task, agent)

env_runner.interact_episode(0, render=True)
scores = env_runner.run(50, 800, eps_start=1.0, eps_end=0.05, eps_decay=0.995)
env_runner.interact_episode(0, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 9
0
from torch.utils.tensorboard import SummaryWriter

import numpy as np
import pylab as plt

writer = SummaryWriter()

env_name = 'CartPole-v1'
task = GymTask(env_name)
agent = DQNAgent(task.state_size, task.action_size, n_steps=5)
env_runner = EnvRunner(task, agent, writer=writer)

scores = env_runner.run(
    reward_goal=100,
    max_episodes=5000,
    eps_end=0.002,
    eps_decay=0.99,
    gif_every_episodes=500,
    force_new=True,
)
env_runner.interact_episode(1000, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 10
0
task: TaskType = GymTask(env_name)
config = {
    'warm_up': 500,
    'batch_size': 200,
    'update_freq': 30,
    "number_updates": 1,
    "gamma": 0.99,
    "critic_lr": 1e-3,
    "actor_lr": 2e-3,
    "alpha": 0.2,
    "tau": 0.01,
    "max_grad_norm_alpha": 1.0,
    "max_grad_norm_actor": 10.0,
    "max_grad_norm_critic": 10.0,
}
agent = Agent(task.state_size, task.action_size, hidden_layers=(100, 100), **config)

env_runner = EnvRunner(task, agent, max_iterations=10000, data_logger=data_logger)
# env_runner.interact_episode(render=True)
scores = env_runner.run(reward_goal=10, max_episodes=500, eps_decay=0.99, log_episode_freq=1, gif_every_episodes=200, force_new=True)
env_runner.interact_episode(render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 11
0
from ai_traineree.agents.ddpg import DDPGAgent as DDPG
from ai_traineree.env_runner import EnvRunner
from ai_traineree.loggers import TensorboardLogger
from ai_traineree.tasks import GymTask
from ai_traineree.types import TaskType

import pylab as plt


data_logger = TensorboardLogger()
env_name = 'LunarLanderContinuous-v2'
task: TaskType = GymTask(env_name)
config = {'action_scale': 1, 'update_freq': 2}
agent = DDPG(task.state_size, task.action_size, hidden_layers=(100, 100), noise_scale=0.4, noise_sigma=0.2, **config)
env_runner = EnvRunner(task, agent, data_logger=data_logger)
scores = env_runner.run(reward_goal=80, max_episodes=1000, eps_start=1.0, eps_end=0.05, eps_decay=0.999, force_new=True)
# env_runner.interact_episode(0, render=True)
data_logger.close()

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 12
0
    int(5e3),
    "device":
    "cuda",
}

for _ in range(prev_states):
    task.reset()

agent = DQNAgent(task.state_size, task.action_size, **config)
env_runner = EnvRunner(task, agent, data_logger=data_logger)

# env_runner.interact_episode(0, render=True)
scores = env_runner.run(
    reward_goal=1000,
    max_episodes=20000,
    log_every=1,
    eps_start=0.9,
    gif_every_episodes=200,
    force_new=True,
)
# env_runner.interact_episode(render=True)
data_logger.close()

# plot scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 13
0
    "gae_lambda": 0.95,
    "ppo_ratio_clip": 0.20,
    "entropy_weight": 0.005,
    "gamma": 0.99,
    "std_init": 0.5,
    "std_max": 1.0,
    "std_min": 0.1,

    "max_grad_norm_actor": 200.0,
    "max_grad_norm_critic": 200.0,
    "critic_lr": 3e-4,
    "critic_betas": (0.9, 0.999),
    "actor_lr": 3e-4,
    "actor_betas": (0.9, 0.999),
}
agent = Agent(task.state_size, task.action_size, hidden_layers=(100, 100), **config)
env_runner = EnvRunner(task, agent, max_iterations=2000, data_logger=data_logger)
# env_runner.interact_episode(render=True)
scores = env_runner.run(300, 1000, log_episode_freq=1, gif_every_episodes=500, force_new=True)
env_runner.interact_episode(render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 14
0
task = GymTask(env_name)
config = {
    'warm_up': 100,
    'batch_size': 50,
    'hidden_layers': (50, 50),
    'noise_scale': 1.,
    'clip': (-2, 2),
    'actor_lr': 1e-4,
    'critic_lr': 2e-4,
}
agent = Agent(task.state_size, task.action_size, **config)
env_runner = EnvRunner(task, agent)

# env_runner.interact_episode(0, render=True)
scores = env_runner.run(0,
                        2000,
                        eps_start=1.0,
                        eps_end=0.05,
                        eps_decay=0.99,
                        log_every=1)
env_runner.interact_episode(0, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 15
0
import pylab as plt

env_name = 'LunarLanderContinuous-v2'
task: TaskType = GymTask(env_name)
config = {'batch_size': 64, 'warm_up': 0, 'action_scale': 2, 'update_freq': 2}
agent = DDPG(task.state_size,
             task.action_size,
             hidden_layers=(300, 200),
             noise_scale=0.4,
             noise_sigma=0.2,
             config=config)
env_runner = EnvRunner(task, agent)

# interact_episode(task, agent, 0, render=True)
scores = env_runner.run(reward_goal=80,
                        max_episodes=40,
                        eps_start=1.0,
                        eps_end=0.05,
                        eps_decay=0.991)
env_runner.interact_episode(0, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 16
0
from ai_traineree.env_runner import EnvRunner
from ai_traineree.tasks import GymTask


env_name = 'Pendulum-v0'
task = GymTask(env_name)
config = {
    'warm_up': 100,
    'batch_size': 50,
    'hidden_layers': (50, 50),
    'noise_scale': 1.,
    'clip': (-2, 2),
    'actor_lr': 1e-4,
    'critic_lr': 2e-4,
}
agent = Agent(task.state_size, task.action_size, **config)
env_runner = EnvRunner(task, agent)

# env_runner.interact_episode(0, render=True)
scores = env_runner.run(0, 2000, eps_start=1.0, eps_end=0.05, eps_decay=0.99, log_episode_freq=1)
env_runner.interact_episode(0, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 17
0
config = {
    'batch_size': 50,
    'warm_up': 100,
    'action_scale': 2,
    'update_freq': 10,
    'hidden_layers': (200, 200),
    'noise_scale': 1.0,
    'noise_sigma': 0.2,
    'actor_lr': 1e-4,
}
agent = Agent(task.state_size, task.action_size, **config)
env_runner = EnvRunner(task, agent)

# interact_episode(task, agent, 0, render=True)
scores = env_runner.run(reward_goal=80,
                        max_episodes=1000,
                        eps_start=1.0,
                        eps_end=0.05,
                        eps_decay=0.991,
                        log_every=1)
env_runner.interact_episode(0, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 18
0
    "actor_lr": 2e-4,
    "alpha": 0.2,
    "tau": 0.02,
    "max_grad_norm_alpha": 1.0,
    "max_grad_norm_actor": 5.0,
    "max_grad_norm_critic": 5.0,
}
agent = Agent(task.state_size,
              task.action_size,
              hidden_layers=(200, 200),
              writer=writer,
              **config)

env_runner = EnvRunner(task, agent, max_iterations=10000, writer=writer)
# env_runner.interact_episode(render=True)
scores = env_runner.run(reward_goal=10,
                        max_episodes=4000,
                        eps_decay=0.9,
                        log_every=1,
                        gif_every_episodes=200)
env_runner.interact_episode(render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 19
0
config = {
    'rollout_length': 60,
    'batch_size': 60,
    "number_updates": 1,

    "using_gae": False,  # Default is True
    "ppo_ratio_clip": 0.2,
    "entropy_weight": 0.0005,
    "gamma": 0.99,
    "action_scale": 1,
    "max_grad_norm_actor": 3.0,
    "max_grad_norm_critic": 5.0,
    "critic_lr": 0.001,
    "actor_lr": 0.0004,
}
agent = Agent(task.state_size, task.action_size, hidden_layers=(100, 100, 50), **config)
env_runner = EnvRunner(task, agent, data_logger=data_logger)
# env_runner.interact_episode(0, render=True)
scores = env_runner.run(80, 2000, eps_decay=0.99, force_new=True, checkpoint_every=20)
env_runner.interact_episode(0, render=True)

data_logger.close()
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 20
0
    10,
    "batch_size":
    100,
    "warm_up":
    100,
    "lr":
    1e-4,
    "network_fn":
    lambda: QNetwork2D(state_size, task.action_size, hidden_layers=(200, 200)),
    "state_transform":
    agent_state_tranform,
}
agent = DQNAgent(state_size, task.action_size, **config)
env_runner = EnvRunner(task, agent, max_iterations=2000, writer=writer)

scores = env_runner.run(reward_goal=500,
                        max_episodes=1000,
                        log_every=1,
                        eps_start=0.99,
                        gif_every_episodes=100)
env_runner.interact_episode(render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 21
0
from ai_traineree.agents.dqn import DQNAgent
from ai_traineree.env_runner import EnvRunner
from ai_traineree.tasks import GymTask

import pylab as plt

env_name = 'Breakout-ram-v0'
task = GymTask(env_name)
agent = DQNAgent(task.state_size, task.action_size, hidden_layers=(400, 300))
env_runner = EnvRunner(task, agent)

# env_runner.interact_episode(0, render=True)
scores = env_runner.run(reward_goal=5, max_episodes=5, log_every=1)
env_runner.interact_episode(100, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 22
0
    'actor_lr': 5e-4,
    'critic_lr': 5e-4,
    'alpha_lr': 3e-5,
    'tau': 0.02,
    "alpha": 0.2,
    'action_scale': 2,
    'action_min': -2,
    'action_max': 2,
    'seed': seed,
}
agent = Agent(task.state_size, task.action_size, **config)

log_dir = f"runs/{env_name}_{agent.name}-{datetime.datetime.now().isoformat()[:-7]}"
data_logger = TensorboardLogger(log_dir=log_dir)
env_runner = EnvRunner(task, agent, data_logger=data_logger, seed=seed)
scores = env_runner.run(reward_goal=30,
                        max_episodes=500,
                        eps_end=0.01,
                        eps_decay=0.95,
                        force_new=True)
env_runner.interact_episode(0, render=True)
data_logger.close()

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
Ejemplo n.º 23
0
import pylab as plt

env_name = 'LunarLanderContinuous-v2'
task: TaskType = GymTask(env_name)
config = {
    'action_scale': 2,
    'batch_size': 200,
    'number_updates': 5,
    'update_freq': 10,
    'update_policy_freq': 10,
}
agent = Agent(task.state_size, task.action_size, **config)
env_runner = EnvRunner(task, agent)

# interact_episode(task, agent, 0, render=True)
scores = env_runner.run(reward_goal=80,
                        max_episodes=2000,
                        log_episode_freq=1,
                        force_new=True)
env_runner.interact_episode(0, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 24
0

def running_mean(x, N):
    cumsum = np.cumsum(np.insert(x, 0, 0))
    return (cumsum[N:] - cumsum[:-N]) / float(N)


env_name = 'CartPole-v1'
task = GymTask(env_name)
data_logger = TensorboardLogger()

agent = Agent(task.state_size, task.action_size, device='cpu')
env_runner = EnvRunner(task, agent, data_logger=data_logger)

scores = env_runner.run(reward_goal=100,
                        max_episodes=500,
                        eps_decay=0.9,
                        force_new=True)
env_runner.interact_episode(render=True)
data_logger.close()

avg_length = 100
ma = running_mean(scores, avg_length)
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.plot(range(avg_length, avg_length + len(ma)), ma)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 25
0
task: TaskType = GymTask(env_name)
config = {
    'rollout_length': 30,
    'batch_size': 30,
    "number_updates": 1,
    "ppo_ratio_clip": 0.2,
    "value_loss_weight": 2,
    "entropy_weight": 0.0005,
    "gamma": 0.98,
    "action_scale": 2,
    "max_grad_norm_actor": 2.0,
    "max_grad_norm_critic": 2.0,
    "critic_lr": 1e-3,
    "actor_lr": 1e-3,
}
agent = Agent(task.state_size, task.action_size, hidden_layers=(300, 300), config=config)
env_runner = EnvRunner(task, agent)

env_runner.interact_episode(0, render=True)
scores = env_runner.run(80, 4000)
env_runner.interact_episode(0, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
Ejemplo n.º 26
0
from ai_traineree.agents.dqn import DQNAgent
from ai_traineree.env_runner import EnvRunner
from ai_traineree.tasks import GymTask
from ai_traineree.loggers import TensorboardLogger

import numpy as np
import pylab as plt
import torch

seed = 32167
# torch.set_deterministic(True)
torch.manual_seed(seed)
data_logger = TensorboardLogger()

env_name = 'CartPole-v1'
task = GymTask(env_name, seed=seed)
agent = DQNAgent(task.state_size, task.action_size, n_steps=5, seed=seed)
env_runner = EnvRunner(task, agent, data_logger=data_logger, seed=seed)

scores = env_runner.run(reward_goal=100, max_episodes=300, force_new=True)
env_runner.interact_episode(render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()