def test_runs_rainbow(): # Assign task = GymTask('CartPole-v1') agent = RainbowAgent(task.state_size, task.action_size, device=DEVICE) env_runner = EnvRunner(task, agent, max_iterations=50) # Act env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
def test_runs_td3(): # Assign task = GymTask('Pendulum-v0') agent = TD3Agent(task.state_size, task.action_size, device=DEVICE) env_runner = EnvRunner(task, agent, max_iterations=50) # Act env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
def test_runs_d3pg(): # Assign task = GymTask('BipedalWalker-v3') agent = D3PGAgent(task.state_size, task.action_size, device=DEVICE) env_runner = EnvRunner(task, agent, max_iterations=50) # Act env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
def test_env_runner_save_state(mock_task, mock_agent, mock_json, mock_path): # Assign mock_task.step.return_value = ([1, 0.1], -1, False, {}) mock_agent.act.return_value = 1 env_runner = EnvRunner(mock_task, mock_agent, max_iterations=10) # Act env_runner.run(max_episodes=10) with mock.patch('builtins.open'): env_runner.save_state('saved_state.state') # Assert mock_agent.save_state.assert_called_once() state = mock_json.dump.call_args[0][0] assert state['episode'] == 10 assert state['tot_iterations'] == 10 * 10
class SageMakerExecutor: _logger = logging.getLogger("SageMakerExecutor") def __init__(self, env_name, agent_name: str, hyperparameters: Optional[Hyperparameters] = None): self._logger.info( "Initiating SageMakerExecutor with env_name '%s' and agent '%s'", env_name, agent_name) env = gym.make(env_name) self.task = GymTask(env, env_name) agent = None if agent_name.upper() == "DQN": from ai_traineree.agents.dqn import DQNAgent agent = DQNAgent elif agent_name.upper() == "PPO": from ai_traineree.agents.ppo import PPOAgent agent = PPOAgent elif agent_name.upper() == "DDPG": from ai_traineree.agents.ddpg import DDPGAgent agent = DDPGAgent else: self._logger.warning( "No agent provided. You're given a PPO agent.") from ai_traineree.agents.ppo import PPOAgent agent = PPOAgent self.max_iterations = int(hyperparameters.get("max_iterations", 10000)) self.max_episodes = int(hyperparameters.get("max_episodes", 1000)) self.log_every = int(hyperparameters.get("log_every", 10)) self.score_goal = int(hyperparameters.get("score_goal", 100)) self.eps_start: float = float(hyperparameters.get('eps_start', 1.0)) self.eps_end: float = float(hyperparameters.get('eps_end', 0.02)) self.eps_decay: float = float(hyperparameters.get('eps_decay', 0.995)) self.agent: AgentType = agent(self.task.state_size, self.task.action_size, config=hyperparameters) self.env_runner = EnvRunner(self.task, self.agent, max_iterations=self.max_iterations) def run(self) -> None: self._logger.info("Running model '%s' for env '%s'", self.agent.name, self.task.name) self.env_runner.run( reward_goal=self.score_goal, max_episodes=self.max_episodes, eps_start=self.eps_start, eps_end=self.eps_end, eps_decay=self.eps_decay, log_every=self.log_every, ) def save_results(self, path): self._logger.info("Saving the model to path %s", path) self.agent.save_state(path)
"lr": 2e-4, "n_steps": 3, "tau": 0.01, "max_grad_norm": 10.0, "hidden_layers": (1200, 1000), } agent = RainbowAgent(state_size, task.action_size, **config) env_runner = EnvRunner(task, agent, max_iterations=2000, data_logger=data_logger) scores = env_runner.run(reward_goal=0.75, max_episodes=50000, log_every=1, gif_every_episodes=1000, force_new=True) env_runner.interact_episode(render=True) data_logger.close() avg_length = 100 ma = running_mean(scores, avg_length) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.plot(range(avg_length, avg_length + len(ma)), ma) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120)
from ai_traineree.loggers import TensorboardLogger from ai_traineree.tasks import GymTask from typing import Any, Dict from pprint import pprint config_default = {'hidden_layers': (50, 50)} config_updates = [{'n_steps': n} for n in range(1, 11)] task = GymTask("CartPole-v1") seeds = [32167, 1, 999, 2833700, 13] for idx, config_update in enumerate(config_updates): config: Dict[str, Any] = config_default.copy() config.update(config_update) for seed in seeds: config['seed'] = seed pprint(config) torch.manual_seed(config['seed']) agent = Agent(task.state_size, task.action_size, **config) data_logger = TensorboardLogger( log_dir=f'runs/MultiExp-{task.name}-i{idx}-s{seed}') env_runner = EnvRunner(task, agent, data_logger=data_logger) env_runner.seed(seed) env_runner.run(reward_goal=99999, max_episodes=500, eps_decay=0.95, force_new=True) data_logger.close()
import numpy as np import pylab as plt from ai_traineree.env_runner import EnvRunner from ai_traineree.agents.dqn import DQNAgent from ai_traineree.tasks import GymTask from ai_traineree.types import TaskType env_name = 'LunarLander-v2' task: TaskType = GymTask(env_name) config = {'batch_size': 64} agent = DQNAgent(task.state_size, task.action_size, config=config) env_runner = EnvRunner(task, agent) env_runner.interact_episode(0, render=True) scores = env_runner.run(50, 800, eps_start=1.0, eps_end=0.05, eps_decay=0.995) env_runner.interact_episode(0, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
from torch.utils.tensorboard import SummaryWriter import numpy as np import pylab as plt writer = SummaryWriter() env_name = 'CartPole-v1' task = GymTask(env_name) agent = DQNAgent(task.state_size, task.action_size, n_steps=5) env_runner = EnvRunner(task, agent, writer=writer) scores = env_runner.run( reward_goal=100, max_episodes=5000, eps_end=0.002, eps_decay=0.99, gif_every_episodes=500, force_new=True, ) env_runner.interact_episode(1000, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
task: TaskType = GymTask(env_name) config = { 'warm_up': 500, 'batch_size': 200, 'update_freq': 30, "number_updates": 1, "gamma": 0.99, "critic_lr": 1e-3, "actor_lr": 2e-3, "alpha": 0.2, "tau": 0.01, "max_grad_norm_alpha": 1.0, "max_grad_norm_actor": 10.0, "max_grad_norm_critic": 10.0, } agent = Agent(task.state_size, task.action_size, hidden_layers=(100, 100), **config) env_runner = EnvRunner(task, agent, max_iterations=10000, data_logger=data_logger) # env_runner.interact_episode(render=True) scores = env_runner.run(reward_goal=10, max_episodes=500, eps_decay=0.99, log_episode_freq=1, gif_every_episodes=200, force_new=True) env_runner.interact_episode(render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
from ai_traineree.agents.ddpg import DDPGAgent as DDPG from ai_traineree.env_runner import EnvRunner from ai_traineree.loggers import TensorboardLogger from ai_traineree.tasks import GymTask from ai_traineree.types import TaskType import pylab as plt data_logger = TensorboardLogger() env_name = 'LunarLanderContinuous-v2' task: TaskType = GymTask(env_name) config = {'action_scale': 1, 'update_freq': 2} agent = DDPG(task.state_size, task.action_size, hidden_layers=(100, 100), noise_scale=0.4, noise_sigma=0.2, **config) env_runner = EnvRunner(task, agent, data_logger=data_logger) scores = env_runner.run(reward_goal=80, max_episodes=1000, eps_start=1.0, eps_end=0.05, eps_decay=0.999, force_new=True) # env_runner.interact_episode(0, render=True) data_logger.close() # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
int(5e3), "device": "cuda", } for _ in range(prev_states): task.reset() agent = DQNAgent(task.state_size, task.action_size, **config) env_runner = EnvRunner(task, agent, data_logger=data_logger) # env_runner.interact_episode(0, render=True) scores = env_runner.run( reward_goal=1000, max_episodes=20000, log_every=1, eps_start=0.9, gif_every_episodes=200, force_new=True, ) # env_runner.interact_episode(render=True) data_logger.close() # plot scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
"gae_lambda": 0.95, "ppo_ratio_clip": 0.20, "entropy_weight": 0.005, "gamma": 0.99, "std_init": 0.5, "std_max": 1.0, "std_min": 0.1, "max_grad_norm_actor": 200.0, "max_grad_norm_critic": 200.0, "critic_lr": 3e-4, "critic_betas": (0.9, 0.999), "actor_lr": 3e-4, "actor_betas": (0.9, 0.999), } agent = Agent(task.state_size, task.action_size, hidden_layers=(100, 100), **config) env_runner = EnvRunner(task, agent, max_iterations=2000, data_logger=data_logger) # env_runner.interact_episode(render=True) scores = env_runner.run(300, 1000, log_episode_freq=1, gif_every_episodes=500, force_new=True) env_runner.interact_episode(render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
task = GymTask(env_name) config = { 'warm_up': 100, 'batch_size': 50, 'hidden_layers': (50, 50), 'noise_scale': 1., 'clip': (-2, 2), 'actor_lr': 1e-4, 'critic_lr': 2e-4, } agent = Agent(task.state_size, task.action_size, **config) env_runner = EnvRunner(task, agent) # env_runner.interact_episode(0, render=True) scores = env_runner.run(0, 2000, eps_start=1.0, eps_end=0.05, eps_decay=0.99, log_every=1) env_runner.interact_episode(0, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
import pylab as plt env_name = 'LunarLanderContinuous-v2' task: TaskType = GymTask(env_name) config = {'batch_size': 64, 'warm_up': 0, 'action_scale': 2, 'update_freq': 2} agent = DDPG(task.state_size, task.action_size, hidden_layers=(300, 200), noise_scale=0.4, noise_sigma=0.2, config=config) env_runner = EnvRunner(task, agent) # interact_episode(task, agent, 0, render=True) scores = env_runner.run(reward_goal=80, max_episodes=40, eps_start=1.0, eps_end=0.05, eps_decay=0.991) env_runner.interact_episode(0, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
from ai_traineree.env_runner import EnvRunner from ai_traineree.tasks import GymTask env_name = 'Pendulum-v0' task = GymTask(env_name) config = { 'warm_up': 100, 'batch_size': 50, 'hidden_layers': (50, 50), 'noise_scale': 1., 'clip': (-2, 2), 'actor_lr': 1e-4, 'critic_lr': 2e-4, } agent = Agent(task.state_size, task.action_size, **config) env_runner = EnvRunner(task, agent) # env_runner.interact_episode(0, render=True) scores = env_runner.run(0, 2000, eps_start=1.0, eps_end=0.05, eps_decay=0.99, log_episode_freq=1) env_runner.interact_episode(0, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
config = { 'batch_size': 50, 'warm_up': 100, 'action_scale': 2, 'update_freq': 10, 'hidden_layers': (200, 200), 'noise_scale': 1.0, 'noise_sigma': 0.2, 'actor_lr': 1e-4, } agent = Agent(task.state_size, task.action_size, **config) env_runner = EnvRunner(task, agent) # interact_episode(task, agent, 0, render=True) scores = env_runner.run(reward_goal=80, max_episodes=1000, eps_start=1.0, eps_end=0.05, eps_decay=0.991, log_every=1) env_runner.interact_episode(0, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
"actor_lr": 2e-4, "alpha": 0.2, "tau": 0.02, "max_grad_norm_alpha": 1.0, "max_grad_norm_actor": 5.0, "max_grad_norm_critic": 5.0, } agent = Agent(task.state_size, task.action_size, hidden_layers=(200, 200), writer=writer, **config) env_runner = EnvRunner(task, agent, max_iterations=10000, writer=writer) # env_runner.interact_episode(render=True) scores = env_runner.run(reward_goal=10, max_episodes=4000, eps_decay=0.9, log_every=1, gif_every_episodes=200) env_runner.interact_episode(render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
config = { 'rollout_length': 60, 'batch_size': 60, "number_updates": 1, "using_gae": False, # Default is True "ppo_ratio_clip": 0.2, "entropy_weight": 0.0005, "gamma": 0.99, "action_scale": 1, "max_grad_norm_actor": 3.0, "max_grad_norm_critic": 5.0, "critic_lr": 0.001, "actor_lr": 0.0004, } agent = Agent(task.state_size, task.action_size, hidden_layers=(100, 100, 50), **config) env_runner = EnvRunner(task, agent, data_logger=data_logger) # env_runner.interact_episode(0, render=True) scores = env_runner.run(80, 2000, eps_decay=0.99, force_new=True, checkpoint_every=20) env_runner.interact_episode(0, render=True) data_logger.close() # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
10, "batch_size": 100, "warm_up": 100, "lr": 1e-4, "network_fn": lambda: QNetwork2D(state_size, task.action_size, hidden_layers=(200, 200)), "state_transform": agent_state_tranform, } agent = DQNAgent(state_size, task.action_size, **config) env_runner = EnvRunner(task, agent, max_iterations=2000, writer=writer) scores = env_runner.run(reward_goal=500, max_episodes=1000, log_every=1, eps_start=0.99, gif_every_episodes=100) env_runner.interact_episode(render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
from ai_traineree.agents.dqn import DQNAgent from ai_traineree.env_runner import EnvRunner from ai_traineree.tasks import GymTask import pylab as plt env_name = 'Breakout-ram-v0' task = GymTask(env_name) agent = DQNAgent(task.state_size, task.action_size, hidden_layers=(400, 300)) env_runner = EnvRunner(task, agent) # env_runner.interact_episode(0, render=True) scores = env_runner.run(reward_goal=5, max_episodes=5, log_every=1) env_runner.interact_episode(100, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
'actor_lr': 5e-4, 'critic_lr': 5e-4, 'alpha_lr': 3e-5, 'tau': 0.02, "alpha": 0.2, 'action_scale': 2, 'action_min': -2, 'action_max': 2, 'seed': seed, } agent = Agent(task.state_size, task.action_size, **config) log_dir = f"runs/{env_name}_{agent.name}-{datetime.datetime.now().isoformat()[:-7]}" data_logger = TensorboardLogger(log_dir=log_dir) env_runner = EnvRunner(task, agent, data_logger=data_logger, seed=seed) scores = env_runner.run(reward_goal=30, max_episodes=500, eps_end=0.01, eps_decay=0.95, force_new=True) env_runner.interact_episode(0, render=True) data_logger.close() # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120)
import pylab as plt env_name = 'LunarLanderContinuous-v2' task: TaskType = GymTask(env_name) config = { 'action_scale': 2, 'batch_size': 200, 'number_updates': 5, 'update_freq': 10, 'update_policy_freq': 10, } agent = Agent(task.state_size, task.action_size, **config) env_runner = EnvRunner(task, agent) # interact_episode(task, agent, 0, render=True) scores = env_runner.run(reward_goal=80, max_episodes=2000, log_episode_freq=1, force_new=True) env_runner.interact_episode(0, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
def running_mean(x, N): cumsum = np.cumsum(np.insert(x, 0, 0)) return (cumsum[N:] - cumsum[:-N]) / float(N) env_name = 'CartPole-v1' task = GymTask(env_name) data_logger = TensorboardLogger() agent = Agent(task.state_size, task.action_size, device='cpu') env_runner = EnvRunner(task, agent, data_logger=data_logger) scores = env_runner.run(reward_goal=100, max_episodes=500, eps_decay=0.9, force_new=True) env_runner.interact_episode(render=True) data_logger.close() avg_length = 100 ma = running_mean(scores, avg_length) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.plot(range(avg_length, avg_length + len(ma)), ma) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
task: TaskType = GymTask(env_name) config = { 'rollout_length': 30, 'batch_size': 30, "number_updates": 1, "ppo_ratio_clip": 0.2, "value_loss_weight": 2, "entropy_weight": 0.0005, "gamma": 0.98, "action_scale": 2, "max_grad_norm_actor": 2.0, "max_grad_norm_critic": 2.0, "critic_lr": 1e-3, "actor_lr": 1e-3, } agent = Agent(task.state_size, task.action_size, hidden_layers=(300, 300), config=config) env_runner = EnvRunner(task, agent) env_runner.interact_episode(0, render=True) scores = env_runner.run(80, 4000) env_runner.interact_episode(0, render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()
from ai_traineree.agents.dqn import DQNAgent from ai_traineree.env_runner import EnvRunner from ai_traineree.tasks import GymTask from ai_traineree.loggers import TensorboardLogger import numpy as np import pylab as plt import torch seed = 32167 # torch.set_deterministic(True) torch.manual_seed(seed) data_logger = TensorboardLogger() env_name = 'CartPole-v1' task = GymTask(env_name, seed=seed) agent = DQNAgent(task.state_size, task.action_size, n_steps=5, seed=seed) env_runner = EnvRunner(task, agent, data_logger=data_logger, seed=seed) scores = env_runner.run(reward_goal=100, max_episodes=300, force_new=True) env_runner.interact_episode(render=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()