def main(): # run on gpu device = 'cuda' def get_agents(preset): agents = [ getattr(preset, agent_name) for agent_name in classic_control.__all__ ] return [agent(device=device) for agent in agents] SlurmExperiment(get_agents(atari), AtariEnvironment('Breakout', device=device), 2e7, sbatch_args={'partition': '1080ti-long'}) SlurmExperiment(get_agents(classic_control), GymEnvironment('CartPole-v0', device=device), 100000, sbatch_args={'partition': '1080ti-short'}) SlurmExperiment(get_agents(continuous), GymEnvironment('LunarLanderContinuous-v2', device=device), 500000, sbatch_args={'partition': '1080ti-short'})
class TestContinuousPresets(unittest.TestCase): def setUp(self): self.env = GymEnvironment('LunarLanderContinuous-v2') self.env.reset() def tearDown(self): if os.path.exists('test_preset.pt'): os.remove('test_preset.pt') def test_ddpg(self): self.validate(ddpg) def test_ppo(self): self.validate(ppo) def test_sac(self): self.validate(sac) def validate(self, builder): preset = builder.device('cpu').env(self.env).build() # normal agent agent = preset.agent(writer=DummyWriter(), train_steps=100000) agent.act(self.env.state) # test agent test_agent = preset.test_agent() test_agent.act(self.env.state) # test save/load preset.save('test_preset.pt') preset = torch.load('test_preset.pt') test_agent = preset.test_agent() test_agent.act(self.env.state)
def setUp(self): self.agent = MockAgent() self.env = GymEnvironment('PongNoFrameskip-v4') self.body = DeepmindAtariBody(ToLegacyBody(self.agent), self.env, noop_max=0, frame_stack=1)
def main(): parser = argparse.ArgumentParser(description="Watch a continuous agent.") parser.add_argument("env", help="ID of the Environment") parser.add_argument("filename", help="File where the model was saved.") parser.add_argument( "--device", default="cuda", help= "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)", ) parser.add_argument( "--fps", default=120, help="Playback speed", ) args = parser.parse_args() if args.env in ENVS: env = GymEnvironment(args.env, device=args.device) elif 'BulletEnv' in args.env or args.env in PybulletEnvironment.short_names: env = PybulletEnvironment(args.env, device=args.device) else: env = GymEnvironment(args.env, device=args.device) load_and_watch(args.filename, env, fps=args.fps)
def setUp(self): np.random.seed(0) torch.manual_seed(0) self.env = GymEnvironment('CartPole-v0') self.env.seed(0) self.experiment = MockExperiment(self.make_agent(), self.env, quiet=True) self.experiment._env.seed(0)
def test_reset(self): env = GymEnvironment('CartPole-v0') state = env.reset() self.assertEqual(state.observation.shape, (4, )) self.assertEqual(state.reward, 0) self.assertFalse(state.done) self.assertEqual(state.mask, 1)
def test_reset_preconstructed_env(self): env = GymEnvironment(gym.make('CartPole-v0')) state = env.reset() self.assertEqual(state.observation.shape, (4, )) self.assertEqual(state.reward, 0) self.assertFalse(state.done) self.assertEqual(state.mask, 1)
def setUp(self): self.env = GymEnvironment('LunarLanderContinuous-v2') self.env.reset() self.parallel_env = DuplicateEnvironment([ GymEnvironment('LunarLanderContinuous-v2'), GymEnvironment('LunarLanderContinuous-v2'), ]) self.parallel_env.reset()
class RainbowAgent(dqn.RLAgent): def __init__(self): super().__init__() if Settings.CUDA: self.device = "cuda" else: self.device = "cpu" self.env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=self.device) self.agent = None @classmethod def load(cls, path): rl_agent = cls() agent = GreedyAgent.load(path, rl_agent.env) rl_agent.agent = agent return rl_agent @classmethod def train(cls, num_frames: int): rl_agent = cls() preset = rainbow( device=rl_agent.device, lr=Settings.LEARNING_RATE, ) experiment = SingleEnvExperiment(preset, rl_agent.env) experiment.train(num_frames) default_log_dir = experiment._writer.log_dir copy_tree(default_log_dir, Settings.FULL_LOG_DIR) rmtree(default_log_dir) rl_agent.env.close() @classmethod def resume_training(cls, path, num_frames: int): rl_agent = cls() lr = Settings.LEARNING_RATE agent = rainbow(device=rl_agent.device, lr=lr) q_dist_module = torch.load(os.path.join(path, "q_dist.pt"), map_location='cpu').to(rl_agent.device) experiment = SingleEnvExperiment(agent, rl_agent.env) agent = experiment._agent old_q_dist = agent.q_dist old_q_dist.model.load_state_dict(q_dist_module.state_dict()) experiment.train(frames=num_frames) default_log_dir = experiment._writer.log_dir copy_tree(default_log_dir, Settings.FULL_LOG_DIR) rmtree(default_log_dir) rl_agent.env.close() def get_control(self, state: prediction.HighwayState) -> float: vector_state = dqn.get_state_vector_from_base_state(state) encoded_state = self.env._make_state(vector_state, False) action = self.agent.eval(encoded_state, 0).item() return Settings.JERK_VALUES_DQN[action] def _cleanup(self): self.env.close()
class TestContinuousPresets(unittest.TestCase): def setUp(self): self.env = GymEnvironment('LunarLanderContinuous-v2') self.env.reset() self.parallel_env = DuplicateEnvironment([ GymEnvironment('LunarLanderContinuous-v2'), GymEnvironment('LunarLanderContinuous-v2'), ]) self.parallel_env.reset() def tearDown(self): if os.path.exists('test_preset.pt'): os.remove('test_preset.pt') def test_ddpg(self): self.validate(ddpg) def test_ppo(self): self.validate(ppo) def test_sac(self): self.validate(sac) def validate(self, builder): preset = builder.device('cpu').env(self.env).build() if isinstance(preset, ParallelPreset): return self.validate_parallel_preset(preset) return self.validate_standard_preset(preset) def validate_standard_preset(self, preset): # train agent agent = preset.agent(writer=DummyWriter(), train_steps=100000) agent.act(self.env.state) # test agent test_agent = preset.test_agent() test_agent.act(self.env.state) # test save/load preset.save('test_preset.pt') preset = torch.load('test_preset.pt') test_agent = preset.test_agent() test_agent.act(self.env.state) def validate_parallel_preset(self, preset): # train agent agent = preset.agent(writer=DummyWriter(), train_steps=100000) agent.act(self.parallel_env.state_array) # test agent test_agent = preset.test_agent() test_agent.act(self.env.state) # parallel test_agent parallel_test_agent = preset.test_agent() parallel_test_agent.act(self.parallel_env.state_array) # test save/load preset.save('test_preset.pt') preset = torch.load('test_preset.pt') test_agent = preset.test_agent() test_agent.act(self.env.state)
def __init__(self): super().__init__() if Settings.CUDA: self.device = "cuda" else: self.device = "cpu" self.env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=self.device) self.agent = None
class TestSingleEnvExperiment(unittest.TestCase): def setUp(self): np.random.seed(0) torch.manual_seed(0) self.env = GymEnvironment('CartPole-v0') self.env.seed(0) self.experiment = None def test_adds_label(self): experiment = MockExperiment(dqn(), self.env, quiet=True) self.assertEqual(experiment._writer.label, "_dqn_CartPole-v0") def test_writes_training_returns_eps(self): experiment = MockExperiment(dqn(), self.env, quiet=True) experiment.train(episodes=3) np.testing.assert_equal( experiment._writer.data["evaluation/returns/episode"]["values"], np.array([22.0, 20.0, 24.0]), ) np.testing.assert_equal( experiment._writer.data["evaluation/returns/episode"]["steps"], np.array([1, 2, 3]), ) def test_writes_test_returns(self): experiment = MockExperiment(dqn(), self.env, quiet=True) experiment.train(episodes=5) returns = experiment.test(episodes=4) expected_mean = 9.5 expected_std = 0.5 np.testing.assert_equal(np.mean(returns), expected_mean) np.testing.assert_equal( experiment._writer.data["evaluation/returns-test/mean"]["values"], np.array([expected_mean]), ) np.testing.assert_equal( experiment._writer.data["evaluation/returns-test/std"]["values"], np.array([expected_std]), ) np.testing.assert_equal( experiment._writer.data["evaluation/returns-test/mean"]["steps"], np.array([95.]), ) def test_writes_loss(self): experiment = MockExperiment(dqn(), self.env, quiet=True, write_loss=True) self.assertTrue(experiment._writer.write_loss) experiment = MockExperiment(dqn(), self.env, quiet=True, write_loss=False) self.assertFalse(experiment._writer.write_loss)
def main(): parser = argparse.ArgumentParser(description="Run a continuous actions benchmark.") parser.add_argument("env", help="Name of the env (e.g. 'lander', 'cheetah')") parser.add_argument( "agent", help="Name of the agent (e.g. ddpg). See presets for available agents." ) parser.add_argument( "--device", default="cuda", help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).", ) parser.add_argument( "--frames", type=int, default=2e6, help="The number of training frames." ) parser.add_argument( "--render", action="store_true", default=False, help="Render the environment." ) parser.add_argument( "--logdir", default='runs', help="The base logging directory." ) parser.add_argument("--writer", default='tensorboard', help="The backend used for tracking experiment metrics.") parser.add_argument( '--hyperparameters', default=[], nargs='*', help="Custom hyperparameters, in the format hyperparameter1=value1 hyperparameter2=value2 etc." ) args = parser.parse_args() if args.env in ENVS: env = GymEnvironment(args.env, device=args.device) elif 'BulletEnv' in args.env or args.env in PybulletEnvironment.short_names: env = PybulletEnvironment(args.env, device=args.device) else: env = GymEnvironment(args.env, device=args.device) agent_name = args.agent agent = getattr(continuous, agent_name) agent = agent.device(args.device) # parse hyperparameters hyperparameters = {} for hp in args.hyperparameters: key, value = hp.split('=') hyperparameters[key] = type(agent.default_hyperparameters[key])(value) agent = agent.hyperparameters(**hyperparameters) run_experiment( agent, env, frames=args.frames, render=args.render, logdir=args.logdir, writer=args.writer, )
def test_step_until_done(self): env = GymEnvironment('CartPole-v0') env.reset() for _ in range(100): state = env.step(1) if state.done: break self.assertEqual(state.observation.shape, (4, )) self.assertEqual(state.reward, 1.) self.assertTrue(state.done) self.assertEqual(state.mask, 0)
def main(): device = 'cpu' timesteps = 40000 run_experiment( [dqn(), a2c()], [ GymEnvironment('CartPole-v0', device), GymEnvironment('Acrobot-v1', device) ], timesteps, ) plot_returns_100('runs', timesteps=timesteps)
class TestClassicControlPresets(unittest.TestCase): def setUp(self): self.env = GymEnvironment('CartPole-v0') self.env.reset() def tearDown(self): if os.path.exists('test_preset.pt'): os.remove('test_preset.pt') def test_a2c(self): self.validate(a2c) def test_c51(self): self.validate(c51) def test_ddqn(self): self.validate(ddqn) def test_dqn(self): self.validate(dqn) def test_ppo(self): self.validate(ppo) def test_rainbow(self): self.validate(rainbow) def test_vac(self): self.validate(vac) def test_vpg(self): self.validate(vpg) def test_vsarsa(self): self.validate(vsarsa) def test_vqn(self): self.validate(vqn) def validate(self, builder): preset = builder.device('cpu').env(self.env).build() # normal agent agent = preset.agent(writer=DummyWriter(), train_steps=100000) agent.act(self.env.state) # test agent test_agent = preset.test_agent() test_agent.act(self.env.state) # test save/load preset.save('test_preset.pt') preset = torch.load('test_preset.pt') test_agent = preset.test_agent() test_agent.act(self.env.state)
class TestParallelEnvExperiment(unittest.TestCase): def setUp(self): np.random.seed(0) torch.manual_seed(0) self.env = GymEnvironment('CartPole-v0') self.env.seed(0) self.experiment = MockExperiment(self.make_agent(), self.env, quiet=True) for i, env in enumerate(self.experiment._envs): env.seed(i) def test_adds_default_label(self): self.assertEqual(self.experiment._writer.label, "a2c_CartPole-v0") def test_adds_custom_label(self): env = GymEnvironment('CartPole-v0') experiment = MockExperiment(self.make_agent(), env, name='a2c', quiet=True) self.assertEqual(experiment._writer.label, "a2c_CartPole-v0") def test_writes_training_returns_eps(self): self.experiment.train(episodes=3) np.testing.assert_equal( self.experiment._writer.data["evaluation/returns/episode"]["steps"], np.array([1, 2, 3]), ) np.testing.assert_equal( self.experiment._writer.data["evaluation/returns/episode"]["values"], np.array([10., 11., 17.]), ) def test_writes_test_returns(self): self.experiment.train(episodes=5) returns = self.experiment.test(episodes=4) self.assertEqual(len(returns), 4) np.testing.assert_equal( self.experiment._writer.data["evaluation/returns-test/mean"]["values"], np.array([np.mean(returns)]), ) np.testing.assert_equal( self.experiment._writer.data["evaluation/returns-test/std"]["values"], np.array([np.std(returns)]), ) def test_writes_loss(self): experiment = MockExperiment(self.make_agent(), self.env, quiet=True, write_loss=True) self.assertTrue(experiment._writer.write_loss) experiment = MockExperiment(self.make_agent(), self.env, quiet=True, write_loss=False) self.assertFalse(experiment._writer.write_loss) def make_agent(self): return a2c.device('cpu').env(self.env).build()
def setUp(self): np.random.seed(0) torch.manual_seed(0) self.env = GymEnvironment('CartPole-v0') self.experiment = MockExperiment(a2c(), self.env, quiet=True) for i, env in enumerate(self.experiment._envs): env.seed(i)
def main(): parser = argparse.ArgumentParser( description="Run a classic control benchmark.") parser.add_argument("env", help="Name of the env (e.g. CartPole-v1).") parser.add_argument( "agent", help="Name of the agent (e.g. dqn). See presets for available agents.") parser.add_argument( "--device", default="cuda", help= "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).", ) parser.add_argument("--frames", type=int, default=20000, help="The number of training frames.") parser.add_argument("--render", type=bool, default=False, help="Render the environment.") args = parser.parse_args() env = GymEnvironment(args.env, device=args.device) agent_name = args.agent agent = getattr(classic_control, agent_name) run_experiment(agent(device=args.device), env, args.frames, render=args.render)
def main(): parser = argparse.ArgumentParser(description="Watch a continuous agent.") parser.add_argument("env", help="ID of the Environment") parser.add_argument("dir", help="Directory where the agent's model was saved.") parser.add_argument( "--device", default="cpu", help= "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)", ) parser.add_argument( "--fps", default=120, help="Playback speed", ) args = parser.parse_args() if args.env in ENVS: env_id = ENVS[args.env] else: env_id = args.env env = GymEnvironment(env_id, device=args.device) agent = TimeFeature(GreedyAgent.load(args.dir, env)) watch(agent, env, fps=args.fps)
def run_atari(): parser = argparse.ArgumentParser(description="Run a continuous actions benchmark.") parser.add_argument("env", help="Name of the env (see envs)") parser.add_argument( "agent", help="Name of the agent (e.g. actor_critic). See presets for available agents.", ) parser.add_argument( "--frames", type=int, default=2e6, help="The number of training frames" ) parser.add_argument( "--device", default="cuda", help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)", ) parser.add_argument( "--render", default=False, help="Whether to render the environment." ) args = parser.parse_args() if args.env in envs: env_id = envs[args.env] else: env_id = args.env env = GymEnvironment(env_id, device=args.device) agent_name = args.agent agent = getattr(continuous, agent_name) experiment = Experiment( agent(device=args.device), env, frames=args.frames, render=args.render )
def __init__(self, env, frames=None, episodes=None): if frames is None: frames = np.inf if episodes is None: episodes = np.inf if isinstance(env, str): self.env = GymEnvironment(env) else: self.env = env self._max_frames = frames self._max_episodes = episodes self._agent = None self._episode = None self._frames = None self._writer = None self._render = None self._console = None
class TestExperiment(unittest.TestCase): def setUp(self): np.random.seed(0) torch.manual_seed(0) self.env = GymEnvironment('CartPole-v0') self.env.seed(0) self.experiment = None def test_adds_label(self): experiment = MockExperiment(dqn(), self.env, quiet=True, episodes=3) self.assertEqual(experiment._writer.label, "_dqn_CartPole-v0") def test_writes_returns_eps(self): experiment = MockExperiment(dqn(), self.env, quiet=True, episodes=3) np.testing.assert_equal( experiment._writer.data["evaluation/returns/episode"]["values"], np.array([14.0, 19.0, 26.0]), ) np.testing.assert_equal( experiment._writer.data["evaluation/returns/episode"]["steps"], np.array([1, 2, 3]), ) def test_writes_loss(self): experiment = MockExperiment(dqn(), self.env, quiet=True, write_loss=True, episodes=3) self.assertTrue(experiment._writer.write_loss) experiment = MockExperiment(dqn(), self.env, quiet=True, write_loss=False, episodes=3) self.assertFalse(experiment._writer.write_loss) def test_runs_multi_env(self): experiment = MockExperiment(a2c(n_envs=3), self.env, quiet=True, episodes=3) self.assertEqual( len(experiment._writer.data["evaluation/returns/episode"] ["values"]), 3)
def main(): DEVICE = 'cpu' # DEVICE = 'cuda' # uncomment for gpu support timesteps = 40000 run_experiment( [ # DQN with default hyperparameters dqn.device(DEVICE), # DQN with a custom hyperparameters and a custom name. dqn.device(DEVICE).hyperparameters(replay_buffer_size=100 ).name('dqn-small-buffer'), # A2C with a custom name a2c.device(DEVICE).name('not-dqn') ], [ GymEnvironment('CartPole-v0', DEVICE), GymEnvironment('Acrobot-v1', DEVICE) ], timesteps, ) plot_returns_100('runs', timesteps=timesteps)
def run(): parser = argparse.ArgumentParser(description="Run a continuous actions benchmark.") parser.add_argument("env", help="Name of the env (see envs)") parser.add_argument("agent", help="Name of the agent (e.g. cacla). See presets for available agents") parser.add_argument( "--episodes", type=int, default=2000, help="The number of training episodes" ) parser.add_argument( "--frames", type=int, default=6e10, help="The number of training frames" ) parser.add_argument( "--device", default="cuda", help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)", ) parser.add_argument( "--render", default=False, help="Whether to render the environment." ) args = parser.parse_args() # create the environment env = GymEnvironment(args.env, device=args.device) agent_name = args.agent agent = getattr(presets, agent_name) # configure desired baseline (run sequentially) run_baseline = False baseline_agent_name = "cacla" baseline_agent = getattr(presets, baseline_agent_name) result_dir = create_result_dir(agent_name, args.env) num_repeats = 20 for i in range(num_repeats): # run the experiment OptimisationExperiment( agent(device=args.device), env, episodes=args.episodes, frames=args.frames, render=args.render, writer=_make_writer(agent_name, env.name, True, result_dir), write_episode_return=True ) if run_baseline: # run the baseline agent for comparison OptimisationExperiment( baseline_agent(device=args.device), env, episodes=args.episodes, frames=args.frames, render=args.render )
def watch(): parser = argparse.ArgumentParser(description="Run an Atari benchmark.") parser.add_argument( "env", help="Name of the environment (e.g. RoboschoolHalfCheetah-v1") parser.add_argument("dir", help="Directory where the agent's model was saved.") parser.add_argument( "--device", default="cpu", help= "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)", ) args = parser.parse_args() env = GymEnvironment(args.env, device=args.device) load_and_watch(args.dir, env)
def make_env(using_lander_reward_shaping=False): env = gym.make('LunarLanderContinuous-v2') env.action_space = spaces.Discrete(n_act_dim) #override the step function. Before run the originally step function, run disc_to_cont to convert #the discrete action to continous action. env.unwrapped._step_orig = env.unwrapped.step def _step(self, action): obs, r, done, info = self._step_orig(disc_to_cont(action)) return obs, r, done, info env.unwrapped.step = types.MethodType(_step, env.unwrapped) env.unwrapped.using_lander_reward_shaping = using_lander_reward_shaping env = GymEnvironment(env, device="cuda") return env
def main(): parser = argparse.ArgumentParser(description="Run an Atari benchmark.") parser.add_argument( "env", help="Name of the environment (e.g. RoboschoolHalfCheetah-v1") parser.add_argument("filename", help="File where the model was saved.") parser.add_argument( "--device", default="cuda", help= "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)", ) parser.add_argument( "--fps", default=60, help="Playback speed", ) args = parser.parse_args() env = GymEnvironment(args.env, device=args.device) load_and_watch(args.filename, env, fps=args.fps)
def train_dqn_all(): from all.environments import GymEnvironment from all.presets.classic_control import ddqn from all.experiments import SingleEnvExperiment if Settings.CUDA: device = "cuda" else: device = "cpu" env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=device) preset = ddqn(device=device, lr=Settings.LEARNING_RATE, initial_exploration=Settings.EPS_START, final_exploration=Settings.EPS_END) experiment = SingleEnvExperiment(preset, env) experiment.train(1E6) default_log_dir = experiment._writer.log_dir copy_tree(default_log_dir, Settings.FULL_LOG_DIR) rmtree(default_log_dir)
def main(): parser = argparse.ArgumentParser( description="Run a continuous actions benchmark.") parser.add_argument("env", help="Name of the env (see envs)") parser.add_argument( "agent", help="Name of the agent (e.g. ddpg). See presets for available agents." ) parser.add_argument( "--device", default="cuda", help= "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).", ) parser.add_argument("--frames", type=int, default=2e6, help="The number of training frames.") parser.add_argument("--render", type=bool, default=False, help="Render the environment.") parser.add_argument("--logdir", default='runs', help="The base logging directory.") args = parser.parse_args() if args.env in ENVS: env_id = ENVS[args.env] else: env_id = args.env env = GymEnvironment(env_id, device=args.device) agent_name = args.agent agent = getattr(continuous, agent_name) run_experiment(agent(device=args.device), env, frames=args.frames, render=args.render, logdir=args.logdir)