コード例 #1
0
def main():
    # run on gpu
    device = 'cuda'

    def get_agents(preset):
        agents = [
            getattr(preset, agent_name)
            for agent_name in classic_control.__all__
        ]
        return [agent(device=device) for agent in agents]

    SlurmExperiment(get_agents(atari),
                    AtariEnvironment('Breakout', device=device),
                    2e7,
                    sbatch_args={'partition': '1080ti-long'})

    SlurmExperiment(get_agents(classic_control),
                    GymEnvironment('CartPole-v0', device=device),
                    100000,
                    sbatch_args={'partition': '1080ti-short'})

    SlurmExperiment(get_agents(continuous),
                    GymEnvironment('LunarLanderContinuous-v2', device=device),
                    500000,
                    sbatch_args={'partition': '1080ti-short'})
コード例 #2
0
class TestContinuousPresets(unittest.TestCase):
    def setUp(self):
        self.env = GymEnvironment('LunarLanderContinuous-v2')
        self.env.reset()

    def tearDown(self):
        if os.path.exists('test_preset.pt'):
            os.remove('test_preset.pt')

    def test_ddpg(self):
        self.validate(ddpg)

    def test_ppo(self):
        self.validate(ppo)

    def test_sac(self):
        self.validate(sac)

    def validate(self, builder):
        preset = builder.device('cpu').env(self.env).build()
        # normal agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.env.state)
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
コード例 #3
0
 def setUp(self):
     self.agent = MockAgent()
     self.env = GymEnvironment('PongNoFrameskip-v4')
     self.body = DeepmindAtariBody(ToLegacyBody(self.agent),
                                   self.env,
                                   noop_max=0,
                                   frame_stack=1)
コード例 #4
0
def main():
    parser = argparse.ArgumentParser(description="Watch a continuous agent.")
    parser.add_argument("env", help="ID of the Environment")
    parser.add_argument("filename", help="File where the model was saved.")
    parser.add_argument(
        "--device",
        default="cuda",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    parser.add_argument(
        "--fps",
        default=120,
        help="Playback speed",
    )
    args = parser.parse_args()

    if args.env in ENVS:
        env = GymEnvironment(args.env, device=args.device)
    elif 'BulletEnv' in args.env or args.env in PybulletEnvironment.short_names:
        env = PybulletEnvironment(args.env, device=args.device)
    else:
        env = GymEnvironment(args.env, device=args.device)

    load_and_watch(args.filename, env, fps=args.fps)
コード例 #5
0
 def setUp(self):
     np.random.seed(0)
     torch.manual_seed(0)
     self.env = GymEnvironment('CartPole-v0')
     self.env.seed(0)
     self.experiment = MockExperiment(self.make_agent(), self.env, quiet=True)
     self.experiment._env.seed(0)
コード例 #6
0
 def test_reset(self):
     env = GymEnvironment('CartPole-v0')
     state = env.reset()
     self.assertEqual(state.observation.shape, (4, ))
     self.assertEqual(state.reward, 0)
     self.assertFalse(state.done)
     self.assertEqual(state.mask, 1)
コード例 #7
0
 def test_reset_preconstructed_env(self):
     env = GymEnvironment(gym.make('CartPole-v0'))
     state = env.reset()
     self.assertEqual(state.observation.shape, (4, ))
     self.assertEqual(state.reward, 0)
     self.assertFalse(state.done)
     self.assertEqual(state.mask, 1)
コード例 #8
0
 def setUp(self):
     self.env = GymEnvironment('LunarLanderContinuous-v2')
     self.env.reset()
     self.parallel_env = DuplicateEnvironment([
         GymEnvironment('LunarLanderContinuous-v2'),
         GymEnvironment('LunarLanderContinuous-v2'),
     ])
     self.parallel_env.reset()
コード例 #9
0
class RainbowAgent(dqn.RLAgent):

    def __init__(self):
        super().__init__()

        if Settings.CUDA:
            self.device = "cuda"
        else:
            self.device = "cpu"

        self.env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=self.device)
        self.agent = None

    @classmethod
    def load(cls, path):
        rl_agent = cls()
        agent = GreedyAgent.load(path, rl_agent.env)
        rl_agent.agent = agent
        return rl_agent

    @classmethod
    def train(cls, num_frames: int):
        rl_agent = cls()
        preset = rainbow(
            device=rl_agent.device,
            lr=Settings.LEARNING_RATE,
        )
        experiment = SingleEnvExperiment(preset, rl_agent.env)
        experiment.train(num_frames)
        default_log_dir = experiment._writer.log_dir
        copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
        rmtree(default_log_dir)
        rl_agent.env.close()

    @classmethod
    def resume_training(cls, path, num_frames: int):
        rl_agent = cls()
        lr = Settings.LEARNING_RATE
        agent = rainbow(device=rl_agent.device, lr=lr)
        q_dist_module = torch.load(os.path.join(path, "q_dist.pt"), map_location='cpu').to(rl_agent.device)
        experiment = SingleEnvExperiment(agent, rl_agent.env)
        agent = experiment._agent
        old_q_dist = agent.q_dist
        old_q_dist.model.load_state_dict(q_dist_module.state_dict())
        experiment.train(frames=num_frames)
        default_log_dir = experiment._writer.log_dir
        copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
        rmtree(default_log_dir)
        rl_agent.env.close()

    def get_control(self, state: prediction.HighwayState) -> float:
        vector_state = dqn.get_state_vector_from_base_state(state)
        encoded_state = self.env._make_state(vector_state, False)
        action = self.agent.eval(encoded_state, 0).item()
        return Settings.JERK_VALUES_DQN[action]

    def _cleanup(self):
        self.env.close()
コード例 #10
0
class TestContinuousPresets(unittest.TestCase):
    def setUp(self):
        self.env = GymEnvironment('LunarLanderContinuous-v2')
        self.env.reset()
        self.parallel_env = DuplicateEnvironment([
            GymEnvironment('LunarLanderContinuous-v2'),
            GymEnvironment('LunarLanderContinuous-v2'),
        ])
        self.parallel_env.reset()

    def tearDown(self):
        if os.path.exists('test_preset.pt'):
            os.remove('test_preset.pt')

    def test_ddpg(self):
        self.validate(ddpg)

    def test_ppo(self):
        self.validate(ppo)

    def test_sac(self):
        self.validate(sac)

    def validate(self, builder):
        preset = builder.device('cpu').env(self.env).build()
        if isinstance(preset, ParallelPreset):
            return self.validate_parallel_preset(preset)
        return self.validate_standard_preset(preset)

    def validate_standard_preset(self, preset):
        # train agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.env.state)
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)

    def validate_parallel_preset(self, preset):
        # train agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.parallel_env.state_array)
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
        # parallel test_agent
        parallel_test_agent = preset.test_agent()
        parallel_test_agent.act(self.parallel_env.state_array)
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
コード例 #11
0
    def __init__(self):
        super().__init__()

        if Settings.CUDA:
            self.device = "cuda"
        else:
            self.device = "cpu"

        self.env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=self.device)
        self.agent = None
コード例 #12
0
class TestSingleEnvExperiment(unittest.TestCase):
    def setUp(self):
        np.random.seed(0)
        torch.manual_seed(0)
        self.env = GymEnvironment('CartPole-v0')
        self.env.seed(0)
        self.experiment = None

    def test_adds_label(self):
        experiment = MockExperiment(dqn(), self.env, quiet=True)
        self.assertEqual(experiment._writer.label, "_dqn_CartPole-v0")

    def test_writes_training_returns_eps(self):
        experiment = MockExperiment(dqn(), self.env, quiet=True)
        experiment.train(episodes=3)
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns/episode"]["values"],
            np.array([22.0, 20.0, 24.0]),
        )
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns/episode"]["steps"],
            np.array([1, 2, 3]),
        )

    def test_writes_test_returns(self):
        experiment = MockExperiment(dqn(), self.env, quiet=True)
        experiment.train(episodes=5)
        returns = experiment.test(episodes=4)
        expected_mean = 9.5
        expected_std = 0.5
        np.testing.assert_equal(np.mean(returns), expected_mean)
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns-test/mean"]["values"],
            np.array([expected_mean]),
        )
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns-test/std"]["values"],
            np.array([expected_std]),
        )
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns-test/mean"]["steps"],
            np.array([95.]),
        )

    def test_writes_loss(self):
        experiment = MockExperiment(dqn(),
                                    self.env,
                                    quiet=True,
                                    write_loss=True)
        self.assertTrue(experiment._writer.write_loss)
        experiment = MockExperiment(dqn(),
                                    self.env,
                                    quiet=True,
                                    write_loss=False)
        self.assertFalse(experiment._writer.write_loss)
コード例 #13
0
def main():
    parser = argparse.ArgumentParser(description="Run a continuous actions benchmark.")
    parser.add_argument("env", help="Name of the env (e.g. 'lander', 'cheetah')")
    parser.add_argument(
        "agent", help="Name of the agent (e.g. ddpg). See presets for available agents."
    )
    parser.add_argument(
        "--device",
        default="cuda",
        help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).",
    )
    parser.add_argument(
        "--frames", type=int, default=2e6, help="The number of training frames."
    )
    parser.add_argument(
        "--render", action="store_true", default=False, help="Render the environment."
    )
    parser.add_argument(
        "--logdir", default='runs', help="The base logging directory."
    )
    parser.add_argument("--writer", default='tensorboard', help="The backend used for tracking experiment metrics.")
    parser.add_argument(
        '--hyperparameters',
        default=[],
        nargs='*',
        help="Custom hyperparameters, in the format hyperparameter1=value1 hyperparameter2=value2 etc."
    )
    args = parser.parse_args()

    if args.env in ENVS:
        env = GymEnvironment(args.env, device=args.device)
    elif 'BulletEnv' in args.env or args.env in PybulletEnvironment.short_names:
        env = PybulletEnvironment(args.env, device=args.device)
    else:
        env = GymEnvironment(args.env, device=args.device)

    agent_name = args.agent
    agent = getattr(continuous, agent_name)
    agent = agent.device(args.device)

    # parse hyperparameters
    hyperparameters = {}
    for hp in args.hyperparameters:
        key, value = hp.split('=')
        hyperparameters[key] = type(agent.default_hyperparameters[key])(value)
    agent = agent.hyperparameters(**hyperparameters)

    run_experiment(
        agent,
        env,
        frames=args.frames,
        render=args.render,
        logdir=args.logdir,
        writer=args.writer,
    )
コード例 #14
0
 def test_step_until_done(self):
     env = GymEnvironment('CartPole-v0')
     env.reset()
     for _ in range(100):
         state = env.step(1)
         if state.done:
             break
     self.assertEqual(state.observation.shape, (4, ))
     self.assertEqual(state.reward, 1.)
     self.assertTrue(state.done)
     self.assertEqual(state.mask, 0)
コード例 #15
0
def main():
    device = 'cpu'
    timesteps = 40000
    run_experiment(
        [dqn(), a2c()],
        [
            GymEnvironment('CartPole-v0', device),
            GymEnvironment('Acrobot-v1', device)
        ],
        timesteps,
    )
    plot_returns_100('runs', timesteps=timesteps)
コード例 #16
0
class TestClassicControlPresets(unittest.TestCase):
    def setUp(self):
        self.env = GymEnvironment('CartPole-v0')
        self.env.reset()

    def tearDown(self):
        if os.path.exists('test_preset.pt'):
            os.remove('test_preset.pt')

    def test_a2c(self):
        self.validate(a2c)

    def test_c51(self):
        self.validate(c51)

    def test_ddqn(self):
        self.validate(ddqn)

    def test_dqn(self):
        self.validate(dqn)

    def test_ppo(self):
        self.validate(ppo)

    def test_rainbow(self):
        self.validate(rainbow)

    def test_vac(self):
        self.validate(vac)

    def test_vpg(self):
        self.validate(vpg)

    def test_vsarsa(self):
        self.validate(vsarsa)

    def test_vqn(self):
        self.validate(vqn)

    def validate(self, builder):
        preset = builder.device('cpu').env(self.env).build()
        # normal agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.env.state)
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
コード例 #17
0
class TestParallelEnvExperiment(unittest.TestCase):
    def setUp(self):
        np.random.seed(0)
        torch.manual_seed(0)
        self.env = GymEnvironment('CartPole-v0')
        self.env.seed(0)
        self.experiment = MockExperiment(self.make_agent(), self.env, quiet=True)
        for i, env in enumerate(self.experiment._envs):
            env.seed(i)

    def test_adds_default_label(self):
        self.assertEqual(self.experiment._writer.label, "a2c_CartPole-v0")

    def test_adds_custom_label(self):
        env = GymEnvironment('CartPole-v0')
        experiment = MockExperiment(self.make_agent(), env, name='a2c', quiet=True)
        self.assertEqual(experiment._writer.label, "a2c_CartPole-v0")

    def test_writes_training_returns_eps(self):
        self.experiment.train(episodes=3)
        np.testing.assert_equal(
            self.experiment._writer.data["evaluation/returns/episode"]["steps"],
            np.array([1, 2, 3]),
        )
        np.testing.assert_equal(
            self.experiment._writer.data["evaluation/returns/episode"]["values"],
            np.array([10., 11., 17.]),
        )

    def test_writes_test_returns(self):
        self.experiment.train(episodes=5)
        returns = self.experiment.test(episodes=4)
        self.assertEqual(len(returns), 4)
        np.testing.assert_equal(
            self.experiment._writer.data["evaluation/returns-test/mean"]["values"],
            np.array([np.mean(returns)]),
        )
        np.testing.assert_equal(
            self.experiment._writer.data["evaluation/returns-test/std"]["values"],
            np.array([np.std(returns)]),
        )

    def test_writes_loss(self):
        experiment = MockExperiment(self.make_agent(), self.env, quiet=True, write_loss=True)
        self.assertTrue(experiment._writer.write_loss)
        experiment = MockExperiment(self.make_agent(), self.env, quiet=True, write_loss=False)
        self.assertFalse(experiment._writer.write_loss)

    def make_agent(self):
        return a2c.device('cpu').env(self.env).build()
 def setUp(self):
     np.random.seed(0)
     torch.manual_seed(0)
     self.env = GymEnvironment('CartPole-v0')
     self.experiment = MockExperiment(a2c(), self.env, quiet=True)
     for i, env in enumerate(self.experiment._envs):
         env.seed(i)
コード例 #19
0
def main():
    parser = argparse.ArgumentParser(
        description="Run a classic control benchmark.")
    parser.add_argument("env", help="Name of the env (e.g. CartPole-v1).")
    parser.add_argument(
        "agent",
        help="Name of the agent (e.g. dqn). See presets for available agents.")
    parser.add_argument(
        "--device",
        default="cuda",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).",
    )
    parser.add_argument("--frames",
                        type=int,
                        default=20000,
                        help="The number of training frames.")
    parser.add_argument("--render",
                        type=bool,
                        default=False,
                        help="Render the environment.")
    args = parser.parse_args()

    env = GymEnvironment(args.env, device=args.device)
    agent_name = args.agent
    agent = getattr(classic_control, agent_name)

    run_experiment(agent(device=args.device),
                   env,
                   args.frames,
                   render=args.render)
コード例 #20
0
def main():
    parser = argparse.ArgumentParser(description="Watch a continuous agent.")
    parser.add_argument("env", help="ID of the Environment")
    parser.add_argument("dir",
                        help="Directory where the agent's model was saved.")
    parser.add_argument(
        "--device",
        default="cpu",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    parser.add_argument(
        "--fps",
        default=120,
        help="Playback speed",
    )
    args = parser.parse_args()

    if args.env in ENVS:
        env_id = ENVS[args.env]
    else:
        env_id = args.env

    env = GymEnvironment(env_id, device=args.device)
    agent = TimeFeature(GreedyAgent.load(args.dir, env))
    watch(agent, env, fps=args.fps)
コード例 #21
0
def run_atari():
    parser = argparse.ArgumentParser(description="Run a continuous actions benchmark.")
    parser.add_argument("env", help="Name of the env (see envs)")
    parser.add_argument(
        "agent",
        help="Name of the agent (e.g. actor_critic). See presets for available agents.",
    )
    parser.add_argument(
        "--frames", type=int, default=2e6, help="The number of training frames"
    )
    parser.add_argument(
        "--device",
        default="cuda",
        help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    parser.add_argument(
        "--render", default=False, help="Whether to render the environment."
    )
    args = parser.parse_args()

    if args.env in envs:
        env_id = envs[args.env]
    else:
        env_id = args.env

    env = GymEnvironment(env_id, device=args.device)
    agent_name = args.agent
    agent = getattr(continuous, agent_name)

    experiment = Experiment(
        agent(device=args.device), env, frames=args.frames, render=args.render
    )
コード例 #22
0
 def __init__(self, env, frames=None, episodes=None):
     if frames is None:
         frames = np.inf
     if episodes is None:
         episodes = np.inf
     if isinstance(env, str):
         self.env = GymEnvironment(env)
     else:
         self.env = env
     self._max_frames = frames
     self._max_episodes = episodes
     self._agent = None
     self._episode = None
     self._frames = None
     self._writer = None
     self._render = None
     self._console = None
コード例 #23
0
class TestExperiment(unittest.TestCase):
    def setUp(self):
        np.random.seed(0)
        torch.manual_seed(0)
        self.env = GymEnvironment('CartPole-v0')
        self.env.seed(0)
        self.experiment = None

    def test_adds_label(self):
        experiment = MockExperiment(dqn(), self.env, quiet=True, episodes=3)
        self.assertEqual(experiment._writer.label, "_dqn_CartPole-v0")

    def test_writes_returns_eps(self):
        experiment = MockExperiment(dqn(), self.env, quiet=True, episodes=3)
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns/episode"]["values"],
            np.array([14.0, 19.0, 26.0]),
        )
        np.testing.assert_equal(
            experiment._writer.data["evaluation/returns/episode"]["steps"],
            np.array([1, 2, 3]),
        )

    def test_writes_loss(self):
        experiment = MockExperiment(dqn(),
                                    self.env,
                                    quiet=True,
                                    write_loss=True,
                                    episodes=3)
        self.assertTrue(experiment._writer.write_loss)
        experiment = MockExperiment(dqn(),
                                    self.env,
                                    quiet=True,
                                    write_loss=False,
                                    episodes=3)
        self.assertFalse(experiment._writer.write_loss)

    def test_runs_multi_env(self):
        experiment = MockExperiment(a2c(n_envs=3),
                                    self.env,
                                    quiet=True,
                                    episodes=3)
        self.assertEqual(
            len(experiment._writer.data["evaluation/returns/episode"]
                ["values"]), 3)
コード例 #24
0
def main():
    DEVICE = 'cpu'
    # DEVICE = 'cuda' # uncomment for gpu support
    timesteps = 40000
    run_experiment(
        [
            # DQN with default hyperparameters
            dqn.device(DEVICE),
            # DQN with a custom hyperparameters and a custom name.
            dqn.device(DEVICE).hyperparameters(replay_buffer_size=100
                                               ).name('dqn-small-buffer'),
            # A2C with a custom name
            a2c.device(DEVICE).name('not-dqn')
        ],
        [
            GymEnvironment('CartPole-v0', DEVICE),
            GymEnvironment('Acrobot-v1', DEVICE)
        ],
        timesteps,
    )
    plot_returns_100('runs', timesteps=timesteps)
コード例 #25
0
ファイル: main.py プロジェクト: gordon-frost-hwu/ALLAgents
def run():
    parser = argparse.ArgumentParser(description="Run a continuous actions benchmark.")
    parser.add_argument("env", help="Name of the env (see envs)")
    parser.add_argument("agent", help="Name of the agent (e.g. cacla). See presets for available agents")

    parser.add_argument(
        "--episodes", type=int, default=2000, help="The number of training episodes"
    )
    parser.add_argument(
        "--frames", type=int, default=6e10, help="The number of training frames"
    )
    parser.add_argument(
        "--device",
        default="cuda",
        help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    parser.add_argument(
        "--render", default=False, help="Whether to render the environment."
    )
    args = parser.parse_args()

    # create the environment
    env = GymEnvironment(args.env, device=args.device)

    agent_name = args.agent
    agent = getattr(presets, agent_name)

    # configure desired baseline (run sequentially)
    run_baseline = False
    baseline_agent_name = "cacla"
    baseline_agent = getattr(presets, baseline_agent_name)

    result_dir = create_result_dir(agent_name, args.env)

    num_repeats = 20
    for i in range(num_repeats):
        # run the experiment
        OptimisationExperiment(
            agent(device=args.device),
            env,
            episodes=args.episodes,
            frames=args.frames,
            render=args.render,
            writer=_make_writer(agent_name, env.name, True, result_dir),
            write_episode_return=True
        )

        if run_baseline:
            # run the baseline agent for comparison
            OptimisationExperiment(
                baseline_agent(device=args.device), env, episodes=args.episodes, frames=args.frames, render=args.render
            )
コード例 #26
0
def watch():
    parser = argparse.ArgumentParser(description="Run an Atari benchmark.")
    parser.add_argument(
        "env", help="Name of the environment (e.g. RoboschoolHalfCheetah-v1")
    parser.add_argument("dir",
                        help="Directory where the agent's model was saved.")
    parser.add_argument(
        "--device",
        default="cpu",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    args = parser.parse_args()
    env = GymEnvironment(args.env, device=args.device)
    load_and_watch(args.dir, env)
コード例 #27
0
def make_env(using_lander_reward_shaping=False):
    env = gym.make('LunarLanderContinuous-v2')
    env.action_space = spaces.Discrete(n_act_dim)

    #override the step function. Before run the originally step function, run disc_to_cont to convert
    #the discrete action to continous action.
    env.unwrapped._step_orig = env.unwrapped.step

    def _step(self, action):
        obs, r, done, info = self._step_orig(disc_to_cont(action))
        return obs, r, done, info

    env.unwrapped.step = types.MethodType(_step, env.unwrapped)
    env.unwrapped.using_lander_reward_shaping = using_lander_reward_shaping

    env = GymEnvironment(env, device="cuda")
    return env
コード例 #28
0
def main():
    parser = argparse.ArgumentParser(description="Run an Atari benchmark.")
    parser.add_argument(
        "env", help="Name of the environment (e.g. RoboschoolHalfCheetah-v1")
    parser.add_argument("filename", help="File where the model was saved.")
    parser.add_argument(
        "--device",
        default="cuda",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    parser.add_argument(
        "--fps",
        default=60,
        help="Playback speed",
    )
    args = parser.parse_args()
    env = GymEnvironment(args.env, device=args.device)
    load_and_watch(args.filename, env, fps=args.fps)
コード例 #29
0
ファイル: dqn.py プロジェクト: jlubars/RL-MPC-LaneMerging
def train_dqn_all():
    from all.environments import GymEnvironment
    from all.presets.classic_control import ddqn
    from all.experiments import SingleEnvExperiment

    if Settings.CUDA:
        device = "cuda"
    else:
        device = "cpu"

    env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=device)
    preset = ddqn(device=device,
                  lr=Settings.LEARNING_RATE,
                  initial_exploration=Settings.EPS_START,
                  final_exploration=Settings.EPS_END)
    experiment = SingleEnvExperiment(preset, env)
    experiment.train(1E6)
    default_log_dir = experiment._writer.log_dir
    copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
    rmtree(default_log_dir)
コード例 #30
0
def main():
    parser = argparse.ArgumentParser(
        description="Run a continuous actions benchmark.")
    parser.add_argument("env", help="Name of the env (see envs)")
    parser.add_argument(
        "agent",
        help="Name of the agent (e.g. ddpg). See presets for available agents."
    )
    parser.add_argument(
        "--device",
        default="cuda",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).",
    )
    parser.add_argument("--frames",
                        type=int,
                        default=2e6,
                        help="The number of training frames.")
    parser.add_argument("--render",
                        type=bool,
                        default=False,
                        help="Render the environment.")
    parser.add_argument("--logdir",
                        default='runs',
                        help="The base logging directory.")
    args = parser.parse_args()

    if args.env in ENVS:
        env_id = ENVS[args.env]
    else:
        env_id = args.env

    env = GymEnvironment(env_id, device=args.device)
    agent_name = args.agent
    agent = getattr(continuous, agent_name)

    run_experiment(agent(device=args.device),
                   env,
                   frames=args.frames,
                   render=args.render,
                   logdir=args.logdir)