def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('MountainCar-v0'))
    env.SetGymEnvMaxEpisodeSteps(400)
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(64, )),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    qfunc.Load('saved_models/mountaincar_shape_64_rmsprop_gamma_099.weights')
    policy = policy_impl.GreedyPolicy()
    runner = runner_impl.NoOpRunner()

    env.TurnOnRendering(should_render=True, fps=24)
    logging.ENV.debug_verbosity = 9

    env.StartRecording(video_filename='mountaincar_demo.mp4')
    # First 5 runs with random actions:
    rand_qfunc = qfunc_impl.RandomQFunction(env.GetActionSpaceSize())
    runner.Run(env=env, brain=rand_qfunc, policy=policy, num_of_episodes=5)
    # Then 10 runs with trained qfunc:
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=10)
    env.StopRecording()
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('CartPole-v0'))
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(20, 20, 20)),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    runner = runner_impl.ExperienceReplayRunner(
        experience_capacity=100000, experience_sample_batch_size=batch_size)

    # Train 500 episodes.
    logging.ENV.debug_verbosity = 3
    policy = policy_impl.GreedyPolicyWithRandomness(epsilon=0.1)
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=500)

    # Test for 100 episodes.
    logging.ENV.debug_verbosity = 4
    policy = policy_impl.GreedyPolicy()
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=100)

    # Demo with video.
    env.TurnOnRendering(should_render=True, fps=24)
    # env.StartRecording(video_filename='demo.mp4')  # uncomment to record video.
    # First 5 runs with random actions:
    runner.Run(env=env,
               brain=qfunc_impl.RandomQFunction(env.GetActionSpaceSize()),
               policy=policy,
               num_of_episodes=5)
    # Then 10 runs with trained qfunc:
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=10)
Beispiel #3
0
 def setUp(self) -> None:
   self.env = environment_impl.SingleStateEnvironment(
     action_space_size=1, step_limit=10)
   self.qfunc = qfunc_impl.MemoizationQFunction(
     action_space_size=3,
     discount_factor=0.9,
     learning_rate=0.9)
   self.policy = policy_impl.GreedyPolicy()
    def test_GreedyPolicy_choosesOptimalAction(self):
        mock_qfunc = mock.MagicMock()
        mock_qfunc.GetValues.return_value = numpy.array([[0.3, 0.7]])

        policy = policy_impl.GreedyPolicy()
        self.assertArrayEq(
            numpy.array([[0, 1]]),
            policy.Decide(env=environment_impl.SingleStateEnvironment(
                action_space_size=2, step_limit=10),
                          brain=mock_qfunc,
                          state=numpy.array([[0]]),
                          episode_idx=0,
                          num_of_episodes=500))
def Demo(_):
    env = screen_learning.ScreenGymEnvironment(gym.make('SpaceInvaders-v0'))
    brain = a3c_impl.A3C(model=a3c_impl.CreateModel(
        state_shape=env.GetStateShape(),
        action_space_size=env.GetActionSpaceSize(),
        hidden_layer_sizes=(12, ),
    ))
    brain.Load('saved_models/a3c_invader.weights')
    policy = policy_impl.GreedyPolicy()

    env.StartRecording('a3c_invader.mp4')
    runner = runner_impl.SimpleRunner()
    runner.Run(env=env, brain=brain, policy=policy, num_of_episodes=10)
    env.StopRecording()
Beispiel #6
0
    def Demo(self,
             num_of_episodes: int = 10,
             save_video_to: t.Text = 'demo.mp4'):
        """Starts a demo run.

    Args:
      num_of_episodes: number of runs to demo.
      save_video_to: saves the demo video for the run to a file of this
        name. It must ends with mp4.
    """
        self.env.TurnOnRendering(should_render=True, fps=24)
        self.env.StartRecording(video_filename=save_video_to)
        runner_impl.NoOpRunner().Run(env=self.env,
                                     brain=self.qfunc,
                                     policy=policy_impl.GreedyPolicy(),
                                     num_of_episodes=num_of_episodes)
        self.env.StopRecording()
        self.env.TurnOnRendering(should_render=False)
Beispiel #7
0
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('Acrobot-v1'))
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(20, 20, 20)),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    qfunc.LoadModel(
        'saved_models/acrobot_v1_shape_20-20-20_rmsprop_gamma_0.99.model')
    policy = policy_impl.GreedyPolicy()
    runner = runner_impl.NoOpRunner()

    env.TurnOnRendering(should_render=True, fps=10)
    logging.ENV.debug_verbosity = 9
    runner.Run(env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
Beispiel #8
0
 def test_GreedyPolicy(self):
   # Tests that it can run; quality if not important for this test.
   self.runner.Run(
     env=self.env, qfunc=self.qfunc,
     policy=policy_impl.GreedyPolicy(), num_of_episodes=1)