def main(_): batch_size = 64 # used in qfunc and runner. env = environment_impl.GymEnvironment(gym.make('MountainCar-v0')) env.SetGymEnvMaxEpisodeSteps(400) qfunc = qfunc_impl.DQN( model=qfunc_impl.CreateModel( state_shape=env.GetStateShape(), action_space_size=env.GetActionSpaceSize(), hidden_layer_sizes=(64, )), training_batch_size=batch_size, discount_factor=0.99, ) qfunc.Load('saved_models/mountaincar_shape_64_rmsprop_gamma_099.weights') policy = policy_impl.GreedyPolicy() runner = runner_impl.NoOpRunner() env.TurnOnRendering(should_render=True, fps=24) logging.ENV.debug_verbosity = 9 env.StartRecording(video_filename='mountaincar_demo.mp4') # First 5 runs with random actions: rand_qfunc = qfunc_impl.RandomQFunction(env.GetActionSpaceSize()) runner.Run(env=env, brain=rand_qfunc, policy=policy, num_of_episodes=5) # Then 10 runs with trained qfunc: runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=10) env.StopRecording()
def main(_): batch_size = 64 # used in qfunc and runner. env = environment_impl.GymEnvironment(gym.make('CartPole-v0')) qfunc = qfunc_impl.DQN( model=qfunc_impl.CreateModel( state_shape=env.GetStateShape(), action_space_size=env.GetActionSpaceSize(), hidden_layer_sizes=(20, 20, 20)), training_batch_size=batch_size, discount_factor=0.99, ) runner = runner_impl.ExperienceReplayRunner( experience_capacity=100000, experience_sample_batch_size=batch_size) # Train 500 episodes. logging.ENV.debug_verbosity = 3 policy = policy_impl.GreedyPolicyWithRandomness(epsilon=0.1) runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=500) # Test for 100 episodes. logging.ENV.debug_verbosity = 4 policy = policy_impl.GreedyPolicy() runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=100) # Demo with video. env.TurnOnRendering(should_render=True, fps=24) # env.StartRecording(video_filename='demo.mp4') # uncomment to record video. # First 5 runs with random actions: runner.Run(env=env, brain=qfunc_impl.RandomQFunction(env.GetActionSpaceSize()), policy=policy, num_of_episodes=5) # Then 10 runs with trained qfunc: runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=10)
def setUp(self) -> None: self.env = environment_impl.SingleStateEnvironment( action_space_size=1, step_limit=10) self.qfunc = qfunc_impl.MemoizationQFunction( action_space_size=3, discount_factor=0.9, learning_rate=0.9) self.policy = policy_impl.GreedyPolicy()
def test_GreedyPolicy_choosesOptimalAction(self): mock_qfunc = mock.MagicMock() mock_qfunc.GetValues.return_value = numpy.array([[0.3, 0.7]]) policy = policy_impl.GreedyPolicy() self.assertArrayEq( numpy.array([[0, 1]]), policy.Decide(env=environment_impl.SingleStateEnvironment( action_space_size=2, step_limit=10), brain=mock_qfunc, state=numpy.array([[0]]), episode_idx=0, num_of_episodes=500))
def Demo(_): env = screen_learning.ScreenGymEnvironment(gym.make('SpaceInvaders-v0')) brain = a3c_impl.A3C(model=a3c_impl.CreateModel( state_shape=env.GetStateShape(), action_space_size=env.GetActionSpaceSize(), hidden_layer_sizes=(12, ), )) brain.Load('saved_models/a3c_invader.weights') policy = policy_impl.GreedyPolicy() env.StartRecording('a3c_invader.mp4') runner = runner_impl.SimpleRunner() runner.Run(env=env, brain=brain, policy=policy, num_of_episodes=10) env.StopRecording()
def Demo(self, num_of_episodes: int = 10, save_video_to: t.Text = 'demo.mp4'): """Starts a demo run. Args: num_of_episodes: number of runs to demo. save_video_to: saves the demo video for the run to a file of this name. It must ends with mp4. """ self.env.TurnOnRendering(should_render=True, fps=24) self.env.StartRecording(video_filename=save_video_to) runner_impl.NoOpRunner().Run(env=self.env, brain=self.qfunc, policy=policy_impl.GreedyPolicy(), num_of_episodes=num_of_episodes) self.env.StopRecording() self.env.TurnOnRendering(should_render=False)
def main(_): batch_size = 64 # used in qfunc and runner. env = environment_impl.GymEnvironment(gym.make('Acrobot-v1')) qfunc = qfunc_impl.DQN( model=qfunc_impl.CreateModel( state_shape=env.GetStateShape(), action_space_size=env.GetActionSpaceSize(), hidden_layer_sizes=(20, 20, 20)), training_batch_size=batch_size, discount_factor=0.99, ) qfunc.LoadModel( 'saved_models/acrobot_v1_shape_20-20-20_rmsprop_gamma_0.99.model') policy = policy_impl.GreedyPolicy() runner = runner_impl.NoOpRunner() env.TurnOnRendering(should_render=True, fps=10) logging.ENV.debug_verbosity = 9 runner.Run(env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
def test_GreedyPolicy(self): # Tests that it can run; quality if not important for this test. self.runner.Run( env=self.env, qfunc=self.qfunc, policy=policy_impl.GreedyPolicy(), num_of_episodes=1)