def setUp(self) -> None: self.env = environment_impl.SingleStateEnvironment( action_space_size=1, step_limit=10) self.qfunc = qfunc_impl.MemoizationQFunction( action_space_size=3, discount_factor=0.9, learning_rate=0.9) self.runner = runner_impl.SimpleRunner()
def _RunEnv(gym_env): env = screen_learning.ScreenGymEnvironment(gym_env) qfunc = qfunc_impl.DQN_TargetNetwork( model=screen_learning.CreateConvolutionModel( action_space_size=env.GetActionSpaceSize())) policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0) runner_impl.SimpleRunner().Run( env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
def _RunEnv(gym_env): env = environment_impl.GymEnvironment(gym_env) qfunc = qfunc_impl.RandomValueQFunction( action_space_size=env.GetActionSpaceSize()) env.Reset() policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0) runner_impl.SimpleRunner().Run( env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
def _RunEnv(gym_env): env = environment_impl.GymEnvironment(gym_env) env.SetGymEnvMaxEpisodeSteps(10) qfunc = qfunc_impl.MemoizationQFunction( action_space_size=env.GetActionSpaceSize()) env.Reset() policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0) runner_impl.SimpleRunner().Run(env=env, brain=qfunc, policy=policy, num_of_episodes=1)
def Demo(_): env = screen_learning.ScreenGymEnvironment(gym.make('SpaceInvaders-v0')) brain = a3c_impl.A3C(model=a3c_impl.CreateModel( state_shape=env.GetStateShape(), action_space_size=env.GetActionSpaceSize(), hidden_layer_sizes=(12, ), )) brain.Load('saved_models/a3c_invader.weights') policy = policy_impl.GreedyPolicy() env.StartRecording('a3c_invader.mp4') runner = runner_impl.SimpleRunner() runner.Run(env=env, brain=brain, policy=policy, num_of_episodes=10) env.StopRecording()
def _RunEnv(gym_env): env = environment_impl.GymEnvironment(gym_env) env.SetGymEnvMaxEpisodeSteps(10) qfunc = qfunc_impl.DQN(model=qfunc_impl.CreateModel( state_shape=env.GetStateShape(), action_space_size=env.GetActionSpaceSize(), hidden_layer_sizes=(4, ), )) env.Reset() policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0) runner_impl.SimpleRunner().Run(env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
def test_simpleRunner(self): # Tests that it can run; quality if not important for this test. runner_impl.SimpleRunner().Run( env=self.env, qfunc=self.qfunc, policy=self.policy, num_of_episodes=1)