예제 #1
0
 def setUp(self) -> None:
   self.env = environment_impl.SingleStateEnvironment(
     action_space_size=1, step_limit=10)
   self.qfunc = qfunc_impl.MemoizationQFunction(
     action_space_size=3,
     discount_factor=0.9,
     learning_rate=0.9)
   self.runner = runner_impl.SimpleRunner()
예제 #2
0
  def _RunEnv(gym_env):
    env = screen_learning.ScreenGymEnvironment(gym_env)
    qfunc = qfunc_impl.DQN_TargetNetwork(
      model=screen_learning.CreateConvolutionModel(
        action_space_size=env.GetActionSpaceSize()))
    policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)

    runner_impl.SimpleRunner().Run(
      env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
예제 #3
0
  def _RunEnv(gym_env):
    env = environment_impl.GymEnvironment(gym_env)
    qfunc = qfunc_impl.RandomValueQFunction(
      action_space_size=env.GetActionSpaceSize())
    env.Reset()
    policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)

    runner_impl.SimpleRunner().Run(
      env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
예제 #4
0
    def _RunEnv(gym_env):
        env = environment_impl.GymEnvironment(gym_env)
        env.SetGymEnvMaxEpisodeSteps(10)
        qfunc = qfunc_impl.MemoizationQFunction(
            action_space_size=env.GetActionSpaceSize())
        env.Reset()
        policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)

        runner_impl.SimpleRunner().Run(env=env,
                                       brain=qfunc,
                                       policy=policy,
                                       num_of_episodes=1)
예제 #5
0
def Demo(_):
    env = screen_learning.ScreenGymEnvironment(gym.make('SpaceInvaders-v0'))
    brain = a3c_impl.A3C(model=a3c_impl.CreateModel(
        state_shape=env.GetStateShape(),
        action_space_size=env.GetActionSpaceSize(),
        hidden_layer_sizes=(12, ),
    ))
    brain.Load('saved_models/a3c_invader.weights')
    policy = policy_impl.GreedyPolicy()

    env.StartRecording('a3c_invader.mp4')
    runner = runner_impl.SimpleRunner()
    runner.Run(env=env, brain=brain, policy=policy, num_of_episodes=10)
    env.StopRecording()
예제 #6
0
    def _RunEnv(gym_env):
        env = environment_impl.GymEnvironment(gym_env)
        env.SetGymEnvMaxEpisodeSteps(10)
        qfunc = qfunc_impl.DQN(model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(4, ),
        ))

        env.Reset()
        policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)

        runner_impl.SimpleRunner().Run(env=env,
                                       qfunc=qfunc,
                                       policy=policy,
                                       num_of_episodes=10)
예제 #7
0
 def test_simpleRunner(self):
   # Tests that it can run; quality if not important for this test.
   runner_impl.SimpleRunner().Run(
     env=self.env, qfunc=self.qfunc, policy=self.policy, num_of_episodes=1)