def test_saveLoad(self): a3c = a3c_impl.A3C(model=a3c_impl.CreateModel( state_shape=(3, ), action_space_size=2, hidden_layer_sizes=(3, ), ), ) tmp_file = tempfile.NamedTemporaryFile().name s = numpy.array([[1, 2, 3]]) for _ in range(10): a3c.UpdateFromTransitions([ base.Transition(s=s, a=numpy.array([[1, 0]]), r=1.0, sp=numpy.array([[4, 5, 6]])), ]) a3c.Save(tmp_file) saved_values = a3c.GetValues(s) a3c = a3c_impl.A3C(model=a3c_impl.CreateModel( state_shape=(3, ), action_space_size=2, hidden_layer_sizes=(3, ), ), ) a3c.Load(tmp_file) numpy_util.TestUtil.AssertArrayEqual(saved_values, a3c.GetValues(s))
def Train(_): env = screen_learning.ScreenGymEnvironment(gym.make('SpaceInvaders-v0')) async_env_runners = [] # type: t.List[async_runner_impl.AsyncEnvRunner] for _ in range(10): async_env_runners.append( async_runner_impl.AsyncEnvRunner( env=screen_learning.ScreenGymEnvironment( gym.make('SpaceInvaders-v0')), runner=runner_impl.NStepExperienceRunner(n_step_return=10), )) brain = async_runner_impl.AsyncBrain( a3c_impl.A3C(model=CreateModel( state_shape=env.GetStateShape(), action_space_size=env.GetActionSpaceSize(), ))) brain.Load('saved_models/a3c_invader.weights') # warm start policy = policy_impl.PolicyWithDecreasingRandomness( base_policy=policy_impl.PiWeightedPolicy(), initial_epsilon=0.2, final_epsilon=0.05, decay_by_half_after_num_of_episodes=500, ) runner = async_runner_impl.ParallelRunner(async_env_runners) runner.AddCallback( async_runner_impl.AsyncRunnerExtension( runner_extension_impl.ProgressTracer( report_every_num_of_episodes=10))) runner.AddCallback( async_runner_impl.AsyncRunnerExtension( runner_extension_impl.ModelSaver( save_filepath='saved_models/a3c_invader.weights', use_averaged_value_over_num_of_episodes=30))) runner.Run(brain=brain, policy=policy, num_of_episodes=200)
def test_convergence(self): a3c = a3c_impl.A3C( model=a3c_impl.CreateModel( state_shape=(3, ), action_space_size=2, hidden_layer_sizes=(3, ), ), # optimizer=a3c_impl.CreateDefaultOptimizer(learning_rate=0.05), ) s = numpy.array([[1, 2, 3]]) a1 = numpy.array([[1, 0]]) a2 = numpy.array([[0, 1]]) for _ in range(10): # Needs to train for both actions as one step, otherwise it shows some # "staggering" effect. a3c.UpdateFromTransitions([ base.Transition(s=s, a=a1, r=1.0, sp=None), ]) a3c.UpdateFromTransitions([ base.Transition(s=s, a=a2, r=-1.0, sp=s), ]) logging.printf('%s', a3c.GetValues(s)) old_value_a1 = a3c.GetActionValues(a3c.GetValues(s), a1) # Trains for one step, for both actions. a3c.UpdateFromTransitions([ base.Transition(s=s, a=a1, r=1.0, sp=None), ]) a3c.UpdateFromTransitions([ base.Transition(s=s, a=a2, r=-1.0, sp=s), ]) self.assertGreaterEqual(a3c.GetActionValues(a3c.GetValues(s), a1), old_value_a1)
def __init__(self): self._env = environment_impl.GymEnvironment( gym_env=gym.make('CartPole-v0')) self._brain = a3c_impl.A3C(model=a3c_impl.CreateModel( state_shape=self._env.GetStateShape(), action_space_size=self._env.GetActionSpaceSize(), hidden_layer_sizes=(12, ), ))
def Demo(_): env = screen_learning.ScreenGymEnvironment(gym.make('SpaceInvaders-v0')) brain = a3c_impl.A3C(model=a3c_impl.CreateModel( state_shape=env.GetStateShape(), action_space_size=env.GetActionSpaceSize(), hidden_layer_sizes=(12, ), )) brain.Load('saved_models/a3c_invader.weights') policy = policy_impl.GreedyPolicy() env.StartRecording('a3c_invader.mp4') runner = runner_impl.SimpleRunner() runner.Run(env=env, brain=brain, policy=policy, num_of_episodes=10) env.StopRecording()
def main(_): running_environment.ForceCpuForTheRun() env = environment_impl.GymEnvironment(gym.make('CartPole-v0')) brain = a3c_impl.A3C(model=a3c_impl.CreateModel( state_shape=env.GetStateShape(), action_space_size=env.GetActionSpaceSize(), hidden_layer_sizes=(12, ), )) policy = policy_impl.PolicyWithDecreasingRandomness( base_policy=policy_impl.PiWeightedPolicy(), initial_epsilon=0.4, final_epsilon=0.05, decay_by_half_after_num_of_episodes=500, ) runner = a3c_impl.NStepExperienceRunner() # runner = runner_impl.SimpleRunner() runner.AddCallback( runner_extension_impl.ProgressTracer(report_every_num_of_episodes=100)) runner.Run(env=env, brain=brain, policy=policy, num_of_episodes=1200)