def test_sac_learning_on_cart_pole_with_n_actors(self): # Create an Env object. env = OpenAIGymEnv("CartPole-v0", actors=2) # Create a Config. config = SACConfig.make( "{}/../configs/sac_cart_pole_learning_n_actors.json".format( os.path.dirname(__file__)), state_space=env.actors[0].state_space, action_space=env.actors[0].action_space) # Create an Algo object. algo = SAC(config=config, name="my-sac") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run and wait for env to complete. env.run(ticks=2000, sync=True, render=debug.RenderEnvInLearningTests) # Check last n episode returns. last_n = 4 mean_last_episodes = np.mean(env.historic_episodes_returns[-last_n:]) print("Avg return over last {} episodes: {}".format( last_n, mean_last_episodes)) self.assertTrue(mean_last_episodes > 160.0) env.terminate()
def test_dqn2015_learning_on_cart_pole_with_n_actors(self): # Create an Env object. env = OpenAIGymEnv("CartPole-v0", actors=4, num_cores=None) # Create a Config. config = DQN2015Config.make( # type: DQN2015Config "{}/../configs/dqn2015_cart_pole_learning_n_actors.json".format( os.path.dirname(__file__)), state_space=env.actors[0].state_space, action_space=env.actors[0].action_space) # Create an Algo object. algo = DQN2015(config=config, name="my-dqn") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run and wait for env to complete. env.run(ticks=3000, sync=True, render=debug.RenderEnvInLearningTests) # Check last n episode returns. n = 10 mean_last_n = np.mean(env.historic_episodes_returns[-n:]) print("Avg return over last {} episodes: {}".format(n, mean_last_n)) self.assertTrue(mean_last_n > 130.0) env.terminate()
def test_dddqn_learning_on_mountain_car_4_actors(self): # Note: MountainCar is tricky as per its reward function: Hence, we need a quite large episode # cutoff to solve it with ease. # With a large enough n-step, the algo should be able to learn the env very quickly after having solved # it once via randomness. env = OpenAIGymEnv("MountainCar-v0", actors=4, max_episode_steps=5000) # Create a DQN2015Config. dqn_config = DDDQNConfig.make( "{}/../configs/dddqn_mountain_car_learning_n_actors.json".format( os.path.dirname( __file__)), # TODO: filename wrong (num actors) state_space=env.actors[0].state_space, action_space=env.actors[0].action_space) # Create an Algo object. algo = DDDQN(config=dqn_config, name="my-dqn") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run and wait for env to complete. env.run(ticks=7000, sync=True, render=debug.RenderEnvInLearningTests) # Check last n episode returns. last_n = 10 mean_last_episodes = np.mean(env.historic_episodes_returns[-last_n:]) print("Avg return over last {} episodes: {}".format( last_n, mean_last_episodes)) self.assertTrue(mean_last_episodes > -200.0) env.terminate()
def test_dqn2015_compilation(self): """ Tests the c'tor of DDDQN. """ env = OpenAIGymEnv("CartPole-v0", actors=3) # Create a Config (for any Atari game). config = DQN2015Config.make( "{}/../configs/dqn2015_cart_pole_learning_n_actors.json".format(os.path.dirname(__file__)), state_space=env.actors[0].state_space, action_space=env.actors[0].action_space ) dqn2015 = DQN2015(config) print("DQN2015 built ({}).".format(dqn2015)) env.terminate()
def test_sac_compilation(self): """ Tests the c'tor of SAC. """ env = OpenAIGymEnv("Pong-v0", actors=2) # Create a Config (for any Atari game). config = SACConfig.make( "{}/../configs/sac_breakout_learning.json".format( os.path.dirname(__file__)), memory_capacity=1000, state_space=env.actors[0].state_space, action_space=env.actors[0].action_space) sac = SAC(config) print("SAC built ({}).".format(sac)) env.terminate()
def test_dddqn_learning_on_car_racing(self): # Action-map: Discrete to Continuous, 9 actions. # 0=noop # 1=left # 2=right # 3=break only # 4=break and left # 5=break and right # 6=gas only # 7=gas and left # 8=gas and right def action_map(a): b = np.reshape(a, (-1, 1)) return np.where( #b == 0, [0.0, 0.0, 0.0], np.where( # b == 1, [-1.0, 0.0, 0.0], np.where( # b == 2, [1.0, 0.0, 0.0], np.where( b == 0, [0.0, 0.0, 1.0], np.where( b == 1, [-1.0, 0.0, 1.0], np.where( b == 2, [1.0, 0.0, 1.0], np.where( b == 3, [0.0, 1.0, 0.0], np.where( b == 4, [-1.0, 1.0, 0.0], [1.0, 1.0, 0.0] ))))) # Create an Env object. env = OpenAIGymEnv("CarRacing-v0", actors=1, action_map=action_map) # Create a DQN2015Config. config = DDDQNConfig.make( "{}/../configs/dddqn_car_racing_learning.json".format(os.path.dirname(__file__)), preprocessor=Preprocessor( #ImageCrop(x=0, y=0, width=150, height=167), GrayScale(keepdims=True), ImageResize(width=84, height=84, interpolation="bilinear"), lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32), # simple preprocessor: [0,255] to [-1.0,1.0] Sequence(sequence_length=4, adddim=False) ), state_space=env.actors[0].state_space, action_space=Int(6) ) # Create an Algo object. algo = DDDQN(config=config, name="my-dddqn") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run and wait for env to complete. env.run(ticks=20000, sync=True, render=debug.RenderEnvInLearningTests) # Check last n episode returns. n = 10 mean_last_n = np.mean(env.historic_episodes_returns[-n:]) print("Avg return over last {} episodes: {}".format(n, mean_last_n)) self.assertTrue(mean_last_n > 150.0) env.terminate()
def test_dddqn_compilation(self): """ Tests the c'tor of DDDQN. """ env = OpenAIGymEnv("MsPacman-v0", actors=4) # Create a Config (for any Atari game). config = DDDQNConfig.make( # Breakout should be the same as MsPacman. "{}/../configs/dddqn_breakout_learning.json".format( os.path.dirname(__file__)), memory_capacity=1000, state_space=env.actors[0].state_space, action_space=env.actors[0].action_space) dddqn = DDDQN(config) print("DDDQN built ({}).".format(dddqn)) env.terminate()
def test_sac_learning_on_breakout(self): # Create an Env object. env = OpenAIGymEnv("Breakout-v4", actors=128, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=6, frame_skip=(2, 5)) # Create a DQN2015Config. config = SACConfig.make( "{}/../configs/sac_breakout_learning.json".format( os.path.dirname(__file__)), state_space=env.actors[0].state_space, action_space=env.actors[0].action_space, summaries=[ "Ls_critic[0]", "Ls_critic[1]", "L_actor", "L_alpha", "alpha", ("actions", "a_soft.value[0]"), "log_pi", "entropy_error_term", "log_alpha", # TEST "episode.return", "episode.time_steps", ]) # Create an Algo object. algo = SAC(config=config, name="my-sac") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run and wait for env to complete. env.run(actor_time_steps=20000000, sync=True, render=debug.RenderEnvInLearningTests) # Check last n episode returns. n = 10 mean_last_10 = np.mean(env.historic_episodes_returns[-n:]) print("Avg return over last 10 episodes: {}".format(mean_last_10)) self.assertTrue(mean_last_10 > 200.0) env.terminate()
def test_dddqn_learning_on_breakout(self): # Create an Env object. env = OpenAIGymEnv( "Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8, frame_skip=(2, 5) ) preprocessor = Preprocessor( ImageCrop(x=5, y=29, width=150, height=167), GrayScale(keepdims=True), ImageResize(width=84, height=84, interpolation="bilinear"), lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32), # simple preprocessor: [0,255] to [-1.0,1.0] Sequence(sequence_length=4, adddim=False) ) # Create a DQN2015Config. config = DDDQNConfig.make( "{}/../configs/dddqn_breakout_learning.json".format(os.path.dirname(__file__)), preprocessor=preprocessor, state_space=env.actors[0].state_space, action_space=env.actors[0].action_space ) # Create an Algo object. algo = DDDQN(config=config, name="my-dddqn") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run and wait for env to complete. env.run(actor_time_steps=10000000, sync=True, render=debug.RenderEnvInLearningTests) # Check last n episode returns. n = 10 mean_last_n = np.mean(env.historic_episodes_returns[-n:]) print("Avg return over last {} episodes: {}".format(n, mean_last_n)) self.assertTrue(mean_last_n > 150.0) env.terminate()