def test_dddqn_n_step_memory_insertion_n_step_samples_only(self): """ Tests the n-step post-processing and memory-insertions of DDDQN (with the n_step_only option set to True). """ # Create an Env object. env = GridWorld("2x2", actors=1) # Create a very standard DDQN. dqn_config = DDDQNConfig.make( "{}/../configs/dddqn_grid_world_2x2_learning.json".format( os.path.dirname(__file__)), n_step=2, # fix n-step to 2, just in case. gamma=0.5, # fix gamma for unique-memory-checks purposes epsilon=[1.0, 0.5], # fix epsilon to get lots of random actions. preprocessor=Preprocessor(lambda inputs_: tf.one_hot( inputs_, depth=env.actors[0].state_space.num_categories)), state_space=env.actors[0].state_space, action_space=env.actors[0].action_space) algo = DDDQN(config=dqn_config, name="my-dddqn") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run for n ticks, then check memory contents for correct n-step tuples. for _ in range(5): env.run(ticks=100, sync=True, render=False) self._check_2x2_grid_world_mem(algo.memory, n_step_only=True) env.terminate()
def test_dddqn_learning_on_cart_pole_with_4_actors(self): # Create an Env object. env = OpenAIGymEnv("CartPole-v0", actors=4) # Create a Config. dqn_config = DDDQNConfig.make( "{}/../configs/dddqn_cart_pole_learning_n_actors.json".format(os.path.dirname(__file__)), state_space=env.actors[0].state_space, action_space=env.actors[0].action_space ) # Create an Algo object. algo = DDDQN(config=dqn_config, name="my-dqn") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run and wait for env to complete. env.run(ticks=2000, sync=True, render=debug.RenderEnvInLearningTests) # Check last n episode returns. last_n = 10 mean_last_episodes = np.mean(env.historic_episodes_returns[-last_n:]) print("Avg return over last {} episodes: {}".format(last_n, mean_last_episodes)) self.assertTrue(mean_last_episodes > 160.0) env.terminate()
def test_dddqn_learning_on_mountain_car_4_actors(self): # Note: MountainCar is tricky as per its reward function: Hence, we need a quite large episode # cutoff to solve it with ease. # With a large enough n-step, the algo should be able to learn the env very quickly after having solved # it once via randomness. env = OpenAIGymEnv("MountainCar-v0", actors=4, max_episode_steps=5000) # Create a DQN2015Config. dqn_config = DDDQNConfig.make( "{}/../configs/dddqn_mountain_car_learning_n_actors.json".format( os.path.dirname( __file__)), # TODO: filename wrong (num actors) state_space=env.actors[0].state_space, action_space=env.actors[0].action_space) # Create an Algo object. algo = DDDQN(config=dqn_config, name="my-dqn") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run and wait for env to complete. env.run(ticks=7000, sync=True, render=debug.RenderEnvInLearningTests) # Check last n episode returns. last_n = 10 mean_last_episodes = np.mean(env.historic_episodes_returns[-last_n:]) print("Avg return over last {} episodes: {}".format( last_n, mean_last_episodes)) self.assertTrue(mean_last_episodes > -200.0) env.terminate()
def test_dddqn_learning_on_car_racing(self): # Action-map: Discrete to Continuous, 9 actions. # 0=noop # 1=left # 2=right # 3=break only # 4=break and left # 5=break and right # 6=gas only # 7=gas and left # 8=gas and right def action_map(a): b = np.reshape(a, (-1, 1)) return np.where( #b == 0, [0.0, 0.0, 0.0], np.where( # b == 1, [-1.0, 0.0, 0.0], np.where( # b == 2, [1.0, 0.0, 0.0], np.where( b == 0, [0.0, 0.0, 1.0], np.where( b == 1, [-1.0, 0.0, 1.0], np.where( b == 2, [1.0, 0.0, 1.0], np.where( b == 3, [0.0, 1.0, 0.0], np.where( b == 4, [-1.0, 1.0, 0.0], [1.0, 1.0, 0.0] ))))) # Create an Env object. env = OpenAIGymEnv("CarRacing-v0", actors=1, action_map=action_map) # Create a DQN2015Config. config = DDDQNConfig.make( "{}/../configs/dddqn_car_racing_learning.json".format(os.path.dirname(__file__)), preprocessor=Preprocessor( #ImageCrop(x=0, y=0, width=150, height=167), GrayScale(keepdims=True), ImageResize(width=84, height=84, interpolation="bilinear"), lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32), # simple preprocessor: [0,255] to [-1.0,1.0] Sequence(sequence_length=4, adddim=False) ), state_space=env.actors[0].state_space, action_space=Int(6) ) # Create an Algo object. algo = DDDQN(config=config, name="my-dddqn") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run and wait for env to complete. env.run(ticks=20000, sync=True, render=debug.RenderEnvInLearningTests) # Check last n episode returns. n = 10 mean_last_n = np.mean(env.historic_episodes_returns[-n:]) print("Avg return over last {} episodes: {}".format(n, mean_last_n)) self.assertTrue(mean_last_n > 150.0) env.terminate()
def test_dddqn_compilation(self): """ Tests the c'tor of DDDQN. """ env = OpenAIGymEnv("MsPacman-v0", actors=4) # Create a Config (for any Atari game). config = DDDQNConfig.make( # Breakout should be the same as MsPacman. "{}/../configs/dddqn_breakout_learning.json".format( os.path.dirname(__file__)), memory_capacity=1000, state_space=env.actors[0].state_space, action_space=env.actors[0].action_space) dddqn = DDDQN(config) print("DDDQN built ({}).".format(dddqn)) env.terminate()
def test_dddqn_learning_on_grid_world_2x2(self): # Create an Env object. env = GridWorld("2x2", actors=1) # Add the preprocessor. preprocessor = Preprocessor( lambda inputs_: tf.one_hot(inputs_, depth=env.actors[0].state_space.num_categories) ) # Create a Config. dqn_config = DDDQNConfig.make( "{}/../configs/dddqn_grid_world_2x2_learning.json".format(os.path.dirname(__file__)), preprocessor=preprocessor, state_space=env.actors[0].state_space, action_space=env.actors[0].action_space ) # Create an Algo object. algo = DDDQN(config=dqn_config, name="my-dddqn") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run and wait for env to complete. env.run(ticks=3000, sync=True, render=debug.RenderEnvInLearningTests) # Check last n episode returns. n = 10 mean_last_n = np.mean(env.historic_episodes_returns[-n:]) print("Avg return over last {} episodes: {}".format(n, mean_last_n)) self.assertTrue(mean_last_n >= 0.6) # Check learnt Q-function (using our dueling layer). a_and_v = algo.Q(one_hot(np.array([0, 0, 0, 0, 1, 1, 1, 1]), depth=4)) q = dueling(a_and_v, np.array([0, 1, 2, 3, 0, 1, 2, 3])) print(q) self.assertTrue(q[1] < min(q[2:]) and q[1] < q[0]) # q(s=0,a=right) is the worst check(q[5], 1.0, atol=0.4) # Q(1,->) is close to 1.0. #self.assertTrue(q[5] > max(q[:4]) and q[5] > max(q[6:])) # q(s=1,a=right) is the best #check(q, [0.8, -5.0, 0.9, 0.8, 0.8, 1.0, 0.9, 0.9], decimals=1) # a=up,down,left,right env.terminate()
def test_dddqn_learning_on_breakout(self): # Create an Env object. env = OpenAIGymEnv( "Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8, frame_skip=(2, 5) ) preprocessor = Preprocessor( ImageCrop(x=5, y=29, width=150, height=167), GrayScale(keepdims=True), ImageResize(width=84, height=84, interpolation="bilinear"), lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32), # simple preprocessor: [0,255] to [-1.0,1.0] Sequence(sequence_length=4, adddim=False) ) # Create a DQN2015Config. config = DDDQNConfig.make( "{}/../configs/dddqn_breakout_learning.json".format(os.path.dirname(__file__)), preprocessor=preprocessor, state_space=env.actors[0].state_space, action_space=env.actors[0].action_space ) # Create an Algo object. algo = DDDQN(config=config, name="my-dddqn") # Point actor(s) to the algo. env.point_all_actors_to_algo(algo) # Run and wait for env to complete. env.run(actor_time_steps=10000000, sync=True, render=debug.RenderEnvInLearningTests) # Check last n episode returns. n = 10 mean_last_n = np.mean(env.historic_episodes_returns[-n:]) print("Avg return over last {} episodes: {}".format(n, mean_last_n)) self.assertTrue(mean_last_n > 150.0) env.terminate()