예제 #1
0
    def test_sac_learning_on_cart_pole_with_n_actors(self):
        # Create an Env object.
        env = OpenAIGymEnv("CartPole-v0", actors=2)

        # Create a Config.
        config = SACConfig.make(
            "{}/../configs/sac_cart_pole_learning_n_actors.json".format(
                os.path.dirname(__file__)),
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space)

        # Create an Algo object.
        algo = SAC(config=config, name="my-sac")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=2000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        last_n = 4
        mean_last_episodes = np.mean(env.historic_episodes_returns[-last_n:])
        print("Avg return over last {} episodes: {}".format(
            last_n, mean_last_episodes))
        self.assertTrue(mean_last_episodes > 160.0)

        env.terminate()
예제 #2
0
    def test_dqn2015_learning_on_cart_pole_with_n_actors(self):
        # Create an Env object.
        env = OpenAIGymEnv("CartPole-v0", actors=4, num_cores=None)

        # Create a Config.
        config = DQN2015Config.make(  # type: DQN2015Config
            "{}/../configs/dqn2015_cart_pole_learning_n_actors.json".format(
                os.path.dirname(__file__)),
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space)

        # Create an Algo object.
        algo = DQN2015(config=config, name="my-dqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=3000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        n = 10
        mean_last_n = np.mean(env.historic_episodes_returns[-n:])
        print("Avg return over last {} episodes: {}".format(n, mean_last_n))
        self.assertTrue(mean_last_n > 130.0)

        env.terminate()
예제 #3
0
    def test_dddqn_learning_on_mountain_car_4_actors(self):
        # Note: MountainCar is tricky as per its reward function: Hence, we need a quite large episode
        # cutoff to solve it with ease.
        # With a large enough n-step, the algo should be able to learn the env very quickly after having solved
        # it once via randomness.
        env = OpenAIGymEnv("MountainCar-v0", actors=4, max_episode_steps=5000)

        # Create a DQN2015Config.
        dqn_config = DDDQNConfig.make(
            "{}/../configs/dddqn_mountain_car_learning_n_actors.json".format(
                os.path.dirname(
                    __file__)),  # TODO: filename wrong (num actors)
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space)

        # Create an Algo object.
        algo = DDDQN(config=dqn_config, name="my-dqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=7000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        last_n = 10
        mean_last_episodes = np.mean(env.historic_episodes_returns[-last_n:])
        print("Avg return over last {} episodes: {}".format(
            last_n, mean_last_episodes))
        self.assertTrue(mean_last_episodes > -200.0)

        env.terminate()
    def test_dqn2015_compilation(self):
        """
        Tests the c'tor of DDDQN.
        """
        env = OpenAIGymEnv("CartPole-v0", actors=3)
        # Create a Config (for any Atari game).
        config = DQN2015Config.make(
            "{}/../configs/dqn2015_cart_pole_learning_n_actors.json".format(os.path.dirname(__file__)),
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space
        )
        dqn2015 = DQN2015(config)
        print("DQN2015 built ({}).".format(dqn2015))

        env.terminate()
예제 #5
0
    def test_sac_compilation(self):
        """
        Tests the c'tor of SAC.
        """
        env = OpenAIGymEnv("Pong-v0", actors=2)
        # Create a Config (for any Atari game).
        config = SACConfig.make(
            "{}/../configs/sac_breakout_learning.json".format(
                os.path.dirname(__file__)),
            memory_capacity=1000,
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space)
        sac = SAC(config)
        print("SAC built ({}).".format(sac))

        env.terminate()
예제 #6
0
    def test_dddqn_learning_on_car_racing(self):
        # Action-map: Discrete to Continuous, 9 actions.
        # 0=noop
        # 1=left
        # 2=right
        # 3=break only
        # 4=break and left
        # 5=break and right
        # 6=gas only
        # 7=gas and left
        # 8=gas and right
        def action_map(a):
            b = np.reshape(a, (-1, 1))
            return np.where(
                #b == 0, [0.0, 0.0, 0.0], np.where(
                #    b == 1, [-1.0, 0.0, 0.0], np.where(
                #        b == 2, [1.0, 0.0, 0.0], np.where(
                            b == 0, [0.0, 0.0, 1.0], np.where(
                                b == 1, [-1.0, 0.0, 1.0], np.where(
                                    b == 2, [1.0, 0.0, 1.0], np.where(
                                        b == 3, [0.0, 1.0, 0.0], np.where(
                                            b == 4, [-1.0, 1.0, 0.0], [1.0, 1.0, 0.0]
            )))))

        # Create an Env object.
        env = OpenAIGymEnv("CarRacing-v0", actors=1, action_map=action_map)

        # Create a DQN2015Config.
        config = DDDQNConfig.make(
            "{}/../configs/dddqn_car_racing_learning.json".format(os.path.dirname(__file__)),
            preprocessor=Preprocessor(
                #ImageCrop(x=0, y=0, width=150, height=167),
                GrayScale(keepdims=True),
                ImageResize(width=84, height=84, interpolation="bilinear"),
                lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32),
                # simple preprocessor: [0,255] to [-1.0,1.0]
                Sequence(sequence_length=4, adddim=False)
            ),
            state_space=env.actors[0].state_space,
            action_space=Int(6)
        )
        # Create an Algo object.
        algo = DDDQN(config=config, name="my-dddqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=20000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        n = 10
        mean_last_n = np.mean(env.historic_episodes_returns[-n:])
        print("Avg return over last {} episodes: {}".format(n, mean_last_n))
        self.assertTrue(mean_last_n > 150.0)

        env.terminate()
예제 #7
0
    def test_dddqn_compilation(self):
        """
        Tests the c'tor of DDDQN.
        """
        env = OpenAIGymEnv("MsPacman-v0", actors=4)
        # Create a Config (for any Atari game).
        config = DDDQNConfig.make(
            # Breakout should be the same as MsPacman.
            "{}/../configs/dddqn_breakout_learning.json".format(
                os.path.dirname(__file__)),
            memory_capacity=1000,
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space)
        dddqn = DDDQN(config)
        print("DDDQN built ({}).".format(dddqn))

        env.terminate()
예제 #8
0
    def test_sac_learning_on_breakout(self):
        # Create an Env object.
        env = OpenAIGymEnv("Breakout-v4",
                           actors=128,
                           fire_after_reset=True,
                           episodic_life=True,
                           max_num_noops_after_reset=6,
                           frame_skip=(2, 5))

        # Create a DQN2015Config.
        config = SACConfig.make(
            "{}/../configs/sac_breakout_learning.json".format(
                os.path.dirname(__file__)),
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space,
            summaries=[
                "Ls_critic[0]",
                "Ls_critic[1]",
                "L_actor",
                "L_alpha",
                "alpha",
                ("actions", "a_soft.value[0]"),
                "log_pi",
                "entropy_error_term",
                "log_alpha",  # TEST
                "episode.return",
                "episode.time_steps",
            ])
        # Create an Algo object.
        algo = SAC(config=config, name="my-sac")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(actor_time_steps=20000000,
                sync=True,
                render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        n = 10
        mean_last_10 = np.mean(env.historic_episodes_returns[-n:])
        print("Avg return over last 10 episodes: {}".format(mean_last_10))
        self.assertTrue(mean_last_10 > 200.0)

        env.terminate()
예제 #9
0
    def test_dddqn_learning_on_breakout(self):
        # Create an Env object.
        env = OpenAIGymEnv(
            "Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8,
            frame_skip=(2, 5)
        )

        preprocessor = Preprocessor(
            ImageCrop(x=5, y=29, width=150, height=167),
            GrayScale(keepdims=True),
            ImageResize(width=84, height=84, interpolation="bilinear"),
            lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32),  # simple preprocessor: [0,255] to [-1.0,1.0]
            Sequence(sequence_length=4, adddim=False)
        )
        # Create a DQN2015Config.
        config = DDDQNConfig.make(
            "{}/../configs/dddqn_breakout_learning.json".format(os.path.dirname(__file__)),
            preprocessor=preprocessor,
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space
        )
        # Create an Algo object.
        algo = DDDQN(config=config, name="my-dddqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(actor_time_steps=10000000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        n = 10
        mean_last_n = np.mean(env.historic_episodes_returns[-n:])
        print("Avg return over last {} episodes: {}".format(n, mean_last_n))
        self.assertTrue(mean_last_n > 150.0)

        env.terminate()