예제 #1
0
    def test_impala_actor_compilation(self):
        """
        Tests IMPALA agent compilation (actor).
        """
        try:
            from rlgraph.environments.deepmind_lab import DeepmindLabEnv
        except ImportError:
            print("Deepmind Lab not installed: Will skip this test.")
            return

        agent_config = config_from_path("configs/impala_agent_for_deepmind_lab_env.json")
        env = DeepmindLabEnv(
            level_id="seekavoid_arena_01", observations=["RGB_INTERLEAVED", "INSTR"], frameskip=4
        )

        actor_agent = IMPALAAgent.from_spec(
            agent_config,
            type="actor",
            state_space=env.state_space,
            action_space=env.action_space,
            internal_states_space=Tuple(FloatBox(shape=(256,)), FloatBox(shape=(256,)), add_batch_rank=True),
            # Make session-creation hang in docker.
            execution_spec=dict(disable_monitoring=True)
        )
        # Start Specifiable Server with Env manually.
        actor_agent.environment_stepper.environment_server.start()
        print("Compiled IMPALA type=actor agent.")
        actor_agent.environment_stepper.environment_server.stop()
    def test_impala_on_cart_pole(self):
        """
        Creates a single IMPALAAgent and runs it via a simple loop on CartPole-v0.
        """
        env_spec = dict(type="open-ai-gym",
                        gym_env="CartPole-v0",
                        seed=10,
                        visualize=self.is_windows)
        config_ = config_from_path("configs/impala_agent_for_cartpole.json")
        config_["environment_spec"] = env_spec
        dummy_env = OpenAIGymEnv.from_spec(env_spec)
        agent = IMPALAAgent.from_spec(config_,
                                      state_space=dummy_env.state_space,
                                      action_space=dummy_env.action_space,
                                      execution_spec=dict(seed=10))

        learn_updates = 300
        mean_returns = []
        for i in range(learn_updates):
            ret = agent.update()
            mean_return = self._calc_mean_return(ret)
            mean_returns.append(mean_return)
            print("i={}/{} Loss={:.4} Avg-reward={:.2}".format(
                i, learn_updates, float(ret[1]), mean_return))

        # Assume we have learned something.
        average_return_last_n_episodes = np.nanmean(mean_returns[:-100])
        print("Average return over last n episodes: {}".format(
            average_return_last_n_episodes))
        self.assertGreater(average_return_last_n_episodes, 30.0)

        time.sleep(3)
        agent.terminate()
        time.sleep(3)
예제 #3
0
    def test_impala_on_outbreak(self):
        """
        Creates a DQNAgent and runs it via a Runner on an openAI Pong Env.
        """
        env = OpenAIGymEnv("Breakout-v0",
                           frameskip=4,
                           max_num_noops=30,
                           episodic_life=True,
                           visualize=False)
        config_ = config_from_path("configs/impala_agent_for_breakout.json")
        agent = IMPALAAgent.from_spec(
            config_,
            state_space=env.state_space,
            action_space=env.action_space,
        )

        learn_updates = 4000000
        mean_returns = []
        for i in range(learn_updates):
            ret = agent.update()
            mean_return = self._calc_mean_return(ret)
            mean_returns.append(mean_return)
            print("i={} Loss={:.4} Avg-reward={:.2}".format(
                i, float(ret[1]), mean_return))

        time.sleep(3)
        agent.terminate()
        time.sleep(3)
예제 #4
0
    def test_impala_learner_compilation(self):
        """
        Tests IMPALA agent compilation (learner).
        """
        return
        if get_backend() == "pytorch":
            return
        try:
            from rlgraph.environments.deepmind_lab import DeepmindLabEnv
        except ImportError:
            print("Deepmind Lab not installed: Will skip this test.")
            return

        agent_config = config_from_path(
            "configs/impala_agent_for_deepmind_lab_env.json")
        env_spec = dict(level_id="seekavoid_arena_01",
                        observations=["RGB_INTERLEAVED", "INSTR"],
                        frameskip=4)
        dummy_env = DeepmindLabEnv.from_spec(env_spec)
        learner_agent = IMPALAAgent.from_spec(
            agent_config,
            type="learner",
            state_space=dummy_env.state_space,
            action_space=dummy_env.action_space,
            internal_states_space=IMPALAAgent.default_internal_states_space,
            environment_spec=default_dict(dict(type="deepmind-lab"), env_spec),
            # Setup distributed tf.
            execution_spec=dict(
                mode="distributed",
                #gpu_spec=dict(
                #    gpus_enabled=True,
                #    max_usable_gpus=1,
                #    num_gpus=1
                #),
                distributed_spec=dict(job="learner",
                                      task_index=0,
                                      cluster_spec=self.impala_cluster_spec),
                session_config=dict(type="monitored-training-session",
                                    allow_soft_placement=True,
                                    log_device_placement=True,
                                    auto_start=False),
                disable_monitoring=True,
                enable_timeline=True,
            ))
        print(
            "Compiled IMPALA type=learner agent without starting the session (would block waiting for actor)."
        )

        ## Take one batch from the filled up queue and run an update_from_memory with the learner.
        #update_steps = 10
        #time_start = time.perf_counter()
        #for _ in range(update_steps):
        #    agent.call_api_method("update_from_memory")
        #time_total = time.perf_counter() - time_start
        #print("Done learning {}xbatch-of-{} in {}sec ({} updates/sec).".format(
        #    update_steps, agent.update_spec["batch_size"], time_total , update_steps / time_total)
        #)

        learner_agent.terminate()
예제 #5
0
 def test_impala_single_agent_compilation(self):
     """
     Tests IMPALA agent compilation (single-node mode).
     """
     env = GridWorld("2x2")
     agent = IMPALAAgent.from_spec(
         config_from_path("configs/impala_agent_for_2x2_gridworld.json"),
         state_space=env.state_space,
         action_space=env.action_space,
         update_spec=dict(batch_size=16),
         optimizer_spec=dict(type="adam", learning_rate=0.05))
     agent.terminate()
     print("Compiled IMPALA type=single agent.")
예제 #6
0
 def test_impala_single_agent_compilation(self):
     """
     Tests IMPALA agent compilation (single-node mode).
     """
     return
     if get_backend() == "pytorch":
         return
     env = GridWorld("2x2")
     agent = IMPALAAgent.from_spec(
         config_from_path("configs/impala_agent_for_2x2_gridworld.json"),
         state_space=env.state_space,
         action_space=env.action_space,
         update_spec=dict(batch_size=16),
         optimizer_spec=dict(type="adam", learning_rate=0.05),
         # Make session-creation hang in docker.
         execution_spec=dict(disable_monitoring=True))
     agent.terminate()
     print("Compiled {}".format(agent))
    def test_impala_on_2x2_grid_world(self):
        """
        Creates a single IMPALAAgent and runs it via a simple loop on a 2x2 GridWorld.
        """
        env = GridWorld("2x2")
        agent = IMPALAAgent.from_spec(
            config_from_path("configs/impala_agent_for_2x2_gridworld.json"),
            state_space=env.state_space,
            action_space=env.action_space,
            execution_spec=dict(seed=12),
            update_spec=dict(batch_size=16),
            optimizer_spec=dict(type="adam", learning_rate=0.05))

        learn_updates = 50
        for i in range(learn_updates):
            ret = agent.update()
            mean_return = self._calc_mean_return(ret)
            print("i={} Loss={:.4} Avg-reward={:.2}".format(
                i, float(ret[1]), mean_return))

        # Assume we have learned something.
        self.assertGreater(mean_return, -0.1)

        # Check the last action probs for the 2 valid next_states (start (after a reset) and one below start).
        action_probs = ret[3]["action_probs"].reshape((80, 4))
        next_states = ret[3]["states"][:, 1:].reshape((80, ))
        for s_, probs in zip(next_states, action_probs):
            # Start state:
            # - Assume we picked "right" in state=1 (in order to step into goal state).
            # - OR we picked "up" or "left" in state=0 (unlikely, but possible).
            if s_ == 0:
                recursive_assert_almost_equal(probs[0], 0.0, decimals=2)
                self.assertTrue(probs[1] > 0.99 or probs[2] > 0.99)
                recursive_assert_almost_equal(probs[3], 0.0, decimals=2)
            # One below start:
            # - Assume we picked "down" in start state with very large probability.
            # - OR we picked "left" or "down" in state=1 (unlikely, but possible).
            elif s_ == 1:
                recursive_assert_almost_equal(probs[0], 0.0, decimals=2)
                self.assertTrue(probs[1] > 0.99 or probs[2] > 0.99)
                recursive_assert_almost_equal(probs[3], 0.0, decimals=2)

        agent.terminate()
예제 #8
0
    def test_impala_on_2x2_grid_world(self):
        """
        Creates a single IMPALAAgent and runs it via the IMPALAWorker on a simple 2x2 GridWorld.
        """
        env = GridWorld("2x2")
        agent = IMPALAAgent.from_spec(
            config_from_path("configs/impala_agent_for_2x2_gridworld.json"),
            state_space=env.state_space,
            action_space=env.action_space,
            execution_spec=dict(seed=12),
            update_spec=dict(update_interval=4, batch_size=16),
            optimizer_spec=dict(type="adam", learning_rate=0.05),
        )

        learn_updates = 1000
        # Setup the queue runner.
        agent.call_api_method("setup_queue_runner")
        for _ in range(learn_updates):
            agent.update()

        #print("STATES:\n{}".format(agent.last_q_table["states"]))
        #print("\n\nQ(s,a)-VALUES:\n{}".format(np.round_(agent.last_q_table["q_values"], decimals=2)))

        #self.assertEqual(results["timesteps_executed"], time_steps)
        #self.assertEqual(results["env_frames"], time_steps)
        #self.assertGreaterEqual(results["mean_episode_reward"], -3.5)
        #self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        #self.assertLessEqual(results["episodes_executed"], 350)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(agent.last_q_table["states"],
                                   agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values,
                                          expected_q_values_per_state[state],
                                          decimals=0)
예제 #9
0
    def test_impala_actor_compilation(self):
        """
        Tests IMPALA agent compilation (actor).
        """
        return
        if get_backend() == "pytorch":
            return
        try:
            from rlgraph.environments.deepmind_lab import DeepmindLabEnv
        except ImportError:
            print("Deepmind Lab not installed: Will skip this test.")
            return

        agent_config = config_from_path("configs/impala_agent_for_deepmind_lab_env.json")
        env_spec = dict(level_id="seekavoid_arena_01", observations=["RGB_INTERLEAVED", "INSTR"], frameskip=4)
        dummy_env = DeepmindLabEnv.from_spec(env_spec)
        agent = IMPALAAgent.from_spec(
            agent_config,
            type="actor",
            state_space=dummy_env.state_space,
            action_space=dummy_env.action_space,
            internal_states_space=Tuple(FloatBox(shape=(256,)), FloatBox(shape=(256,)), add_batch_rank=False),
            environment_spec=default_dict(dict(type="deepmind-lab"), env_spec),
            # Make session-creation hang in docker.
            execution_spec=dict(
                session_config=dict(
                    type="monitored-training-session",
                    auto_start=False
                ),
                disable_monitoring=True
            )
        )
        # Start Specifiable Server with Env manually (monitoring is disabled).
        agent.environment_stepper.environment_server.start_server()
        print("Compiled {}".format(agent))
        agent.environment_stepper.environment_server.stop_server()
        agent.terminate()
예제 #10
0
    def test_impala_learner_compilation(self):
        """
        Tests IMPALA agent compilation (learner).
        """
        try:
            from rlgraph.environments.deepmind_lab import DeepmindLabEnv
        except ImportError:
            print("Deepmind Lab not installed: Will skip this test.")
            return

        agent_config = config_from_path("configs/impala_agent_for_deepmind_lab_env.json")
        env = DeepmindLabEnv(
            level_id="seekavoid_arena_01", observations=["RGB_INTERLEAVED", "INSTR"], frameskip=4
        )

        learner_agent = IMPALAAgent.from_spec(
            agent_config,
            type="learner",
            state_space=env.state_space,
            action_space=env.action_space,
            internal_states_space=Tuple(FloatBox(shape=(256,)), FloatBox(shape=(256,)), add_batch_rank=True),
        )

        print("Compiled IMPALA type=learner agent.")