Example #1
0
    def test_needs_reset(self):
        # MagicMock can mock eval_mode while Mock cannot
        agent = mock.MagicMock()
        agent.batch_act.side_effect = [[1, 1]] * 5

        def make_env(idx):
            env = mock.Mock()
            if idx == 0:
                # First episode: 0 -> 1 -> 2 -> 3 (reset)
                # Second episode: 4 -> 5 -> 6 -> 7 (done)
                env.reset.side_effect = [("state", 0), ("state", 4)]
                env.step.side_effect = [
                    (("state", 1), 0, False, {}),
                    (("state", 2), 0, False, {}),
                    (("state", 3), 0, False, {
                        "needs_reset": True
                    }),
                    (("state", 5), -0.5, False, {}),
                    (("state", 6), 0, False, {}),
                    (("state", 7), 1, True, {}),
                ]
            else:
                # First episode: 0 -> 1 (reset)
                # Second episode: 2 -> 3 (reset)
                # Third episode: 4 -> 5 -> 6 -> 7 (done)
                env.reset.side_effect = [("state", 0), ("state", 2),
                                         ("state", 4)]
                env.step.side_effect = [
                    (("state", 1), 2, False, {
                        "needs_reset": True
                    }),
                    (("state", 3), 3, False, {
                        "needs_reset": True
                    }),
                    (("state", 5), -0.6, False, {}),
                    (("state", 6), 0, False, {}),
                    (("state", 7), 1, True, {}),
                ]
            return env

        vec_env = pfrl.envs.SerialVectorEnv([make_env(i) for i in range(2)])

        # First Env: [1 2 (3_a) 5 6 (7_a)]
        # Second Env: [(1) (3_b) 5 6 (7_b)]
        # Results: (1), (3a), (3b), (7b)
        scores = evaluator.batch_run_evaluation_episodes(vec_env,
                                                         agent,
                                                         n_steps=None,
                                                         n_episodes=4)
        assert len(scores) == 4
        np.testing.assert_allclose(scores[0], 0)
        np.testing.assert_allclose(scores[1], 2)
        np.testing.assert_allclose(scores[2], 3)
        np.testing.assert_allclose(scores[3], 0.4)
        # batch_reset should be all True
        assert all(agent.batch_observe.call_args[0][3])
Example #2
0
    def _test_abc_batch(self,
                        steps=100000,
                        require_success=True,
                        gpu=-1,
                        load_model=False,
                        num_envs=4):

        env, _ = self.make_vec_env_and_successful_return(test=False,
                                                         num_envs=num_envs)
        test_env, successful_return = self.make_vec_env_and_successful_return(
            test=True, num_envs=num_envs)
        agent = self.make_agent(env, gpu)
        max_episode_len = None if self.episodic else 2

        if load_model:
            print("Load agent from", self.agent_dirname)
            agent.load(self.agent_dirname)

        # Train
        train_agent_batch_with_evaluation(
            agent=agent,
            env=env,
            steps=steps,
            outdir=self.tmpdir,
            eval_interval=200,
            eval_n_steps=None,
            eval_n_episodes=40,
            successful_score=successful_return,
            eval_env=test_env,
            log_interval=100,
            max_episode_len=max_episode_len,
        )
        env.close()

        # Test
        n_test_runs = 10
        eval_returns, _ = batch_run_evaluation_episodes(
            test_env,
            agent,
            n_steps=None,
            n_episodes=n_test_runs,
            max_episode_len=max_episode_len,
        )
        test_env.close()
        if require_success:
            n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return)
            assert n_succeeded == n_test_runs

        # Save
        agent.save(self.agent_dirname)
Example #3
0
    def _test_batch_training(self,
                             gpu,
                             steps=5000,
                             load_model=False,
                             require_success=True):

        random_seed.set_random_seed(1)
        logging.basicConfig(level=logging.DEBUG)

        env, _ = self.make_vec_env_and_successful_return(test=False)
        test_env, successful_return = self.make_vec_env_and_successful_return(
            test=True)
        agent = self.make_agent(env, gpu)

        if load_model:
            print("Load agent from", self.agent_dirname)
            agent.load(self.agent_dirname)
            agent.replay_buffer.load(self.rbuf_filename)

        # Train
        train_agent_batch_with_evaluation(
            agent=agent,
            env=env,
            steps=steps,
            outdir=self.tmpdir,
            eval_interval=200,
            eval_n_steps=None,
            eval_n_episodes=5,
            successful_score=1,
            eval_env=test_env,
        )
        env.close()

        # Test
        n_test_runs = 5
        eval_returns, _ = batch_run_evaluation_episodes(
            test_env,
            agent,
            n_steps=None,
            n_episodes=n_test_runs,
        )
        test_env.close()
        n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return)
        if require_success:
            assert n_succeeded == n_test_runs

        # Save
        agent.save(self.agent_dirname)
        agent.replay_buffer.save(self.rbuf_filename)
Example #4
0
    def _test_abc(self,
                  steps=1000000,
                  require_success=True,
                  gpu=-1,
                  load_model=False):

        env, _ = self.make_env_and_successful_return(test=False,
                                                     n=self.num_processes)
        test_env, successful_return = self.make_env_and_successful_return(
            test=True, n=1)
        agent = self.make_agent(env, gpu)

        if load_model:
            print("Load agent from", self.agent_dirname)
            agent.load(self.agent_dirname)

        # Train
        pfrl.experiments.train_agent_batch_with_evaluation(
            agent=agent,
            env=env,
            steps=steps,
            outdir=self.tmpdir,
            log_interval=10,
            eval_interval=200,
            eval_n_steps=None,
            eval_n_episodes=50,
            successful_score=1,
            eval_env=test_env,
        )
        env.close()

        # Test
        n_test_runs = 100
        eval_returns = batch_run_evaluation_episodes(
            test_env,
            agent,
            n_steps=None,
            n_episodes=n_test_runs,
        )
        test_env.close()
        n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return)
        if require_success:
            assert n_succeeded > 0.8 * n_test_runs

        # Save
        agent.save(self.agent_dirname)
Example #5
0
def test_batch_run_evaluation_episodes_with_n_steps(n_episodes, n_steps):
    # MagicMock can mock eval_mode while Mock cannot
    agent = mock.MagicMock()
    agent.batch_act.side_effect = [[1, 1]] * 5

    def make_env(idx):
        env = mock.Mock()
        if idx == 0:
            # First episode: 0 -> 1 -> 2 -> 3 (reset)
            # Second episode: 4 -> 5 -> 6 -> 7 (done)
            env.reset.side_effect = [("state", 0), ("state", 4)]
            env.step.side_effect = [
                (("state", 1), 0, False, {}),
                (("state", 2), 0.1, False, {}),
                (("state", 3), 0.2, False, {
                    "needs_reset": True
                }),
                (("state", 5), -0.5, False, {}),
                (("state", 6), 0, False, {}),
                (("state", 7), 1, True, {}),
            ]
        else:
            # First episode: 0 -> 1 (reset)
            # Second episode: 2 -> 3 (reset)
            # Third episode: 4 -> 5 -> 6 -> 7 (done)
            env.reset.side_effect = [("state", 0), ("state", 2), ("state", 4)]
            env.step.side_effect = [
                (("state", 1), 2, False, {
                    "needs_reset": True
                }),
                (("state", 3), 3, False, {
                    "needs_reset": True
                }),
                (("state", 5), -0.6, False, {}),
                (("state", 6), 0, False, {}),
                (("state", 7), 1, True, {}),
            ]
        return env

    vec_env = pfrl.envs.SerialVectorEnv([make_env(i) for i in range(2)])
    if n_episodes:
        with pytest.raises(AssertionError):
            scores = evaluator.batch_run_evaluation_episodes(
                vec_env, agent, n_steps=n_steps, n_episodes=n_episodes)
    else:
        # First Env:  [1   2   (3_a)  5  6   (7_a)]
        # Second Env: [(1)(3_b) 5     6 (7_b)]
        scores = evaluator.batch_run_evaluation_episodes(vec_env,
                                                         agent,
                                                         n_steps=n_steps,
                                                         n_episodes=n_episodes)
        if n_steps == 2:
            assert len(scores) == 1
            np.testing.assert_allclose(scores[0], 0.1)
            assert agent.batch_observe.call_count == 2
        else:
            assert len(scores) == 3
            np.testing.assert_allclose(scores[0], 0.3)
            np.testing.assert_allclose(scores[1], 2.0)
            np.testing.assert_allclose(scores[2], 3.0)
        # batch_reset should be all True
        assert all(agent.batch_observe.call_args[0][3])