Exemple #1
0
    def _test_abc(
        self,
        t_max,
        recurrent,
        discrete=True,
        episodic=True,
        steps=100000,
        require_success=True,
    ):

        nproc = 8

        def make_env(process_idx, test):
            size = 2
            return ABC(
                size=size,
                discrete=discrete,
                episodic=episodic or test,
                partially_observable=self.recurrent,
                deterministic=test,
            )

        env = make_env(0, False)

        model = self.make_model(env)

        from pfrl.optimizers import SharedRMSpropEpsInsideSqrt

        opt = SharedRMSpropEpsInsideSqrt(model.parameters())
        gamma = 0.8
        beta = 1e-2
        agent = a3c.A3C(
            model,
            opt,
            t_max=t_max,
            gamma=gamma,
            beta=beta,
            act_deterministically=True,
            max_grad_norm=1.0,
            recurrent=recurrent,
        )

        max_episode_len = None if episodic else 2

        with warnings.catch_warnings(record=True) as warns:
            train_agent_async(
                outdir=self.outdir,
                processes=nproc,
                make_env=make_env,
                agent=agent,
                steps=steps,
                max_episode_len=max_episode_len,
                eval_interval=500,
                eval_n_steps=None,
                eval_n_episodes=5,
                successful_score=1,
            )
            assert len(warns) == 0, warns[0]

        # The agent returned by train_agent_async is not guaranteed to be
        # successful because parameters could be modified by other processes
        # after success. Thus here the successful model is loaded explicitly.
        if require_success:
            agent.load(os.path.join(self.outdir, "successful"))

        # Test
        env = make_env(0, True)
        n_test_runs = 5
        eval_returns = run_evaluation_episodes(
            env,
            agent,
            n_steps=None,
            n_episodes=n_test_runs,
            max_episode_len=max_episode_len,
        )
        successful_return = 1
        if require_success:
            n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return)
            assert n_succeeded == n_test_runs
Exemple #2
0
    def _test_abc(
        self,
        t_max,
        use_lstm,
        discrete=True,
        episodic=True,
        steps=100000,
        require_success=True,
    ):

        nproc = 8

        def make_env(process_idx, test):
            size = 2
            return ABC(
                size=size,
                discrete=discrete,
                episodic=episodic or test,
                partially_observable=self.use_lstm,
                deterministic=test,
            )

        sample_env = make_env(0, False)
        action_space = sample_env.action_space
        obs_space = sample_env.observation_space

        replay_buffer = EpisodicReplayBuffer(10**4)
        obs_size = obs_space.low.size
        hidden_size = 20
        if discrete:
            n_actions = action_space.n
            head = acer.ACERDiscreteActionHead(
                pi=nn.Sequential(
                    nn.Linear(hidden_size, n_actions),
                    SoftmaxCategoricalHead(),
                ),
                q=nn.Sequential(
                    nn.Linear(hidden_size, n_actions),
                    DiscreteActionValueHead(),
                ),
            )
        else:
            action_size = action_space.low.size
            head = acer.ACERContinuousActionHead(
                pi=nn.Sequential(
                    nn.Linear(hidden_size, action_size * 2),
                    GaussianHeadWithDiagonalCovariance(),
                ),
                v=nn.Sequential(nn.Linear(hidden_size, 1), ),
                adv=nn.Sequential(
                    ConcatObsAndAction(),
                    nn.Linear(hidden_size + action_size, 1),
                ),
            )
        if use_lstm:
            model = pfrl.nn.RecurrentSequential(
                nn.Linear(obs_size, hidden_size),
                nn.LeakyReLU(),
                nn.LSTM(num_layers=1,
                        input_size=hidden_size,
                        hidden_size=hidden_size),
                head,
            )
        else:
            model = nn.Sequential(
                nn.Linear(obs_size, hidden_size),
                nn.LeakyReLU(),
                head,
            )
        eps = 1e-8
        opt = pfrl.optimizers.SharedRMSpropEpsInsideSqrt(model.parameters(),
                                                         lr=1e-3,
                                                         eps=eps,
                                                         alpha=0.99)
        gamma = 0.5
        beta = 1e-5
        if self.n_times_replay == 0 and self.disable_online_update:
            # At least one of them must be enabled
            pytest.skip()
        agent = acer.ACER(
            model,
            opt,
            replay_buffer=replay_buffer,
            t_max=t_max,
            gamma=gamma,
            beta=beta,
            n_times_replay=self.n_times_replay,
            act_deterministically=True,
            disable_online_update=self.disable_online_update,
            replay_start_size=100,
            use_trust_region=self.use_trust_region,
            recurrent=use_lstm,
        )

        max_episode_len = None if episodic else 2

        with warnings.catch_warnings(record=True) as warns:
            train_agent_async(
                outdir=self.outdir,
                processes=nproc,
                make_env=make_env,
                agent=agent,
                steps=steps,
                max_episode_len=max_episode_len,
                eval_interval=500,
                eval_n_steps=None,
                eval_n_episodes=5,
                successful_score=1,
            )
            assert len(warns) == 0, warns[0]

        # The agent returned by train_agent_async is not guaranteed to be
        # successful because parameters could be modified by other processes
        # after success. Thus here the successful model is loaded explicitly.
        if require_success:
            agent.load(os.path.join(self.outdir, "successful"))

        # Test
        env = make_env(0, True)
        n_test_runs = 5
        eval_returns = run_evaluation_episodes(
            env,
            agent,
            n_steps=None,
            n_episodes=n_test_runs,
            max_episode_len=max_episode_len,
        )
        successful_return = 1
        if require_success:
            n_succeeded = np.sum(np.asarray(eval_returns) >= successful_return)
            assert n_succeeded == n_test_runs