Esempio n. 1
0
 def _rainbow(env, writer=DummyWriter()):
     model = model_constructor(env, atoms=atoms, sigma=sigma).to(device)
     optimizer = Adam(model.parameters(), lr=lr)
     q = QDist(
         model,
         optimizer,
         env.action_space.n,
         atoms,
         v_min=v_min,
         v_max=v_max,
         writer=writer,
     )
     # replay_buffer = ExperienceReplayBuffer(replay_buffer_size, device=device)
     replay_buffer = PrioritizedReplayBuffer(
         replay_buffer_size,
         alpha=alpha,
         beta=beta,
         device=device
     )
     replay_buffer = NStepReplayBuffer(n_steps, discount_factor, replay_buffer)
     return Rainbow(
         q,
         replay_buffer,
         exploration=0.,
         discount_factor=discount_factor ** n_steps,
         minibatch_size=minibatch_size,
         replay_start_size=replay_start_size,
         update_frequency=update_frequency,
         writer=writer,
     )
    def agent(self, writer=DummyWriter(), train_steps=float('inf')):
        n_updates = (train_steps - self.hyperparameters['replay_start_size']
                     ) / self.hyperparameters['update_frequency']

        optimizer = Adam(self.model.parameters(),
                         lr=self.hyperparameters['lr'],
                         eps=self.hyperparameters['eps'])

        q_dist = QDist(
            self.model,
            optimizer,
            self.n_actions,
            self.hyperparameters['atoms'],
            scheduler=CosineAnnealingLR(optimizer, n_updates),
            v_min=self.hyperparameters['v_min'],
            v_max=self.hyperparameters['v_max'],
            target=FixedTarget(
                self.hyperparameters['target_update_frequency']),
            writer=writer,
        )

        replay_buffer = NStepReplayBuffer(
            self.hyperparameters['n_steps'],
            self.hyperparameters['discount_factor'],
            PrioritizedReplayBuffer(self.hyperparameters['replay_buffer_size'],
                                    alpha=self.hyperparameters['alpha'],
                                    beta=self.hyperparameters['beta'],
                                    device=self.device))

        return DeepmindAtariBody(Rainbow(
            q_dist,
            replay_buffer,
            exploration=LinearScheduler(
                self.hyperparameters['initial_exploration'],
                self.hyperparameters['final_exploration'],
                0,
                train_steps - self.hyperparameters['replay_start_size'],
                name="exploration",
                writer=writer),
            discount_factor=self.hyperparameters['discount_factor']**
            self.hyperparameters["n_steps"],
            minibatch_size=self.hyperparameters['minibatch_size'],
            replay_start_size=self.hyperparameters['replay_start_size'],
            update_frequency=self.hyperparameters['update_frequency'],
            writer=writer,
        ),
                                 lazy_frames=True,
                                 episodic_lives=True)
    def _rainbow(env, writer=DummyWriter()):
        action_repeat = 4
        last_timestep = last_frame / action_repeat
        last_update = (last_timestep - replay_start_size) / update_frequency

        model = model_constructor(env, atoms=atoms, sigma=sigma).to(device)
        optimizer = Adam(model.parameters(), lr=lr, eps=eps)
        q = QDist(
            model,
            optimizer,
            env.action_space.n,
            atoms,
            scheduler=CosineAnnealingLR(optimizer, last_update),
            v_min=v_min,
            v_max=v_max,
            target=FixedTarget(target_update_frequency),
            writer=writer,
        )
        replay_buffer = PrioritizedReplayBuffer(replay_buffer_size,
                                                alpha=alpha,
                                                beta=beta,
                                                device=device)
        replay_buffer = NStepReplayBuffer(n_steps, discount_factor,
                                          replay_buffer)

        agent = Rainbow(
            q,
            replay_buffer,
            exploration=LinearScheduler(initial_exploration,
                                        final_exploration,
                                        0,
                                        last_timestep,
                                        name='exploration',
                                        writer=writer),
            discount_factor=discount_factor**n_steps,
            minibatch_size=minibatch_size,
            replay_start_size=replay_start_size,
            update_frequency=update_frequency,
            writer=writer,
        )
        return DeepmindAtariBody(agent, lazy_frames=True, episodic_lives=True)
Esempio n. 4
0
 def agent_constructor(writer):
     return DeepmindAtariBody(
         Rainbow(
             q_dist,
             replay_buffer,
             exploration=LinearScheduler(
                 self.hyperparameters['initial_exploration'],
                 self.hyperparameters['final_exploration'],
                 0,
                 train_steps - self.hyperparameters['replay_start_size'],
                 name="exploration",
                 writer=writer
             ),
             discount_factor=self.hyperparameters['discount_factor'] ** self.hyperparameters["n_steps"],
             minibatch_size=self.hyperparameters['minibatch_size'],
             replay_start_size=self.hyperparameters['replay_start_size'],
             update_frequency=self.hyperparameters['update_frequency'],
             writer=writer,
         ),
         lazy_frames=True,
         episodic_lives=True
     )