def _vqn(envs, writer=DummyWriter()):
        action_repeat = 4
        final_exploration_timestep = final_exploration_frame / action_repeat

        env = envs[0]
        model = model_constructor(env).to(device)
        optimizer = Adam(model.parameters(), lr=lr, eps=eps)
        q = QNetwork(
            model,
            optimizer,
            writer=writer
        )
        policy = ParallelGreedyPolicy(
            q,
            env.action_space.n,
            epsilon=LinearScheduler(
                initial_exploration,
                final_exploration,
                0,
                final_exploration_timestep,
                name="epsilon",
                writer=writer
            )
        )
        return DeepmindAtariBody(
            VQN(q, policy, discount_factor=discount_factor),
        )
Esempio n. 2
0
    def agent(self, writer=DummyWriter(), train_steps=float('inf')):
        n_updates = train_steps / self.hyperparameters['n_envs']

        optimizer = Adam(
            self.model.parameters(),
            lr=self.hyperparameters['lr'],
            eps=self.hyperparameters['eps']
        )

        q = QNetwork(
            self.model,
            optimizer,
            scheduler=CosineAnnealingLR(optimizer, n_updates),
            writer=writer
        )

        policy = ParallelGreedyPolicy(
            q,
            self.n_actions,
            epsilon=LinearScheduler(
                self.hyperparameters['initial_exploration'],
                self.hyperparameters['final_exploration'],
                0,
                self.hyperparameters["final_exploration_step"] / self.hyperparameters["n_envs"],
                name="exploration",
                writer=writer
            )
        )

        return VQN(q, policy, discount_factor=self.hyperparameters['discount_factor'])
 def parallel_test_agent(self):
     q = QNetwork(copy.deepcopy(self.model))
     policy = ParallelGreedyPolicy(
         q,
         self.n_actions,
         epsilon=self.hyperparameters["test_exploration"])
     return VSarsaTestAgent(policy)
 def _vsarsa(envs, writer=DummyWriter()):
     env = envs[0]
     model = model_constructor(env).to(device)
     optimizer = Adam(model.parameters(), lr=lr, eps=eps)
     q = QNetwork(model, optimizer, writer=writer)
     policy = ParallelGreedyPolicy(q, env.action_space.n, epsilon=epsilon)
     return VSarsa(q, policy, discount_factor=discount_factor)
Esempio n. 5
0
 def parallel_test_agent(self):
     q = QNetwork(copy.deepcopy(self.model))
     policy = ParallelGreedyPolicy(
         q,
         self.n_actions,
         epsilon=self.hyperparameters['test_exploration'])
     return DeepmindAtariBody(VQNTestAgent(policy))