コード例 #1
0
ファイル: vsarsa.py プロジェクト: lmurph05/engg-3130-final
 def _vsarsa(envs, writer=DummyWriter()):
     env = envs[0]
     model = fc_relu_q(env).to(device)
     optimizer = Adam(model.parameters(), lr=lr, eps=eps)
     q = QNetwork(model, optimizer, writer=writer)
     policy = GreedyPolicy(q, env.action_space.n, epsilon=epsilon)
     return VSarsa(q, policy, discount_factor=discount_factor)
コード例 #2
0
    def agent(self, writer=DummyWriter(), train_steps=float('inf')):
        n_updates = train_steps / self.hyperparameters['n_envs']

        optimizer = Adam(
            self.model.parameters(),
            lr=self.hyperparameters['lr'],
            eps=self.hyperparameters['eps']
        )

        q = QNetwork(
            self.model,
            optimizer,
            scheduler=CosineAnnealingLR(optimizer, n_updates),
            writer=writer
        )

        policy = ParallelGreedyPolicy(
            q,
            self.n_actions,
            epsilon=LinearScheduler(
                self.hyperparameters['initial_exploration'],
                self.hyperparameters['final_exploration'],
                0,
                self.hyperparameters["final_exploration_step"] / self.hyperparameters["n_envs"],
                name="exploration",
                writer=writer
            )
        )

        return DeepmindAtariBody(
            VSarsa(q, policy, discount_factor=self.hyperparameters['discount_factor']),
        )
コード例 #3
0
 def _vsarsa(envs, writer=DummyWriter()):
     env = envs[0]
     model = nature_ddqn(env).to(device)
     optimizer = RMSprop(model.parameters(), lr=lr, alpha=alpha, eps=eps)
     q = QNetwork(model,
                  optimizer,
                  env.action_space.n,
                  loss=smooth_l1_loss,
                  writer=writer)
     policy = GreedyPolicy(q,
                           env.action_space.n,
                           epsilon=LinearScheduler(initial_exploration,
                                                   final_exploration,
                                                   0,
                                                   final_exploration_frame,
                                                   name="epsilon",
                                                   writer=writer))
     return DeepmindAtariBody(VSarsa(q, policy, gamma=discount_factor), )