def _vsarsa(envs, writer=DummyWriter()): env = envs[0] model = fc_relu_q(env).to(device) optimizer = Adam(model.parameters(), lr=lr, eps=eps) q = QNetwork(model, optimizer, writer=writer) policy = GreedyPolicy(q, env.action_space.n, epsilon=epsilon) return VSarsa(q, policy, discount_factor=discount_factor)
def agent(self, writer=DummyWriter(), train_steps=float('inf')): n_updates = train_steps / self.hyperparameters['n_envs'] optimizer = Adam( self.model.parameters(), lr=self.hyperparameters['lr'], eps=self.hyperparameters['eps'] ) q = QNetwork( self.model, optimizer, scheduler=CosineAnnealingLR(optimizer, n_updates), writer=writer ) policy = ParallelGreedyPolicy( q, self.n_actions, epsilon=LinearScheduler( self.hyperparameters['initial_exploration'], self.hyperparameters['final_exploration'], 0, self.hyperparameters["final_exploration_step"] / self.hyperparameters["n_envs"], name="exploration", writer=writer ) ) return DeepmindAtariBody( VSarsa(q, policy, discount_factor=self.hyperparameters['discount_factor']), )
def _vsarsa(envs, writer=DummyWriter()): env = envs[0] model = nature_ddqn(env).to(device) optimizer = RMSprop(model.parameters(), lr=lr, alpha=alpha, eps=eps) q = QNetwork(model, optimizer, env.action_space.n, loss=smooth_l1_loss, writer=writer) policy = GreedyPolicy(q, env.action_space.n, epsilon=LinearScheduler(initial_exploration, final_exploration, 0, final_exploration_frame, name="epsilon", writer=writer)) return DeepmindAtariBody(VSarsa(q, policy, gamma=discount_factor), )