def _vqn(envs, writer=DummyWriter()): action_repeat = 4 final_exploration_timestep = final_exploration_frame / action_repeat env = envs[0] model = model_constructor(env).to(device) optimizer = Adam(model.parameters(), lr=lr, eps=eps) q = QNetwork( model, optimizer, writer=writer ) policy = ParallelGreedyPolicy( q, env.action_space.n, epsilon=LinearScheduler( initial_exploration, final_exploration, 0, final_exploration_timestep, name="epsilon", writer=writer ) ) return DeepmindAtariBody( VQN(q, policy, discount_factor=discount_factor), )
def agent(self, writer=DummyWriter(), train_steps=float('inf')): n_updates = train_steps / self.hyperparameters['n_envs'] optimizer = Adam( self.model.parameters(), lr=self.hyperparameters['lr'], eps=self.hyperparameters['eps'] ) q = QNetwork( self.model, optimizer, scheduler=CosineAnnealingLR(optimizer, n_updates), writer=writer ) policy = ParallelGreedyPolicy( q, self.n_actions, epsilon=LinearScheduler( self.hyperparameters['initial_exploration'], self.hyperparameters['final_exploration'], 0, self.hyperparameters["final_exploration_step"] / self.hyperparameters["n_envs"], name="exploration", writer=writer ) ) return VQN(q, policy, discount_factor=self.hyperparameters['discount_factor'])
def parallel_test_agent(self): q = QNetwork(copy.deepcopy(self.model)) policy = ParallelGreedyPolicy( q, self.n_actions, epsilon=self.hyperparameters["test_exploration"]) return VSarsaTestAgent(policy)
def _vsarsa(envs, writer=DummyWriter()): env = envs[0] model = model_constructor(env).to(device) optimizer = Adam(model.parameters(), lr=lr, eps=eps) q = QNetwork(model, optimizer, writer=writer) policy = ParallelGreedyPolicy(q, env.action_space.n, epsilon=epsilon) return VSarsa(q, policy, discount_factor=discount_factor)
def parallel_test_agent(self): q = QNetwork(copy.deepcopy(self.model)) policy = ParallelGreedyPolicy( q, self.n_actions, epsilon=self.hyperparameters['test_exploration']) return DeepmindAtariBody(VQNTestAgent(policy))