def exploit(self, state): state = torch.tensor(state[None, ...].copy(), dtype=torch.float, device=self._device) with torch.no_grad(): _, _, action = self._policy_net(state) action = action.cpu().numpy()[0] assert_action(action) return action
def explore(self, state): state = torch.tensor(state[None, ...].copy(), dtype=torch.float, device=self._device) with torch.no_grad(): action = self.get_action(state) action = action.cpu().numpy() assert_action(action) return action