Example #1
0
 def exploit(self, state):
     state = torch.tensor(state[None, ...].copy(),
                          dtype=torch.float,
                          device=self._device)
     with torch.no_grad():
         _, _, action = self._policy_net(state)
     action = action.cpu().numpy()[0]
     assert_action(action)
     return action
Example #2
0
 def explore(self, state):
     state = torch.tensor(state[None, ...].copy(),
                          dtype=torch.float,
                          device=self._device)
     with torch.no_grad():
         action = self.get_action(state)
     action = action.cpu().numpy()
     assert_action(action)
     return action