Esempio n. 1
0
 def act(self, state):
     if self.body.env.clock.frame < self.training_start_step:
         return policy_util.random(state, self, self.body).cpu().squeeze().numpy()
     else:
         action = self.action_policy(state, self, self.body)
         if not self.body.is_discrete:
             action = self.scale_action(torch.tanh(action))  # continuous action bound
         return action.cpu().squeeze().numpy()
Esempio n. 2
0
 def act(self, state):
     if self.body.env.clock.frame < self.training_start_step:
         return policy_util.random(state, self,
                                   self.body).cpu().squeeze().numpy()
     else:
         action = self.action_policy(state, self, self.body)
         if self.body.is_discrete:
             # discrete output is RelaxedOneHotCategorical, need to sample to int
             action = torch.distributions.Categorical(probs=action).sample()
         else:
             action = torch.tanh(action)  # continuous action bound
         return action.cpu().squeeze().numpy()