Beispiel #1
0
 def choose_action(self, state):
     state = torch.from_numpy(state).float().unsqueeze(0)
     mean, logstd = self.policy(state.cuda())
     dist = Independent(Normal(mean.squeeze(), torch.exp(logstd)), 1)
     action = dist.sample()
     log_prob = dist.log_prob(action)
     return action.squeeze().cpu().numpy(), log_prob.item()
Beispiel #2
0
 def choose_action(self, state):
     state = torch.from_numpy(state).float().unsqueeze(0)
     mean, logstd = self.policy(state)
     dist = Independent(Normal(mean, torch.exp(logstd)), 1)
     return dist.sample().squeeze().numpy()
Beispiel #3
0
 def get_act(mean, std, amount=None):
     dist = Independent(Normal(mean, std), reinterpreted_batch_ndims=1)
     if amount is not None:
         return dist.sample(amount)
     return dist.sample()