def choose_action(self, state): state = torch.from_numpy(state).float().unsqueeze(0) mean, logstd = self.policy(state.cuda()) dist = Independent(Normal(mean.squeeze(), torch.exp(logstd)), 1) action = dist.sample() log_prob = dist.log_prob(action) return action.squeeze().cpu().numpy(), log_prob.item()
def choose_action(self, state): state = torch.from_numpy(state).float().unsqueeze(0) mean, logstd = self.policy(state) dist = Independent(Normal(mean, torch.exp(logstd)), 1) return dist.sample().squeeze().numpy()
def get_act(mean, std, amount=None): dist = Independent(Normal(mean, std), reinterpreted_batch_ndims=1) if amount is not None: return dist.sample(amount) return dist.sample()