def forward(self, state): concentration = self._get_concentration(state) if self.training: # PyTorch can't backwards pass _sample_dirichlet action = Dirichlet(concentration).rsample() else: # ONNX can't export Dirichlet() action = torch._sample_dirichlet(concentration) log_prob = Dirichlet(concentration).log_prob(action) return rlt.ActorOutput(action=action, log_prob=log_prob.unsqueeze(dim=1))
def get_log_prob(self, state, action): concentration = self._get_concentration(state) log_prob = Dirichlet(concentration).log_prob(action) return log_prob.unsqueeze(dim=1)