Example #1
0
    def forward(self, state):
        concentration = self._get_concentration(state)
        if self.training:
            # PyTorch can't backwards pass _sample_dirichlet
            action = Dirichlet(concentration).rsample()
        else:
            # ONNX can't export Dirichlet()
            action = torch._sample_dirichlet(concentration)

        log_prob = Dirichlet(concentration).log_prob(action)
        return rlt.ActorOutput(action=action,
                               log_prob=log_prob.unsqueeze(dim=1))
Example #2
0
 def get_log_prob(self, state, action):
     concentration = self._get_concentration(state)
     log_prob = Dirichlet(concentration).log_prob(action)
     return log_prob.unsqueeze(dim=1)