def predict_option_termination(self, state, current_option):
        termination = self.terminations(state)[:, current_option].sigmoid()
        option_termination = Bernoulli(termination).sample()

        Q = self.get_Q(state)
        next_option = Q.argmax(dim=-1)
        return bool(option_termination.item()), next_option.item()
Esempio n. 2
0
 def predict_option_termination(self, state, current_option):
     state = torch.tensor(state).float().to(device)
     state = self.qnetwork_local.state(state)
     termination = self.qnetwork_local.terminations(state).softmax(dim = -1)
     termination = termination[current_option]
     #termination = self.qnetwork_local.terminations(state)[current_option].sigmoid()
     option_termination = Bernoulli(termination).sample()
     Q = self.get_Q(state)
     next_option = Q.argmax(dim=-1)
     return bool(option_termination.item()), next_option.item()