def predict_option_termination(self, state, current_option): termination = self.terminations(state)[:, current_option].sigmoid() option_termination = Bernoulli(termination).sample() Q = self.get_Q(state) next_option = Q.argmax(dim=-1) return bool(option_termination.item()), next_option.item()
def predict_option_termination(self, state, current_option): state = torch.tensor(state).float().to(device) state = self.qnetwork_local.state(state) termination = self.qnetwork_local.terminations(state).softmax(dim = -1) termination = termination[current_option] #termination = self.qnetwork_local.terminations(state)[current_option].sigmoid() option_termination = Bernoulli(termination).sample() Q = self.get_Q(state) next_option = Q.argmax(dim=-1) return bool(option_termination.item()), next_option.item()