Ejemplo n.º 1
0
 def return_results(self, temp):
     ''' Process the output at the root node '''
     counts = np.array([child_action.n for child_action in self.root.child_actions])
     Q = np.array([child_action.Q for child_action in self.root.child_actions])
     pi_target = stable_normalizer(counts, temp)
     V_target = np.sum((counts / np.sum(counts)) * Q)[None]
     return self.root.index.flatten(), pi_target, V_target
Ejemplo n.º 2
0
 def return_results(self, temp):
     """ Process the output at the root node """
     counts = np.array(
         [child_action.n for child_action in self.root.child_actions])
     q = np.array(
         [child_action.q for child_action in self.root.child_actions])
     pi_target = stable_normalizer(counts, temp)
     v_target = np.sum((counts / np.sum(counts)) * q)[None]
     return self.root.index, pi_target, v_target
 def return_results(self, temp, on_visits=False):
     """ Process the output at the root node """
     counts = np.array(
         [child_action.n for child_action in self.root.child_actions])
     Q = np.array(
         [child_action.Q for child_action in self.root.child_actions])
     if on_visits:
         pi_target = stable_normalizer(counts, temp)
     else:
         pi_target = max_Q(Q)
     V_target = np.sum((counts / np.sum(counts)) * Q)[None]
     return self.root_signature, pi_target, V_target