def expand(self, state: State):
     if not self.already_expanded:
         if not state.is_terminal():
             if not state in self.tree:
                 self.tree[state] = [state.find_random_successor()]
             else:
                 child = state.find_random_successor()
                 if not child in self.tree[state]:
                     self.tree[state] += [child]
 def simulate(self, state: State) -> float:
     """
     A simulation is rolled out using uniform random choices.
     Return the simulation's reward (i.e., reward of the terminal state).
     """
     while not state.is_terminal():
         state = state.find_random_successor()
     z = state.reward()
     if state not in self.states_evaluated:
         self.states_evaluated[state] = z
         self.n_evaluations += 1
         if z > 0:
             self.n_positive_evaluations += 1
     self.nof_reward_function_calls += 1
     self.terminal_nodes_visits += 1
     return z