def expand(self, state: State): if not self.already_expanded: if not state.is_terminal(): if not state in self.tree: self.tree[state] = [state.find_random_successor()] else: child = state.find_random_successor() if not child in self.tree[state]: self.tree[state] += [child]
def simulate(self, state: State) -> float: """ A simulation is rolled out using uniform random choices. Return the simulation's reward (i.e., reward of the terminal state). """ while not state.is_terminal(): state = state.find_random_successor() z = state.reward() if state not in self.states_evaluated: self.states_evaluated[state] = z self.n_evaluations += 1 if z > 0: self.n_positive_evaluations += 1 self.nof_reward_function_calls += 1 self.terminal_nodes_visits += 1 return z