def expand(self, state: State): if not self.already_expanded: if not state.is_terminal(): if not state in self.tree: self.tree[state] = [state.find_random_successor()] else: child = state.find_random_successor() if not child in self.tree[state]: self.tree[state] += [child]
def expand(self, state: State): """ Step 2: Expansion. Update the tree with the children of 'state'. """ if not state in self.tree: self.tree[state] = state.find_successors()
def simulate(self, state: State) -> float: """ A simulation is rolled out using uniform random choices. Return the simulation's reward (i.e., reward of the terminal state). """ while not state.is_terminal(): state = state.find_random_successor() z = state.reward() if state not in self.states_evaluated: self.states_evaluated[state] = z self.n_evaluations += 1 if z > 0: self.n_positive_evaluations += 1 self.nof_reward_function_calls += 1 self.terminal_nodes_visits += 1 return z
def select(self, state: State) -> list[State]: self.already_expanded = False path = [state] while state in self.tree and self.tree[ state]: # while state is neither explored nor terminal unexplored = self.tree[state] - self.tree.keys() if unexplored: # the node is not fully expanded s = unexplored.pop() path.append(s) return path else: possible_unexplored = state.find_random_successor() if not possible_unexplored in self.tree[ state]: # the node is not yet fully expanded self.already_expanded = True self.tree[state] += [possible_unexplored] path.append(possible_unexplored) return path state = self.best_child(state) path.append(state) return path
def run(self, state: State) -> State: return state.find_random_successor()
def choose(self, state: State) -> State: if state not in self.tree: return state.find_random_successor() return self.selection_criteria.best_child(state, self.tree[state], self.Q, self.N)
def do_rollout(self, state: State): """Perform a simulation and store the statistics.""" child = state.find_random_successor() reward = self.simulate(child) self.Q[child] += reward self.N[child] += 1