def expand(self, state: State):
     if not self.already_expanded:
         if not state.is_terminal():
             if not state in self.tree:
                 self.tree[state] = [state.find_random_successor()]
             else:
                 child = state.find_random_successor()
                 if not child in self.tree[state]:
                     self.tree[state] += [child]
 def expand(self, state: State):
     """
     Step 2: Expansion.
     Update the tree with the children of 'state'.
     """
     if not state in self.tree:
         self.tree[state] = state.find_successors()
 def simulate(self, state: State) -> float:
     """
     A simulation is rolled out using uniform random choices.
     Return the simulation's reward (i.e., reward of the terminal state).
     """
     while not state.is_terminal():
         state = state.find_random_successor()
     z = state.reward()
     if state not in self.states_evaluated:
         self.states_evaluated[state] = z
         self.n_evaluations += 1
         if z > 0:
             self.n_positive_evaluations += 1
     self.nof_reward_function_calls += 1
     self.terminal_nodes_visits += 1
     return z
 def select(self, state: State) -> list[State]:
     self.already_expanded = False
     path = [state]
     while state in self.tree and self.tree[
             state]:  # while state is neither explored nor terminal
         unexplored = self.tree[state] - self.tree.keys()
         if unexplored:  # the node is not fully expanded
             s = unexplored.pop()
             path.append(s)
             return path
         else:
             possible_unexplored = state.find_random_successor()
             if not possible_unexplored in self.tree[
                     state]:  # the node is not yet fully expanded
                 self.already_expanded = True
                 self.tree[state] += [possible_unexplored]
                 path.append(possible_unexplored)
                 return path
         state = self.best_child(state)
         path.append(state)
     return path
Exemple #5
0
 def run(self, state: State) -> State:
     return state.find_random_successor()
 def choose(self, state: State) -> State:
     if state not in self.tree:
         return state.find_random_successor()
     return self.selection_criteria.best_child(state, self.tree[state],
                                               self.Q, self.N)
 def do_rollout(self, state: State):
     """Perform a simulation and store the statistics."""
     child = state.find_random_successor()
     reward = self.simulate(child)
     self.Q[child] += reward
     self.N[child] += 1