def h(self, game: GameState): if game.terminal(): return game.winner() for _ in range(self.nbr_rollouts): self.do_rollout(game) return self.h_score(self.choose(game))
def _simulate(self, node: GameState): "Returns the reward for a random simulation (to completion) of `node`" invert_reward = True while True: if node.terminal(): reward = node.reward() return 1 - reward if invert_reward else reward node = node.push(node.random_move()) invert_reward = not invert_reward
def choose(self, node: GameState): def score(n): if self.N[n] == 0: return float("-inf") # avoid unseen moves return self.Q[n] / self.N[n] # average reward "Choose the best successor of node. (Choose a move in the game)" if node.terminal(): raise RuntimeError(f"choose called on terminal node {node}") if node not in self.children: return node.random_child() return max(self.children[node], key=score)