def _playout(self, game_state: go.GameState): """Run a single playout from the root to the leaf, getting a value at the leaf and propagating it back through its parents. State is modified in-place, so a copy must be provided. """ node = self._root while True: if node.is_leaf(): break action, node = node.select(self._c_puct) if action == PASS_MOVE: move = PASS_MOVE else: move = (action // game_state.size, action % game_state.size) game_state.do_move(move) if not game_state.is_end_of_game and len(game_state.get_legal_moves(False)) > 0: action_probs, leaf_value = self._policy_value_net.policy_value_fn( game_state ) node.expand(action_probs) else: winner = game_state.get_winner() # for end stateļ¼return the "true" leaf_value if winner is None: # tie leaf_value = 0.0 else: leaf_value = 1.0 if winner == game_state.current_player else -1.0 # Update value and visit count of nodes in this traversal. node.update_recursive(-leaf_value)
def _evaluate_rollout(self, game_state: go.GameState, limit=1000): """Use the rollout policy to play until the end of the game, returning +1 if the current player wins, -1 if the opponent wins, and 0 if it is a tie. """ player = game_state.current_player for _ in range(limit): if game_state.is_end_of_game or len( game_state.get_legal_moves(False)) == 0: break action_probs = _rollout_policy_fn(game_state) max_action = max(action_probs, key=itemgetter(1))[0] if max_action == PASS_MOVE: move = PASS_MOVE else: move = (max_action // game_state.size, max_action % game_state.size) game_state.do_move(move) else: # If no break from the loop, issue a warning. print("WARNING: rollout reached move limit") winner = game_state.get_winner() if winner is None: # tie return 0.0 else: return 1.0 if winner == player else -1.0