def traverse(self, node: TreeNode, board: Board): """ Expand node. :param node: Current node. :param board: The board. :return: <TreeNode> Expanded nodes. """ while True: if len(node.children) == 0: break action, node = node.choose_best_child(c=self.greedy_value) board.step(action) is_over, _ = board.result() if is_over: return node # Expand all child node. actions = board.available_actions probs = np.ones(len(actions)) / len(actions) for action, prob in zip(actions, probs): _ = node.expand(action, prob) return node
def traverse(self, node: TreeNode, board: Board): """ 扩展子节点。 Expand node. :param node: 当前节点。 Current node. :param board: 棋盘。 The board. :return: (<TreeNode>, value<int>) 扩展出的节点和需要反向传输的 value。 Expanded nodes, and the value to be backpropagated. """ while True: if len(node.children) == 0: break action, node = node.choose_best_child(c=self.greedy_value) board.step(action) # 是否结束。 game over? is_over, winner = board.result() if is_over: if winner == board.current_player: value = 1.0 elif winner == -board.current_player: value = -1.0 else: value = 0.0 return node, value # 使用策略价值函数决策当前动作概率及评估价值。 # Use the strategy value function to decide the current action probability and evaluate the value. action_probs, value = self.policy_value_function(board) for action, probability in action_probs: _ = node.expand(action, probability) return node, value
def traverse(self, node: TreeNode, board: Board): """ 扩展子节点。 Expand node. :param node: 当前节点。 Current node. :param board: :return: <TreeNode> 扩展出的节点。 Expanded nodes. """ while True: is_over, _ = board.result() if is_over: break if len(node.children) != 0: action, node = node.choose_best_child(c=5.0) board.step(action) else: actions = board.available_actions probs = np.ones(len(actions)) / len(actions) # 扩展所有子节点。 Expand all child node. for action, prob in zip(actions, probs): _ = node.expand(action, prob) break return node, board