Esempio n. 1
0
    def traverse(self, node: TreeNode, board: Board):
        """
        扩展子节点。
        Expand node.
        :param node: 当前节点。 Current node.
        :param board: 棋盘。 The board.
        :return: (<TreeNode>, value<int>) 扩展出的节点和需要反向传输的 value。
        Expanded nodes, and the value to be backpropagated.
        """
        while True:
            if len(node.children) == 0:
                break
            action, node = node.choose_best_child(c=self.greedy_value)
            board.step(action)

        # 是否结束。 game over?
        is_over, winner = board.result()
        if is_over:
            if winner == board.current_player:
                value = 1.0
            elif winner == -board.current_player:
                value = -1.0
            else:
                value = 0.0
            return node, value

        # 使用策略价值函数决策当前动作概率及评估价值。
        # Use the strategy value function to decide the current action probability and evaluate the value.
        action_probs, value = self.policy_value_function(board)

        for action, probability in action_probs:
            _ = node.expand(action, probability)

        return node, value
Esempio n. 2
0
    def traverse(self, node: TreeNode, board: Board):
        """
        Expand node.
        :param node: Current node.
        :param board: The board.
        :return: <TreeNode> Expanded nodes.
        """
        while True:
            if len(node.children) == 0:
                break
            action, node = node.choose_best_child(c=self.greedy_value)
            board.step(action)

        is_over, _ = board.result()
        if is_over:
            return node

        # Expand all child node.
        actions = board.available_actions
        probs = np.ones(len(actions)) / len(actions)

        for action, prob in zip(actions, probs):
            _ = node.expand(action, prob)

        return node
Esempio n. 3
0
 def rollout(self, board: Board):
     while True:
         is_over, winner = board.result()
         if is_over:
             break
         self.rollout_policy(board)
     return winner
Esempio n. 4
0
    def traverse(self, node: TreeNode, board: Board):
        """

        :param node:
        :param board:
        :return:
        """
        while True:
            if len(node.children) == 0:
                break
            action, node = node.choose_best_child(c=self.greedy_value)
            board.step(action)

        is_over, winner = board.result()
        if is_over:
            if winner == board.current_player:
                value = 1.0
            elif winner == -board.current_player:
                value = -1.0
            else:
                value = 0.0
            return node, value

        action_probs, value = self.policy_value_function(board)

        for action, probability in action_probs:
            _ = node.expand(action, probability)

        return node, value
Esempio n. 5
0
def start_until_game_over(player1: Player,
                          player2: Player,
                          board_renderer: BoardRenderer = None):
    """
    玩家 player1 和玩家 player2 在 board 上进行游戏直到游戏结束,并输出获胜者。
    Player player1 and player2 play on the board until the game is over, and output the winner.
    :param player1: 玩家 1。 Player 1.
    :param player2: 玩家 2。 Player 2.
    :param board_renderer: 棋盘渲染器。 The board renderer.
    :return: <int> board 返回的获胜者。 The winner returned by board.
    """
    board = Board()
    while True:
        # 渲染。 Render.
        if board_renderer is not None:
            board.render(board_renderer)

        # 执行动作。 Take action.
        if board.current_player == BOARD.o:
            player1.take_action(board,
                                is_output_action=board_renderer is not None)
        else:
            player2.take_action(board,
                                is_output_action=board_renderer is not None)

        # 游戏是否结束。 Game over?
        is_over, winner = board.result()
        if is_over:
            if board_renderer is not None:
                board.render(board_renderer)
            return winner
    def self_play(self, temp=1e-3):
        """
        Self-play, return to all boards after the game,
        the probability of losing all positions,
        and reward of victory or lose.
        :param temp: Temperature parameter (Degree of exploration).
        :return: [(boards, all_action_probs, values)]
        """
        board_inputs, all_action_probs, current_player = [], [], []
        board = Board()
        self.reset()

        while True:
            self.run(board, self.search_times)

            # Get actions and probabilities.
            actions, probs = self.get_action_probs(temp=temp)
            action_probs = np.zeros((BOARD.board_size, BOARD.board_size))

            # actions, probs -> action_probs
            for action, prob in zip(actions, probs):
                action_probs[action[0], action[1]] = prob

            # Collect self play data.
            board_inputs.append(self.board_to_xlabel(board))
            all_action_probs.append(action_probs)
            current_player.append(board.current_player)

            # action -> flatten_action
            flatten_actions = []
            for one_action in actions:
                flatten_actions.append(one_action[0] * BOARD.board_size +
                                       one_action[1])

            # Add Dirichlet Noise for exploration in training.
            flatten_action = np.random.choice(
                flatten_actions,
                p=0.75 * probs +
                0.25 * np.random.dirichlet(0.3 * np.ones(len(probs))))

            # flatten_action -> action
            action = (flatten_action // BOARD.board_size,
                      flatten_action % BOARD.board_size)

            board.step(action)

            # Reset the root node.
            if action in self.root.children:
                self.root = self.root.children[action]
                self.root.parent = None
            else:
                self.reset()

            is_over, winner = board.result()
            if is_over:
                values = np.zeros(len(current_player))
                if winner != 0:
                    values[np.array(current_player) == winner] = 1
                    values[np.array(current_player) != winner] = -1
                return board_inputs, all_action_probs, values
Esempio n. 7
0
def play_web_game(is_stop, player1: Player, player2: Player, turn_to, send_board_step,
                  send_player1_running, send_player2_running, wait_human_action, game_over):
    board = Board()
    while not is_stop():
        turn_to(board.current_player)

        if board.current_player == BOARD.o:
            if isinstance(player1, Human):
                action = wait_human_action(1, is_stop)
                if is_stop():
                    return
                board.step(action)
            else:
                action = player1.take_action(board, is_output_action=False,
                                             running_output_function=send_player1_running, is_stop=is_stop)
            send_board_step(1, action)
        else:
            if isinstance(player2, Human):
                action = wait_human_action(2, is_stop)
                if is_stop():
                    return
                board.step(action)
            else:
                action = player2.take_action(board, is_output_action=False,
                                             running_output_function=send_player2_running, is_stop=is_stop)
            send_board_step(2, action)

        is_over, winner = board.result()
        if is_over:
            game_over(winner)
            return
def start_until_game_over(player1: Player,
                          player2: Player,
                          board_renderer: BoardRenderer = None):
    """
    Player player1 and player2 play on the board until the game is over, and output the winner.
    :param player1: Player 1.
    :param player2: Player 2.
    :param board_renderer: The board renderer.
    :return: <int> board The winner returned by board.
    """
    board = Board()
    while True:
        # Render.
        if board_renderer is not None:
            board.render(board_renderer)

        # Take action.
        if board.current_player == BOARD.o:
            player1.take_action(board,
                                is_output_action=board_renderer is not None)
        else:
            player2.take_action(board,
                                is_output_action=board_renderer is not None)

        # Game over?
        is_over, winner = board.result()
        if is_over:
            if board_renderer is not None:
                board.render(board_renderer)
            return winner
Esempio n. 9
0
 def rollout(self, board: Board):
     """
     Simulation.
     :param board: The board.
     :return: winner<int> winner.
     """
     while True:
         is_over, winner = board.result()
         if is_over:
             break
         # Decision making next step.
         self.rollout_policy(board)
     return winner
Esempio n. 10
0
    def self_play(self, temp=1e-3):
        """
        :param temp:
        :return:
        """
        board_inputs, all_action_probs, current_player = [], [], []
        board = Board()
        self.reset()

        while True:
            self.run(board, self.search_times)

            actions, probs = self.get_action_probs(temp=temp)
            action_probs = np.zeros((BOARD.board_size, BOARD.board_size))

            for action, prob in zip(actions, probs):
                action_probs[action[0], action[1]] = prob

            board_inputs.append(self.board_to_xlabel(board))
            all_action_probs.append(action_probs)
            current_player.append(board.current_player)

            # action -> flatten_action
            flatten_actions = []
            for one_action in actions:
                flatten_actions.append(one_action[0] * BOARD.board_size + one_action[1])

            flatten_action = np.random.choice(flatten_actions,
                                              p=0.75 * probs + 0.25 * np.random.dirichlet(0.3 * np.ones(len(probs))))

            # flatten_action -> action
            action = (flatten_action // BOARD.board_size, flatten_action % BOARD.board_size)

            board.step(action)

            if action in self.root.children:
                self.root = self.root.children[action]
                self.root.parent = None
            else:
                self.reset()

            is_over, winner = board.result()
            if is_over:
                values = np.zeros(len(current_player))
                if winner != 0:
                    values[np.array(current_player) == winner] = 1
                    values[np.array(current_player) != winner] = -1
                return board_inputs, all_action_probs, values
Esempio n. 11
0
    def traverse(self, node: TreeNode, board: Board):
        while True:
            if len(node.children) == 0:
                break
            action, node = node.choose_best_child(c=self.greedy_value)
            board.step(action)

        is_over, _ = board.result()
        if is_over:
            return node

        actions = board.available_actions
        probs = np.ones(len(actions)) / len(actions)

        for action, prob in zip(actions, probs):
            _ = node.expand(action, prob)

        return node
Esempio n. 12
0
 def rollout(self, board: Board):
     """
     模拟。
     Simulation.
     :param node: 当前节点。 Current node.
     :param board:
     :return: winner<int> 获胜者。 winner.
     """
     time = []
     while True:
         is_over, winner = board.result()
         if is_over:
             break
         c1 = datetime.datetime.now()
         self.rollout_policy(board)
         c2 = datetime.datetime.now()
         time.append((c2 - c1).microseconds)
         pass
     return winner
Esempio n. 13
0
def start_until_game_over(player1: Player, player2: Player, board_renderer: BoardRenderer = None):
    """

    :param player1:
    :param player2:
    :param board_renderer:
    :return:
    """
    board = Board()
    while True:
        if board_renderer is not None:
            board.render(board_renderer)

        if board.current_player == BOARD.o:
            player1.take_action(board, is_output_action=board_renderer is not None)
        else:
            player2.take_action(board, is_output_action=board_renderer is not None)

        is_over, winner = board.result()
        if is_over:
            if board_renderer is not None:
                board.render(board_renderer)
            return winner
Esempio n. 14
0
    def traverse(self, node: TreeNode, board: Board):
        """
        扩展子节点。
        Expand node.
        :param node: 当前节点。 Current node.
        :param board:
        :return: <TreeNode> 扩展出的节点。 Expanded nodes.
        """
        while True:
            is_over, _ = board.result()
            if is_over:
                break
            if len(node.children) != 0:
                action, node = node.choose_best_child(c=5.0)
                board.step(action)
            else:
                actions = board.available_actions
                probs = np.ones(len(actions)) / len(actions)

                # 扩展所有子节点。 Expand all child node.
                for action, prob in zip(actions, probs):
                    _ = node.expand(action, prob)
                break
        return node, board