Ejemplo n.º 1
0
 def get_action(self, board, temp=1e-3):
     #sensible_moves = board.availables
     # the pi vector returned by MCTS as in the alphaGo Zero paper
     if self._is_selfplay:
         temp = 1.5
     move_probs = np.zeros(15 * 15)
     acts, probs = self.mcts.get_move_probs(board, temp)
     if acts is None:  #ai认输
         return None, None
     move_probs[list(acts)] = probs
     best_chance = np.max(move_probs)
     best_move = np.where(move_probs == best_chance)[0][0]
     if self._is_selfplay:
         move = np.random.choice(
             acts,
             p=probs
             #p=0.9*probs + 0.1*np.random.dirichlet(0.3*np.ones(len(probs)))
         )
         #debug
         print("choose ", RenjuBoard.number2pos(move), "by prob ",
               move_probs[move])
         print("best move is ", RenjuBoard.number2pos(best_move),
               best_chance)
         # update the root node and reuse the search tree
     else:
         # with the default temp=1e-3, it is almost equivalent
         # to choosing the move with the highest prob
         #move = np.random.choice(acts, p=probs)
         move = best_move
         # reset the root node
         #self.mcts.update_with_move(-1)
     self.mcts.update_with_move(board, move)
     return move, move_probs
Ejemplo n.º 2
0
class Game(object):
    def __init__(self, player1, player2):
        self.player1 = player1
        self.player2 = player2
        self.board = RenjuBoard()

    #初始化之后,就可以开始游戏了。
    #外面去控制开始比赛
    def do_play(self):
        self.board.reset()
        states, mcts_probs = [], []
        while True:
            player = self.player2
            opponent = self.player1
            debug_stone = '◯'
            if self.board.get_current_player():
                player = self.player1
                opponent = self.player2
                debug_stone = '●'
            move, move_probs = player.get_action(self.board)
            #TODO  注意,在游戏进行时,Game类负责将当前棋局传递给当前棋手。
            # 棋手思考并得出结论,返回给Game;
            # Game负责将棋子落在棋盘上,然后应该是发一个全局的通知。 (发布:订阅模型)
            # 目前并没有这样做,只是通知落子的对方而已。
            opponent.notice(self.board, move)  #Game在落子之后,要通知对手。
            #加入认输逻辑
            if move is None:
                end = True
                winner = (
                    RenjuBoard.WHITE_WIN if self.board.get_current_player()
                    else RenjuBoard.BLACK_WIN)  #认输了,对手赢了
                print("player: ", debug_stone, " resigns.")
            else:
                # store the data
                states.append(self.board.current_state())
                mcts_probs.append(move_probs)
                #print(move_probs)
                # perform a move
                self.board.do_move_by_number(move)
                print("player: ", debug_stone)
                self.board._debug_board()
                #if len(states) >= 5:
                end, winner = self.board.game_end()
            if end:
                total_moves = len(states)

                if winner == RenjuBoard.DRAW:
                    winner_map = [0 for _i in range(total_moves)]
                    print("draw")
                elif winner == RenjuBoard.WHITE_WIN:
                    winner_map = [(_i % 2) * 2 - 1
                                  for _i in range(total_moves)]
                    print("WHITE_WIN")
                else:
                    winner_map = [((_i + 1) % 2) * 2 - 1
                                  for _i in range(total_moves)]
                    print("BLACK_WIN")
                return winner, zip(states, mcts_probs, winner_map)
Ejemplo n.º 3
0
class Game(object):
    def __init__(self, player1, player2):
        self.player1 = player1
        self.player2 = player2
        self.board = RenjuBoard()

    def do_play(self):
        self.board.reset()
        states, mcts_probs = [], []
        while True:
            player = self.player2
            opponent = self.player1
            debug_stone = '◯'
            if self.board.get_current_player():
                player = self.player1
                opponent = self.player2
                debug_stone = '●'
            move, move_probs = player.get_action(self.board)

            # notice the opponent
            opponent.notice(self.board, move)

            # Resign
            if move is None:
                end = True
                winner = (RenjuBoard.WHITE_WIN
                          if self.board.get_current_player() else
                          RenjuBoard.BLACK_WIN)
                # print ("player: ",debug_stone," resigns.")
            else:
                # store the data
                states.append(self.board.current_state())
                mcts_probs.append(move_probs)
                # perform a move
                self.board.do_move_by_number(move)

                # open debug board which the game interface
                # print ("player: ", debug_stone)
                # self.board._debug_board()

                end, winner = self.board.game_end()

            if end:
                self.board._debug_board()
                total_moves = len(states)
                if winner == RenjuBoard.DRAW:
                    winner_map = [0 for _i in range(total_moves)]
                    print("draw")
                elif winner == RenjuBoard.WHITE_WIN:
                    winner_map = [(_i % 2) * 2 - 1
                                  for _i in range(total_moves)]
                    print("WHITE_WIN")
                else:
                    winner_map = [((_i + 1) % 2) * 2 - 1
                                  for _i in range(total_moves)]
                    print("BLACK_WIN")
                return winner, zip(states, mcts_probs, winner_map)
Ejemplo n.º 4
0
 def _debug(self):
     if self.debug_mode:
         for act, _sub_node in self._root._children.items():
             if _sub_node._n_visits > 0:
                 print(RenjuBoard.number2pos(act), "\tsel ",
                       _sub_node.get_value(self._c_puct), "\tv ",
                       _sub_node._n_visits, "\tQ ", _sub_node._Q, "\tp ",
                       _sub_node._P)
Ejemplo n.º 5
0
class MasterPlayer(object):
    """Master reads human game records"""
    def __init__(self, game_source='./games.log', jump_line=0):
        self.file_reader = open(game_source, 'r')
        self.board = RenjuBoard()

    def get_train_game(self):
        #注意,人类棋谱由于RIF规则的原因,前5手丢弃。
        game_string = self.file_reader.readline()
        print(game_string)
        self.board.reset()
        game_string = game_string.strip()
        if len(game_string) < 10:
            return None
        states, mcts_probs = [], []
        #获得game 的记录和结果, 做一堆和self play 差不多的数据返回回去
        game_result = game_string.split(",")
        winner = int(game_result[1])
        for i in range(0, len(game_result[0]), 2):
            pos = game_result[0][i:i + 2]
            move_probs = np.zeros(15 * 15)
            move_number = RenjuBoard.pos2number(pos)
            move_probs[move_number] = 1.0
            # store the data

            states.append(self.board.current_state())
            mcts_probs.append(move_probs)
            self.board.do_move(pos)
            #self.board._debug_board()
            #if len(states) >= 5:
        total_moves = len(states)
        if winner == -1:
            winner_map = [0 for _i in range(total_moves)]
            print("draw")
        elif winner == 0:  #white win
            winner_map = [(_i % 2) * 2 - 1 for _i in range(total_moves)]
            print("WHITE_WIN")
        else:
            winner_map = [((_i + 1) % 2) * 2 - 1 for _i in range(total_moves)]
            print("BLACK_WIN")
        #去掉前5手
        states = states[5:]
        mcts_probs = mcts_probs[5:]
        winner_map = winner_map[5:]
        return zip(states, mcts_probs, winner_map)
Ejemplo n.º 6
0
 def get_action(self, board):
     location = input("Your move: (11 to ff)")
     #增加投降功能:
     if location == 'RESIGN':
         return None, None
     move_number = RenjuBoard.pos2number(location.strip())
     if move_number not in board.availables:
         print("invalid move")
         location = self.get_action(board)
     prob = np.zeros(15 * 15)
     prob[move_number] = 1.0
     return move_number, prob
Ejemplo n.º 7
0
#输入一个特定局面, 顾问会给出它认为的好点
from renju import RenjuBoard
from players import MCTSPlayer
#from policy_value_net import PolicyValueNet
from policy_value_net_residual import PolicyValueNet

pv_net = PolicyValueNet('./master')

consultant = MCTSPlayer(pv_net.policy_value_fn,
                        c_puct=10,
                        n_playout=10000,
                        debug=True)
board = RenjuBoard()

while True:
    board_str = input("What do you want?(e for exit)\n").strip()
    if board_str == 'e' or board_str == '':
        break
    board.reset(board_str)
    board._debug_board()
    consultant.reset_player()
    move, move_prob = consultant.get_action(board)
    #如果认输,显示一下
    if move is None:
        print("got conclusion, resign")
    board.do_move_by_number(move)
    board._debug_board()
Ejemplo n.º 8
0
 def __init__(self, player1, player2):
     self.player1 = player1
     self.player2 = player2
     self.board = RenjuBoard()
Ejemplo n.º 9
0
 def __init__(self, game_source='./games.log', jump_line=0):
     self.file_reader = open(game_source, 'r')
     self.board = RenjuBoard()
Ejemplo n.º 10
0
    def _playout(self, state):
        """
        从根节点开始跑一个playout,找到暂时没有结论的叶子节点
        确认其胜负,不确定的就采纳神经网络的结论值;
        """
        node = self._root
        while True:
            if node.is_leaf():
                break
            # 爬树,如果爬树了,那么root至少不是秃的
            action, node = node.select(self._c_puct)
            state.do_move_by_number(action)

        player = 1 - state.get_current_player()  #应当是刚刚落子的那一方。
        leaf_value = None

        if not (node._win or node._lose):
            end, winner = state.game_end()
            if end:
                if winner == RenjuBoard.DRAW:  # tie
                    leaf_value = 0.0
                else:
                    if (player == 1 and winner == RenjuBoard.BLACK_WIN) or (
                            player == 0 and winner == RenjuBoard.WHITE_WIN):
                        node.mark_win()
                    else:
                        node.mark_lose()
            else:
                win_move, only_defense, defense_count = state.Find_win()
                if win_move is not None:
                    node.mark_lose()
                #elif 有2个以上冲四点或者活四 or  防点是禁手
                elif (defense_count >
                      1) or (only_defense and state.get_current_player() == 1
                             and state.isForbidden(
                                 RenjuBoard.num2coordinate(only_defense))):
                    node.mark_win()
                else:  #走到这里是没结论
                    if only_defense is not None:  #如果有冲四就按照冲四防一下,往下走一手
                        node.expand(
                            MCTS._build_expand_prob(state.availables,
                                                    only_defense))
                        node._remain_count = 1  #这里就剩唯一防了。
                        for act, _sub_node in node._children.items():
                            if act != only_defense:
                                _sub_node.mark_lose()
                        node = node._children[
                            only_defense]  #这里可以保证这个only_defense的落子不会触发胜负
                        state.do_move_by_number(only_defense)

                    action_probs, leaf_value = self._policy(state)
                    node.expand(action_probs)
                #当前局面下,轮到对手下棋,如果对方有获胜策略,则当前方输了。

        if node._win:
            leaf_value = 1.0
        elif node._lose:
            leaf_value = -1.0

        node.update(leaf_value)
        root_result = self._root._win or self._root._lose or self._root._remain_count == 1
        if root_result and len(self._root._children) == 0:
            #TODO 对于有结论而没有expand 的根节点进行特殊处理。 因为再不处理, 这么返回的话, playout提前结束,就崩了。
            #如果根是秃的,那刚刚 state.Find_win() 肯定是对根做的,直接拿来用! state也没动
            try:
                win_move
            except NameError:
                win_move, only_defense, defense_count = state.Find_win()
            if win_move:
                self._root.expand(
                    MCTS._build_expand_prob(state.availables, win_move))
                self._root._children[win_move].mark_win()
            else:
                self._root.expand(
                    MCTS._build_expand_prob(state.availables, only_defense))

        return root_result
Ejemplo n.º 11
0
    def _playout(self, state):
        node = self._root
        while True:
            if node.is_leaf():
                break
            action, node = node.select(self._c_puct)
            state.do_move_by_number(action)

        player = 1 - state.get_current_player()
        leaf_value = None

        if not (node._win or node._lose):
            end, winner = state.game_end()
            if end:
                if winner == RenjuBoard.DRAW:  # tie
                    leaf_value = 0.0
                else:
                    if (player == 1 and winner == RenjuBoard.BLACK_WIN) or (
                            player == 0 and winner == RenjuBoard.WHITE_WIN):
                        node.mark_win()
                    else:
                        node.mark_lose()
            else:
                win_move, only_defense, defense_count = state.Find_win()
                if win_move is not None:
                    node.mark_lose()

                # if there are two cases where we can get a open four
                # if there is already a open four
                # if the oppenent break the forbidden rule
                elif (defense_count >
                      1) or (only_defense and state.get_current_player() == 1
                             and state.isForbidden(
                                 RenjuBoard.num2coordinate(only_defense))):
                    node.mark_win()
                else:
                    if only_defense is not None:
                        node.expand(
                            MCTS._build_expand_prob(state.availables,
                                                    only_defense))
                        node._remain_count = 1
                        for act, _sub_node in node._children.items():
                            if act != only_defense:
                                _sub_node.mark_lose()
                        node = node._children[only_defense]
                        state.do_move_by_number(only_defense)

                    action_probs, leaf_value = self._policy(state)
                    node.expand(action_probs)

        if node._win:
            leaf_value = 1.0
        elif node._lose:
            leaf_value = -1.0

        node.update(leaf_value)
        root_result = self._root._win or self._root._lose or self._root._remain_count == 1
        if root_result and len(self._root._children) == 0:
            try:
                win_move
            except NameError:
                win_move, only_defense, defense_count = state.Find_win()
            if win_move:
                self._root.expand(
                    MCTS._build_expand_prob(state.availables, win_move))
                self._root._children[win_move].mark_win()
            else:
                self._root.expand(
                    MCTS._build_expand_prob(state.availables, only_defense))

        return root_result