def get_action(self, board, temp=1e-3): #sensible_moves = board.availables # the pi vector returned by MCTS as in the alphaGo Zero paper if self._is_selfplay: temp = 1.5 move_probs = np.zeros(15 * 15) acts, probs = self.mcts.get_move_probs(board, temp) if acts is None: #ai认输 return None, None move_probs[list(acts)] = probs best_chance = np.max(move_probs) best_move = np.where(move_probs == best_chance)[0][0] if self._is_selfplay: move = np.random.choice( acts, p=probs #p=0.9*probs + 0.1*np.random.dirichlet(0.3*np.ones(len(probs))) ) #debug print("choose ", RenjuBoard.number2pos(move), "by prob ", move_probs[move]) print("best move is ", RenjuBoard.number2pos(best_move), best_chance) # update the root node and reuse the search tree else: # with the default temp=1e-3, it is almost equivalent # to choosing the move with the highest prob #move = np.random.choice(acts, p=probs) move = best_move # reset the root node #self.mcts.update_with_move(-1) self.mcts.update_with_move(board, move) return move, move_probs
class Game(object): def __init__(self, player1, player2): self.player1 = player1 self.player2 = player2 self.board = RenjuBoard() #初始化之后,就可以开始游戏了。 #外面去控制开始比赛 def do_play(self): self.board.reset() states, mcts_probs = [], [] while True: player = self.player2 opponent = self.player1 debug_stone = '◯' if self.board.get_current_player(): player = self.player1 opponent = self.player2 debug_stone = '●' move, move_probs = player.get_action(self.board) #TODO 注意,在游戏进行时,Game类负责将当前棋局传递给当前棋手。 # 棋手思考并得出结论,返回给Game; # Game负责将棋子落在棋盘上,然后应该是发一个全局的通知。 (发布:订阅模型) # 目前并没有这样做,只是通知落子的对方而已。 opponent.notice(self.board, move) #Game在落子之后,要通知对手。 #加入认输逻辑 if move is None: end = True winner = ( RenjuBoard.WHITE_WIN if self.board.get_current_player() else RenjuBoard.BLACK_WIN) #认输了,对手赢了 print("player: ", debug_stone, " resigns.") else: # store the data states.append(self.board.current_state()) mcts_probs.append(move_probs) #print(move_probs) # perform a move self.board.do_move_by_number(move) print("player: ", debug_stone) self.board._debug_board() #if len(states) >= 5: end, winner = self.board.game_end() if end: total_moves = len(states) if winner == RenjuBoard.DRAW: winner_map = [0 for _i in range(total_moves)] print("draw") elif winner == RenjuBoard.WHITE_WIN: winner_map = [(_i % 2) * 2 - 1 for _i in range(total_moves)] print("WHITE_WIN") else: winner_map = [((_i + 1) % 2) * 2 - 1 for _i in range(total_moves)] print("BLACK_WIN") return winner, zip(states, mcts_probs, winner_map)
class Game(object): def __init__(self, player1, player2): self.player1 = player1 self.player2 = player2 self.board = RenjuBoard() def do_play(self): self.board.reset() states, mcts_probs = [], [] while True: player = self.player2 opponent = self.player1 debug_stone = '◯' if self.board.get_current_player(): player = self.player1 opponent = self.player2 debug_stone = '●' move, move_probs = player.get_action(self.board) # notice the opponent opponent.notice(self.board, move) # Resign if move is None: end = True winner = (RenjuBoard.WHITE_WIN if self.board.get_current_player() else RenjuBoard.BLACK_WIN) # print ("player: ",debug_stone," resigns.") else: # store the data states.append(self.board.current_state()) mcts_probs.append(move_probs) # perform a move self.board.do_move_by_number(move) # open debug board which the game interface # print ("player: ", debug_stone) # self.board._debug_board() end, winner = self.board.game_end() if end: self.board._debug_board() total_moves = len(states) if winner == RenjuBoard.DRAW: winner_map = [0 for _i in range(total_moves)] print("draw") elif winner == RenjuBoard.WHITE_WIN: winner_map = [(_i % 2) * 2 - 1 for _i in range(total_moves)] print("WHITE_WIN") else: winner_map = [((_i + 1) % 2) * 2 - 1 for _i in range(total_moves)] print("BLACK_WIN") return winner, zip(states, mcts_probs, winner_map)
def _debug(self): if self.debug_mode: for act, _sub_node in self._root._children.items(): if _sub_node._n_visits > 0: print(RenjuBoard.number2pos(act), "\tsel ", _sub_node.get_value(self._c_puct), "\tv ", _sub_node._n_visits, "\tQ ", _sub_node._Q, "\tp ", _sub_node._P)
class MasterPlayer(object): """Master reads human game records""" def __init__(self, game_source='./games.log', jump_line=0): self.file_reader = open(game_source, 'r') self.board = RenjuBoard() def get_train_game(self): #注意,人类棋谱由于RIF规则的原因,前5手丢弃。 game_string = self.file_reader.readline() print(game_string) self.board.reset() game_string = game_string.strip() if len(game_string) < 10: return None states, mcts_probs = [], [] #获得game 的记录和结果, 做一堆和self play 差不多的数据返回回去 game_result = game_string.split(",") winner = int(game_result[1]) for i in range(0, len(game_result[0]), 2): pos = game_result[0][i:i + 2] move_probs = np.zeros(15 * 15) move_number = RenjuBoard.pos2number(pos) move_probs[move_number] = 1.0 # store the data states.append(self.board.current_state()) mcts_probs.append(move_probs) self.board.do_move(pos) #self.board._debug_board() #if len(states) >= 5: total_moves = len(states) if winner == -1: winner_map = [0 for _i in range(total_moves)] print("draw") elif winner == 0: #white win winner_map = [(_i % 2) * 2 - 1 for _i in range(total_moves)] print("WHITE_WIN") else: winner_map = [((_i + 1) % 2) * 2 - 1 for _i in range(total_moves)] print("BLACK_WIN") #去掉前5手 states = states[5:] mcts_probs = mcts_probs[5:] winner_map = winner_map[5:] return zip(states, mcts_probs, winner_map)
def get_action(self, board): location = input("Your move: (11 to ff)") #增加投降功能: if location == 'RESIGN': return None, None move_number = RenjuBoard.pos2number(location.strip()) if move_number not in board.availables: print("invalid move") location = self.get_action(board) prob = np.zeros(15 * 15) prob[move_number] = 1.0 return move_number, prob
#输入一个特定局面, 顾问会给出它认为的好点 from renju import RenjuBoard from players import MCTSPlayer #from policy_value_net import PolicyValueNet from policy_value_net_residual import PolicyValueNet pv_net = PolicyValueNet('./master') consultant = MCTSPlayer(pv_net.policy_value_fn, c_puct=10, n_playout=10000, debug=True) board = RenjuBoard() while True: board_str = input("What do you want?(e for exit)\n").strip() if board_str == 'e' or board_str == '': break board.reset(board_str) board._debug_board() consultant.reset_player() move, move_prob = consultant.get_action(board) #如果认输,显示一下 if move is None: print("got conclusion, resign") board.do_move_by_number(move) board._debug_board()
def __init__(self, player1, player2): self.player1 = player1 self.player2 = player2 self.board = RenjuBoard()
def __init__(self, game_source='./games.log', jump_line=0): self.file_reader = open(game_source, 'r') self.board = RenjuBoard()
def _playout(self, state): """ 从根节点开始跑一个playout,找到暂时没有结论的叶子节点 确认其胜负,不确定的就采纳神经网络的结论值; """ node = self._root while True: if node.is_leaf(): break # 爬树,如果爬树了,那么root至少不是秃的 action, node = node.select(self._c_puct) state.do_move_by_number(action) player = 1 - state.get_current_player() #应当是刚刚落子的那一方。 leaf_value = None if not (node._win or node._lose): end, winner = state.game_end() if end: if winner == RenjuBoard.DRAW: # tie leaf_value = 0.0 else: if (player == 1 and winner == RenjuBoard.BLACK_WIN) or ( player == 0 and winner == RenjuBoard.WHITE_WIN): node.mark_win() else: node.mark_lose() else: win_move, only_defense, defense_count = state.Find_win() if win_move is not None: node.mark_lose() #elif 有2个以上冲四点或者活四 or 防点是禁手 elif (defense_count > 1) or (only_defense and state.get_current_player() == 1 and state.isForbidden( RenjuBoard.num2coordinate(only_defense))): node.mark_win() else: #走到这里是没结论 if only_defense is not None: #如果有冲四就按照冲四防一下,往下走一手 node.expand( MCTS._build_expand_prob(state.availables, only_defense)) node._remain_count = 1 #这里就剩唯一防了。 for act, _sub_node in node._children.items(): if act != only_defense: _sub_node.mark_lose() node = node._children[ only_defense] #这里可以保证这个only_defense的落子不会触发胜负 state.do_move_by_number(only_defense) action_probs, leaf_value = self._policy(state) node.expand(action_probs) #当前局面下,轮到对手下棋,如果对方有获胜策略,则当前方输了。 if node._win: leaf_value = 1.0 elif node._lose: leaf_value = -1.0 node.update(leaf_value) root_result = self._root._win or self._root._lose or self._root._remain_count == 1 if root_result and len(self._root._children) == 0: #TODO 对于有结论而没有expand 的根节点进行特殊处理。 因为再不处理, 这么返回的话, playout提前结束,就崩了。 #如果根是秃的,那刚刚 state.Find_win() 肯定是对根做的,直接拿来用! state也没动 try: win_move except NameError: win_move, only_defense, defense_count = state.Find_win() if win_move: self._root.expand( MCTS._build_expand_prob(state.availables, win_move)) self._root._children[win_move].mark_win() else: self._root.expand( MCTS._build_expand_prob(state.availables, only_defense)) return root_result
def _playout(self, state): node = self._root while True: if node.is_leaf(): break action, node = node.select(self._c_puct) state.do_move_by_number(action) player = 1 - state.get_current_player() leaf_value = None if not (node._win or node._lose): end, winner = state.game_end() if end: if winner == RenjuBoard.DRAW: # tie leaf_value = 0.0 else: if (player == 1 and winner == RenjuBoard.BLACK_WIN) or ( player == 0 and winner == RenjuBoard.WHITE_WIN): node.mark_win() else: node.mark_lose() else: win_move, only_defense, defense_count = state.Find_win() if win_move is not None: node.mark_lose() # if there are two cases where we can get a open four # if there is already a open four # if the oppenent break the forbidden rule elif (defense_count > 1) or (only_defense and state.get_current_player() == 1 and state.isForbidden( RenjuBoard.num2coordinate(only_defense))): node.mark_win() else: if only_defense is not None: node.expand( MCTS._build_expand_prob(state.availables, only_defense)) node._remain_count = 1 for act, _sub_node in node._children.items(): if act != only_defense: _sub_node.mark_lose() node = node._children[only_defense] state.do_move_by_number(only_defense) action_probs, leaf_value = self._policy(state) node.expand(action_probs) if node._win: leaf_value = 1.0 elif node._lose: leaf_value = -1.0 node.update(leaf_value) root_result = self._root._win or self._root._lose or self._root._remain_count == 1 if root_result and len(self._root._children) == 0: try: win_move except NameError: win_move, only_defense, defense_count = state.Find_win() if win_move: self._root.expand( MCTS._build_expand_prob(state.availables, win_move)) self._root._children[win_move].mark_win() else: self._root.expand( MCTS._build_expand_prob(state.availables, only_defense)) return root_result