def search_my_move(self, env: GoBangEnv, is_root_node=False) -> (float): """ Q, V is value for this Player(always white). P is value for the player of next_player (black or white) This method searches for possible moves, adds them to a search tree, and eventually returns the best move that was found during the search. :param GoBangEnv env: environment in which to search for the move :param boolean is_root_node: whether this is the root node of the search. :return float: value of the move. This is calculated by getting a prediction from the value network. """ if env.done: if env.winner == Winner.draw: return 0 # assert env.whitewon != env.white_to_move # side to move can't be winner! return -1 state = env.state_key with self.node_lock[state]: #print(state) if state not in self.tree: leaf_p, leaf_v = self.expand_and_evaluate(env) self.tree[state].p = leaf_p return leaf_v # I'm returning everything from the POV of side to move #print(self.tree.items()) # SELECT STEP action_t = self.select_action_q_and_u(env, is_root_node) virtual_loss = self.play_config.virtual_loss my_visit_stats = self.tree[state] my_stats = my_visit_stats.a[action_t] my_visit_stats.sum_n += virtual_loss my_stats.n += virtual_loss my_stats.w += -virtual_loss my_stats.q = my_stats.w / my_stats.n env.step(action_t) leaf_v = self.search_my_move(env) # next move from enemy POV leaf_v = -leaf_v # BACKUP STEP # on returning search path # update: N, W, Q with self.node_lock[state]: my_visit_stats.sum_n += -virtual_loss + 1 my_stats.n += -virtual_loss + 1 my_stats.w += virtual_loss + leaf_v my_stats.q = my_stats.w / my_stats.n # pretty_print_panel(state) # print(f'state:{state}, after action:{action_t}, esimated score: {leaf_v:.4}, and n,w,q are:{my_stats.n},{my_stats.w:.4},{my_stats.q:.4}') return leaf_v
def play_game(config: Config, cur, robot_white: int) -> (float, GoBangEnv, int): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool ng_no: ng所在的位置,0-1 :return (float, ChessEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() env = GoBangEnv().reset() configs = config.eval.play_config # man configs.simulation_num_per_move = 1200 configs.tau_decay_rate = 0. # current_player = ChessPlayer(config, pipes=cur_pipes, play_config=configs) if robot_white: white, black = current_player, None else: white, black = None, current_player print(f"本局游戏人类为{'黑棋' if robot_white else '白棋'}.") while not env.done: if env.white_to_move and robot_white: action = white.action(env) elif env.white_to_move == False and robot_white == False: action = black.action(env) else: # 轮到人类 print('当前局面如下:') pretty_print_panel(env.board.panel) print() action = input("请输入您要放置的棋子位置:") while action not in env.board.legal_moves: print("输入有误!请重新输入.") action = input("请输入您要放置的棋子位置:") env.step(action) print('本局游戏结束!当前棋面为:') pretty_print_panel(env.board.panel) if env.winner == Winner.draw: man_score = 0.5 elif env.white_won == robot_white: man_score = 0 else: man_score = 1 cur.append(cur_pipes) return man_score
def self_play_buffer(config, cur) -> (GoBangEnv, list): """ Play one game and add the play data to the buffer :param Config config: config for how to play :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting predictions. One will be removed from this list during the game, then added back :return (GoBangEnv,list((str,list(float)): a tuple containing the final GoBangEnv state and then a list of data to be appended to the SelfPlayWorker.buffer """ pipes = cur.pop() # borrow env = GoBangEnv().reset() white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) # pretty_print_panel(env.board.panel) # print(f'After action:{action}') env.step(action) # pretty_print_panel(env.board.panel) # print() # if env.num_halfmoves >= config.play.max_game_length: # env.adjudicate() if env.winner == Winner.white: black_score, white_score = -1, 1 elif env.winner == Winner.black: black_score, white_score = 1, -1 else: black_score, white_score = -0.5, -0.5 black.finish_game(black_score) white.finish_game(white_score) data = [] for i in range(len(black.moves)): data.append(black.moves[i]) if i < len(white.moves): data.append(white.moves[i]) pretty_print_panel(env.board.panel) print() #print(data) cur.append(pipes) return env, data
def expand_and_evaluate(self, env: GoBangEnv) -> (np.ndarray, float): """ expand new leaf, this is called only once per state this is called with state locked insert P(a|s), return leaf_v This gets a prediction for the policy and value of the state within the given env :return (float, float): the policy and value predictions for this state """ state_planes = env.canonical_input_planes() leaf_p, leaf_v = self.predict(state_planes) # these are canonical policy and value (i.e. side to move is "white") # if not env.white_to_move: # leaf_p = Config.flip_policy(leaf_p) # get it back to python-chess form return leaf_p, leaf_v
def deboog(self, env: GoBangEnv): print(env.testeval()) state = env.state_key my_visit_stats = self.tree[state] stats = [] for action, a_s in my_visit_stats.a.items(): moi = self.move_lookup[action] stats.append(np.asarray([a_s.n, a_s.w, a_s.q, a_s.p, moi])) stats = np.asarray(stats) a = stats[stats[:, 0].argsort()[::-1]] for s in a: print(f'{self.labels[int(s[4])]:5}: ' f'n: {s[0]:3.0f} ' f'w: {s[1]:7.3f} ' f'q: {s[2]:7.3f} ' f'p: {s[3]:7.5f}')
def search_moves(self, env: GoBangEnv) -> (float, float): """ Looks at all the possible moves using the AGZ MCTS algorithm and finds the highest value possible move. Does so using multiple threads to get multiple estimates from the AGZ MCTS algorithm so we can pick the best. :param GoBangEnv env: env to search for moves within :return (float,float): the maximum value of all values predicted by each thread, and the first value that was predicted. """ futures = [] with ThreadPoolExecutor(max_workers=self.play_config.search_threads) as executor: for _ in range(self.play_config.simulation_num_per_move): futures.append(executor.submit(self.search_my_move,env=env.copy(),is_root_node=True)) vals = [f.result() for f in futures] return np.max(vals), vals[0] # vals[0] is kind of racy
def play_game(config, cur, ng, current_white: bool) -> (float, GoBangEnv, bool): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool current_white: whether cur should play white or black :return (float, GoBangEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() ng_pipes = ng.pop() env = GoBangEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white
def play_game(config, cur, ng, current_white: bool) -> (float, GoBangEnv, bool): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool current_white: whether cur should play white or black :return (float, GoBangEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() ng_pipes = ng.pop() env = GoBangEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 # ----- 整理moves ----- if env.winner == Winner.white: black_score, white_score = -1, 1 elif env.winner == Winner.black: black_score, white_score = 1, -1 else: black_score, white_score = -0.5, -0.5 black.finish_game(black_score) white.finish_game(white_score) data = [] for i in range(len(black.moves)): data.append(black.moves[i]) if i < len(white.moves): data.append(white.moves[i]) # -------------------- cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white, data
def play_game(robot: ChessPlayer, robot_white: int) -> (float, GoBangEnv, int): env = GoBangEnv().reset() screen = pygame.display.set_mode([1200, 806]) #定义窗口 pygame.display.set_caption("五子棋") #定义窗口名字 put_text(f'本局游戏人类为{Player.BNAME if robot_white else Player.WNAME}.', screen, 28) #在窗口画出棋盘,提示器以及按钮 draw_board(screen) pygame.display.flip() clock = pygame.time.Clock() while not env.done: # 一局游戏开始 if not env.white_to_move: no = 1 # 黑子编号为1 put_text('黑棋落子', screen, 54) else: no = -1 # 白子编号为-1 put_text('白棋落子', screen, 54) # 判断是否为robot下棋 if env.white_to_move and robot_white: action = robot.action(env) print(action) i, j, no = action.split('_') plot_chess(int(i) + 1, int(j) + 1, screen, int(no)) pygame.display.flip() env.step(action) elif not env.white_to_move and not robot_white: action = robot.action(env) i, j, no = action.split('_') plot_chess(int(i) + 1, int(j) + 1, screen, int(no)) pygame.display.flip() print(action) env.step(action) else: # 轮到人类 block = False for event in pygame.event.get(): # 关闭窗口 if event.type == pygame.QUIT: pygame.quit() sys.exit() # 点击窗口里面类容则完成相应指令 elif event.type == MOUSEBUTTONDOWN: if event.button == 1: x, y = event.pos[0], event.pos[1] # 如果点击‘重新开始’ if 900 < x < 1100 and 500 < y < 600: return #点击‘退出游戏’,退出游戏 elif 900 < x < 1100 and 650 < y < 750: pygame.quit() sys.exit() #点击‘悔棋’ elif 900 < x < 1020 and 350 < y < 450 and env.previous_actions.shape[ 0] >= 2: env.regret_n_steps(step=2) #将map显示出来 draw_board_with_chess(env.board.panel, screen) #悔棋完成,阻止再次悔棋 x, y = 0, 0 for i in range(PANEL_SIZE): for j in range(PANEL_SIZE): #点击棋盘相应位置 if i * 40 + 3 + 20 < x < i * 40 + 3 + 60 and j * 40 + 3 + 20 < y < j * 40 + 3 + 60 and not env.board.panel[ i, j] and not block: block = True #在棋盘相应位置落相应颜色棋子 plot_chess(i + 1, j + 1, screen, no) action = f'{i}_{j}_{no}' print(action) pygame.display.flip() env.step(action) clock.tick(60) if env.white_won: put_text('白棋胜利,请重新游戏', screen, 30) else: put_text('黑棋胜利,请重新游戏', screen, 30) sleep(10)