def search_my_move(self, env: ChessEnv, is_root_node=False) -> float: """ Q, V is value for this Player(always white). P is value for the player of next_player (black or white) This method searches for possible moves, adds them to a search tree, and eventually returns the best move that was found during the search. :param ChessEnv env: environment in which to search for the move :param boolean is_root_node: whether this is the root node of the search. :return float: value of the move. This is calculated by getting a prediction from the value network. """ if env.done: if env.winner == Winner.draw: return 0 # assert env.whitewon != env.white_to_move # side to move can't be winner! return -1 state = state_key(env) with self.node_lock[state]: if state not in self.tree: leaf_p, leaf_v = self.expand_and_evaluate(env) self.tree[state].p = leaf_p return leaf_v # I'm returning everything from the POV of side to move # SELECT STEP action_t = self.select_action_q_and_u(env, is_root_node) virtual_loss = self.play_config.virtual_loss my_visit_stats = self.tree[state] my_stats = my_visit_stats.a[action_t] my_visit_stats.sum_n += virtual_loss my_stats.n += virtual_loss my_stats.w += -virtual_loss my_stats.q = my_stats.w / my_stats.n env.step(action_t.uci()) leaf_v = self.search_my_move(env) # next move from enemy POV leaf_v = -leaf_v # BACKUP STEP # on returning search path # update: N, W, Q with self.node_lock[state]: my_visit_stats.sum_n += -virtual_loss + 1 my_stats.n += -virtual_loss + 1 my_stats.w += virtual_loss + leaf_v my_stats.q = my_stats.w / my_stats.n return leaf_v
def get_buffer(config, game) -> (ChessEnv, list): """ Gets data to load into the buffer by playing a game using PGN data. :param Config config: config to use to play the game :param pgn.Game game: game to play :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer """ env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def play_buffer(config, cur) -> (ChessEnv, list): """ Play one game and add the play data to the buffer :param Config config: config for how to play :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting predictions. One will be removed from this list during the game, then added back :return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list of data to be appended to the DuelWorker.buffer """ pipes_1 = cur[0].pop() # borrow pipes_2 = cur[1].pop() # borrow env = ChessEnv().reset() white = ChessPlayer(config, pipes=pipes_1) black = ChessPlayer(config, pipes=pipes_2) while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.play.max_game_length: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) cur[0].append(pipes_1) cur[1].append(pipes_2) fen_data = [] moves_array = np.zeros( (len(white.moves) + len(black.moves), white.labels_n), dtype=np.float16) scores = np.zeros((len(white.moves) + len(black.moves)), dtype=np.int8) for i in range(len(white.moves)): fen_data.append(white.moves[i][0]) moves_array[i * 2] = white.moves[i][1] scores[i * 2] = white.moves[i][2] if i < len(black.moves): fen_data.append(black.moves[i][0]) moves_array[i * 2 + 1] = black.moves[i][1] scores[i * 2 + 1] = black.moves[i][2] return env, fen_data, moves_array, scores
def self_play_buffer(config, cur) -> (ChessEnv, list): """ Play one game and add the play data to the buffer :param Config config: config for how to play :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting predictions. One will be removed from this list during the game, then added back :return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list of data to be appended to the SelfPlayWorker.buffer """ pipes = cur.pop() # borrow env = ChessEnv().reset() white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.play.max_game_length: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool current_white: whether cur should play white or black :return (float, ChessEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() ng_pipes = ng.pop() env = ChessEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white
def start(config: Config): DemoConfig().update_play_config(config.play) chess_model = PlayWithHuman(config) env = ChessEnv(config).reset() human_is_black = random() < 0.5 chess_model.start_game(human_is_black) while not env.done: if (env.board.turn == chess.BLACK) == human_is_black: action = chess_model.move_by_human(env) print("You move to: " + action) else: action = chess_model.move_by_ai(env) print("AI moves to: " + action) env.step(action) env.render() print("Board FEN = " + env.board.fen()) print("\nEnd of the game.") print("Game result:") print(env.board.result())
def run_game(self): default_config = Config() PlayWithHumanConfig().update_play_config(default_config.play) me_player = None env = ChessEnv().reset() #mcts_player = mcts_pure(c_puct=5, n_playout=5) move_stack = "" board = chess.Board() chess_sets = Settings() screen = pygame.display.set_mode((chess_sets.screen_width, chess_sets.screen_height)) pygame.display.set_caption("Chess Game") pygame.init() image_path = '/home/k1758068/Desktop/alphaGoTest-master/image/' black_b = pygame.image.load(image_path + 'blackb.png').convert_alpha() black_k = pygame.image.load(image_path + 'blackk.png').convert_alpha() black_n = pygame.image.load(image_path + 'blackn.png').convert_alpha() black_p = pygame.image.load(image_path + 'blackp.png').convert_alpha() black_q = pygame.image.load(image_path + 'blackq.png').convert_alpha() black_r = pygame.image.load(image_path + 'blackr.png').convert_alpha() white_b = pygame.image.load(image_path + 'whiteb.png').convert_alpha() white_k = pygame.image.load(image_path + 'whitek.png').convert_alpha() white_n = pygame.image.load(image_path + 'whiten.png').convert_alpha() white_p = pygame.image.load(image_path + 'whitep.png').convert_alpha() white_q = pygame.image.load(image_path + 'whiteq.png').convert_alpha() white_r = pygame.image.load(image_path + 'whiter.png').convert_alpha() images = {3: [white_b, black_b], 6: [white_k, black_k], 2: [white_n, black_n], 1: [white_p, black_p], 5: [white_q, black_q], 4: [white_r, black_r]} background_color = (230,230,230) #image_path = '/Desktop/alphaGoTest-master/image/' chess_board = pygame.image.load(image_path + 'board_image.png').convert() while True: screen.fill(background_color) chess_board_x = 100 chess_board_y = 50 screen.blit(chess_board, (chess_board_x, chess_board_y)) d = (950 - 55) / 8 for i in range(64): if board.piece_at(i): piece = board.piece_at(i).piece_type color = board.piece_at(i).color if color: piece = images[piece][0] else: piece = images[piece][1] x = 177 + (i % 8) * d y = 23 + (8 - (i // 8)) * d screen.blit(piece, (x, y)) myfont = pygame.font.SysFont('test', 30) textsurface = myfont.render(move_stack, True, (0, 0, 0)) screen.blit(textsurface, (1000, 1000)) for event in pygame.event.get(): if board.turn == True: if event.type == MOUSEBUTTONDOWN: if event.button == 1: selected_position = self.is_chess_clicked(chess_sets.position, event) select_piece = board.piece_at(selected_position) if select_piece.color == True: from_position = selected_position print(from_position) else: pass if event.button == 3: selected_position = self.is_chess_clicked(chess_sets.position, event) to_position = selected_position a = [48, 49, 50, 51, 52, 53, 54, 55] # The promotion precess of PAWN pieces if from_position in a and select_piece.piece_type == 1: # a = self.create_dialog()# a = pop_dialog.main() move = chess.Move(from_position, to_position, promotion=int(a), drop=None) print(int(a), move) else: move = chess.Move(from_position, to_position) # Move check and make move if move not in board.legal_moves: print("invalide move!", move, board.legal_moves) else: board.push(move) move_stack += "," + board.peek().uci() myfont = pygame.font.SysFont('test', 30) textsurface = myfont.render(move_stack, True, (0, 0, 0)) screen.blit(textsurface, (100, 100)) else: me_player = self.get_player(default_config) action = me_player.action(env, False) env.step(action) screen.blit(textsurface, (100, 100)) me_player = self.get_player(default_config) action = me_player.action(env, False) print(f"bestmove {action}") board.push(chess.Move.from_uci(action)) pygame.display.flip()
def get_buffer(config, game) -> (ChessEnv, list): """ Gets data to load into the buffer by playing a game using PGN data. :param Config config: config to use to play the game :param pgn.Game game: game to play :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer """ env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] # Rare cases where elo ratings are not in the headers if "WhiteElo" not in game.headers or "BlackElo" not in game.headers: return None, None, None, None, False white_elo, black_elo = int(game.headers["WhiteElo"]), int( game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) # ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) # ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) fen_data = [] moves_array = np.zeros( (len(white.moves) + len(black.moves), white.labels_n), dtype=np.float16) scores = np.zeros((len(white.moves) + len(black.moves)), dtype=np.int8) for i in range(len(white.moves)): fen_data.append(white.moves[i][0]) moves_array[i * 2] = white.moves[i][1] scores[i * 2] = white.moves[i][2] if i < len(black.moves): fen_data.append(black.moves[i][0]) moves_array[i * 2 + 1] = black.moves[i][1] scores[i * 2 + 1] = black.moves[i][2] return env, fen_data, moves_array, scores, True