def self_play_buffer(config, cur) -> (ChessEnv, list): pipes = cur.pop() # borrow env = ChessEnv().reset() white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.play.max_game_length: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def self_play_buffer(config, cur) -> (GoBangEnv, list): """ Play one game and add the play data to the buffer :param Config config: config for how to play :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting predictions. One will be removed from this list during the game, then added back :return (GoBangEnv,list((str,list(float)): a tuple containing the final GoBangEnv state and then a list of data to be appended to the SelfPlayWorker.buffer """ pipes = cur.pop() # borrow env = GoBangEnv().reset() white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) # pretty_print_panel(env.board.panel) # print(f'After action:{action}') env.step(action) # pretty_print_panel(env.board.panel) # print() # if env.num_halfmoves >= config.play.max_game_length: # env.adjudicate() if env.winner == Winner.white: black_score, white_score = -1, 1 elif env.winner == Winner.black: black_score, white_score = 1, -1 else: black_score, white_score = -0.5, -0.5 black.finish_game(black_score) white.finish_game(white_score) data = [] for i in range(len(black.moves)): data.append(black.moves[i]) if i < len(white.moves): data.append(white.moves[i]) pretty_print_panel(env.board.panel) print() #print(data) cur.append(pipes) return env, data
def self_play_buffer(config, cur) -> (ChessEnv, list): pipes = cur.pop() # borrow env = ChessEnv().reset() search_tree = defaultdict(VisitStats) white = ChessPlayer(config, search_tree=search_tree, pipes=pipes) black = ChessPlayer(config, search_tree=search_tree, pipes=pipes) history = [] cc = 0 while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) history.append(action) if len(history) > 6 and history[-1] == history[-5]: cc = cc + 1 else: cc = 0 if env.num_halfmoves >= config.play.max_game_length or cc >= 4: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.human_color = None self.observers = [] self.model = self._load_model() self.ai = None # type: ChessPlayer self.last_evaluation = None self.last_history = None # type: HistoryItem def start_game(self, human_is_black): self.human_color = chess.BLACK if human_is_black else chess.WHITE self.ai = ChessPlayer(self.config, self.model) def _load_model(self): from chess_zero.agent.model_chess import ChessModel model = ChessModel(self.config) if not load_best_model_weight(model): raise RuntimeError("Best model not found!") return model def move_by_ai(self, env): if self.ai is None: self.ai = ChessPlayer(self.config, self.model) action = self.ai.action(env.observation) self.last_history = self.ai.ask_thought_about(env.observation) self.last_evaluation = self.last_history.values[ self.last_history.action] logger.debug(f"Evaluation history by AI = {self.last_history}") logger.debug(f"Evaluation by AI = {self.last_evaluation}") return action def move_by_human(self, env): while True: try: move = input( '\nEnter your move in UCI format (a1a2, b2b6, ...): ') if chess.Move.from_uci(move) in env.board.legal_moves: return move else: print("That is NOT a valid move :(.") except: print("That is NOT a valid move :(.")
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.human_color = None self.observers = [] self.model = self._load_model() self.ai = None # type: ChessPlayer self.last_evaluation = None self.last_history = None # type: HistoryItem def start_game(self, human_is_white): self.human_color = chess.WHITE if human_is_white else chess.BLACK self.ai = ChessPlayer( self.config, self.model, self.config.human.play_config ) # override self.config.play with optional third parameter def _load_model(self): from chess_zero.agent.model_chess import ChessModel model = ChessModel(self.config) if not load_newest_model_weight(self.config.resource, model): raise RuntimeError("newest model not found!") return model def move_by_ai(self, env): action = self.ai.action(env) return action def move_by_human(self, env): while True: san = input( '\nEnter your move in SAN format ("e4", "Nf3", ... or "quit"): ' ) if san == "quit": raise SystemExit try: move = env.board.parse_san(san) if move != chess.Move.null(): return move else: print("That is NOT a valid move :(." ) # how will parse_san ever return a null move...? except: print("That is NOT a valid move :(.")
class PlayWithEngine: def __init__(self, config: Config): self.config = config self.engine_color = None self.engine = UciEngine() self.observers = [] self.model = self._load_model() self.ai = None # type: ChessPlayer self.last_evaluation = None self.last_history = None # type: HistoryItem def start_game(self, engine_is_black): self.engine_color = chess.BLACK if engine_is_black else chess.WHITE self.engine.start_game(engine_is_black) self.ai = ChessPlayer(self.config, self.model) def _load_model(self): from chess_zero.agent.model_chess import ChessModel model = ChessModel(self.config) if not load_best_model_weight(model): raise RuntimeError("Best model not found!") return model def move_by_ai(self, env): if self.ai is None: self.ai = ChessPlayer(self.config, self.model) action = self.ai.action(env.observation) self.last_history = self.ai.ask_thought_about(env.observation) self.last_evaluation = self.last_history.values[self.last_history.action] logger.debug(f"Evaluation history by AI = {self.last_history}") logger.debug(f"Evaluation by AI = {self.last_evaluation}") return action def move_by_opponent(self, env): self.engine.update_position(env.board) action = self.engine.best_move() last_evaluation = self.engine.score() logger.debug(f"Evaluation by Stockfish = {last_evaluation}") return action
class SelfPlayWorker: def __init__(self, config: Config, env=None, model=None): """ :param config: :param ChessEnv|None env: :param chess_zero.agent.model_chess.ChessModel|None model: """ self.config = config self.model = model self.env = env # type: ChessEnv self.black = None # type: ChessPlayer self.white = None # type: ChessPlayer self.buffer = [] def start(self): if self.model is None: self.model = self.load_model() self.buffer = [] idx = 1 while True: start_time = time() env = self.start_game(idx) end_time = time() logger.debug(f"game {idx} time={end_time - start_time} sec, " f"turn={env.turn}:{env.observation} - Winner:{env.winner} - by resignation?:{env.resigned}") if (idx % self.config.play_data.nb_game_in_file) == 0: reload_best_model_weight_if_changed(self.model) idx += 1 def start_game(self, idx): self.env.reset() self.black = ChessPlayer(self.config, self.model) self.white = ChessPlayer(self.config, self.model) observation = self.env.observation while not self.env.done: if self.env.board.turn == chess.BLACK: action = self.black.action(observation) else: action = self.white.action(observation) board, info = self.env.step(action) observation = board.fen() self.finish_game() self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0) self.remove_play_data() return self.env def save_play_data(self, write=True): data = self.black.moves + self.white.moves self.buffer += data if not write: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id) logger.info(f"save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) def finish_game(self): if self.env.winner == Winner.black: black_win = 1 elif self.env.winner == Winner.white: black_win = -1 else: black_win = 0 self.black.finish_game(black_win) self.white.finish_game(-black_win) def load_model(self): from chess_zero.agent.model_chess import ChessModel model = ChessModel(self.config) if self.config.opts.new or not load_best_model_weight(model): model.build() save_as_best_model(model) return model
def play_game(robot: ChessPlayer, robot_white: int) -> (float, GoBangEnv, int): env = GoBangEnv().reset() screen = pygame.display.set_mode([1200, 806]) #定义窗口 pygame.display.set_caption("五子棋") #定义窗口名字 put_text(f'本局游戏人类为{Player.BNAME if robot_white else Player.WNAME}.', screen, 28) #在窗口画出棋盘,提示器以及按钮 draw_board(screen) pygame.display.flip() clock = pygame.time.Clock() while not env.done: # 一局游戏开始 if not env.white_to_move: no = 1 # 黑子编号为1 put_text('黑棋落子', screen, 54) else: no = -1 # 白子编号为-1 put_text('白棋落子', screen, 54) # 判断是否为robot下棋 if env.white_to_move and robot_white: action = robot.action(env) print(action) i, j, no = action.split('_') plot_chess(int(i) + 1, int(j) + 1, screen, int(no)) pygame.display.flip() env.step(action) elif not env.white_to_move and not robot_white: action = robot.action(env) i, j, no = action.split('_') plot_chess(int(i) + 1, int(j) + 1, screen, int(no)) pygame.display.flip() print(action) env.step(action) else: # 轮到人类 block = False for event in pygame.event.get(): # 关闭窗口 if event.type == pygame.QUIT: pygame.quit() sys.exit() # 点击窗口里面类容则完成相应指令 elif event.type == MOUSEBUTTONDOWN: if event.button == 1: x, y = event.pos[0], event.pos[1] # 如果点击‘重新开始’ if 900 < x < 1100 and 500 < y < 600: return #点击‘退出游戏’,退出游戏 elif 900 < x < 1100 and 650 < y < 750: pygame.quit() sys.exit() #点击‘悔棋’ elif 900 < x < 1020 and 350 < y < 450 and env.previous_actions.shape[ 0] >= 2: env.regret_n_steps(step=2) #将map显示出来 draw_board_with_chess(env.board.panel, screen) #悔棋完成,阻止再次悔棋 x, y = 0, 0 for i in range(PANEL_SIZE): for j in range(PANEL_SIZE): #点击棋盘相应位置 if i * 40 + 3 + 20 < x < i * 40 + 3 + 60 and j * 40 + 3 + 20 < y < j * 40 + 3 + 60 and not env.board.panel[ i, j] and not block: block = True #在棋盘相应位置落相应颜色棋子 plot_chess(i + 1, j + 1, screen, no) action = f'{i}_{j}_{no}' print(action) pygame.display.flip() env.step(action) clock.tick(60) if env.white_won: put_text('白棋胜利,请重新游戏', screen, 30) else: put_text('黑棋胜利,请重新游戏', screen, 30) sleep(10)
def self_play_buffer(config, cur) -> (ChessEnv, list): """ Play one game and add the play data to the buffer :param Config config: config for how to play :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting predictions. One will be removed from this list during the game, then added back :return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list of data to be appended to the SelfPlayWorker.buffer """ pipes = cur.pop() # borrow env = ChessEnv().reset() # EDIT CODE HERE TO CHANGE THE ENVIRONMENT white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) move = 0 failed_play = 0 total_failed_plays = 0 print("Match Started") moves_list = "" while not env.done: # CHANGES_MADE_HERE temp = deepcopy(env) black_pieces = set("prnbqk") white_pieces = set("PRNBQK") if env.white_to_move: x = temp.board.piece_map() for i in x: if str(x[i]) in black_pieces: temp.board.remove_piece_at(i) action = white.action(temp) else: x = temp.board.piece_map() for i in x: if str(x[i]) in white_pieces: temp.board.remove_piece_at(i) action = black.action(temp) print("Match in Progress: ", move, "Moves made in the game, Failed Plays: ", total_failed_plays, end='\r') try: env.step(action) moves_list += action + ', ' failed_play = 0 move += 1 if env.num_halfmoves >= config.play.max_game_length: env.adjudicate() except ValueError: failed_play += 1 total_failed_plays += 1 if failed_play == 50: logger.warning("\nEnding the Game due to lack of development") env.adjudicate() continue # END_OF_CHANGES with open("result.csv", "a+") as fp: result = str(move) + ", " + str(total_failed_plays) + ", " + str( env.winner) + ", <" + env.board.fen() result += ">, Adjudicated\n" if failed_play == 50 else ">, Game End\n" fp.write(result) fp.close() with open("moves_list.csv", "a+") as fp: fp.write(moves_list) fp.write("\n") fp.close() if env.winner == Winner.white: black_win = -1 logger.info("White wins") elif env.winner == Winner.black: black_win = 1 logger.info("Black wins") else: black_win = 0 logger.info("Draw Match") black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data