def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool): cur_pipes = cur.pop() ng_pipes = ng.pop() env = ChessEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white
def play_game(self, best_model, ng_model): env = ChessEnv().reset() best_player = ChessPlayer(self.config, best_model, play_config=self.config.eval.play_config) ng_player = ChessPlayer(self.config, ng_model, play_config=self.config.eval.play_config) best_is_white = random() < 0.5 if not best_is_white: black, white = best_player, ng_player else: black, white = ng_player, best_player observation = env.observation while not env.done: if env.board.turn == chess.BLACK: action = black.action(observation) else: action = white.action(observation) board, info = env.step(action) observation = board.fen() ng_win = None if env.winner == Winner.white: if best_is_white: ng_win = 0 else: ng_win = 1 elif env.winner == Winner.black: if best_is_white: ng_win = 1 else: ng_win = 0 return ng_win, best_is_white
def play_game(self, current_model, ng_model, current_white: bool) -> (float, ChessEnv): env = ChessEnv().reset() current_player = ChessPlayer(self.config, model=current_model, play_config=self.config.eval.play_config) ng_player = ChessPlayer(self.config, model=ng_model, play_config=self.config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.board.turn == chess.WHITE: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= self.config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.whitewon == current_white: ng_score = 0 else: ng_score = 1 return ng_score, env
def self_play_buffer(config, cur) -> (ChessEnv, list): pipes = cur.pop() # borrow env = ChessEnv().reset() white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.play.max_game_length: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def move_by_ai(self, env): if self.ai is None: self.ai = ChessPlayer(self.config, self.model) action = self.ai.action(env.observation) self.last_history = self.ai.ask_thought_about(env.observation) self.last_evaluation = self.last_history.values[self.last_history.action] logger.debug(f"Evaluation by AI = {self.last_evaluation}") return action
def get_buffer(config, game) -> (ChessEnv, list): """ Gets data to load into the buffer by playing a game using PGN data. :param Config config: config to use to play the game :param pgn.Game game: game to play :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer """ env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] white_elo, black_elo = int(game.headers["WhiteElo"]), int( game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def self_play_buffer(config, cur) -> (GoBangEnv, list): """ Play one game and add the play data to the buffer :param Config config: config for how to play :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting predictions. One will be removed from this list during the game, then added back :return (GoBangEnv,list((str,list(float)): a tuple containing the final GoBangEnv state and then a list of data to be appended to the SelfPlayWorker.buffer """ pipes = cur.pop() # borrow env = GoBangEnv().reset() white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) # pretty_print_panel(env.board.panel) # print(f'After action:{action}') env.step(action) # pretty_print_panel(env.board.panel) # print() # if env.num_halfmoves >= config.play.max_game_length: # env.adjudicate() if env.winner == Winner.white: black_score, white_score = -1, 1 elif env.winner == Winner.black: black_score, white_score = 1, -1 else: black_score, white_score = -0.5, -0.5 black.finish_game(black_score) white.finish_game(white_score) data = [] for i in range(len(black.moves)): data.append(black.moves[i]) if i < len(white.moves): data.append(white.moves[i]) pretty_print_panel(env.board.panel) print() #print(data) cur.append(pipes) return env, data
def supervised_buffer(config, game) -> (ChessEnv, list): env = ChessEnv(config).reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] env.board = game.board() for move in game.main_line(): ai = white if env.board.turn == chess.WHITE else black ai.sl_action(env, move) env.step(move) if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.WHITE white_win = 1 elif result == '0-1': env.winner = Winner.BLACK white_win = -1 else: env.winner = Winner.DRAW white_win = 0 white.finish_game(white_win) black.finish_game(-white_win) return env, merge_data(white, black)
def start_game(self, idx): self.env.reset() self.black = ChessPlayer(self.config, self.model) self.white = ChessPlayer(self.config, self.model) while not self.env.done: if self.env.board.turn == chess.BLACK: action = self.black.action(self.env) else: action = self.white.action(self.env) self.env.step(action) self.finish_game() self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0) self.remove_play_data() return self.env
def get_player(config): from chess_zero.agent.model_chess import ChessModel from chess_zero.lib.model_helper import load_best_model_weight model = ChessModel(config) if not load_best_model_weight(model): raise RuntimeError("Best model not found!") return ChessPlayer(config, model.get_pipes(config.play.search_threads))
def start(config: Config): PlayWithHumanConfig().update_play_config(config.play) me_player = None env = ChessEnv().reset() app = Flask(__name__) model = ChessModel(config) if not load_best_model_weight(model): raise RuntimeError("Best model not found!") player = ChessPlayer(config, model.get_pipes(config.play.search_threads)) @app.route('/play', methods=["GET", "POST"]) def play(): data = request.get_json() print(data["position"]) env.update(data["position"]) env.step(data["moves"], False) bestmove = player.action(env, False) return jsonify(bestmove) app.run(host="0.0.0.0", port="8080")
def play_game(config, cur, ng, current_white: bool) -> (float, GoBangEnv, bool): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool current_white: whether cur should play white or black :return (float, GoBangEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() ng_pipes = ng.pop() env = GoBangEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white
def play_game(config: Config, cur, robot_white: int) -> (float, GoBangEnv, int): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool ng_no: ng所在的位置,0-1 :return (float, ChessEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() env = GoBangEnv().reset() configs = config.eval.play_config # man configs.simulation_num_per_move = 1200 configs.tau_decay_rate = 0. # current_player = ChessPlayer(config, pipes=cur_pipes, play_config=configs) if robot_white: white, black = current_player, None else: white, black = None, current_player print(f"本局游戏人类为{'黑棋' if robot_white else '白棋'}.") while not env.done: if env.white_to_move and robot_white: action = white.action(env) elif env.white_to_move == False and robot_white == False: action = black.action(env) else: # 轮到人类 print('当前局面如下:') pretty_print_panel(env.board.panel) print() action = input("请输入您要放置的棋子位置:") while action not in env.board.legal_moves: print("输入有误!请重新输入.") action = input("请输入您要放置的棋子位置:") env.step(action) print('本局游戏结束!当前棋面为:') pretty_print_panel(env.board.panel) if env.winner == Winner.draw: man_score = 0.5 elif env.white_won == robot_white: man_score = 0 else: man_score = 1 cur.append(cur_pipes) return man_score
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.human_color = None self.observers = [] self.model = self._load_model() self.ai = None # type: ChessPlayer self.last_evaluation = None self.last_history = None # type: HistoryItem def start_game(self, human_is_black): self.human_color = chess.BLACK if human_is_black else chess.WHITE self.ai = ChessPlayer(self.config, self.model) def _load_model(self): from chess_zero.agent.model_chess import ChessModel model = ChessModel(self.config) if not load_best_model_weight(model): raise RuntimeError("Best model not found!") return model def move_by_ai(self, env): if self.ai is None: self.ai = ChessPlayer(self.config, self.model) action = self.ai.action(env.observation) self.last_history = self.ai.ask_thought_about(env.observation) self.last_evaluation = self.last_history.values[ self.last_history.action] logger.debug(f"Evaluation history by AI = {self.last_history}") logger.debug(f"Evaluation by AI = {self.last_evaluation}") return action def move_by_human(self, env): while True: try: move = input( '\nEnter your move in UCI format (a1a2, b2b6, ...): ') if chess.Move.from_uci(move) in env.board.legal_moves: return move else: print("That is NOT a valid move :(.") except: print("That is NOT a valid move :(.")
def get_buffer(config, game) -> (ChessEnv, list): env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def get_buffer(game, config) -> (ChessEnv, list): env = ChessEnv().reset() black = ChessPlayer(config, dummy=True) white = ChessPlayer(config, dummy=True) result = game.headers["Result"] actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 observation = env.observation while not env.done and k < len(actions): if env.board.turn == chess.WHITE: action = white.sl_action(observation, actions[k]) #ignore=True else: action = black.sl_action(observation, actions[k]) #ignore=True board, info = env.step(action, False) observation = board.fen() k += 1 env.done = True if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
class PlayWithEngine: def __init__(self, config: Config): self.config = config self.engine_color = None self.engine = UciEngine() self.observers = [] self.model = self._load_model() self.ai = None # type: ChessPlayer self.last_evaluation = None self.last_history = None # type: HistoryItem def start_game(self, engine_is_black): self.engine_color = chess.BLACK if engine_is_black else chess.WHITE self.engine.start_game(engine_is_black) self.ai = ChessPlayer(self.config, self.model) def _load_model(self): from chess_zero.agent.model_chess import ChessModel model = ChessModel(self.config) if not load_best_model_weight(model): raise RuntimeError("Best model not found!") return model def move_by_ai(self, env): if self.ai is None: self.ai = ChessPlayer(self.config, self.model) action = self.ai.action(env.observation) self.last_history = self.ai.ask_thought_about(env.observation) self.last_evaluation = self.last_history.values[self.last_history.action] logger.debug(f"Evaluation history by AI = {self.last_history}") logger.debug(f"Evaluation by AI = {self.last_evaluation}") return action def move_by_opponent(self, env): self.engine.update_position(env.board) action = self.engine.best_move() last_evaluation = self.engine.score() logger.debug(f"Evaluation by Stockfish = {last_evaluation}") return action
def self_play_buffer(config, cur) -> (ChessEnv, list): pipes = cur.pop() # borrow env = ChessEnv().reset() search_tree = defaultdict(VisitStats) white = ChessPlayer(config, search_tree=search_tree, pipes=pipes) black = ChessPlayer(config, search_tree=search_tree, pipes=pipes) history = [] cc = 0 while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) history.append(action) if len(history) > 6 and history[-1] == history[-5]: cc = cc + 1 else: cc = 0 if env.num_halfmoves >= config.play.max_game_length or cc >= 4: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def get_player_from_model(config): try: model = ChessModel(config) if not load_best_model_weight(model): raise RuntimeError("Best model not found!") return ChessPlayer(config, model.get_pipes(config.play.search_threads)) except Exception as e: traceback.print_exc() log.error(e) return None
def get_buffer(config, game) -> (ChessEnv, list): """ Gets data to load into the buffer by playing a game using PGN data. :param Config config: config to use to play the game :param pgn.Game game: game to play :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer """ env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def start(self): game_idx = 0 while True: cur = self.cur_pipes.pop() play_config = self.play_config play_config.simulation_num_per_move = 100 play_config.tau_decay_rate = 0 robot = ChessPlayer(self.config, pipes=cur, play_config=play_config) score = play_game(robot, (game_idx % 2) == 0) game_idx += 1 self.cur_pipes.append(cur)
def get_player_from_model(config): try: from chess_zero.agent.player_chess import ChessPlayer from chess_zero.agent.model_chess import ChessModel from chess_zero.lib.model_helper import load_best_model_weight model = ChessModel(config) if not load_best_model_weight(model): raise RuntimeError("Best model not found!") return ChessPlayer(config, model.get_pipes(config.play.search_threads)) except Exception as e: traceback.print_exc() log.error(e) return None
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.human_color = None self.observers = [] self.model = self._load_model() self.ai = None # type: ChessPlayer self.last_evaluation = None self.last_history = None # type: HistoryItem def start_game(self, human_is_white): self.human_color = chess.WHITE if human_is_white else chess.BLACK self.ai = ChessPlayer( self.config, self.model, self.config.human.play_config ) # override self.config.play with optional third parameter def _load_model(self): from chess_zero.agent.model_chess import ChessModel model = ChessModel(self.config) if not load_newest_model_weight(self.config.resource, model): raise RuntimeError("newest model not found!") return model def move_by_ai(self, env): action = self.ai.action(env) return action def move_by_human(self, env): while True: san = input( '\nEnter your move in SAN format ("e4", "Nf3", ... or "quit"): ' ) if san == "quit": raise SystemExit try: move = env.board.parse_san(san) if move != chess.Move.null(): return move else: print("That is NOT a valid move :(." ) # how will parse_san ever return a null move...? except: print("That is NOT a valid move :(.")
def start_game(self, human_is_black): self.human_color = chess.BLACK if human_is_black else chess.WHITE self.ai = ChessPlayer(self.config, self.model)
class SelfPlayWorker: def __init__(self, config: Config, env=None, model=None): """ :param config: :param ChessEnv|None env: :param chess_zero.agent.model_chess.ChessModel|None model: """ self.config = config self.model = model self.env = env # type: ChessEnv self.black = None # type: ChessPlayer self.white = None # type: ChessPlayer self.buffer = [] def start(self): if self.model is None: self.model = self.load_model() self.buffer = [] idx = 1 while True: start_time = time() env = self.start_game(idx) end_time = time() logger.debug(f"game {idx} time={end_time - start_time} sec, " f"turn={env.turn}:{env.observation} - Winner:{env.winner} - by resignation?:{env.resigned}") if (idx % self.config.play_data.nb_game_in_file) == 0: reload_best_model_weight_if_changed(self.model) idx += 1 def start_game(self, idx): self.env.reset() self.black = ChessPlayer(self.config, self.model) self.white = ChessPlayer(self.config, self.model) observation = self.env.observation while not self.env.done: if self.env.board.turn == chess.BLACK: action = self.black.action(observation) else: action = self.white.action(observation) board, info = self.env.step(action) observation = board.fen() self.finish_game() self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0) self.remove_play_data() return self.env def save_play_data(self, write=True): data = self.black.moves + self.white.moves self.buffer += data if not write: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id) logger.info(f"save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) def finish_game(self): if self.env.winner == Winner.black: black_win = 1 elif self.env.winner == Winner.white: black_win = -1 else: black_win = 0 self.black.finish_game(black_win) self.white.finish_game(-black_win) def load_model(self): from chess_zero.agent.model_chess import ChessModel model = ChessModel(self.config) if self.config.opts.new or not load_best_model_weight(model): model.build() save_as_best_model(model) return model
def start_game(self, human_is_white): self.human_color = chess.WHITE if human_is_white else chess.BLACK self.ai = ChessPlayer( self.config, self.model, self.config.human.play_config ) # override self.config.play with optional third parameter
def start_game(self, engine_is_black): self.engine_color = chess.BLACK if engine_is_black else chess.WHITE self.engine.start_game(engine_is_black) self.ai = ChessPlayer(self.config, self.model)
def start_game(self, human_is_white): self.human_color = chess.WHITE if human_is_white else chess.BLACK self.ai = ChessPlayer(self.config, self.model)
def play_game(config, cur, ng, current_white: bool) -> (float, GoBangEnv, bool): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool current_white: whether cur should play white or black :return (float, GoBangEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() ng_pipes = ng.pop() env = GoBangEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 # ----- 整理moves ----- if env.winner == Winner.white: black_score, white_score = -1, 1 elif env.winner == Winner.black: black_score, white_score = 1, -1 else: black_score, white_score = -0.5, -0.5 black.finish_game(black_score) white.finish_game(white_score) data = [] for i in range(len(black.moves)): data.append(black.moves[i]) if i < len(white.moves): data.append(white.moves[i]) # -------------------- cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white, data
class SupervisedLearningWorker: def __init__(self, config: Config, env=None, model=None): """ :param config: :param ChessEnv|None env: :param chess_zero.agent.model_chess.ChessModel|None model: """ self.config = config self.model = model self.env = env # type: ChessEnv self.black = None # type: ChessPlayer self.white = None # type: ChessPlayer self.buffer = [] def start(self): if self.model is None: self.model = self.load_model() self.buffer = [] idx = 1 while True: start_time = time() env = self.read_game(idx) end_time = time() logger.debug(f"game {idx} time={end_time - start_time} sec, " f"turn={int(env.turn/2)}:{env.observation} - Winner:{env.winner} - by resignation?:{env.resigned}") if (idx % self.config.play_data.nb_game_in_file) == 0: reload_best_model_weight_if_changed(self.model) idx += 1 def read_game(self, idx): self.env.reset() self.black = ChessPlayer(self.config, self.model) self.white = ChessPlayer(self.config, self.model) files = find_pgn_files(self.config.resource.play_data_dir) if len(files) > 0: random.shuffle(files) filename = files[0] pgn = open(filename, errors='ignore') size = os.path.getsize(filename) pos = random.randint(0, size) pgn.seek(pos) line = pgn.readline() offset = 0 # Parse game headers. while line: if line.isspace() or line.startswith("%"): line = pgn.readline() continue # Read header tags. tag_match = TAG_REGEX.match(line) if tag_match: offset = pgn.tell() break line = pgn.readline() pgn.seek(offset) game = chess.pgn.read_game(pgn) node = game result = game.headers["Result"] actions = [] while not node.is_end(): next_node = node.variation(0) actions.append(node.board().uci(next_node.move)) node = next_node pgn.close() k = 0 observation = self.env.observation while not self.env.done and k < len(actions): if self.env.board.turn == chess.BLACK: action = self.black.sl_action(observation, actions[k]) else: action = self.white.sl_action(observation, actions[k]) board, info = self.env.step(action) observation = board.fen() k += 1 self.env.done = True if not self.env.board.is_game_over() and result != '1/2-1/2': self.env.resigned = True if result == '1-0': self.env.winner = Winner.white elif result == '0-1': self.env.winner = Winner.black else: self.env.winner = Winner.draw self.finish_game() self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0) self.remove_play_data() return self.env def save_play_data(self, write=True): data = self.black.moves + self.white.moves self.buffer += data if not write: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id) logger.info(f"save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) def finish_game(self): if self.env.winner == Winner.black: black_win = 1 elif self.env.winner == Winner.white: black_win = -1 else: black_win = 0 self.black.finish_game(black_win) self.white.finish_game(-black_win) def load_model(self): from chess_zero.agent.model_chess import ChessModel model = ChessModel(self.config) if self.config.opts.new or not load_best_model_weight(model): model.build() save_as_best_model(model) return model
def read_game(self, idx): self.env.reset() self.black = ChessPlayer(self.config, self.model) self.white = ChessPlayer(self.config, self.model) files = find_pgn_files(self.config.resource.play_data_dir) if len(files) > 0: random.shuffle(files) filename = files[0] pgn = open(filename, errors='ignore') size = os.path.getsize(filename) pos = random.randint(0, size) pgn.seek(pos) line = pgn.readline() offset = 0 # Parse game headers. while line: if line.isspace() or line.startswith("%"): line = pgn.readline() continue # Read header tags. tag_match = TAG_REGEX.match(line) if tag_match: offset = pgn.tell() break line = pgn.readline() pgn.seek(offset) game = chess.pgn.read_game(pgn) node = game result = game.headers["Result"] actions = [] while not node.is_end(): next_node = node.variation(0) actions.append(node.board().uci(next_node.move)) node = next_node pgn.close() k = 0 observation = self.env.observation while not self.env.done and k < len(actions): if self.env.board.turn == chess.BLACK: action = self.black.sl_action(observation, actions[k]) else: action = self.white.sl_action(observation, actions[k]) board, info = self.env.step(action) observation = board.fen() k += 1 self.env.done = True if not self.env.board.is_game_over() and result != '1/2-1/2': self.env.resigned = True if result == '1-0': self.env.winner = Winner.white elif result == '0-1': self.env.winner = Winner.black else: self.env.winner = Winner.draw self.finish_game() self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0) self.remove_play_data() return self.env