def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool): cur_pipes = cur.pop() ng_pipes = ng.pop() env = ChessEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white
def self_play_buffer(config, cur) -> (ChessEnv, list): pipes = cur.pop() # borrow env = ChessEnv().reset() white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.play.max_game_length: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def play_game(self, current_model, ng_model, current_white: bool) -> (float, ChessEnv): env = ChessEnv().reset() current_player = ChessPlayer(self.config, model=current_model, play_config=self.config.eval.play_config) ng_player = ChessPlayer(self.config, model=ng_model, play_config=self.config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.board.turn == chess.WHITE: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= self.config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.whitewon == current_white: ng_score = 0 else: ng_score = 1 return ng_score, env
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool current_white: whether cur should play white or black :return (float, ChessEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() ng_pipes = ng.pop() env = ChessEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white
def self_play_buffer(config, cur) -> (ChessEnv, list): """ Play one game and add the play data to the buffer :param Config config: config for how to play :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting predictions. One will be removed from this list during the game, then added back :return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list of data to be appended to the SelfPlayWorker.buffer """ pipes = cur.pop() # borrow env = ChessEnv().reset() white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.play.max_game_length: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def self_play_buffer(config, cur) -> (ChessEnv, list): pipes = cur.pop() # borrow env = ChessEnv().reset() search_tree = defaultdict(VisitStats) white = ChessPlayer(config, search_tree=search_tree, pipes=pipes) black = ChessPlayer(config, search_tree=search_tree, pipes=pipes) history = [] cc = 0 while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) history.append(action) if len(history) > 6 and history[-1] == history[-5]: cc = cc + 1 else: cc = 0 if env.num_halfmoves >= config.play.max_game_length or cc >= 4: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def self_play_buffer(config, cur) -> (ChessEnv, list): """ Play one game and add the play data to the buffer :param Config config: config for how to play :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting predictions. One will be removed from this list during the game, then added back :return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list of data to be appended to the SelfPlayWorker.buffer """ pipes = cur.pop() # borrow env = ChessEnv().reset() # EDIT CODE HERE TO CHANGE THE ENVIRONMENT white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) move = 0 failed_play = 0 total_failed_plays = 0 print("Match Started") moves_list = "" while not env.done: # CHANGES_MADE_HERE temp = deepcopy(env) black_pieces = set("prnbqk") white_pieces = set("PRNBQK") if env.white_to_move: x = temp.board.piece_map() for i in x: if str(x[i]) in black_pieces: temp.board.remove_piece_at(i) action = white.action(temp) else: x = temp.board.piece_map() for i in x: if str(x[i]) in white_pieces: temp.board.remove_piece_at(i) action = black.action(temp) print("Match in Progress: ", move, "Moves made in the game, Failed Plays: ", total_failed_plays, end='\r') try: env.step(action) moves_list += action + ', ' failed_play = 0 move += 1 if env.num_halfmoves >= config.play.max_game_length: env.adjudicate() except ValueError: failed_play += 1 total_failed_plays += 1 if failed_play == 50: logger.warning("\nEnding the Game due to lack of development") env.adjudicate() continue # END_OF_CHANGES with open("result.csv", "a+") as fp: result = str(move) + ", " + str(total_failed_plays) + ", " + str( env.winner) + ", <" + env.board.fen() result += ">, Adjudicated\n" if failed_play == 50 else ">, Game End\n" fp.write(result) fp.close() with open("moves_list.csv", "a+") as fp: fp.write(moves_list) fp.write("\n") fp.close() if env.winner == Winner.white: black_win = -1 logger.info("White wins") elif env.winner == Winner.black: black_win = 1 logger.info("Black wins") else: black_win = 0 logger.info("Draw Match") black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data