def search_my_move(self, env: ChessEnv, is_root_node=False) -> float: """ Q, V is value for this Player(always white). P is value for the player of next_player (black or white) This method searches for possible moves, adds them to a search tree, and eventually returns the best move that was found during the search. :param ChessEnv env: environment in which to search for the move :param boolean is_root_node: whether this is the root node of the search. :return float: value of the move. This is calculated by getting a prediction from the value network. """ if env.done: if env.winner == Winner.draw: return 0 # assert env.whitewon != env.white_to_move # side to move can't be winner! return -1 state = state_key(env) with self.node_lock[state]: if state not in self.tree: leaf_p, leaf_v = self.expand_and_evaluate(env) self.tree[state].p = leaf_p return leaf_v # I'm returning everything from the POV of side to move # SELECT STEP action_t = self.select_action_q_and_u(env, is_root_node) virtual_loss = self.play_config.virtual_loss my_visit_stats = self.tree[state] my_stats = my_visit_stats.a[action_t] my_visit_stats.sum_n += virtual_loss my_stats.n += virtual_loss my_stats.w += -virtual_loss my_stats.q = my_stats.w / my_stats.n env.step(action_t.uci()) leaf_v = self.search_my_move(env) # next move from enemy POV leaf_v = -leaf_v # BACKUP STEP # on returning search path # update: N, W, Q with self.node_lock[state]: my_visit_stats.sum_n += -virtual_loss + 1 my_stats.n += -virtual_loss + 1 my_stats.w += virtual_loss + leaf_v my_stats.q = my_stats.w / my_stats.n return leaf_v
def get_buffer(config, game) -> (ChessEnv, list): """ Gets data to load into the buffer by playing a game using PGN data. :param Config config: config to use to play the game :param pgn.Game game: game to play :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer """ env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def start(config: Config): PlayWithHumanConfig().update_play_config(config.play) me_player = None env = ChessEnv().reset() while True: line = input() words = line.rstrip().split(" ",1) if words[0] == "uci": print("id name ChessZero") print("id author ChessZero") print("uciok") elif words[0] == "isready": if not me_player: me_player = get_player(config) print("readyok") elif words[0] == "ucinewgame": env.reset() elif words[0] == "position": words = words[1].split(" ",1) if words[0] == "startpos": env.reset() else: if words[0] == "fen": # skip extraneous word words = words[1].split(' ',1) fen = words[0] for _ in range(5): words = words[1].split(' ',1) fen += " " + words[0] env.update(fen) if len(words) > 1: words = words[1].split(" ",1) if words[0] == "moves": for w in words[1].split(" "): env.step(w, False) elif words[0] == "go": if not me_player: me_player = get_player(config) action = me_player.action(env, False) print(f"bestmove {action}") elif words[0] == "stop": pass elif words[0] == "quit": break
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool current_white: whether cur should play white or black :return (float, ChessEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() ng_pipes = ng.pop() env = ChessEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white
def start(config: Config): PlayWithHumanConfig().update_play_config(config.play) chess_model = PlayWithHuman(config) env = ChessEnv().reset() human_is_black = random() < 0.5 chess_model.start_game(human_is_black) while not env.done: if (env.board.turn == chess.BLACK) == human_is_black: action = chess_model.move_by_human(env) print("You move to: " + action) else: action = chess_model.move_by_ai(env) print("AI moves to: " + action) board, info = env.step(action) env.render() print("Board FEN = " + board.fen()) print("\nEnd of the game.") #spaces after this? print("Game result:") #and this? print(env.board.result())
def play_game(self, best_model, ng_model): env = ChessEnv().reset() best_player = ChessPlayer(self.config, best_model, play_config=self.config.eval.play_config) ng_player = ChessPlayer(self.config, ng_model, play_config=self.config.eval.play_config) best_is_white = random() < 0.5 if not best_is_white: black, white = best_player, ng_player else: black, white = ng_player, best_player observation = env.observation while not env.done: if env.board.turn == chess.BLACK: action = black.action(observation) else: action = white.action(observation) board, info = env.step(action) observation = board.fen() ng_win = None if env.winner == Winner.white: if best_is_white: ng_win = 0 else: ng_win = 1 elif env.winner == Winner.black: if best_is_white: ng_win = 1 else: ng_win = 0 return ng_win, best_is_white
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool current_white: whether cur should play white or black :return (float, ChessEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() ng_pipes = ng.pop() env = ChessEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player start_time = time() total = 0.0 total_step = 0.0 while not env.done: if env.white_to_move: if (current_white): action = white.action(env) else: start_time = time() action = white.action_modify(env) total += time() - start_time total_step += 1 else: if (current_white): start_time = time() action = black.action_modify(env) total += time() - start_time total_step += 1 else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.eval.max_game_length: env.adjudicate() #print(f"time={total:5.1f}s total step = {total_step} average = {total/total_step:5.1f}") #exit() if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white, total, total_step
def search_my_move(self, env: ChessEnv, is_root_node=False, tid=0) -> float: #dfs to the leaf and back up """ Q, V is value for this Player(always white). P is value for the player of next_player (black or white) :return: leaf value """ if env.done: if env.winner == Winner.draw: return 0 return -1 state = state_key(env) with self.node_lock[state]: if state not in self.tree: leaf_p, leaf_v = self.expand_and_evaluate(env) self.tree[state].p = leaf_p self.tree[state].legal_moves = state_moves(env) return leaf_v # I'm returning everything from the POV of side to move if tid in self.tree[state].visit: # loop -> loss return 0 self.tree[state].visit.append(tid) # SELECT STEP canon_action = self.select_action_q_and_u(state, is_root_node) virtual_loss = self.config.play.virtual_loss my_visit_stats = self.tree[state] my_visit_stats.sum_n += virtual_loss my_stats = my_visit_stats.a[canon_action] my_stats.n += virtual_loss my_stats.w -= virtual_loss my_stats.q = my_stats.w / my_stats.n if env.white_to_move: env.step(canon_action) else: env.step(flip_move(canon_action)) leaf_v = self.search_my_move(env, False, tid) # next move from enemy POV leaf_v = -leaf_v # BACKUP STEP # on returning search path # update: N, W, Q with self.node_lock[state]: my_visit_stats = self.tree[state] my_visit_stats.visit.remove(tid) my_visit_stats.sum_n += 1 - virtual_loss my_stats = my_visit_stats.a[canon_action] my_stats.n += 1 - virtual_loss my_stats.w += leaf_v + virtual_loss my_stats.q = my_stats.w / my_stats.n return leaf_v
def self_play_buffer(config, cur) -> (ChessEnv, list): """ Play one game and add the play data to the buffer :param Config config: config for how to play :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting predictions. One will be removed from this list during the game, then added back :return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list of data to be appended to the SelfPlayWorker.buffer """ pipes = cur.pop() # borrow env = ChessEnv().reset() # EDIT CODE HERE TO CHANGE THE ENVIRONMENT white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) move = 0 failed_play = 0 total_failed_plays = 0 print("Match Started") moves_list = "" while not env.done: # CHANGES_MADE_HERE temp = deepcopy(env) black_pieces = set("prnbqk") white_pieces = set("PRNBQK") if env.white_to_move: x = temp.board.piece_map() for i in x: if str(x[i]) in black_pieces: temp.board.remove_piece_at(i) action = white.action(temp) else: x = temp.board.piece_map() for i in x: if str(x[i]) in white_pieces: temp.board.remove_piece_at(i) action = black.action(temp) print("Match in Progress: ", move, "Moves made in the game, Failed Plays: ", total_failed_plays, end='\r') try: env.step(action) moves_list += action + ', ' failed_play = 0 move += 1 if env.num_halfmoves >= config.play.max_game_length: env.adjudicate() except ValueError: failed_play += 1 total_failed_plays += 1 if failed_play == 50: logger.warning("\nEnding the Game due to lack of development") env.adjudicate() continue # END_OF_CHANGES with open("result.csv", "a+") as fp: result = str(move) + ", " + str(total_failed_plays) + ", " + str( env.winner) + ", <" + env.board.fen() result += ">, Adjudicated\n" if failed_play == 50 else ">, Game End\n" fp.write(result) fp.close() with open("moves_list.csv", "a+") as fp: fp.write(moves_list) fp.write("\n") fp.close() if env.winner == Winner.white: black_win = -1 logger.info("White wins") elif env.winner == Winner.black: black_win = 1 logger.info("Black wins") else: black_win = 0 logger.info("Draw Match") black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def search_my_move_m(self, env: ChessEnv, is_root_node=False, version=0) -> float: """ Q, V is value for this Player(always white). P is value for the player of next_player (black or white) This method searches for possible moves, adds them to a search tree, and eventually returns the best move that was found during the search. :param ChessEnv env: environment in which to search for the move :param boolean is_root_node: whether this is the root node of the search. :return float: value of the move. This is calculated by getting a prediction from the value network. """ if env.done: if env.winner == Winner.draw: return 0 # assert env.whitewon != env.white_to_move # side to move can't be winner! return -1 state = state_key(env) if (version == 0): with self.node_lock[state]: if state not in self.tree: leaf_p, leaf_v = self.expand_and_evaluate(env) self.tree[state].p = leaf_p return leaf_v # I'm returning everything from the POV of side to move # SELECT STEP action_t = self.select_action_q_and_u(env, is_root_node) virtual_loss = self.play_config.virtual_loss_t my_visit_stats = self.tree[state] my_stats = my_visit_stats.a[action_t] my_visit_stats.sum_n += virtual_loss my_stats.n += virtual_loss my_stats.w += -virtual_loss my_stats.q = my_stats.w / my_stats.n env.step(action_t.uci()) leaf_v = self.search_my_move_m( env, version=0) # next move from enemy POV leaf_v = -leaf_v # BACKUP STEP # on returning search path # update: N, W, Q with self.node_lock[state]: my_visit_stats.sum_n += -virtual_loss + 1 my_stats.n += -virtual_loss + 1 my_stats.w += virtual_loss + leaf_v my_stats.q = my_stats.w / my_stats.n #testing version 1, using constant virtual loss elif (version == 1): with self.node_lock[state]: if state not in self.tree: leaf_p, leaf_v = self.expand_and_evaluate(env) self.tree[state].p = leaf_p return leaf_v # I'm returning everything from the POV of side to move # SELECT STEP action_t = self.select_action_q_and_u(env, is_root_node) virtual_loss = self.play_config.virtual_loss_t my_visit_stats = self.tree[state] my_stats = my_visit_stats.a[action_t] my_stats.v += -virtual_loss env.step(action_t.uci()) leaf_v = self.search_my_move_m( env, version=1) # next move from enemy POV leaf_v = -leaf_v # BACKUP STEP # on returning search path # update: N, W, Q with self.node_lock[state]: my_visit_stats.sum_n += 1 my_stats.n += 1 my_stats.w += leaf_v my_stats.q = my_stats.w / my_stats.n my_stats.v += virtual_loss #testing version 2, using deminishing virtual loss value elif (version == 2): with self.node_lock[state]: if state not in self.tree: leaf_p, leaf_v = self.expand_and_evaluate(env) self.tree[state].p = leaf_p return leaf_v # I'm returning everything from the POV of side to move # SELECT STEP action_t = self.select_action_q_and_u(env, is_root_node) my_visit_stats = self.tree[state] my_stats = my_visit_stats.a[action_t] virtual_loss = self.play_config.virtual_loss_t / ( my_stats.n * my_stats.n + 1) my_stats.v += -virtual_loss env.step(action_t.uci()) leaf_v = self.search_my_move_m( env, version=2) # next move from enemy POV leaf_v = -leaf_v # BACKUP STEP # on returning search path # update: N, W, Q with self.node_lock[state]: my_visit_stats.sum_n += 1 my_stats.n += 1 my_stats.w += leaf_v my_stats.q = my_stats.w / my_stats.n my_stats.v += virtual_loss else: print("something is wrong!") return leaf_v