def play_game(self, best_model, ng_model): env = ChessEnv().reset() best_player = ChessPlayer(self.config, best_model, play_config=self.config.eval.play_config) ng_player = ChessPlayer(self.config, ng_model, play_config=self.config.eval.play_config) best_is_white = random() < 0.5 if not best_is_white: black, white = best_player, ng_player else: black, white = ng_player, best_player observation = env.observation while not env.done: if env.board.turn == chess.BLACK: action = black.action(observation) else: action = white.action(observation) board, info = env.step(action) observation = board.fen() ng_win = None if env.winner == Winner.white: if best_is_white: ng_win = 0 else: ng_win = 1 elif env.winner == Winner.black: if best_is_white: ng_win = 1 else: ng_win = 0 return ng_win, best_is_white
def start(config: Config): PlayWithHumanConfig().update_play_config(config.play) chess_model = PlayWithHuman(config) env = ChessEnv().reset() human_is_black = random() < 0.5 chess_model.start_game(human_is_black) while not env.done: if env.board.turn == chess.BLACK: if not human_is_black: action = chess_model.move_by_ai(env) print("IA moves to: " + action) else: action = chess_model.move_by_human(env) print("You move to: " + action) else: if human_is_black: action = chess_model.move_by_ai(env) print("IA moves to: " + action) else: action = chess_model.move_by_human(env) print("You move to: " + action) board, info = env.step(action) env.render() print("Board fen = " + board.fen()) print("\nEnd of the game.") print("Game result:") print(env.board.result())
def action(self, board): env = ChessEnv().update(board) key = self.counter_key(env) for tl in range(self.play_config.thinking_loop): if tl > 0 and self.play_config.logging_thinking: logger.debug( f"continue thinking: policy move=({action % 8}, {action // 8}), " f"value move=({action_by_value % 8}, {action_by_value // 8})" ) self.search_moves(board) policy = self.calc_policy(board) action = int(np.random.choice(range(self.labels_n), p=policy)) action_by_value = int( np.argmax(self.var_q[key] + (self.var_n[key] > 0) * 100)) if action == action_by_value or env.turn < self.play_config.change_tau_turn: break # this is for play_gui, not necessary when training. self.thinking_history[env.observation] = HistoryItem( action, policy, list(self.var_q[key]), list(self.var_n[key])) if self.play_config.resign_threshold is not None and \ env.score_current() <= self.play_config.resign_threshold and \ self.play_config.min_resign_turn < env.turn: return None # means resign else: self.moves.append([env.observation, list(policy)]) return self.config.labels[action]
def supervised_buffer(config, game) -> (ChessEnv, list): env = ChessEnv(config).reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] env.board = game.board() for move in game.main_line(): ai = white if env.board.turn == chess.WHITE else black ai.sl_action(env, move) env.step(move) if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.WHITE white_win = 1 elif result == '0-1': env.winner = Winner.BLACK white_win = -1 else: env.winner = Winner.DRAW white_win = 0 white.finish_game(white_win) black.finish_game(-white_win) return env, merge_data(white, black)
def search_my_move(self, env: ChessEnv, is_root_node=False, depth=0) -> float: """ Q, V is value for this Player(always white). P is value for the player of next_player (black or white) This method searches for possible moves, adds them to a search tree, and eventually returns the best move that was found during the search. :param ChessEnv env: environment in which to search for the move :param boolean is_root_node: whether this is the root node of the search. :return float: value of the move. This is calculated by getting a prediction from the value network. """ if env.done: if env.winner == Winner.draw: return 0 # assert env.whitewon != env.white_to_move # side to move can't be winner! return -1 state = state_key(env) with self.node_lock[state]: if state not in self.tree: leaf_p, leaf_v = self.expand_and_evaluate(env) self.tree[state].p = leaf_p return leaf_v # I'm returning everything from the POV of side to move # SELECT STEP action_t = self.select_action_q_and_u(env, is_root_node) virtual_loss = self.play_config.virtual_loss my_visit_stats = self.tree[state] my_stats = my_visit_stats.a[action_t] my_visit_stats.sum_n += virtual_loss my_stats.n += virtual_loss my_stats.w += -virtual_loss my_stats.q = my_stats.w / my_stats.n print("SMM state", 'd{}'.format(depth), state, action_t) env.step(action_t.uci()) leaf_v = self.search_my_move(env, depth=depth+1) # next move from enemy POV leaf_v = -leaf_v # BACKUP STEP # on returning search path # update: N, W, Q with self.node_lock[state]: my_visit_stats.sum_n += -virtual_loss + 1 my_stats.n += -virtual_loss + 1 my_stats.w += virtual_loss + leaf_v my_stats.q = my_stats.w / my_stats.n return leaf_v
def search_my_move(self, env: ChessEnv, is_root_node=False) -> float: """ Q, V is value for this Player(always white). P is value for the player of next_player (black or white) This method searches for possible moves, adds them to a search tree, and eventually returns the best move that was found during the search. :param ChessEnv env: environment in which to search for the move :param boolean is_root_node: whether this is the root node of the search. :return float: value of the move. This is calculated by getting a prediction from the value network. """ if env.done: if env.winner == Winner.draw: return 0 # assert env.whitewon != env.white_to_move # side to move can't be winner! return -1 state = state_key(env) with self.node_lock[state]: if state not in self.tree: leaf_p, leaf_v = self.expand_and_evaluate(env) self.tree[state].p = leaf_p return leaf_v # I'm returning everything from the POV of side to move # SELECT STEP action_t = self.select_action_q_and_u(env, is_root_node) virtual_loss = self.play_config.virtual_loss my_visit_stats = self.tree[state] my_stats = my_visit_stats.a[action_t] my_visit_stats.sum_n += virtual_loss my_stats.n += virtual_loss my_stats.w += -virtual_loss my_stats.q = my_stats.w / my_stats.n env.step(action_t.uci()) leaf_v = self.search_my_move(env) # next move from enemy POV leaf_v = -leaf_v # BACKUP STEP # on returning search path # update: N, W, Q with self.node_lock[state]: my_visit_stats.sum_n += -virtual_loss + 1 my_stats.n += -virtual_loss + 1 my_stats.w += virtual_loss + leaf_v my_stats.q = my_stats.w / my_stats.n return leaf_v
async def search_my_move(self, env: ChessEnv, is_root_node=False): """ Q, V is value for this Player(always white). P is value for the player of next_player (black or white) :param env: :param is_root_node: :return: """ if env.done: if env.winner == Winner.white: return 1 elif env.winner == Winner.black: return -1 else: return 0 key = self.counter_key(env) while key in self.now_expanding: await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec) # is leaf? if key not in self.expanded: # reach leaf node leaf_v = await self.expand_and_evaluate(env.copy()) if env.board.turn == chess.WHITE: return leaf_v # Value for white else: return -leaf_v # Value for white == -Value for white action_t = self.select_action_q_and_u(env, is_root_node) _, _ = env.step(self.config.labels[action_t]) virtual_loss = self.config.play.virtual_loss self.var_n[key][action_t] += virtual_loss self.var_w[key][action_t] -= virtual_loss leaf_v = await self.search_my_move(env) # next move # on returning search path # update: N, W, Q, U n = self.var_n[key][ action_t] = self.var_n[key][action_t] - virtual_loss + 1 w = self.var_w[key][ action_t] = self.var_w[key][action_t] + virtual_loss + leaf_v self.var_q[key][action_t] = w / n return leaf_v
def search_my_move(self, env: ChessEnv, is_root_node=False) -> float: """ Q, V is value for this Player(always white). P is value for the player of next_player (black or white) :return: leaf value """ if env.done: if env.winner == Winner.draw: return 0 #assert env.whitewon != (env.board.turn == chess.WHITE) # side to move can't be winner! return -1 state = state_key(env) with self.node_lock[state]: if state not in self.tree: leaf_p, leaf_v = self.expand_and_evaluate(env=env) self.tree[state].p = leaf_p return leaf_v # I'm returning everything from the POV of side to move #assert state in self.tree # SELECT STEP action_t = self.select_action_q_and_u(env, is_root_node) virtual_loss = self.play_config.virtual_loss my_visitstats = self.tree[state] my_stats = my_visitstats.a[action_t] my_stats.n += virtual_loss my_visitstats.sum_n += virtual_loss my_stats.w += -virtual_loss my_stats.q = my_stats.w / my_stats.n env.step(action_t.uci()) leaf_v = self.search_my_move(env) # next move from enemy POV leaf_v = -leaf_v # BACKUP STEP # on returning search path # update: N, W, Q with self.node_lock[state]: my_stats.n += -virtual_loss + 1 my_visitstats.sum_n += -virtual_loss + 1 my_stats.w += virtual_loss + leaf_v my_stats.q = my_stats.w / my_stats.n return leaf_v
def start(config: Config): PlayWithHumanConfig().update_play_config(config.play) me_player = None env = ChessEnv().reset() app = Flask(__name__) model = ChessModel(config) if not load_best_model_weight(model): raise RuntimeError("Best model not found!") player = ChessPlayer(config, model.get_pipes(config.play.search_threads)) @app.route('/play', methods=["GET", "POST"]) def play(): data = request.get_json() print(data["position"]) env.update(data["position"]) env.step(data["moves"], False) bestmove = player.action(env, False) return jsonify(bestmove) app.run(host="0.0.0.0", port="8080")
async def start_search_my_move(self, board): self.running_simulation_num += 1 with await self.sem: # reduce parallel search number env = ChessEnv().update(board) leaf_v = await self.search_my_move(env, is_root_node=True) self.running_simulation_num -= 1 return leaf_v
def start(config: Config): PlayWithHumanConfig().update_play_config(config.play) me_player = None env = ChessEnv().reset() while True: line = input() words = line.rstrip().split(" ", 1) if words[0] == "uci": print("id name ChessZero") print("id author ChessZero") print("uciok") elif words[0] == "isready": if not me_player: me_player = get_player(config) print("readyok") elif words[0] == "ucinewgame": env.reset() elif words[0] == "position": words = words[1].split(" ", 1) if words[0] == "startpos": env.reset() else: if words[0] == "fen": # skip extraneous word words = words[1].split(' ', 1) fen = words[0] for _ in range(5): words = words[1].split(' ', 1) fen += " " + words[0] env.update(fen) if len(words) > 1: words = words[1].split(" ", 1) if words[0] == "moves": for w in words[1].split(" "): env.step(w, False) elif words[0] == "go": if not me_player: me_player = get_player(config) action = me_player.action(env, False) print(f"bestmove {action}") elif words[0] == "stop": pass elif words[0] == "quit": break
def convert_to_cheating_data(data): """ :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] value_list = [] env = ChessEnv().reset() for state_fen, policy, value in data: move_number = int(state_fen.split(' ')[5]) # f2 = maybe_flip_fen(maybe_flip_fen(state_fen,True),True) # assert state_fen == f2 next_move = env.deltamove(state_fen) if next_move == None: # new game! assert state_fen == chess.STARTING_FEN env.reset() else: env.step(next_move, False) state_planes = env.canonical_input_planes() # assert env.check_current_planes(state_planes) side_to_move = state_fen.split(" ")[1] if side_to_move == 'b': #assert np.sum(policy) == 0 policy = Config.flip_policy(policy) else: #assert abs(np.sum(policy) - 1) < 1e-8 pass # if np.sum(policy) != 0: # policy /= np.sum(policy) #assert abs(np.sum(policy) - 1) < 1e-8 assert len(policy) == 1968 assert state_planes.dtype == np.float32 value_certainty = min( 15, move_number ) / 15 # reduces the noise of the opening... plz train faster SL_value = value * value_certainty + env.testeval() * (1 - value_certainty) state_list.append(state_planes) policy_list.append(policy) value_list.append(SL_value) return np.array(state_list, dtype=np.float32), np.array( policy_list, dtype=np.float32), np.array(value_list, dtype=np.float32)
def get_buffer(config, game) -> (ChessEnv, list): """ Gets data to load into the buffer by playing a game using PGN data. :param Config config: config to use to play the game :param pgn.Game game: game to play :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer """ env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] white_elo, black_elo = int(game.headers["WhiteElo"]), int( game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def training(self): tc = self.config.trainer last_load_data_step = last_save_step = total_steps = self.config.trainer.start_total_steps meta_dir = 'data/model' meta_file = os.path.join(meta_dir, 'metadata.json') file_dir = 'data/model/next_generation' h5_file = os.path.join(file_dir, 'weights.{epoch:02d}.h5') self.meta_writer = OptimizeWorker(meta_file) self.early_stopping = EarlyStopping(monitor='val_loss') self.check_point = ModelCheckpoint(filepath=h5_file, monitor='val_loss', verbose=1) while True: self.load_play_data() if (self.dataset_size * (1 - self.validation)) < tc.batch_size: while (self.dataset_size * (1 - self.validation)) < tc.batch_size: self_play = SelfPlayWorker(self.config, env=ChessEnv(), model=self.model) self_play.start() self.load_play_data() else: self_play = SelfPlayWorker(self.config, env=ChessEnv(), model=self.model) self_play.start() self.load_play_data() self.compile_model() self.update_learning_rate(total_steps) steps = self.train_epoch(self.config.trainer.epoch_to_checkpoint) total_steps += steps if True: self.save_current_model() last_save_step = total_steps #net_params = ChessModel(self.config).get_policy_param() #pickle.dump(net_params, open('current_policy.model', 'wb'), pickle.HIGHEST_PROTOCOL) k.clear_session() load_best_model_weight(self.model)
def search_my_move(self, env: ChessEnv, is_root_node): """ Q, V is value for this Player (always white). P is value for the player of next_player (white or black) :param env: :param is_root_node: :return: leaf value """ if env.done: if env.winner == Winner.DRAW: return 0 else: return -1 # a tricky optimization: this conditional will _only_ execute if the side to move has just lost. key = env.transposition_key() with self.node_lock[key]: if key not in self.tree: leaf_p, leaf_v = self.expand_and_evaluate(env) self.tree[key].p = leaf_p return leaf_v # returning everything from the POV of side to move # keep the same lock open? move_t, action_t = self.select_action_q_and_u(env, is_root_node) virtual_loss = self.play_config.virtual_loss my_visit_stats = self.tree[key] my_action_stats = my_visit_stats.a[move_t] my_visit_stats.sum_n += virtual_loss my_action_stats.n += virtual_loss my_action_stats.w += -virtual_loss my_action_stats.q = my_action_stats.w / my_action_stats.n # fixed a bug: must update q here... env.step(move_t) leaf_v = -self.search_my_move(env, False) # next move # on returning search path, update: N, W, Q with self.node_lock[key]: my_visit_stats.sum_n += -virtual_loss + 1 my_action_stats.n += -virtual_loss + 1 my_action_stats.w += virtual_loss + leaf_v my_action_stats.q = my_action_stats.w / my_action_stats.n return leaf_v
def start(config: Config): PlayWithHumanConfig().update_play_config(config.play) config.play.thinking_loop = 1 chess_model = None env = ChessEnv().reset() while True: line = input() words = line.rstrip().split(" ", 1) if words[0] == "uci": print("id name ChessZero") print("id author ChessZero") print("uciok") elif words[0] == "isready": if chess_model is None: chess_model = PlayWithHuman(config) print("readyok") elif words[0] == "ucinewgame": env.reset() elif words[0] == "position": words = words[1].split(" ", 1) if words[0] == "startpos": env.reset() else: fen = words[0] for _ in range(5): words = words[1].split(' ', 1) fen += " "+words[0] env.update(fen) if len(words) > 1: words = words[1].split(" ", 1) if words[0] == "moves": for w in words[1].split(" "): env.step(w, False) elif words[0] == "go": action = chess_model.move_by_ai(env) print(f"bestmove {action}") elif words[0] == "stop": pass elif words[0] == "quit": break
def convert_to_training_data(data): """ :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] z_list = [] for state, policy, z in data: env = ChessEnv().update(state) black_ary, white_ary = env.black_and_white_plane() state = [black_ary, white_ary] if env.board.turn == chess.BLACK else [white_ary, black_ary] state_list.append(state) policy_list.append(policy) z_list.append(z) return np.array(state_list), np.array(policy_list), np.array(z_list)
def convert_to_training_data(data): """ :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] z_list = [] aux_move_number = 1 movements = [] for state, policy, z in data: move_number = int( (ChessEnv().update(state, movements)).board.fen().split(" ")[5]) if aux_move_number < move_number: if len(movements) > 8: movements.pop(0) movements.append(env.observation) aux_move_number = move_number else: aux_move_number = 1 movements = [] env = ChessEnv().update(state, movements) black_ary, white_ary, current_player, move_number = env.black_and_white_plane( ) state = [black_ary, white_ary ] if env.board.fen().split(" ")[1] == 'b' else [ white_ary, black_ary ] state = np.reshape(np.reshape(np.array(state), (18, 6, 8, 8)), (108, 8, 8)) state = np.vstack((state, np.reshape(current_player, (1, 8, 8)), np.reshape(move_number, (1, 8, 8)))) state_list.append(state) policy_list.append(policy) z_list.append(z) return np.array(state_list), np.array(policy_list), np.array(z_list)
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool current_white: whether cur should play white or black :return (float, ChessEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() ng_pipes = ng.pop() env = ChessEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white
def start(config: Config): PlayWithHumanConfig().update_play_config(config.play) me_player = None env = ChessEnv().reset() while True: line = input() words = line.rstrip().split(" ",1) if words[0] == "uci": print("id name ChessZero") print("id author ChessZero") print("uciok") elif words[0] == "isready": if not me_player: me_player = get_player(config) print("readyok") elif words[0] == "ucinewgame": env.reset() elif words[0] == "position": words = words[1].split(" ",1) if words[0] == "startpos": env.reset() else: if words[0] == "fen": # skip extraneous word words = words[1].split(' ',1) fen = words[0] for _ in range(5): words = words[1].split(' ',1) fen += " " + words[0] env.update(fen) if len(words) > 1: words = words[1].split(" ",1) if words[0] == "moves": for w in words[1].split(" "): env.step(w, False) elif words[0] == "go": if not me_player: me_player = get_player(config) action = me_player.action(env, False) print(f"bestmove {action}") elif words[0] == "stop": pass elif words[0] == "quit": break
def start(config: Config): chess_model = PlayWithHuman(config) while True: random_endgame = config.play.random_endgame if random_endgame == -1: env = ChessEnv(config).reset() else: env = ChessEnv(config).randomize(random_endgame) human_is_white = random() < 0.5 chess_model.start_game(human_is_white) print(env.board) while not env.done: if (env.board.turn == chess.WHITE) == human_is_white: action = chess_model.move_by_human(env) print(f"You move to: {env.board.san(action)}") else: action = chess_model.move_by_ai(env) print(f"AI moves to: {env.board.san(action)}") env.step(action) print(env.board) print(f"Board FEN = {env.fen}") game = chess.pgn.Game.from_board(env.board) game.headers['White'] = "Human" if human_is_white else f"AI {chess_model.model.digest[:10]}..." game.headers['Black'] = f"AI {chess_model.model.digest[:10]}..." if human_is_white else "Human" logger.debug("\n"+str(game)) print(f"\nEnd of the game. Game result: {env.board.result()}")
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool): cur_pipes = cur.pop() ng_pipes = ng.pop() env = ChessEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white
def self_play_buffer(config, cur) -> (ChessEnv, list): pipes = cur.pop() # borrow env = ChessEnv().reset() white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.play.max_game_length: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def play_game(self, current_model, ng_model, current_white: bool) -> (float, ChessEnv): env = ChessEnv().reset() current_player = ChessPlayer(self.config, model=current_model, play_config=self.config.eval.play_config) ng_player = ChessPlayer(self.config, model=ng_model, play_config=self.config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.board.turn == chess.WHITE: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= self.config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.whitewon == current_white: ng_score = 0 else: ng_score = 1 return ng_score, env
def start(config: Config): PlayWithHumanConfig().update_play_config(config.play) chess_model = PlayWithEngine(config) env = ChessEnv().reset() human_is_black = random() < 0.5 chess_model.start_game(human_is_black) while not env.done: if (env.board.turn == chess.BLACK) == human_is_black: action = chess_model.move_by_opponent(env) print("You move to: " + action) else: action = chess_model.move_by_ai(env) print("AI moves to: " + action) board, info = env.step(action) env.render() print("Board FEN = " + board.fen()) print("\nEnd of the game.") #spaces after this? print("Game result:") #and this? print(env.board.result())
def sl_action(self, board, action): env = ChessEnv().update(board) policy = np.zeros(self.labels_n) k = 0 for mov in self.config.labels: if mov == action: policy[k] = 1.0 break k += 1 self.moves.append([env.observation, list(policy)]) return action
def calc_policy(self, board): """calc π(a|s0) :return: """ pc = self.play_config env = ChessEnv().update(board) key = self.counter_key(env) if env.turn < pc.change_tau_turn: return self.var_n[key] / (np.sum(self.var_n[key])+1e-8) # tau = 1 else: action = np.argmax(self.var_n[key]) # tau = 0 ret = np.zeros(self.labels_n) ret[action] = 1 return ret
def get_buffer(config, game) -> (ChessEnv, list): """ Gets data to load into the buffer by playing a game using PGN data. :param Config config: config to use to play the game :param pgn.Game game: game to play :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer """ env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def get_buffer(config, game) -> (ChessEnv, list): env = ChessEnv().reset() white = ChessPlayer(config, dummy=True) black = ChessPlayer(config, dummy=True) result = game.headers["Result"] white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]) white_weight = clip_elo_policy(config, white_elo) black_weight = clip_elo_policy(config, black_elo) actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 while not env.done and k < len(actions): if env.white_to_move: action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True else: action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True env.step(action, False) k += 1 if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def get_buffer(game, config) -> (ChessEnv, list): env = ChessEnv().reset() black = ChessPlayer(config, dummy=True) white = ChessPlayer(config, dummy=True) result = game.headers["Result"] actions = [] while not game.is_end(): game = game.variation(0) actions.append(game.move.uci()) k = 0 observation = env.observation while not env.done and k < len(actions): if env.board.turn == chess.WHITE: action = white.sl_action(observation, actions[k]) #ignore=True else: action = black.sl_action(observation, actions[k]) #ignore=True board, info = env.step(action, False) observation = board.fen() k += 1 env.done = True if not env.board.is_game_over() and result != '1/2-1/2': env.resigned = True if result == '1-0': env.winner = Winner.white black_win = -1 elif result == '0-1': env.winner = Winner.black black_win = 1 else: env.winner = Winner.draw black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) return env, data
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool): """ Plays a game against models cur and ng and reports the results. :param Config config: config for how to play the game :param ChessModel cur: should be the current model :param ChessModel ng: should be the next generation model :param bool current_white: whether cur should play white or black :return (float, ChessEnv, bool): the score for the ng model (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool which is true iff cur played as white in that game. """ cur_pipes = cur.pop() ng_pipes = ng.pop() env = ChessEnv().reset() current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config) ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config) if current_white: white, black = current_player, ng_player else: white, black = ng_player, current_player while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.eval.max_game_length: env.adjudicate() if env.winner == Winner.draw: ng_score = 0.5 elif env.white_won == current_white: ng_score = 0 else: ng_score = 1 cur.append(cur_pipes) ng.append(ng_pipes) return ng_score, env, current_white
def self_play_buffer(config, cur) -> (ChessEnv, list): pipes = cur.pop() # borrow env = ChessEnv().reset() search_tree = defaultdict(VisitStats) white = ChessPlayer(config, search_tree=search_tree, pipes=pipes) black = ChessPlayer(config, search_tree=search_tree, pipes=pipes) history = [] cc = 0 while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) history.append(action) if len(history) > 6 and history[-1] == history[-5]: cc = cc + 1 else: cc = 0 if env.num_halfmoves >= config.play.max_game_length or cc >= 4: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def self_play_buffer(config, cur) -> (ChessEnv, list): """ Play one game and add the play data to the buffer :param Config config: config for how to play :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting predictions. One will be removed from this list during the game, then added back :return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list of data to be appended to the SelfPlayWorker.buffer """ pipes = cur.pop() # borrow env = ChessEnv().reset() white = ChessPlayer(config, pipes=pipes) black = ChessPlayer(config, pipes=pipes) while not env.done: if env.white_to_move: action = white.action(env) else: action = black.action(env) env.step(action) if env.num_halfmoves >= config.play.max_game_length: env.adjudicate() if env.winner == Winner.white: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) white.finish_game(-black_win) data = [] for i in range(len(white.moves)): data.append(white.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
def counter_key(env: ChessEnv): return CounterKey(env.replace_tags(), env.board.turn)