def __init__(self, name): self.app = Flask(name) self.app.add_url_rule("/api/state", "api/state", self._wrap_endpoint(ChessServer.serve_state)) self.app.add_url_rule("/api/new", "api/new", self._wrap_endpoint(ChessServer.serve_new_game)) self.app.add_url_rule("/api/move", "api/move", self._wrap_endpoint(ChessServer.serve_move)) self.app.add_url_rule("/", "serve_client_r", self._wrap_endpoint(ChessServer.serve_client)) self.app.add_url_rule("/<path:path>", "serve_client", self._wrap_endpoint(ChessServer.serve_client)) self._gamestate = GameState() net = NeuralNetAPI() # Loading network player_agents = { "raw_net": RawNetAgent(net), "mcts": MCTSAgent(net, virtual_loss=3, threads=batch_size, cpuct=cpuct, dirichlet_epsilon=dirichlet_epsilon), } # Setting up agent self.agent = player_agents["raw_net"]
def perform_action(self, state: GameState, verbose=True): value, selected_move, confidence, selected_child_idx = super( ).perform_action(state) # apply the selected mve on the current board state in order to create a lookup table for future board states state.apply_move(selected_move) # select the q value for the child which leads to the best calculated line value = self.root_node.q[selected_child_idx] # select the next node node = self.root_node.child_nodes[selected_child_idx] # store the reference links for all possible child future child to the node lookup table for idx, mv in enumerate(state.get_legal_moves()): state_future = deepcopy(state) state_future.apply_move(mv) # store the current child node with it's board fen as the hash-key if the child node has already been expanded if node is not None and idx < node.nb_direct_child_nodes and node.child_nodes[ idx] is not None: self.node_lookup[ state_future.get_board_fen()] = node.child_nodes[idx] return value, selected_move, confidence, selected_child_idx
def setup_network(): """ Load the libraries and the weights of the neural network :return: """ global gamestate global setup_done global rawnet_agent global mcts_agent global s global engine_played_move if setup_done is False: from DeepCrazyhouse.src.domain.crazyhouse.GameState import GameState from DeepCrazyhouse.src.domain.agent.NeuralNetAPI import NeuralNetAPI from DeepCrazyhouse.src.domain.agent.player.RawNetAgent import RawNetAgent from DeepCrazyhouse.src.domain.agent.player.MCTSAgent import MCTSAgent # check for valid parameter setup and do auto-corrections if possible param_validity_check() nets = [] for i in range(s["neural_net_services"]): nets.append(NeuralNetAPI(ctx=s["context"], batch_size=s["batch_size"])) rawnet_agent = RawNetAgent( nets[0], temperature=s["centi_temperature"] / 100, temperature_moves=s["temperature_moves"] ) mcts_agent = MCTSAgent( nets, cpuct=s["centi_cpuct"] / 100, playouts_empty_pockets=s["playouts_empty_pockets"], playouts_filled_pockets=s["playouts_filled_pockets"], max_search_depth=s["max_search_depth"], dirichlet_alpha=s["centi_dirichlet_alpha"] / 100, q_value_weight=s["centi_q_value_weight"] / 100, dirichlet_epsilon=s["centi_dirichlet_epsilon"] / 100, virtual_loss=s["virtual_loss"], threads=s["threads"], temperature=s["centi_temperature"] / 100, temperature_moves=s["temperature_moves"], verbose=s["verbose"], min_movetime=MIN_SEARCH_TIME_MS, batch_size=s["batch_size"], check_mate_in_one=s["check_mate_in_one"], use_pruning=s["use_pruning"], use_oscillating_cpuct=s["use_oscillating_cpuct"], use_time_management=s["use_time_management"], opening_guard_moves=s["opening_guard_moves"], ) gamestate = GameState() setup_done = True
def setup_network(): """ Load the libraries and the weights of the neural network :return: """ global gamestate global setup_done global rawnet_agent global mcts_agent global s global engine_played_move if setup_done is False: from DeepCrazyhouse.src.domain.crazyhouse.GameState import GameState from DeepCrazyhouse.src.domain.agent.NeuralNetAPI import NeuralNetAPI from DeepCrazyhouse.src.domain.agent.player.RawNetAgent import RawNetAgent from DeepCrazyhouse.src.domain.agent.player.MCTSAgent import MCTSAgent # check for valid parameter setup and do auto-corrections if possible param_validity_check() net = NeuralNetAPI(ctx=s['context'], batch_size=s['batch_size']) rawnet_agent = RawNetAgent(net, temperature=s['centi_temperature'], clip_quantil=s['centi_clip_quantil']) mcts_agent = MCTSAgent( net, cpuct=s['centi_cpuct'] / 100, playouts_empty_pockets=s['playouts_empty_pockets'], playouts_filled_pockets=s['playouts_filled_pockets'], max_search_depth=s['max_search_depth'], dirichlet_alpha=s['centi_dirichlet_alpha'] / 100, q_value_weight=s['centi_q_value_weight'] / 100, dirichlet_epsilon=s['centi_dirichlet_epsilon'] / 100, virtual_loss=s['virtual_loss'], threads=s['threads'], temperature=s['centi_temperature'] / 100, verbose=s['verbose'], clip_quantil=s['centi_clip_quantil'] / 100, min_movetime=MIN_SEARCH_TIME_MS, batch_size=s['batch_size'], check_mate_in_one=s['check_mate_in_one']) gamestate = GameState() setup_done = True
def _run_single_playout(self, state: GameState, parent_node: Node, depth=1, mv_list=[]): #, pipe_id): """ This function works recursively until a terminal node is reached :param state: Current game-state for the evaluation. This state differs between the treads :param parent_node: Current parent-node of the selected node. In the first expansion this is the root node. :param depth: Current depth for the evaluation. Depth is increased by 1 for every recusive call :param mv_list: List of moves which have been taken in the current path. For each selected child node this list is expanded by one move recursively. :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed because the point of view changes each half-move depth: Current depth reach by this evaluation mv_list: List of moves which have been selected """ # select a legal move on the chess board node, move, child_idx = self._select_node(parent_node) if move is None: raise Exception( "Illegal tree setup. A 'None' move was selected which souldn't be possible" ) # update the visit counts to this node # temporarily reduce the attraction of this node by applying a virtual loss / # the effect of virtual loss will be undone if the playout is over parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss) # apply the selected move on the board state.apply_move(move) # append the selected move to the move list mv_list.append(move) if node is None: # get the board-fen which is used as an identifier for the board positions in the look-up table board_fen = state.get_board_fen() # check if the addressed fen exist in the look-up table if board_fen in self.node_lookup: # get the node from the look-up list node = self.node_lookup[board_fen] with parent_node.lock: # setup a new connection from the parent to the child parent_node.child_nodes[child_idx] = node # get the prior value from the leaf node which has already been expanded #value = node.v # get the value from the leaf node (the current function is called recursively) value, depth, mv_list = self._run_single_playout( state, node, depth + 1, mv_list) else: # expand and evaluate the new board state (the node wasn't found in the look-up table) # its value will be backpropagated through the tree and flipped after every layer # receive a free available pipe my_pipe = self.my_pipe_endings.pop() my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [value, policy_vec] = my_pipe.recv() # put the used pipe back into the list self.my_pipe_endings.append(my_pipe) # initialize is_leaf by default to false is_leaf = False # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) if state.is_won() is True: value = -1 is_leaf = True legal_moves = [] p_vec_small = None # check if you can claim a draw - its assumed that the draw is always claimed elif state.is_draw() is True: value = 0 is_leaf = True legal_moves = [] p_vec_small = None else: # get the current legal move of its board state legal_moves = list(state.get_legal_moves()) if len(legal_moves) < 1: raise Exception( 'No legal move is available for state: %s' % state) # extract a sparse policy vector with normalized probabilities try: p_vec_small = get_probs_of_move_list( policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True) except KeyError: raise Exception('Key Error for state: %s' % state) # convert all legal moves to a string if the option check_mate_in_one was enabled if self.check_mate_in_one is True: str_legal_moves = str(state.get_legal_moves()) else: str_legal_moves = '' # create a new node new_node = Node(value, p_vec_small, legal_moves, str_legal_moves, is_leaf) #if is_leaf is False: # test of adding dirichlet noise to a new node # new_node.apply_dirichlet_noise_to_prior_policy(epsilon=self.dirichlet_epsilon/4, alpha=self.dirichlet_alpha) # include a reference to the new node in the look-up table self.node_lookup[board_fen] = new_node with parent_node.lock: # add the new node to its parent parent_node.child_nodes[child_idx] = new_node # check if the new node has a mate_in_one connection (if yes overwrite the network prediction) if new_node.mate_child_idx is not None: value = 1 # check if we have reached a leaf node elif node.is_leaf is True: value = node.v # receive a free available pipe my_pipe = self.my_pipe_endings.pop() my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [_, _] = my_pipe.recv() # put the used pipe back into the list self.my_pipe_endings.append(my_pipe) else: # get the value from the leaf node (the current function is called recursively) value, depth, mv_list = self._run_single_playout( state, node, depth + 1, mv_list) # revert the virtual loss and apply the predicted value by the network to the node parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value) # we invert the value prediction for the parent of the above node layer because the player's turn is flipped every turn return -value, depth, mv_list
def evaluate_board_state(self, state_in: GameState): """ Analyzes the current board state :param state_in: Actual game state to evaluate for the MCTS :return: """ # store the time at which the search started t_start_eval = time() state = deepcopy(state_in) # check if the net prediction service has already been started if self.net_pred_service.running is False: # start the prediction daemon thread self.net_pred_service.start() # receive a list of all possible legal move in the current board position legal_moves = list(state.get_legal_moves()) # store what depth has been reached at maximum in the current search tree # default is 1, in case only 1 move is available max_depth_reached = 1 # consistency check if len(legal_moves) == 0: raise Exception( 'The given board state has no legal move available') # check for fast way out if len(legal_moves) == 1: # set value 0 as a dummy value value = 0 p_vec_small = np.array([1], np.float32) board_fen = state.get_pythonchess_board().fen() # check first if the the current tree can be reused if board_fen in self.node_lookup: self.root_node = self.node_lookup[board_fen] logging.debug( 'Reuse the search tree. Number of nodes in search tree: %d', self.root_node.n_sum) else: logging.debug( "The given board position wasn't found in the search tree." ) logging.debug("Starting a brand new search tree...") # create a new root node self.root_node = Node(value, p_vec_small, legal_moves, str(state.get_legal_moves())) # check a child node if it doesn't exists already if self.root_node.child_nodes[0] is None: state_child = deepcopy(state_in) state_child.apply_move(legal_moves[0]) # initialize is_leaf by default to false is_leaf = False # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) if state.is_won() is True: value = -1 is_leaf = True legal_moves_child = [] p_vec_small_child = None # check if you can claim a draw - its assumed that the draw is always claimed elif state.is_draw() is True: value = 0 is_leaf = True legal_moves_child = [] p_vec_small_child = None else: legal_moves_child = list(state_child.get_legal_moves()) # start a brand new prediction for the child state_planes = state_child.get_state_planes() [value, policy_vec] = self.net.predict_single(state_planes) # extract a sparse policy vector with normalized probabilities p_vec_small_child = get_probs_of_move_list( policy_vec, legal_moves_child, state_child.is_white_to_move()) # create a new child node child_node = Node(value, p_vec_small_child, legal_moves_child, str(state_child.get_legal_moves()), is_leaf) # connect the child to the root self.root_node.child_nodes[0] = child_node else: board_fen = state.get_board_fen() # check first if the the current tree can be reused if board_fen in self.node_lookup: self.root_node = self.node_lookup[board_fen] logging.debug( 'Reuse the search tree. Number of nodes in search tree: %d', self.root_node.nb_total_expanded_child_nodes) else: logging.debug( "The given board position wasn't found in the search tree." ) logging.debug("Starting a brand new search tree...") # initialize is_leaf by default to false is_leaf = False # start a brand new tree state_planes = state.get_state_planes() [value, policy_vec] = self.net.predict_single(state_planes) # extract a sparse policy vector with normalized probabilities p_vec_small = get_probs_of_move_list(policy_vec, legal_moves, state.is_white_to_move()) # create a new root node self.root_node = Node(value, p_vec_small, legal_moves, str(state.get_legal_moves()), is_leaf) # clear the look up table self.node_lookup = {} # apply dirichlet noise to the prior probabilities in order to ensure # that every move can possibly be visited self.root_node.apply_dirichlet_noise_to_prior_policy( epsilon=self.dirichlet_epsilon, alpha=self.dirichlet_alpha) futures = [] # set the number of playouts accordingly if state_in.are_pocket_empty() is True: nb_playouts = self.nb_playouts_empty_pockets else: nb_playouts = self.nb_playouts_filled_pockets t_elapsed = 0 cur_playouts = 0 old_time = time() while max_depth_reached < self.max_search_depth and\ cur_playouts < nb_playouts and\ t_elapsed*1000 < self.movetime_ms: #and np.abs(self.root_node.q.mean()) < 0.99: # start searching with ThreadPoolExecutor(max_workers=self.threads) as executor: for i in range(self.threads): # calculate the thread id based on the current playout futures.append( executor.submit(self._run_single_playout, state=deepcopy(state), parent_node=self.root_node, depth=1, mv_list=[])) cur_playouts += self.threads time_show_info = time() - old_time # store the mean of all value predictions in this variable #mean_value = 0 for i, f in enumerate(futures): cur_value, cur_depth, mv_list = f.result() # sum up all values #mean_value += cur_value if cur_depth > max_depth_reached: max_depth_reached = cur_depth # Print every second if verbose is true if self.verbose and time_show_info > 1: str_moves = self._mv_list_to_str(mv_list) logging.debug('Update: %d' % cur_depth) print('info score cp %d depth %d nodes %d pv%s' % (value_to_centipawn(cur_value), cur_depth, self.root_node.n_sum, str_moves)) old_time = time() # update the current search time t_elapsed = time() - t_start_eval if self.verbose and time_show_info > 1: print( 'info nps %d time %d' % ((self.root_node.n_sum / t_elapsed), t_elapsed * 1000)) # receive the policy vector based on the MCTS search p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight) print('info string move overhead is %dms' % (t_elapsed * 1000 - self.movetime_ms)) # store the current root in the lookup table self.node_lookup[state.get_board_fen()] = self.root_node # select the q value which would score the highest value #value = self.root_node.q.max() # select the q-value according to the mcts best child value best_child_idx = self.root_node.get_mcts_policy( self.q_value_weight).argmax() value = self.root_node.q[best_child_idx] lst_best_moves, _ = self.get_calculated_line() str_moves = self._mv_list_to_str(lst_best_moves) # show the best calculated line time_e = time() - t_start_eval node_searched = self.root_node.n_sum print('info score cp %d depth %d nodes %d time %d nps %d pv%s' % (value_to_centipawn(value), max_depth_reached, node_searched, time_e * 1000, node_searched / max(1, time_e), str_moves)) if len(legal_moves) != len(p_vec_small): print( 'Legal move list %s with length %s is uncompatible to policy vector %s with shape %s for board state %s' % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state_in)) self.node_lookup = {} # restart the search TODO: Fix this error """ raise Exception('Legal move list %s with length %s is uncompatible to policy vector %s with shape %s for board state %s' % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state_in)) Exception: Legal move list [Move.from_uci('e4h7'), Move.from_uci('e4g6'), Move.from_uci('e4f5'), Move.from_uci('c4a6'), Move.from_uci('c4b5'), Move.from_uci('c4b3'), Move.from_uci('f3g5'), Move.from_uci('f3e5'), Move.from_uci('f3h4'), Move.from_uci('f3d4'), Move.from_uci('f3d2'), Move.from_uci('f3e1'), Move.from_uci('g1h1'), Move.from_uci('f1e1'), Move.from_uci('d1e2'), Move.from_uci('d1d2'), Move.from_uci('d1e1'), Move.from_uci('d1c1'), Move.from_uci('d1b1'), Move.from_uci('a1c1'), Move.from_uci('a1b1'), Move.from_uci('d3d4'), Move.from_uci('h2h3'), Move.from_uci('g2g3'), Move.from_uci('c2c3'), Move.from_uci('b2b3'), Move.from_uci('a2a3'), Move.from_uci('h2h4'), Move.from_uci('b2b4'), Move.from_uci('a2a4'), Move.from_uci('N@b1'), Move.from_uci('N@c1'), Move.from_uci('N@e1'), Move.from_uci('N@h1'), Move.from_uci('N@d2'), Move.from_uci('N@e2'), Move.from_uci('N@a3'), Move.from_uci('N@b3'), Move.from_uci('N@c3'), Move.from_uci('N@e3'), Move.from_uci('N@g3'), Move.from_uci('N@h3'), Move.from_uci('N@a4'), Move.from_uci('N@b4'), Move.from_uci('N@d4'), Move.from_uci('N@f4'), Move.from_uci('N@h4'), Move.from_uci('N@b5'), Move.from_uci('N@f5'), Move.from_uci('N@g5'), Move.from_uci('N@h5'), Move.from_uci('N@a6'), Move.from_uci('N@b6'), Move.from_uci('N@c6'), Move.from_uci('N@e6'), Move.from_uci('N@g6'), Move.from_uci('N@d7'), Move.from_uci('N@e7'), Move.from_uci('N@h7'), Move.from_uci('N@b8'), Move.from_uci('N@c8'), Move.from_uci('N@d8'), Move.from_uci('N@e8'), Move.from_uci('N@h8')] with length 64 is uncompatible to policy vector [0.71529347 0.00194482 0.00194482 0.00389555 0.00194482 0.00194482 0.00389942 0.00389942 0.00389941 0.0038994 0.0019448 0.0038994 0.0019448 0.00389941 0.00389941 0.00194482 0.00585401 0.00194482 0.00194482 0.00389941 0.00389942 0.00194482 0.00194482 0.00389942 0.00389942 0.00389941 0.00585341 0.00194482 0.00585396 0.00389942 0.00389941 0.00389941 0.00389941 0.00389941 0.00194482 0.00585401 0.00585401 0.00194482 0.00585399 0.00780859 0.00389942 0.00389941 0.00585401 0.00976319 0.00780829 0.00585215 0.00389942 0.00389942 0.00194482 0.00194482 0.02735228 0.00389942 0.005854 0.00389939 0.00389924 0.00389942 0.00194482 0.00389942 0.00585398 0.00389942 0.0038994 0.0038994 0.00585398 0.00194482 0.00389942 0.00389942 0.00389942 0.00389942] with shape (68,) for board state r4rk1/ppp2pp1/3p1q1p/n1bPp3/2B1B1b1/3P1N2/PPP2PPP/R2Q1RK1[Nn] w - - 2 13 """ return self.evaluate_board_state(state_in) return value, legal_moves, p_vec_small
class ChessServer(object): def __init__(self, name): self.app = Flask(name) self.app.add_url_rule("/api/state", "api/state", self._wrap_endpoint(ChessServer.serve_state)) self.app.add_url_rule("/api/new", "api/new", self._wrap_endpoint(ChessServer.serve_new_game)) self.app.add_url_rule("/api/move", "api/move", self._wrap_endpoint(ChessServer.serve_move)) self.app.add_url_rule("/", "serve_client_r", self._wrap_endpoint(ChessServer.serve_client)) self.app.add_url_rule("/<path:path>", "serve_client", self._wrap_endpoint(ChessServer.serve_client)) self._gamestate = GameState() net = NeuralNetAPI() # Loading network player_agents = { "raw_net": RawNetAgent(net), "mcts": MCTSAgent(net, virtual_loss=3, threads=batch_size, cpuct=cpuct, dirichlet_epsilon=dirichlet_epsilon), } # Setting up agent self.agent = player_agents["raw_net"] # self.agent = player_agents["mcts"] def _wrap_endpoint(self, func): def wrapper(kwargs): return func(self, **kwargs) return lambda **kwargs: wrapper(kwargs) def run(self): self.app.run() # noinspection PyMethodMayBeStatic def serve_client(self, path=None): if path is None: path = "index.html" return send_from_directory("./client", path) def serve_state(self): return self.serialize_game_state() def serve_new_game(self): logging.debug("staring new game()") self.perform_new_game() return self.serialize_game_state() def serve_move(self): # read move data drop_piece = request.args.get("drop") from_square = request.args.get("from") to_square = request.args.get("to") promotion_piece = request.args.get("promotion") from_square_idx = get_square_index_from_name(from_square) to_square_idx = get_square_index_from_name(to_square) if (from_square_idx is None and drop_piece is None) or to_square_idx is None: return self.serialize_game_state("board name is invalid") promotion = None drop = None if drop_piece is not None: from_square_idx = to_square_idx if not (drop_piece in chess.PIECE_SYMBOLS): return self.serialize_game_state("drop piece name is invalid") drop = chess.PIECE_SYMBOLS.index(drop_piece) if promotion_piece is not None: if not (promotion_piece in chess.PIECE_SYMBOLS): return self.serialize_game_state( "promotion piece name is invalid") promotion = chess.PIECE_SYMBOLS.index(promotion_piece) move = chess.Move(from_square_idx, to_square_idx, promotion, drop) # perform move try: self.perform_move(move) except ValueError as e: logging.error("ValueError %s", e) return self.serialize_game_state(e.args[0]) # calculate agent response if not self.perform_agent_move(): return self.serialize_game_state("Black has no more moves to play", True) return self.serialize_game_state() def perform_new_game(self): self._gamestate = GameState() def perform_move(self, move): logging.debug("perform_move(): %s", move) # check if move is valid if move not in list(self._gamestate.board.legal_moves): raise ValueError( "The given move %s is invalid for the current position" % move) self._gamestate.apply_move(move) if self._gamestate.is_won(): logging.debug("Checkmate") return False def perform_agent_move(self): if self._gamestate.is_won(): logging.debug("Checkmate") return False value, move, confidence, _ = self.agent.perform_action(self._gamestate) if self._gamestate.is_white_to_move() is False: value = -value logging.debug("Value %.4f", value) if move is None: logging.error("None move proposed!") return False self.perform_move(move) return True def serialize_game_state(self, message=None, finished=None): if message is None: message = "" board_str = "" + self._gamestate.board.__str__() pocket_str = "" + self._gamestate.board.pockets[1].__str__( ) + "|" + self._gamestate.board.pockets[0].__str__() state = {"board": board_str, "pocket": pocket_str, "message": message} if finished is not None: state["finished"] = finished return json.dumps(state)
def perform_new_game(self): self._gamestate = GameState()
def _run_single_playout(self, state: GameState, parent_node: Node, pipe_id=0, depth=1, chosen_nodes=[]): """ This function works recursively until a leaf or terminal node is reached. It ends by backpropagating the value of the new expanded node or by propagating the value of a terminal state. :param state_: Current game-state for the evaluation. This state differs between the treads :param parent_node: Current parent-node of the selected node. In the first expansion this is the root node. :param depth: Current depth for the evaluation. Depth is increased by 1 for every recusive call :param chosen_nodes: List of moves which have been taken in the current path. For each selected child node this list is expanded by one move recursively. :param chosen_nodes: List of all nodes that this thread has explored with respect to the root node :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed because the point of view changes each half-move depth: Current depth reach by this evaluation mv_list: List of moves which have been selected """ # select a legal move on the chess board node, move, child_idx = self._select_node(parent_node) if move is None: raise Exception( "Illegal tree setup. A 'None' move was selected which souldn't be possible" ) # update the visit counts to this node # temporarily reduce the attraction of this node by applying a virtual loss / # the effect of virtual loss will be undone if the playout is over parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss) if depth == 1: state = GameState(deepcopy(state.get_pythonchess_board())) # apply the selected move on the board state.apply_move(move) # append the selected move to the move list # append the chosen child idx to the chosen_nodes list chosen_nodes.append(child_idx) if node is None: # get the transposition-key which is used as an identifier for the board positions in the look-up table transposition_key = state.get_transposition_key() # check if the addressed fen exist in the look-up table # note: It's important to use also the halfmove-counter here, otherwise the system can create an infinite # feed-back-loop key = (transposition_key, state.get_halfmove_counter()) # expand and evaluate the new board state (the node wasn't found in the look-up table) # its value will be backpropagated through the tree and flipped after every layer # receive a free available pipe my_pipe = self.my_pipe_endings[pipe_id] if self.send_batches is True: my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [value, policy_vec] = my_pipe.recv() else: state_planes = state.get_state_planes() self.batch_state_planes[pipe_id] = state_planes my_pipe.send(pipe_id) result_channel = my_pipe.recv() value = np.array(self.batch_value_results[result_channel]) policy_vec = np.array( self.batch_policy_results[result_channel]) # initialize is_leaf by default to false is_leaf = False # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) is_won = False is_check = False if state.is_check() is True: is_check = True if state.is_won() is True: is_won = True if is_won is True: value = -1 is_leaf = True legal_moves = [] p_vec_small = None # establish a mate in one connection in order to stop exploring different alternatives parent_node.mate_child_idx = child_idx # get the value from the leaf node (the current function is called recursively) # check if you can claim a draw - its assumed that the draw is always claimed elif self.can_claim_threefold_repetition(transposition_key, chosen_nodes) or \ state.get_pythonchess_board().can_claim_fifty_moves() is True: value = 0 is_leaf = True legal_moves = [] p_vec_small = None else: # get the current legal move of its board state legal_moves = state.get_legal_moves() if len(legal_moves) < 1: raise Exception( 'No legal move is available for state: %s' % state) # extract a sparse policy vector with normalized probabilities try: p_vec_small = get_probs_of_move_list( policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True) except KeyError: raise Exception('Key Error for state: %s' % state) # convert all legal moves to a string if the option check_mate_in_one was enabled if self.check_mate_in_one is True: str_legal_moves = str(state.get_legal_moves()) else: str_legal_moves = '' # clip the visit nodes for all nodes in the search tree except the director opp. move clip_low_visit = self.use_pruning and depth != 1 # create a new node new_node = Node(value, p_vec_small, legal_moves, str_legal_moves, is_leaf, transposition_key, clip_low_visit) if depth == 1: # disable uncertain moves from being visited by giving them a very bad score if is_leaf is False: if self.root_node_prior_policy[ child_idx] < 1e-3 and value * -1 < self.root_node.v: with parent_node.lock: value = 99 if value < 0: # and state.are_pocket_empty(): #and pipe_id == 0: # test of adding dirichlet noise to a new node new_node.apply_dirichlet_noise_to_prior_policy( epsilon=self.dirichlet_epsilon * .02, alpha=self.dirichlet_alpha) if self.use_pruning is False: # include a reference to the new node in the look-up table self.node_lookup[key] = new_node with parent_node.lock: # add the new node to its parent parent_node.child_nodes[child_idx] = new_node # check if we have reached a leaf node elif node.is_leaf is True: value = node.v else: # get the value from the leaf node (the current function is called recursively) value, depth, chosen_nodes = self._run_single_playout( state, node, pipe_id, depth + 1, chosen_nodes) # revert the virtual loss and apply the predicted value by the network to the node parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value) # we invert the value prediction for the parent of the above node layer because the player's turn is flipped every turn return -value, depth, chosen_nodes
def evaluate_board_state(self, state: GameState): """ Analyzes the current board state. This is the main method which get called by the uci interface or analysis request. :param state_in: Actual game state to evaluate for the MCTS :return: """ # store the time at which the search started self.t_start_eval = time() # check if the net prediction service has already been started if self.net_pred_services[0].running is False: # start the prediction daemon thread for net_pred_service in self.net_pred_services: net_pred_service.start() # receive a list of all possible legal move in the current board position legal_moves = state.get_legal_moves() # consistency check if len(legal_moves) == 0: raise Exception( 'The given board state has no legal move available') # check first if the the current tree can be reused key = (state.get_transposition_key(), state.get_halfmove_counter) if self.use_pruning is False and key in self.node_lookup: self.root_node = self.node_lookup[key] logging.debug( 'Reuse the search tree. Number of nodes in search tree: %d', self.root_node.nb_total_expanded_child_nodes) self.total_nodes_pre_search = deepcopy(self.root_node.n_sum) # reset potential good nodes for the root self.root_node.q[self.root_node.q < 1.1] = 0 else: logging.debug("Starting a brand new search tree...") self.root_node = None self.total_nodes_pre_search = 0 # check for fast way out if len(legal_moves) == 1: # if there's only a single legal move you only must go 1 depth max_depth_reached = 1 if self.root_node is None: # conduct all necessary steps for fastest way out self._expand_root_node_single_move(state, legal_moves) else: if self.root_node is None: # run a single expansion on the root node self._expand_root_node_multiple_moves(state, legal_moves) # conduct the mcts-search based on the given settings max_depth_reached = self._run_mcts_search(state) t_elapsed = time() - self.t_start_eval print('info string move overhead is %dms' % (t_elapsed * 1000 - self.movetime_ms)) # receive the policy vector based on the MCTS search p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight) if self.use_pruning is False: # store the current root in the lookup table self.node_lookup[key] = self.root_node # select the q-value according to the mcts best child value best_child_idx = p_vec_small.argmax() value = self.root_node.q[best_child_idx] lst_best_moves, _ = self.get_calculated_line() str_moves = self._mv_list_to_str(lst_best_moves) # show the best calculated line node_searched = int(self.root_node.n_sum - self.total_nodes_pre_search) # In uci the depth is given using half-moves notation also called plies time_e = time() - self.t_start_eval if len(legal_moves) != len(p_vec_small): raise Exception( 'Legal move list %s with length %s is uncompatible to policy vector %s' ' with shape %s for board state %s and nodes legal move list: %s' % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state, self.root_node.legal_moves)) # define the remaining return variables cp = value_to_centipawn(value) depth = max_depth_reached nodes = node_searched time_elapsed_s = time_e * 1000 nps = node_searched / time_e pv = str_moves return value, legal_moves, p_vec_small, cp, depth, nodes, time_elapsed_s, nps, pv