def perform_action(self, state: GameState, verbose=True): value, selected_move, confidence, selected_child_idx = super( ).perform_action(state) # apply the selected mve on the current board state in order to create a lookup table for future board states state.apply_move(selected_move) # select the q value for the child which leads to the best calculated line value = self.root_node.q[selected_child_idx] # select the next node node = self.root_node.child_nodes[selected_child_idx] # store the reference links for all possible child future child to the node lookup table for idx, mv in enumerate(state.get_legal_moves()): state_future = deepcopy(state) state_future.apply_move(mv) # store the current child node with it's board fen as the hash-key if the child node has already been expanded if node is not None and idx < node.nb_direct_child_nodes and node.child_nodes[ idx] is not None: self.node_lookup[ state_future.get_board_fen()] = node.child_nodes[idx] return value, selected_move, confidence, selected_child_idx
def _run_single_playout(self, state: GameState, parent_node: Node, depth=1, mv_list=[]): #, pipe_id): """ This function works recursively until a terminal node is reached :param state: Current game-state for the evaluation. This state differs between the treads :param parent_node: Current parent-node of the selected node. In the first expansion this is the root node. :param depth: Current depth for the evaluation. Depth is increased by 1 for every recusive call :param mv_list: List of moves which have been taken in the current path. For each selected child node this list is expanded by one move recursively. :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed because the point of view changes each half-move depth: Current depth reach by this evaluation mv_list: List of moves which have been selected """ # select a legal move on the chess board node, move, child_idx = self._select_node(parent_node) if move is None: raise Exception( "Illegal tree setup. A 'None' move was selected which souldn't be possible" ) # update the visit counts to this node # temporarily reduce the attraction of this node by applying a virtual loss / # the effect of virtual loss will be undone if the playout is over parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss) # apply the selected move on the board state.apply_move(move) # append the selected move to the move list mv_list.append(move) if node is None: # get the board-fen which is used as an identifier for the board positions in the look-up table board_fen = state.get_board_fen() # check if the addressed fen exist in the look-up table if board_fen in self.node_lookup: # get the node from the look-up list node = self.node_lookup[board_fen] with parent_node.lock: # setup a new connection from the parent to the child parent_node.child_nodes[child_idx] = node # get the prior value from the leaf node which has already been expanded #value = node.v # get the value from the leaf node (the current function is called recursively) value, depth, mv_list = self._run_single_playout( state, node, depth + 1, mv_list) else: # expand and evaluate the new board state (the node wasn't found in the look-up table) # its value will be backpropagated through the tree and flipped after every layer # receive a free available pipe my_pipe = self.my_pipe_endings.pop() my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [value, policy_vec] = my_pipe.recv() # put the used pipe back into the list self.my_pipe_endings.append(my_pipe) # initialize is_leaf by default to false is_leaf = False # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) if state.is_won() is True: value = -1 is_leaf = True legal_moves = [] p_vec_small = None # check if you can claim a draw - its assumed that the draw is always claimed elif state.is_draw() is True: value = 0 is_leaf = True legal_moves = [] p_vec_small = None else: # get the current legal move of its board state legal_moves = list(state.get_legal_moves()) if len(legal_moves) < 1: raise Exception( 'No legal move is available for state: %s' % state) # extract a sparse policy vector with normalized probabilities try: p_vec_small = get_probs_of_move_list( policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True) except KeyError: raise Exception('Key Error for state: %s' % state) # convert all legal moves to a string if the option check_mate_in_one was enabled if self.check_mate_in_one is True: str_legal_moves = str(state.get_legal_moves()) else: str_legal_moves = '' # create a new node new_node = Node(value, p_vec_small, legal_moves, str_legal_moves, is_leaf) #if is_leaf is False: # test of adding dirichlet noise to a new node # new_node.apply_dirichlet_noise_to_prior_policy(epsilon=self.dirichlet_epsilon/4, alpha=self.dirichlet_alpha) # include a reference to the new node in the look-up table self.node_lookup[board_fen] = new_node with parent_node.lock: # add the new node to its parent parent_node.child_nodes[child_idx] = new_node # check if the new node has a mate_in_one connection (if yes overwrite the network prediction) if new_node.mate_child_idx is not None: value = 1 # check if we have reached a leaf node elif node.is_leaf is True: value = node.v # receive a free available pipe my_pipe = self.my_pipe_endings.pop() my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [_, _] = my_pipe.recv() # put the used pipe back into the list self.my_pipe_endings.append(my_pipe) else: # get the value from the leaf node (the current function is called recursively) value, depth, mv_list = self._run_single_playout( state, node, depth + 1, mv_list) # revert the virtual loss and apply the predicted value by the network to the node parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value) # we invert the value prediction for the parent of the above node layer because the player's turn is flipped every turn return -value, depth, mv_list
class ChessServer(object): def __init__(self, name): self.app = Flask(name) self.app.add_url_rule("/api/state", "api/state", self._wrap_endpoint(ChessServer.serve_state)) self.app.add_url_rule("/api/new", "api/new", self._wrap_endpoint(ChessServer.serve_new_game)) self.app.add_url_rule("/api/move", "api/move", self._wrap_endpoint(ChessServer.serve_move)) self.app.add_url_rule("/", "serve_client_r", self._wrap_endpoint(ChessServer.serve_client)) self.app.add_url_rule("/<path:path>", "serve_client", self._wrap_endpoint(ChessServer.serve_client)) self._gamestate = GameState() net = NeuralNetAPI() # Loading network player_agents = { "raw_net": RawNetAgent(net), "mcts": MCTSAgent(net, virtual_loss=3, threads=batch_size, cpuct=cpuct, dirichlet_epsilon=dirichlet_epsilon), } # Setting up agent self.agent = player_agents["raw_net"] # self.agent = player_agents["mcts"] def _wrap_endpoint(self, func): def wrapper(kwargs): return func(self, **kwargs) return lambda **kwargs: wrapper(kwargs) def run(self): self.app.run() # noinspection PyMethodMayBeStatic def serve_client(self, path=None): if path is None: path = "index.html" return send_from_directory("./client", path) def serve_state(self): return self.serialize_game_state() def serve_new_game(self): logging.debug("staring new game()") self.perform_new_game() return self.serialize_game_state() def serve_move(self): # read move data drop_piece = request.args.get("drop") from_square = request.args.get("from") to_square = request.args.get("to") promotion_piece = request.args.get("promotion") from_square_idx = get_square_index_from_name(from_square) to_square_idx = get_square_index_from_name(to_square) if (from_square_idx is None and drop_piece is None) or to_square_idx is None: return self.serialize_game_state("board name is invalid") promotion = None drop = None if drop_piece is not None: from_square_idx = to_square_idx if not (drop_piece in chess.PIECE_SYMBOLS): return self.serialize_game_state("drop piece name is invalid") drop = chess.PIECE_SYMBOLS.index(drop_piece) if promotion_piece is not None: if not (promotion_piece in chess.PIECE_SYMBOLS): return self.serialize_game_state( "promotion piece name is invalid") promotion = chess.PIECE_SYMBOLS.index(promotion_piece) move = chess.Move(from_square_idx, to_square_idx, promotion, drop) # perform move try: self.perform_move(move) except ValueError as e: logging.error("ValueError %s", e) return self.serialize_game_state(e.args[0]) # calculate agent response if not self.perform_agent_move(): return self.serialize_game_state("Black has no more moves to play", True) return self.serialize_game_state() def perform_new_game(self): self._gamestate = GameState() def perform_move(self, move): logging.debug("perform_move(): %s", move) # check if move is valid if move not in list(self._gamestate.board.legal_moves): raise ValueError( "The given move %s is invalid for the current position" % move) self._gamestate.apply_move(move) if self._gamestate.is_won(): logging.debug("Checkmate") return False def perform_agent_move(self): if self._gamestate.is_won(): logging.debug("Checkmate") return False value, move, confidence, _ = self.agent.perform_action(self._gamestate) if self._gamestate.is_white_to_move() is False: value = -value logging.debug("Value %.4f", value) if move is None: logging.error("None move proposed!") return False self.perform_move(move) return True def serialize_game_state(self, message=None, finished=None): if message is None: message = "" board_str = "" + self._gamestate.board.__str__() pocket_str = "" + self._gamestate.board.pockets[1].__str__( ) + "|" + self._gamestate.board.pockets[0].__str__() state = {"board": board_str, "pocket": pocket_str, "message": message} if finished is not None: state["finished"] = finished return json.dumps(state)
def _run_single_playout(self, state: GameState, parent_node: Node, pipe_id=0, depth=1, chosen_nodes=[]): """ This function works recursively until a leaf or terminal node is reached. It ends by backpropagating the value of the new expanded node or by propagating the value of a terminal state. :param state_: Current game-state for the evaluation. This state differs between the treads :param parent_node: Current parent-node of the selected node. In the first expansion this is the root node. :param depth: Current depth for the evaluation. Depth is increased by 1 for every recusive call :param chosen_nodes: List of moves which have been taken in the current path. For each selected child node this list is expanded by one move recursively. :param chosen_nodes: List of all nodes that this thread has explored with respect to the root node :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed because the point of view changes each half-move depth: Current depth reach by this evaluation mv_list: List of moves which have been selected """ # select a legal move on the chess board node, move, child_idx = self._select_node(parent_node) if move is None: raise Exception( "Illegal tree setup. A 'None' move was selected which souldn't be possible" ) # update the visit counts to this node # temporarily reduce the attraction of this node by applying a virtual loss / # the effect of virtual loss will be undone if the playout is over parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss) if depth == 1: state = GameState(deepcopy(state.get_pythonchess_board())) # apply the selected move on the board state.apply_move(move) # append the selected move to the move list # append the chosen child idx to the chosen_nodes list chosen_nodes.append(child_idx) if node is None: # get the transposition-key which is used as an identifier for the board positions in the look-up table transposition_key = state.get_transposition_key() # check if the addressed fen exist in the look-up table # note: It's important to use also the halfmove-counter here, otherwise the system can create an infinite # feed-back-loop key = (transposition_key, state.get_halfmove_counter()) # expand and evaluate the new board state (the node wasn't found in the look-up table) # its value will be backpropagated through the tree and flipped after every layer # receive a free available pipe my_pipe = self.my_pipe_endings[pipe_id] if self.send_batches is True: my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [value, policy_vec] = my_pipe.recv() else: state_planes = state.get_state_planes() self.batch_state_planes[pipe_id] = state_planes my_pipe.send(pipe_id) result_channel = my_pipe.recv() value = np.array(self.batch_value_results[result_channel]) policy_vec = np.array( self.batch_policy_results[result_channel]) # initialize is_leaf by default to false is_leaf = False # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) is_won = False is_check = False if state.is_check() is True: is_check = True if state.is_won() is True: is_won = True if is_won is True: value = -1 is_leaf = True legal_moves = [] p_vec_small = None # establish a mate in one connection in order to stop exploring different alternatives parent_node.mate_child_idx = child_idx # get the value from the leaf node (the current function is called recursively) # check if you can claim a draw - its assumed that the draw is always claimed elif self.can_claim_threefold_repetition(transposition_key, chosen_nodes) or \ state.get_pythonchess_board().can_claim_fifty_moves() is True: value = 0 is_leaf = True legal_moves = [] p_vec_small = None else: # get the current legal move of its board state legal_moves = state.get_legal_moves() if len(legal_moves) < 1: raise Exception( 'No legal move is available for state: %s' % state) # extract a sparse policy vector with normalized probabilities try: p_vec_small = get_probs_of_move_list( policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True) except KeyError: raise Exception('Key Error for state: %s' % state) # convert all legal moves to a string if the option check_mate_in_one was enabled if self.check_mate_in_one is True: str_legal_moves = str(state.get_legal_moves()) else: str_legal_moves = '' # clip the visit nodes for all nodes in the search tree except the director opp. move clip_low_visit = self.use_pruning and depth != 1 # create a new node new_node = Node(value, p_vec_small, legal_moves, str_legal_moves, is_leaf, transposition_key, clip_low_visit) if depth == 1: # disable uncertain moves from being visited by giving them a very bad score if is_leaf is False: if self.root_node_prior_policy[ child_idx] < 1e-3 and value * -1 < self.root_node.v: with parent_node.lock: value = 99 if value < 0: # and state.are_pocket_empty(): #and pipe_id == 0: # test of adding dirichlet noise to a new node new_node.apply_dirichlet_noise_to_prior_policy( epsilon=self.dirichlet_epsilon * .02, alpha=self.dirichlet_alpha) if self.use_pruning is False: # include a reference to the new node in the look-up table self.node_lookup[key] = new_node with parent_node.lock: # add the new node to its parent parent_node.child_nodes[child_idx] = new_node # check if we have reached a leaf node elif node.is_leaf is True: value = node.v else: # get the value from the leaf node (the current function is called recursively) value, depth, chosen_nodes = self._run_single_playout( state, node, pipe_id, depth + 1, chosen_nodes) # revert the virtual loss and apply the predicted value by the network to the node parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value) # we invert the value prediction for the parent of the above node layer because the player's turn is flipped every turn return -value, depth, chosen_nodes