def evaluate_board_state(self, state: AbsGameState): # Too few public methods (1/2) """ The greedy agent always performs the first legal move with the highest move probability :param state: Gamestate object :return: value - Value prediction in the current players view from [-1,1]: -1 -> 100% lost, +1 100% won selected_move - Python chess move object of the selected move confidence - Probability value for the selected move in the probability distribution idx - Integer index of the move which was returned centipawn - Centi pawn evaluation which is converted from the value prediction in currents player view depth - Depth which was reached after the search nodes - Number of nodes which have been evaluated in the search time_elapsed_s - Elapsed time in seconds for the full search nps - Nodes per second metric pv - Calculated best line for both players """ t_start_eval = time() pred_value, pred_policy = self._net.predict_single(state.get_state_planes()) legal_moves = list(state.get_legal_moves()) p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.is_white_to_move()) # define the remaining return variables time_e = time() - t_start_eval centipawn = value_to_centipawn(pred_value) depth = nodes = 1 time_elapsed_s = time_e * 1000 nps = nodes / time_e # use the move with the highest probability as the best move for logging pv = legal_moves[p_vec_small.argmax()].uci() return pred_value, legal_moves, p_vec_small, centipawn, depth, nodes, time_elapsed_s, nps, pv
def _expand_root_node_multiple_moves(self, state, legal_moves): """ Checks if the current root node can be found in the look-up table. Otherwise run a single inference of the neural network for this board state :param state: Current game state :param legal_moves: Available moves :return: """ # initialize is_leaf by default to false is_leaf = False # start a brand new tree state_planes = state.get_state_planes() [value, policy_vec] = self.nets[0].predict_single(state_planes) # extract a sparse policy vector with normalized probabilities p_vec_small = get_probs_of_move_list(policy_vec, legal_moves, state.is_white_to_move()) if self.check_mate_in_one is True: str_legal_moves = str(state.get_legal_moves()) else: str_legal_moves = '' # create a new root node self.root_node = Node(value, p_vec_small, legal_moves, str_legal_moves, is_leaf, clip_low_visit=False)
def evaluate_board_state(self, state: _GameState): """ :param state: :return: """ t_start_eval = time() pred_value, pred_policy = self._net.predict_single( state.get_state_planes()) legal_moves = list(state.get_legal_moves()) p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.is_white_to_move()) # use the move with the highest probability as the best move for logging instinct_move = legal_moves[p_vec_small.argmax()] # define the remaining return variables time_e = time() - t_start_eval cp = value_to_centipawn(pred_value) depth = 1 nodes = 1 time_elapsed_s = time_e * 1000 nps = nodes / time_e pv = instinct_move.uci() return pred_value, legal_moves, p_vec_small, cp, depth, nodes, time_elapsed_s, nps, pv
def _expand_root_node_single_move(self, state, legal_moves): """ Expands the current root in the case if there's only a single move available. The neural network search can be omitted in this case. :param state: Request games state :param legal_moves: Available moves :return: """ # request the value prediction for the current position [value, _] = self.nets[0].predict_single(state.get_state_planes()) p_vec_small = np.array([1], np.float32) # we can create the move probability vector without the NN this time # create a new root node self.root_node = Node(state.get_pythonchess_board(), value, p_vec_small, legal_moves, clip_low_visit=False) if self.root_node.child_nodes[0] is None: # check a child node if it doesn't exists already state_child = deepcopy(state) state_child.apply_move(legal_moves[0]) is_leaf = False # initialize is_leaf by default to false # we don't need to check for is_lost() because the game is already over if state.is_won(): # check if the current player has won the game value = -1 is_leaf = True legal_moves_child = [] p_vec_small_child = None # check if you can claim a draw - its assumed that the draw is always claimed elif ( self.can_claim_threefold_repetition(state.get_transposition_key(), [0]) or state.get_pythonchess_board().can_claim_fifty_moves() ): value = 0 is_leaf = True legal_moves_child = [] p_vec_small_child = None else: legal_moves_child = state_child.get_legal_moves() # start a brand new prediction for the child [value, policy_vec] = self.nets[0].predict_single(state_child.get_state_planes()) # extract a sparse policy vector with normalized probabilities p_vec_small_child = get_probs_of_move_list( policy_vec, legal_moves_child, state_child.is_white_to_move() ) # create a new child node child_node = Node(state.get_pythonchess_board(), value, p_vec_small_child, legal_moves_child, is_leaf) self.root_node.child_nodes[0] = child_node # connect the child to the root # assign the value of the root node as the q-value for the child # here we must invert the invert the value because it's the value prediction of the next state self.root_node.q_value[0] = -value
def evaluate_board_state(self, state: _GameState, verbose=True): """ :param state: :return: """ t_start_eval = time() pred_value, pred_policy = self._net.predict_single(state.get_state_planes()) legal_moves = list(state.get_legal_moves()) p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.is_white_to_move()) if verbose is True: # use the move with the highest probability as the best move for logging instinct_move = legal_moves[p_vec_small.argmax()] # show the best calculated line print('info score cp %d depth %d nodes %d time %d pv %s' % ( value_to_centipawn(pred_value), 1, 1, (time() - t_start_eval) * 1000, instinct_move.uci())) return pred_value, legal_moves, p_vec_small
def _expand_root_node_multiple_moves(self, state, legal_moves): """ Checks if the current root node can be found in the look-up table. Otherwise run a single inference of the neural network for this board state :param state: Current game state :param legal_moves: Available moves :return: """ is_leaf = False # initialize is_leaf by default to false [value, policy_vec] = self.nets[0].predict_single(state.get_state_planes()) # start a brand new tree # extract a sparse policy vector with normalized probabilities p_vec_small = get_probs_of_move_list(policy_vec, legal_moves, state.is_white_to_move()) chess_board = state.get_pythonchess_board() if self.enhance_captures: self._enhance_captures(chess_board, legal_moves, p_vec_small) if self.enhance_checks: self._enhance_checks(chess_board, legal_moves, p_vec_small) # create a new root node self.root_node = Node(chess_board, value, p_vec_small, legal_moves, is_leaf, clip_low_visit=False)
def _run_single_playout(self, state: GameState, parent_node: Node, depth=1, mv_list=[]): #, pipe_id): """ This function works recursively until a terminal node is reached :param state: Current game-state for the evaluation. This state differs between the treads :param parent_node: Current parent-node of the selected node. In the first expansion this is the root node. :param depth: Current depth for the evaluation. Depth is increased by 1 for every recusive call :param mv_list: List of moves which have been taken in the current path. For each selected child node this list is expanded by one move recursively. :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed because the point of view changes each half-move depth: Current depth reach by this evaluation mv_list: List of moves which have been selected """ # select a legal move on the chess board node, move, child_idx = self._select_node(parent_node) if move is None: raise Exception( "Illegal tree setup. A 'None' move was selected which souldn't be possible" ) # update the visit counts to this node # temporarily reduce the attraction of this node by applying a virtual loss / # the effect of virtual loss will be undone if the playout is over parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss) # apply the selected move on the board state.apply_move(move) # append the selected move to the move list mv_list.append(move) if node is None: # get the board-fen which is used as an identifier for the board positions in the look-up table board_fen = state.get_board_fen() # check if the addressed fen exist in the look-up table if board_fen in self.node_lookup: # get the node from the look-up list node = self.node_lookup[board_fen] with parent_node.lock: # setup a new connection from the parent to the child parent_node.child_nodes[child_idx] = node # get the prior value from the leaf node which has already been expanded #value = node.v # get the value from the leaf node (the current function is called recursively) value, depth, mv_list = self._run_single_playout( state, node, depth + 1, mv_list) else: # expand and evaluate the new board state (the node wasn't found in the look-up table) # its value will be backpropagated through the tree and flipped after every layer # receive a free available pipe my_pipe = self.my_pipe_endings.pop() my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [value, policy_vec] = my_pipe.recv() # put the used pipe back into the list self.my_pipe_endings.append(my_pipe) # initialize is_leaf by default to false is_leaf = False # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) if state.is_won() is True: value = -1 is_leaf = True legal_moves = [] p_vec_small = None # check if you can claim a draw - its assumed that the draw is always claimed elif state.is_draw() is True: value = 0 is_leaf = True legal_moves = [] p_vec_small = None else: # get the current legal move of its board state legal_moves = list(state.get_legal_moves()) if len(legal_moves) < 1: raise Exception( 'No legal move is available for state: %s' % state) # extract a sparse policy vector with normalized probabilities try: p_vec_small = get_probs_of_move_list( policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True) except KeyError: raise Exception('Key Error for state: %s' % state) # convert all legal moves to a string if the option check_mate_in_one was enabled if self.check_mate_in_one is True: str_legal_moves = str(state.get_legal_moves()) else: str_legal_moves = '' # create a new node new_node = Node(value, p_vec_small, legal_moves, str_legal_moves, is_leaf) #if is_leaf is False: # test of adding dirichlet noise to a new node # new_node.apply_dirichlet_noise_to_prior_policy(epsilon=self.dirichlet_epsilon/4, alpha=self.dirichlet_alpha) # include a reference to the new node in the look-up table self.node_lookup[board_fen] = new_node with parent_node.lock: # add the new node to its parent parent_node.child_nodes[child_idx] = new_node # check if the new node has a mate_in_one connection (if yes overwrite the network prediction) if new_node.mate_child_idx is not None: value = 1 # check if we have reached a leaf node elif node.is_leaf is True: value = node.v # receive a free available pipe my_pipe = self.my_pipe_endings.pop() my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [_, _] = my_pipe.recv() # put the used pipe back into the list self.my_pipe_endings.append(my_pipe) else: # get the value from the leaf node (the current function is called recursively) value, depth, mv_list = self._run_single_playout( state, node, depth + 1, mv_list) # revert the virtual loss and apply the predicted value by the network to the node parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value) # we invert the value prediction for the parent of the above node layer because the player's turn is flipped every turn return -value, depth, mv_list
def evaluate_board_state(self, state_in: GameState): """ Analyzes the current board state :param state_in: Actual game state to evaluate for the MCTS :return: """ # store the time at which the search started t_start_eval = time() state = deepcopy(state_in) # check if the net prediction service has already been started if self.net_pred_service.running is False: # start the prediction daemon thread self.net_pred_service.start() # receive a list of all possible legal move in the current board position legal_moves = list(state.get_legal_moves()) # store what depth has been reached at maximum in the current search tree # default is 1, in case only 1 move is available max_depth_reached = 1 # consistency check if len(legal_moves) == 0: raise Exception( 'The given board state has no legal move available') # check for fast way out if len(legal_moves) == 1: # set value 0 as a dummy value value = 0 p_vec_small = np.array([1], np.float32) board_fen = state.get_pythonchess_board().fen() # check first if the the current tree can be reused if board_fen in self.node_lookup: self.root_node = self.node_lookup[board_fen] logging.debug( 'Reuse the search tree. Number of nodes in search tree: %d', self.root_node.n_sum) else: logging.debug( "The given board position wasn't found in the search tree." ) logging.debug("Starting a brand new search tree...") # create a new root node self.root_node = Node(value, p_vec_small, legal_moves, str(state.get_legal_moves())) # check a child node if it doesn't exists already if self.root_node.child_nodes[0] is None: state_child = deepcopy(state_in) state_child.apply_move(legal_moves[0]) # initialize is_leaf by default to false is_leaf = False # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) if state.is_won() is True: value = -1 is_leaf = True legal_moves_child = [] p_vec_small_child = None # check if you can claim a draw - its assumed that the draw is always claimed elif state.is_draw() is True: value = 0 is_leaf = True legal_moves_child = [] p_vec_small_child = None else: legal_moves_child = list(state_child.get_legal_moves()) # start a brand new prediction for the child state_planes = state_child.get_state_planes() [value, policy_vec] = self.net.predict_single(state_planes) # extract a sparse policy vector with normalized probabilities p_vec_small_child = get_probs_of_move_list( policy_vec, legal_moves_child, state_child.is_white_to_move()) # create a new child node child_node = Node(value, p_vec_small_child, legal_moves_child, str(state_child.get_legal_moves()), is_leaf) # connect the child to the root self.root_node.child_nodes[0] = child_node else: board_fen = state.get_board_fen() # check first if the the current tree can be reused if board_fen in self.node_lookup: self.root_node = self.node_lookup[board_fen] logging.debug( 'Reuse the search tree. Number of nodes in search tree: %d', self.root_node.nb_total_expanded_child_nodes) else: logging.debug( "The given board position wasn't found in the search tree." ) logging.debug("Starting a brand new search tree...") # initialize is_leaf by default to false is_leaf = False # start a brand new tree state_planes = state.get_state_planes() [value, policy_vec] = self.net.predict_single(state_planes) # extract a sparse policy vector with normalized probabilities p_vec_small = get_probs_of_move_list(policy_vec, legal_moves, state.is_white_to_move()) # create a new root node self.root_node = Node(value, p_vec_small, legal_moves, str(state.get_legal_moves()), is_leaf) # clear the look up table self.node_lookup = {} # apply dirichlet noise to the prior probabilities in order to ensure # that every move can possibly be visited self.root_node.apply_dirichlet_noise_to_prior_policy( epsilon=self.dirichlet_epsilon, alpha=self.dirichlet_alpha) futures = [] # set the number of playouts accordingly if state_in.are_pocket_empty() is True: nb_playouts = self.nb_playouts_empty_pockets else: nb_playouts = self.nb_playouts_filled_pockets t_elapsed = 0 cur_playouts = 0 old_time = time() while max_depth_reached < self.max_search_depth and\ cur_playouts < nb_playouts and\ t_elapsed*1000 < self.movetime_ms: #and np.abs(self.root_node.q.mean()) < 0.99: # start searching with ThreadPoolExecutor(max_workers=self.threads) as executor: for i in range(self.threads): # calculate the thread id based on the current playout futures.append( executor.submit(self._run_single_playout, state=deepcopy(state), parent_node=self.root_node, depth=1, mv_list=[])) cur_playouts += self.threads time_show_info = time() - old_time # store the mean of all value predictions in this variable #mean_value = 0 for i, f in enumerate(futures): cur_value, cur_depth, mv_list = f.result() # sum up all values #mean_value += cur_value if cur_depth > max_depth_reached: max_depth_reached = cur_depth # Print every second if verbose is true if self.verbose and time_show_info > 1: str_moves = self._mv_list_to_str(mv_list) logging.debug('Update: %d' % cur_depth) print('info score cp %d depth %d nodes %d pv%s' % (value_to_centipawn(cur_value), cur_depth, self.root_node.n_sum, str_moves)) old_time = time() # update the current search time t_elapsed = time() - t_start_eval if self.verbose and time_show_info > 1: print( 'info nps %d time %d' % ((self.root_node.n_sum / t_elapsed), t_elapsed * 1000)) # receive the policy vector based on the MCTS search p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight) print('info string move overhead is %dms' % (t_elapsed * 1000 - self.movetime_ms)) # store the current root in the lookup table self.node_lookup[state.get_board_fen()] = self.root_node # select the q value which would score the highest value #value = self.root_node.q.max() # select the q-value according to the mcts best child value best_child_idx = self.root_node.get_mcts_policy( self.q_value_weight).argmax() value = self.root_node.q[best_child_idx] lst_best_moves, _ = self.get_calculated_line() str_moves = self._mv_list_to_str(lst_best_moves) # show the best calculated line time_e = time() - t_start_eval node_searched = self.root_node.n_sum print('info score cp %d depth %d nodes %d time %d nps %d pv%s' % (value_to_centipawn(value), max_depth_reached, node_searched, time_e * 1000, node_searched / max(1, time_e), str_moves)) if len(legal_moves) != len(p_vec_small): print( 'Legal move list %s with length %s is uncompatible to policy vector %s with shape %s for board state %s' % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state_in)) self.node_lookup = {} # restart the search TODO: Fix this error """ raise Exception('Legal move list %s with length %s is uncompatible to policy vector %s with shape %s for board state %s' % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state_in)) Exception: Legal move list [Move.from_uci('e4h7'), Move.from_uci('e4g6'), Move.from_uci('e4f5'), Move.from_uci('c4a6'), Move.from_uci('c4b5'), Move.from_uci('c4b3'), Move.from_uci('f3g5'), Move.from_uci('f3e5'), Move.from_uci('f3h4'), Move.from_uci('f3d4'), Move.from_uci('f3d2'), Move.from_uci('f3e1'), Move.from_uci('g1h1'), Move.from_uci('f1e1'), Move.from_uci('d1e2'), Move.from_uci('d1d2'), Move.from_uci('d1e1'), Move.from_uci('d1c1'), Move.from_uci('d1b1'), Move.from_uci('a1c1'), Move.from_uci('a1b1'), Move.from_uci('d3d4'), Move.from_uci('h2h3'), Move.from_uci('g2g3'), Move.from_uci('c2c3'), Move.from_uci('b2b3'), Move.from_uci('a2a3'), Move.from_uci('h2h4'), Move.from_uci('b2b4'), Move.from_uci('a2a4'), Move.from_uci('N@b1'), Move.from_uci('N@c1'), Move.from_uci('N@e1'), Move.from_uci('N@h1'), Move.from_uci('N@d2'), Move.from_uci('N@e2'), Move.from_uci('N@a3'), Move.from_uci('N@b3'), Move.from_uci('N@c3'), Move.from_uci('N@e3'), Move.from_uci('N@g3'), Move.from_uci('N@h3'), Move.from_uci('N@a4'), Move.from_uci('N@b4'), Move.from_uci('N@d4'), Move.from_uci('N@f4'), Move.from_uci('N@h4'), Move.from_uci('N@b5'), Move.from_uci('N@f5'), Move.from_uci('N@g5'), Move.from_uci('N@h5'), Move.from_uci('N@a6'), Move.from_uci('N@b6'), Move.from_uci('N@c6'), Move.from_uci('N@e6'), Move.from_uci('N@g6'), Move.from_uci('N@d7'), Move.from_uci('N@e7'), Move.from_uci('N@h7'), Move.from_uci('N@b8'), Move.from_uci('N@c8'), Move.from_uci('N@d8'), Move.from_uci('N@e8'), Move.from_uci('N@h8')] with length 64 is uncompatible to policy vector [0.71529347 0.00194482 0.00194482 0.00389555 0.00194482 0.00194482 0.00389942 0.00389942 0.00389941 0.0038994 0.0019448 0.0038994 0.0019448 0.00389941 0.00389941 0.00194482 0.00585401 0.00194482 0.00194482 0.00389941 0.00389942 0.00194482 0.00194482 0.00389942 0.00389942 0.00389941 0.00585341 0.00194482 0.00585396 0.00389942 0.00389941 0.00389941 0.00389941 0.00389941 0.00194482 0.00585401 0.00585401 0.00194482 0.00585399 0.00780859 0.00389942 0.00389941 0.00585401 0.00976319 0.00780829 0.00585215 0.00389942 0.00389942 0.00194482 0.00194482 0.02735228 0.00389942 0.005854 0.00389939 0.00389924 0.00389942 0.00194482 0.00389942 0.00585398 0.00389942 0.0038994 0.0038994 0.00585398 0.00194482 0.00389942 0.00389942 0.00389942 0.00389942] with shape (68,) for board state r4rk1/ppp2pp1/3p1q1p/n1bPp3/2B1B1b1/3P1N2/PPP2PPP/R2Q1RK1[Nn] w - - 2 13 """ return self.evaluate_board_state(state_in) return value, legal_moves, p_vec_small
def negamax(self, state, depth, alpha=-math.inf, beta=math.inf, color=1, all_moves=1): """ Evaluates all nodes at a given depth and back-propagates their values to their respective parent nodes. In order to keep the number nof nodes manageable for neural network evaluation :param all_moves: All possible moves :param state: Game state object :param depth: Number of depth to reach during search :param alpha: Current alpha value which is used for pruning :param beta: Current beta value which is used for pruning :param color: Integer color value 1 for white, -1 for black :return: best_value - Best value for the current player until search depth """ if state.is_won( ): # check for draw is neglected for now due to bad runtime return -1 [value, policy_vec] = self.net.predict_single( state.get_state_planes()) # start a brand new tree if depth == 0: return value # the value is always returned in the view of the current player best_value = -math.inf # initialization legal_moves = state.get_legal_moves() p_vec_small = get_probs_of_move_list(policy_vec, state.get_legal_moves(), state.is_white_to_move()) if all_moves > 0: mv_idces = list(np.argsort(p_vec_small)[::-1]) else: mv_idces = list( np.argsort(p_vec_small)[::-1][:self.nb_candidate_moves]) if self.include_check_moves: check_idces, _ = get_check_move_indices( state.get_pythonchess_board(), state.get_legal_moves()) mv_idces += check_idces for mv_idx in mv_idces: # each child of position if p_vec_small[mv_idx] > 0.1: mv = legal_moves[mv_idx] state_child = copy.deepcopy(state) state_child.apply_move(mv) value = -self.negamax(state_child, depth - 1, -beta, -alpha, -color, all_moves - 1) if value > best_value: self.best_moves[-depth] = mv self.sel_mv_idx[-depth] = mv_idx best_value = value alpha = max(alpha, value) if alpha >= beta: break return best_value
def _run_single_playout(self, parent_node: Node, pipe_id=0, depth=1, chosen_nodes=None): """ This function works recursively until a leaf or terminal node is reached. It ends by back-propagating the value of the new expanded node or by propagating the value of a terminal state. :param state: Current game-state for the evaluation. This state differs between the treads :param parent_node: Current parent-node of the selected node. In the first expansion this is the root node. :param depth: Current depth for the evaluation. Depth is increased by 1 for every recursive call :param chosen_nodes: List of moves which have been taken in the current path. For each selected child node this list is expanded by one move recursively. :param chosen_nodes: List of all nodes that this thread has explored with respect to the root node :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed because the point of view changes each half-move depth: Current depth reach by this evaluation mv_list: List of moves which have been selected """ # Probably is better to be refactored # Too many arguments (6/5) - Too many local variables (27/15) - Too many branches (28/12) - # Too many statements (86/50) if chosen_nodes is None: # select a legal move on the chess board chosen_nodes = [] node, move, child_idx = self._select_node(parent_node) if move is None: raise Exception("Illegal tree setup. A 'None' move was selected which shouldn't be possible") # update the visit counts to this node # temporarily reduce the attraction of this node by applying a virtual loss / # the effect of virtual loss will be undone if the playout is over parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss) # append the selected move to the move list chosen_nodes.append(child_idx) # append the chosen child idx to the chosen_nodes list if node is None: state = GameState(deepcopy(parent_node.board)) # get the board from the parent node state.apply_move(move) # apply the selected move on the board # get the transposition-key which is used as an identifier for the board positions in the look-up table transposition_key = state.get_transposition_key() # check if the addressed fen exist in the look-up table # note: It's important to use also the halfmove-counter here, otherwise the system can create an infinite # feed-back-loop key = transposition_key + (state.get_fullmove_number(),) use_tran_table = True node_verified = False if use_tran_table and key in self.node_lookup: # if self.check_for_duplicate(transposition_key, chosen_nodes) is False: node = self.node_lookup[key] # get the node from the look-up list if node.n_sum > parent_node.n_sum: # make sure that you don't connect to a node with lower visits node_verified = True if node_verified: with parent_node.lock: # setup a new connection from the parent to the child parent_node.child_nodes[child_idx] = node # logging.debug('found key: %s' % state.get_board_fen()) # get the prior value from the leaf node which has already been expanded value = node.initial_value else: # expand and evaluate the new board state (the node wasn't found in the look-up table) # its value will be back-propagated through the tree and flipped after every layer my_pipe = self.my_pipe_endings[pipe_id] # receive a free available pipe if self.send_batches: my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [value, policy_vec] = my_pipe.recv() else: state_planes = state.get_state_planes() self.batch_state_planes[pipe_id] = state_planes my_pipe.send(pipe_id) result_channel = my_pipe.recv() value = np.array(self.batch_value_results[result_channel]) policy_vec = np.array(self.batch_policy_results[result_channel]) is_leaf = is_won = False # initialize is_leaf by default to false and check if the game is won # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) if state.is_check(): if state.is_won(): is_won = True if is_won: value = -1 is_leaf = True legal_moves = [] p_vec_small = None # establish a mate in one connection in order to stop exploring different alternatives parent_node.set_check_mate_node_idx(child_idx) # get the value from the leaf node (the current function is called recursively) # check if you can claim a draw - its assumed that the draw is always claimed elif ( self.can_claim_threefold_repetition(transposition_key, chosen_nodes) or state.get_pythonchess_board().can_claim_fifty_moves() is True ): value = 0 is_leaf = True legal_moves = [] p_vec_small = None else: legal_moves = state.get_legal_moves() # get the current legal move of its board state if not legal_moves: # stalemate occurred which is very rare for crazyhouse value = 0 is_leaf = True legal_moves = [] p_vec_small = None # raise Exception("No legal move is available for state: %s" % state) else: try: # extract a sparse policy vector with normalized probabilities p_vec_small = get_probs_of_move_list( policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True ) except KeyError: raise Exception("Key Error for state: %s" % state) # clip the visit nodes for all nodes in the search tree except the director opp. move clip_low_visit = self.use_pruning and depth != 1 # and depth > 4 new_node = Node( state.get_pythonchess_board(), value, p_vec_small, legal_moves, is_leaf, transposition_key, clip_low_visit, ) # create a new node if depth == 1: # disable uncertain moves from being visited by giving them a very bad score if not is_leaf and self.use_pruning: if self.root_node_prior_policy[child_idx] < 1e-3 and value * -1 < self.root_node.initial_value: with parent_node.lock: value = 99 # for performance reasons only apply check enhancement on depth 1 for now chess_board = state.get_pythonchess_board() if self.enhance_checks: self._enhance_checks(chess_board, legal_moves, p_vec_small) if self.enhance_captures: self._enhance_captures(chess_board, legal_moves, p_vec_small) if not self.use_pruning: self.node_lookup[key] = new_node # include a reference to the new node in the look-up table with parent_node.lock: parent_node.child_nodes[child_idx] = new_node # add the new node to its parent elif node.is_leaf: # check if we have reached a leaf node value = node.initial_value else: # get the value from the leaf node (the current function is called recursively) value, depth, chosen_nodes = self._run_single_playout(node, pipe_id, depth + 1, chosen_nodes) # revert the virtual loss and apply the predicted value by the network to the node parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value) # invert the value prediction for the parent of the above node layer because the player's changes every turn return -value, depth, chosen_nodes
def _run_single_playout(self, state: GameState, parent_node: Node, pipe_id=0, depth=1, chosen_nodes=[]): """ This function works recursively until a leaf or terminal node is reached. It ends by backpropagating the value of the new expanded node or by propagating the value of a terminal state. :param state_: Current game-state for the evaluation. This state differs between the treads :param parent_node: Current parent-node of the selected node. In the first expansion this is the root node. :param depth: Current depth for the evaluation. Depth is increased by 1 for every recusive call :param chosen_nodes: List of moves which have been taken in the current path. For each selected child node this list is expanded by one move recursively. :param chosen_nodes: List of all nodes that this thread has explored with respect to the root node :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed because the point of view changes each half-move depth: Current depth reach by this evaluation mv_list: List of moves which have been selected """ # select a legal move on the chess board node, move, child_idx = self._select_node(parent_node) if move is None: raise Exception( "Illegal tree setup. A 'None' move was selected which souldn't be possible" ) # update the visit counts to this node # temporarily reduce the attraction of this node by applying a virtual loss / # the effect of virtual loss will be undone if the playout is over parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss) if depth == 1: state = GameState(deepcopy(state.get_pythonchess_board())) # apply the selected move on the board state.apply_move(move) # append the selected move to the move list # append the chosen child idx to the chosen_nodes list chosen_nodes.append(child_idx) if node is None: # get the transposition-key which is used as an identifier for the board positions in the look-up table transposition_key = state.get_transposition_key() # check if the addressed fen exist in the look-up table # note: It's important to use also the halfmove-counter here, otherwise the system can create an infinite # feed-back-loop key = (transposition_key, state.get_halfmove_counter()) # expand and evaluate the new board state (the node wasn't found in the look-up table) # its value will be backpropagated through the tree and flipped after every layer # receive a free available pipe my_pipe = self.my_pipe_endings[pipe_id] if self.send_batches is True: my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [value, policy_vec] = my_pipe.recv() else: state_planes = state.get_state_planes() self.batch_state_planes[pipe_id] = state_planes my_pipe.send(pipe_id) result_channel = my_pipe.recv() value = np.array(self.batch_value_results[result_channel]) policy_vec = np.array( self.batch_policy_results[result_channel]) # initialize is_leaf by default to false is_leaf = False # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) is_won = False is_check = False if state.is_check() is True: is_check = True if state.is_won() is True: is_won = True if is_won is True: value = -1 is_leaf = True legal_moves = [] p_vec_small = None # establish a mate in one connection in order to stop exploring different alternatives parent_node.mate_child_idx = child_idx # get the value from the leaf node (the current function is called recursively) # check if you can claim a draw - its assumed that the draw is always claimed elif self.can_claim_threefold_repetition(transposition_key, chosen_nodes) or \ state.get_pythonchess_board().can_claim_fifty_moves() is True: value = 0 is_leaf = True legal_moves = [] p_vec_small = None else: # get the current legal move of its board state legal_moves = state.get_legal_moves() if len(legal_moves) < 1: raise Exception( 'No legal move is available for state: %s' % state) # extract a sparse policy vector with normalized probabilities try: p_vec_small = get_probs_of_move_list( policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True) except KeyError: raise Exception('Key Error for state: %s' % state) # convert all legal moves to a string if the option check_mate_in_one was enabled if self.check_mate_in_one is True: str_legal_moves = str(state.get_legal_moves()) else: str_legal_moves = '' # clip the visit nodes for all nodes in the search tree except the director opp. move clip_low_visit = self.use_pruning and depth != 1 # create a new node new_node = Node(value, p_vec_small, legal_moves, str_legal_moves, is_leaf, transposition_key, clip_low_visit) if depth == 1: # disable uncertain moves from being visited by giving them a very bad score if is_leaf is False: if self.root_node_prior_policy[ child_idx] < 1e-3 and value * -1 < self.root_node.v: with parent_node.lock: value = 99 if value < 0: # and state.are_pocket_empty(): #and pipe_id == 0: # test of adding dirichlet noise to a new node new_node.apply_dirichlet_noise_to_prior_policy( epsilon=self.dirichlet_epsilon * .02, alpha=self.dirichlet_alpha) if self.use_pruning is False: # include a reference to the new node in the look-up table self.node_lookup[key] = new_node with parent_node.lock: # add the new node to its parent parent_node.child_nodes[child_idx] = new_node # check if we have reached a leaf node elif node.is_leaf is True: value = node.v else: # get the value from the leaf node (the current function is called recursively) value, depth, chosen_nodes = self._run_single_playout( state, node, pipe_id, depth + 1, chosen_nodes) # revert the virtual loss and apply the predicted value by the network to the node parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value) # we invert the value prediction for the parent of the above node layer because the player's turn is flipped every turn return -value, depth, chosen_nodes
def _expand_root_node_single_move(self, state, legal_moves): """ Expands the current root in the case if there's only a single move available. The neural network search can be omitted in this case. :param state: Request games state :param legal_moves: Available moves :return: """ # set value 0 as a dummy value value = 0 p_vec_small = np.array([1], np.float32) # create a new root node self.root_node = Node(value, p_vec_small, legal_moves, str(state.get_legal_moves())) # check a child node if it doesn't exists already if self.root_node.child_nodes[0] is None: state_child = deepcopy(state) state_child.apply_move(legal_moves[0]) # initialize is_leaf by default to false is_leaf = False # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) if state.is_won() is True: value = -1 is_leaf = True legal_moves_child = [] p_vec_small_child = None # check if you can claim a draw - its assumed that the draw is always claimed elif self.can_claim_threefold_repetition(state.get_transposition_key(), [0]) or\ state.get_pythonchess_board().can_claim_fifty_moves() is True: value = 0 is_leaf = True legal_moves_child = [] p_vec_small_child = None else: legal_moves_child = state_child.get_legal_moves() # start a brand new prediction for the child state_planes = state_child.get_state_planes() [value, policy_vec] = self.nets[0].predict_single(state_planes) # extract a sparse policy vector with normalized probabilities p_vec_small_child = get_probs_of_move_list( policy_vec, legal_moves_child, state_child.is_white_to_move()) # create a new child node child_node = Node(value, p_vec_small_child, legal_moves_child, str(state_child.get_legal_moves()), is_leaf) # connect the child to the root self.root_node.child_nodes[0] = child_node