Ejemplo n.º 1
0
    def evaluate_board_state(self, state: AbsGameState):  # Too few public methods (1/2)
        """
        The greedy agent always performs the first legal move with the highest move probability

        :param state: Gamestate object
        :return:
        value - Value prediction in the current players view from [-1,1]: -1 -> 100% lost, +1 100% won
        selected_move - Python chess move object of the selected move
        confidence - Probability value for the selected move in the probability distribution
        idx - Integer index of the move which was returned
        centipawn - Centi pawn evaluation which is converted from the value prediction in currents player view
        depth - Depth which was reached after the search
        nodes - Number of nodes which have been evaluated in the search
        time_elapsed_s - Elapsed time in seconds for the full search
        nps - Nodes per second metric
        pv - Calculated best line for both players
        """

        t_start_eval = time()
        pred_value, pred_policy = self._net.predict_single(state.get_state_planes())
        legal_moves = list(state.get_legal_moves())
        p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.is_white_to_move())
        # define the remaining return variables
        time_e = time() - t_start_eval
        centipawn = value_to_centipawn(pred_value)
        depth = nodes = 1
        time_elapsed_s = time_e * 1000
        nps = nodes / time_e
        # use the move with the highest probability as the best move for logging
        pv = legal_moves[p_vec_small.argmax()].uci()
        return pred_value, legal_moves, p_vec_small, centipawn, depth, nodes, time_elapsed_s, nps, pv
Ejemplo n.º 2
0
    def evaluate_board_state(self, state: AbsGameState) -> tuple:
        """
        Evaluates a given board position according to alpha beta search
        :param state: Game state object
        :return:
        """
        self.t_start_eval = time()
        value = self.negamax(state,
                             depth=self.depth,
                             alpha=-math.inf,
                             beta=math.inf,
                             color=1 if state.board.turn else -1)

        legal_moves = state.get_legal_moves()
        policy = np.zeros(len(legal_moves))
        policy[self.sel_mv_idx[0]] = 1
        centipawn = value_to_centipawn(value)
        # depth = 1
        nodes = self.nodes
        time_e = time(
        ) - self.t_start_eval  # In uci the depth is given using half-moves notation also called plies
        time_elapsed_s = time_e * 1000
        nps = nodes / time_e
        pv = self.best_moves[0].uci()

        logging.info(f"{self.best_moves}")
        logging.info(f"Value: {value}, Centipawn: {centipawn}")
        return value, legal_moves, policy, centipawn, self.depth, nodes, time_elapsed_s, nps, pv
Ejemplo n.º 3
0
    def evaluate_board_state(self, state: _GameState):
        """

        :param state:
        :return:
        """

        t_start_eval = time()
        pred_value, pred_policy = self._net.predict_single(
            state.get_state_planes())

        legal_moves = list(state.get_legal_moves())

        p_vec_small = get_probs_of_move_list(pred_policy, legal_moves,
                                             state.is_white_to_move())

        # use the move with the highest probability as the best move for logging
        instinct_move = legal_moves[p_vec_small.argmax()]

        # define the remaining return variables
        time_e = time() - t_start_eval
        cp = value_to_centipawn(pred_value)
        depth = 1
        nodes = 1
        time_elapsed_s = time_e * 1000
        nps = nodes / time_e
        pv = instinct_move.uci()

        return pred_value, legal_moves, p_vec_small, cp, depth, nodes, time_elapsed_s, nps, pv
Ejemplo n.º 4
0
    def evaluate_board_state(self, state: _GameState, verbose=True):
        """

        :param state:
        :return:
        """
        t_start_eval = time()
        pred_value, pred_policy = self._net.predict_single(state.get_state_planes())

        legal_moves = list(state.get_legal_moves())
        p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.is_white_to_move())

        if verbose is True:
            # use the move with the highest probability as the best move for logging
            instinct_move = legal_moves[p_vec_small.argmax()]

            # show the best calculated line
            print('info score cp %d depth %d nodes %d time %d pv %s' % (
            value_to_centipawn(pred_value), 1, 1, (time() - t_start_eval) * 1000, instinct_move.uci()))

        return pred_value, legal_moves, p_vec_small
Ejemplo n.º 5
0
    def evaluate_board_state(self, state_in: GameState):
        """
        Analyzes the current board state

        :param state_in: Actual game state to evaluate for the MCTS
        :return:
        """

        # store the time at which the search started
        t_start_eval = time()

        state = deepcopy(state_in)

        # check if the net prediction service has already been started
        if self.net_pred_service.running is False:
            # start the prediction daemon thread
            self.net_pred_service.start()

        # receive a list of all possible legal move in the current board position
        legal_moves = list(state.get_legal_moves())

        # store what depth has been reached at maximum in the current search tree
        # default is 1, in case only 1 move is available
        max_depth_reached = 1

        # consistency check
        if len(legal_moves) == 0:
            raise Exception(
                'The given board state has no legal move available')

        # check for fast way out
        if len(legal_moves) == 1:

            # set value 0 as a dummy value
            value = 0
            p_vec_small = np.array([1], np.float32)

            board_fen = state.get_pythonchess_board().fen()

            # check first if the the current tree can be reused
            if board_fen in self.node_lookup:
                self.root_node = self.node_lookup[board_fen]
                logging.debug(
                    'Reuse the search tree. Number of nodes in search tree: %d',
                    self.root_node.n_sum)
            else:
                logging.debug(
                    "The given board position wasn't found in the search tree."
                )
                logging.debug("Starting a brand new search tree...")

                # create a new root node
                self.root_node = Node(value, p_vec_small, legal_moves,
                                      str(state.get_legal_moves()))

                # check a child node if it doesn't exists already
                if self.root_node.child_nodes[0] is None:
                    state_child = deepcopy(state_in)
                    state_child.apply_move(legal_moves[0])

                    # initialize is_leaf by default to false
                    is_leaf = False

                    # check if the current player has won the game
                    # (we don't need to check for is_lost() because the game is already over
                    #  if the current player checkmated his opponent)
                    if state.is_won() is True:
                        value = -1
                        is_leaf = True
                        legal_moves_child = []
                        p_vec_small_child = None

                    # check if you can claim a draw - its assumed that the draw is always claimed
                    elif state.is_draw() is True:
                        value = 0
                        is_leaf = True
                        legal_moves_child = []
                        p_vec_small_child = None

                    else:
                        legal_moves_child = list(state_child.get_legal_moves())

                        # start a brand new prediction for the child
                        state_planes = state_child.get_state_planes()
                        [value,
                         policy_vec] = self.net.predict_single(state_planes)

                        # extract a sparse policy vector with normalized probabilities
                        p_vec_small_child = get_probs_of_move_list(
                            policy_vec, legal_moves_child,
                            state_child.is_white_to_move())

                    # create a new child node
                    child_node = Node(value, p_vec_small_child,
                                      legal_moves_child,
                                      str(state_child.get_legal_moves()),
                                      is_leaf)

                    # connect the child to the root
                    self.root_node.child_nodes[0] = child_node

        else:
            board_fen = state.get_board_fen()

            # check first if the the current tree can be reused
            if board_fen in self.node_lookup:
                self.root_node = self.node_lookup[board_fen]
                logging.debug(
                    'Reuse the search tree. Number of nodes in search tree: %d',
                    self.root_node.nb_total_expanded_child_nodes)
            else:
                logging.debug(
                    "The given board position wasn't found in the search tree."
                )
                logging.debug("Starting a brand new search tree...")

                # initialize is_leaf by default to false
                is_leaf = False

                # start a brand new tree
                state_planes = state.get_state_planes()
                [value, policy_vec] = self.net.predict_single(state_planes)

                # extract a sparse policy vector with normalized probabilities
                p_vec_small = get_probs_of_move_list(policy_vec, legal_moves,
                                                     state.is_white_to_move())

                # create a new root node
                self.root_node = Node(value, p_vec_small, legal_moves,
                                      str(state.get_legal_moves()), is_leaf)

            # clear the look up table
            self.node_lookup = {}

            # apply dirichlet noise to the prior probabilities in order to ensure
            #  that every move can possibly be visited
            self.root_node.apply_dirichlet_noise_to_prior_policy(
                epsilon=self.dirichlet_epsilon, alpha=self.dirichlet_alpha)

            futures = []

            # set the number of playouts accordingly
            if state_in.are_pocket_empty() is True:
                nb_playouts = self.nb_playouts_empty_pockets
            else:
                nb_playouts = self.nb_playouts_filled_pockets

            t_elapsed = 0
            cur_playouts = 0
            old_time = time()

            while max_depth_reached < self.max_search_depth and\
                       cur_playouts < nb_playouts and\
                     t_elapsed*1000 < self.movetime_ms: #and np.abs(self.root_node.q.mean()) < 0.99:

                # start searching
                with ThreadPoolExecutor(max_workers=self.threads) as executor:
                    for i in range(self.threads):
                        # calculate the thread id based on the current playout
                        futures.append(
                            executor.submit(self._run_single_playout,
                                            state=deepcopy(state),
                                            parent_node=self.root_node,
                                            depth=1,
                                            mv_list=[]))

                cur_playouts += self.threads
                time_show_info = time() - old_time

                # store the mean of all value predictions in this variable
                #mean_value = 0

                for i, f in enumerate(futures):
                    cur_value, cur_depth, mv_list = f.result()

                    # sum up all values
                    #mean_value += cur_value

                    if cur_depth > max_depth_reached:
                        max_depth_reached = cur_depth

                    # Print every second if verbose is true
                    if self.verbose and time_show_info > 1:
                        str_moves = self._mv_list_to_str(mv_list)
                        logging.debug('Update: %d' % cur_depth)
                        print('info score cp %d depth %d nodes %d pv%s' %
                              (value_to_centipawn(cur_value), cur_depth,
                               self.root_node.n_sum, str_moves))
                        old_time = time()

                # update the current search time
                t_elapsed = time() - t_start_eval
                if self.verbose and time_show_info > 1:
                    print(
                        'info nps %d time %d' %
                        ((self.root_node.n_sum / t_elapsed), t_elapsed * 1000))

            # receive the policy vector based on the MCTS search
            p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight)
            print('info string move overhead is %dms' %
                  (t_elapsed * 1000 - self.movetime_ms))

        # store the current root in the lookup table
        self.node_lookup[state.get_board_fen()] = self.root_node

        # select the q value which would score the highest value

        #value = self.root_node.q.max()

        # select the q-value according to the mcts best child value
        best_child_idx = self.root_node.get_mcts_policy(
            self.q_value_weight).argmax()
        value = self.root_node.q[best_child_idx]

        lst_best_moves, _ = self.get_calculated_line()

        str_moves = self._mv_list_to_str(lst_best_moves)

        # show the best calculated line
        time_e = time() - t_start_eval
        node_searched = self.root_node.n_sum
        print('info score cp %d depth %d nodes %d time %d nps %d pv%s' %
              (value_to_centipawn(value), max_depth_reached, node_searched,
               time_e * 1000, node_searched / max(1, time_e), str_moves))

        if len(legal_moves) != len(p_vec_small):
            print(
                'Legal move list %s with length %s is uncompatible to policy vector %s with shape %s for board state %s'
                % (legal_moves, len(legal_moves), p_vec_small,
                   p_vec_small.shape, state_in))
            self.node_lookup = {}
            # restart the search TODO: Fix this error
            """
                raise Exception('Legal move list %s with length %s is uncompatible to policy vector %s with shape %s for board state %s' % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state_in))
                    Exception: Legal move list [Move.from_uci('e4h7'), Move.from_uci('e4g6'), Move.from_uci('e4f5'), Move.from_uci('c4a6'), Move.from_uci('c4b5'), Move.from_uci('c4b3'), Move.from_uci('f3g5'), Move.from_uci('f3e5'), Move.from_uci('f3h4'), Move.from_uci('f3d4'), Move.from_uci('f3d2'), Move.from_uci('f3e1'), Move.from_uci('g1h1'), Move.from_uci('f1e1'), Move.from_uci('d1e2'), Move.from_uci('d1d2'), Move.from_uci('d1e1'), Move.from_uci('d1c1'), Move.from_uci('d1b1'), Move.from_uci('a1c1'), Move.from_uci('a1b1'), Move.from_uci('d3d4'), Move.from_uci('h2h3'), Move.from_uci('g2g3'), Move.from_uci('c2c3'), Move.from_uci('b2b3'), Move.from_uci('a2a3'), Move.from_uci('h2h4'), Move.from_uci('b2b4'), Move.from_uci('a2a4'), Move.from_uci('N@b1'), Move.from_uci('N@c1'), Move.from_uci('N@e1'), Move.from_uci('N@h1'), Move.from_uci('N@d2'), Move.from_uci('N@e2'), Move.from_uci('N@a3'), Move.from_uci('N@b3'), Move.from_uci('N@c3'), Move.from_uci('N@e3'), Move.from_uci('N@g3'), Move.from_uci('N@h3'), Move.from_uci('N@a4'), Move.from_uci('N@b4'), Move.from_uci('N@d4'), Move.from_uci('N@f4'), Move.from_uci('N@h4'), Move.from_uci('N@b5'), Move.from_uci('N@f5'), Move.from_uci('N@g5'), Move.from_uci('N@h5'), Move.from_uci('N@a6'), Move.from_uci('N@b6'), Move.from_uci('N@c6'), Move.from_uci('N@e6'), Move.from_uci('N@g6'), Move.from_uci('N@d7'), Move.from_uci('N@e7'), Move.from_uci('N@h7'), Move.from_uci('N@b8'), Move.from_uci('N@c8'), Move.from_uci('N@d8'), Move.from_uci('N@e8'), Move.from_uci('N@h8')] with length 64 is uncompatible to policy vector [0.71529347 0.00194482 0.00194482 0.00389555 0.00194482 0.00194482
                     0.00389942 0.00389942 0.00389941 0.0038994  0.0019448  0.0038994
                     0.0019448  0.00389941 0.00389941 0.00194482 0.00585401 0.00194482
                     0.00194482 0.00389941 0.00389942 0.00194482 0.00194482 0.00389942
                     0.00389942 0.00389941 0.00585341 0.00194482 0.00585396 0.00389942
                     0.00389941 0.00389941 0.00389941 0.00389941 0.00194482 0.00585401
                     0.00585401 0.00194482 0.00585399 0.00780859 0.00389942 0.00389941
                     0.00585401 0.00976319 0.00780829 0.00585215 0.00389942 0.00389942
                     0.00194482 0.00194482 0.02735228 0.00389942 0.005854   0.00389939
                     0.00389924 0.00389942 0.00194482 0.00389942 0.00585398 0.00389942
                     0.0038994  0.0038994  0.00585398 0.00194482 0.00389942 0.00389942
                     0.00389942 0.00389942] with shape (68,) for board state r4rk1/ppp2pp1/3p1q1p/n1bPp3/2B1B1b1/3P1N2/PPP2PPP/R2Q1RK1[Nn] w - - 2 13
             """
            return self.evaluate_board_state(state_in)

        return value, legal_moves, p_vec_small
Ejemplo n.º 6
0
    def _run_mcts_search(self, state):
        """
        Runs a new or continues the mcts on the current search tree.
        :param state: Input state given by the user
        :return: max_depth_reached (int) - The longest search path length after the whole search
        """

        self.node_lookup = {}  # clear the look up table
        self.root_node_prior_policy = deepcopy(self.root_node.policy_prob)  # safe the prior policy of the root node
        # apply dirichlet noise to the prior probabilities in order to ensure
        #  that every move can possibly be visited
        self.root_node.apply_dirichlet_noise_to_prior_policy(epsilon=self.dirichlet_epsilon, alpha=self.dirichlet_alpha)
        # store what depth has been reached at maximum in the current search tree
        max_depth_reached = 1  # default is 1, in case only 1 move is available
        futures = []

        if state.are_pocket_empty():  # set the number of playouts accordingly
            nb_playouts = self.nb_playouts_empty_pockets
        else:
            nb_playouts = self.nb_playouts_filled_pockets

        t_elapsed_ms = cur_playouts = 0
        old_time = time()
        cpuct_init = self.cpuct

        if self.use_time_management:
            time_checked = time_checked_early = False
        else:
            time_checked = time_checked_early = True

        while (
            max_depth_reached < self.max_search_depth and cur_playouts < nb_playouts and t_elapsed_ms < self.movetime_ms
        ):  # and np.abs(self.root_node.q_value.mean()) < 0.99:

            # start searching
            with ThreadPoolExecutor(max_workers=self.threads) as executor:
                for i in range(self.threads):
                    # calculate the thread id based on the current playout
                    futures.append(
                        executor.submit(
                            self._run_single_playout, parent_node=self.root_node, pipe_id=i, depth=1, chosen_nodes=[]
                        )
                    )

            cur_playouts += self.threads
            time_show_info = time() - old_time

            for i, future in enumerate(futures):
                cur_value, cur_depth, chosen_nodes = future.result()

                if cur_depth > max_depth_reached:
                    max_depth_reached = cur_depth
                # Print the explored line of the last line for every x seconds if verbose is true
                if self.verbose and time_show_info > 0.5 and i == len(futures) - 1:
                    mv_list = self._create_mv_list(chosen_nodes)
                    str_moves = self._mv_list_to_str(mv_list)
                    print(
                        "info score cp %d depth %d nodes %d pv %s"
                        % (value_to_centipawn(cur_value), cur_depth, self.root_node.n_sum, str_moves)
                    )
                    logging.debug("Update info")
                    old_time = time()

            t_elapsed = time() - self.t_start_eval  # update the current search time
            t_elapsed_ms = t_elapsed * 1000
            if time_show_info > 1:
                node_searched = int(self.root_node.n_sum - self.total_nodes_pre_search)
                print("info nps %d time %d" % (int((node_searched / t_elapsed)), t_elapsed_ms))

            if not time_checked_early and t_elapsed_ms > self.movetime_ms / 2:
                if (
                    self.root_node.policy_prob.max() > 0.9
                    and self.root_node.policy_prob.argmax() == self.root_node.q_value.argmax()
                ):
                    self.time_buffer_ms += (self.movetime_ms - t_elapsed_ms) * 0.9
                    print("info early break up")
                    break
                else:
                    time_checked_early = True

            if (
                self.time_buffer_ms > 2500
                and not time_checked
                and t_elapsed_ms > self.movetime_ms * 0.9
                and self.root_node.q_value[self.root_node.child_number_visits.argmax()]
                < self.root_node.initial_value + 0.01
            ):
                print("info increase time")
                time_checked = True
                time_bonus = self.time_buffer_ms / 4
                self.time_buffer_ms -= time_bonus  # increase the movetime
                self.movetime_ms += time_bonus * 0.75
                self.root_node.initial_value = self.root_node.q_value[self.root_node.child_number_visits.argmax()]

                if self.time_buffer_ms < 0:
                    self.movetime_ms += self.time_buffer_ms
                    self.time_buffer_ms = 0
        self.cpuct = cpuct_init
        return max_depth_reached
Ejemplo n.º 7
0
    def evaluate_board_state(self, state: GameState):  # Probably is better to be refactored
        """
        Analyzes the current board state. This is the main method which get called by the uci interface or analysis
        request.
        :param state: Actual game state to evaluate for the MCTS
        :return:
        """
        # Too many local variables (28/15) - Too many branches (25/12) - Too many statements (75/50)
        self.t_start_eval = time()  # store the time at which the search started

        if not self.net_pred_services[0].running:  # check if the net prediction service has already been started
            for net_pred_service in self.net_pred_services:  # start the prediction daemon thread
                net_pred_service.start()

        legal_moves = state.get_legal_moves()  # list of all possible legal move in the current board position

        if not legal_moves:  # consistency check
            raise Exception("The given board state has no legal move available")

        key = state.get_transposition_key() + (
            state.get_fullmove_number(),
        )  # check first if the the current tree can be reused

        if not self.use_pruning and key in self.node_lookup:
            chess_board = state.get_pythonchess_board()
            self.root_node = self.node_lookup[key]  # if key in self.node_lookup:
            if self.enhance_captures:
                self._enhance_captures(chess_board, legal_moves, self.root_node.policy_prob)
                # enhance checks for all direct child nodes
                for child_node in self.root_node.child_nodes:
                    if child_node:
                        self._enhance_captures(child_node.board, child_node.legal_moves, child_node.policy_prob)

            if self.enhance_checks:
                self._enhance_checks(chess_board, legal_moves, self.root_node.policy_prob)
                # enhance checks for all direct child nodes
                for child_node in self.root_node.child_nodes:
                    if child_node:
                        self._enhance_checks(child_node.board, child_node.legal_moves, child_node.policy_prob)

            logging.debug(
                "Reuse the search tree. Number of nodes in search tree: %d",
                self.root_node.nb_total_expanded_child_nodes,
            )
            self.total_nodes_pre_search = deepcopy(self.root_node.n_sum)
        else:
            logging.debug("Starting a brand new search tree...")
            self.root_node = None
            self.total_nodes_pre_search = 0

        if len(legal_moves) == 1:  # check for fast way out
            max_depth_reached = 1  # if there's only a single legal move you only must go 1 depth

            if self.root_node is None:
                # conduct all necessary steps for fastest way out
                self._expand_root_node_single_move(state, legal_moves)

            # increase the move time buffer
            # subtract half a second as a constant for possible delay
            self.time_buffer_ms += max(self.movetime_ms - 500, 0)
        else:
            if self.root_node is None:
                self._expand_root_node_multiple_moves(state, legal_moves)  # run a single expansion on the root node
            # opening guard
            if state.get_fullmove_number() <= self.opening_guard_moves:  # 100: #7: #10:
                self.root_node.q_value[self.root_node.policy_prob < 5e-2] = -9999
            # elif len(legal_moves) > 50:
            #    self.root_node.q_value[self.root_node.policy_prob < 1e-3] = -9999
            # conduct the mcts-search based on the given settings
            max_depth_reached = self._run_mcts_search(state)
            t_elapsed = time() - self.t_start_eval
            print("info string move overhead is %dms" % (t_elapsed * 1000 - self.movetime_ms))

        # receive the policy vector based on the MCTS search
        p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight)  # , xth_n_max=xth_n_max, is_root=True)

        if self.use_future_q_values:
            # use q-future value to update the q-values of direct child nodes
            q_future, indices = self.get_last_q_values(min_nb_visits=5, max_depth=5) #25)
            # self.root_node.q_value = 0.5 * self.root_node.q_value + 0.5 * q_future
            # TODO: make this matrix vector form
            if max_depth_reached >= 5:
                for idx in indices:
                    self.root_node.q_value[idx] = min(self.root_node.q_value[idx], q_future[idx])
                p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight)

        # if self.use_pruning is False:
        self.node_lookup[key] = self.root_node  # store the current root in the lookup table
        best_child_idx = p_vec_small.argmax()  # select the q-value according to the mcts best child value
        value = self.root_node.q_value[best_child_idx]
        # value = orig_q[best_child_idx]
        lst_best_moves, _ = self.get_calculated_line()
        str_moves = self._mv_list_to_str(lst_best_moves)
        node_searched = int(self.root_node.n_sum - self.total_nodes_pre_search)  # show the best calculated line
        time_e = time() - self.t_start_eval  # In uci the depth is given using half-moves notation also called plies

        if len(legal_moves) != len(p_vec_small):
            raise Exception(
                "Legal move list %s with length %s is incompatible to policy vector %s"
                " with shape %s for board state %s and nodes legal move list: %s"
                % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state, self.root_node.legal_moves)
            )

        # define the remaining return variables
        centipawns = value_to_centipawn(value)
        depth = max_depth_reached
        nodes = node_searched
        time_elapsed_s = time_e * 1000

        # avoid division by 0
        if time_e > 0.0:
            nps = node_searched / time_e
        else:
            # return a high constant in otherwise
            nps = 999999999

        pv = str_moves
        if self.verbose:
            score = "score cp %d depth %d nodes %d time %d nps %d pv %s" % (
                centipawns,
                depth,
                nodes,
                time_elapsed_s,
                nps,
                pv,
            )
            logging.info("info string %s", score)
        return value, legal_moves, p_vec_small, centipawns, depth, nodes, time_elapsed_s, nps, pv
Ejemplo n.º 8
0
    def _run_mcts_search(self, state):
        """
        Runs a new or continues the mcts on the current search tree.

        :param state: Input state given by the user
        :return: max_depth_reached (int) - The longest search path length after the whole search
        """

        # clear the look up table
        self.node_lookup = {}

        # safe the prior policy of the root node
        self.root_node_prior_policy = deepcopy(self.root_node.p)

        # apply dirichlet noise to the prior probabilities in order to ensure
        #  that every move can possibly be visited
        self.root_node.apply_dirichlet_noise_to_prior_policy(
            epsilon=self.dirichlet_epsilon, alpha=self.dirichlet_alpha)

        # iterate through all children and add dirichlet if there exists any
        for child_node in self.root_node.child_nodes:
            if child_node is not None:
                # test of adding dirichlet noise to a new node
                child_node.apply_dirichlet_noise_to_prior_policy(
                    epsilon=self.dirichlet_epsilon * .1,
                    alpha=self.dirichlet_alpha)

        # store what depth has been reached at maximum in the current search tree
        # default is 1, in case only 1 move is available
        max_depth_reached = 1

        futures = []

        # set the number of playouts accordingly
        if state.are_pocket_empty() is True:
            nb_playouts = self.nb_playouts_empty_pockets
        else:
            nb_playouts = self.nb_playouts_filled_pockets
            self.temperature_current = 0

        t_elapsed = 0
        cur_playouts = 0
        old_time = time()

        cpuct_init = self.cpuct

        decline = True

        while max_depth_reached < self.max_search_depth and \
                cur_playouts < nb_playouts and \
                t_elapsed * 1000 < self.movetime_ms:  # and np.abs(self.root_node.q.mean()) < 0.99:

            if self.use_oscillating_cpuct is True:
                # Test about decreasing CPUCT value
                if decline is True:
                    self.cpuct -= 0.01
                else:
                    self.cpuct += 0.01
                if self.cpuct < cpuct_init * .5:
                    decline = False
                elif self.cpuct > cpuct_init:
                    decline = True

            # start searching
            with ThreadPoolExecutor(max_workers=self.threads) as executor:
                for i in range(self.threads):
                    # calculate the thread id based on the current playout
                    futures.append(
                        executor.submit(self._run_single_playout,
                                        state=state,
                                        parent_node=self.root_node,
                                        pipe_id=i,
                                        depth=1,
                                        chosen_nodes=[]))

            cur_playouts += self.threads
            time_show_info = time() - old_time

            for i, f in enumerate(futures):
                cur_value, cur_depth, chosen_nodes = f.result()

                if cur_depth > max_depth_reached:
                    max_depth_reached = cur_depth

                # Print the explored line of the last line for every x seconds if verbose is true
                if self.verbose and time_show_info > 0.5 and i == len(
                        futures) - 1:
                    mv_list = self._create_mv_list(chosen_nodes)
                    str_moves = self._mv_list_to_str(mv_list)
                    print('info score cp %d depth %d nodes %d pv%s' %
                          (value_to_centipawn(cur_value), cur_depth,
                           self.root_node.n_sum, str_moves))
                    logging.debug('Update info')
                    old_time = time()

            # update the current search time
            t_elapsed = time() - self.t_start_eval
            if self.verbose and time_show_info > 1:
                node_searched = int(self.root_node.n_sum -
                                    self.total_nodes_pre_search)
                print('info nps %d time %d' % (int(
                    (node_searched / t_elapsed)), t_elapsed * 1000))

        self.cpuct = cpuct_init

        return max_depth_reached
Ejemplo n.º 9
0
    def evaluate_board_state(self, state: GameState):
        """
        Analyzes the current board state. This is the main method which get called by the uci interface or analysis
        request.

        :param state_in: Actual game state to evaluate for the MCTS
        :return:
        """

        # store the time at which the search started
        self.t_start_eval = time()

        # check if the net prediction service has already been started
        if self.net_pred_services[0].running is False:
            # start the prediction daemon thread
            for net_pred_service in self.net_pred_services:
                net_pred_service.start()

        # receive a list of all possible legal move in the current board position
        legal_moves = state.get_legal_moves()

        # consistency check
        if len(legal_moves) == 0:
            raise Exception(
                'The given board state has no legal move available')

        # check first if the the current tree can be reused
        key = (state.get_transposition_key(), state.get_halfmove_counter)

        if self.use_pruning is False and key in self.node_lookup:
            self.root_node = self.node_lookup[key]
            logging.debug(
                'Reuse the search tree. Number of nodes in search tree: %d',
                self.root_node.nb_total_expanded_child_nodes)
            self.total_nodes_pre_search = deepcopy(self.root_node.n_sum)

            # reset potential good nodes for the root
            self.root_node.q[self.root_node.q < 1.1] = 0

        else:
            logging.debug("Starting a brand new search tree...")
            self.root_node = None
            self.total_nodes_pre_search = 0

        # check for fast way out
        if len(legal_moves) == 1:

            # if there's only a single legal move you only must go 1 depth
            max_depth_reached = 1

            if self.root_node is None:
                # conduct all necessary steps for fastest way out
                self._expand_root_node_single_move(state, legal_moves)
        else:

            if self.root_node is None:
                # run a single expansion on the root node
                self._expand_root_node_multiple_moves(state, legal_moves)

            # conduct the mcts-search based on the given settings
            max_depth_reached = self._run_mcts_search(state)

            t_elapsed = time() - self.t_start_eval
            print('info string move overhead is %dms' %
                  (t_elapsed * 1000 - self.movetime_ms))

        # receive the policy vector based on the MCTS search
        p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight)

        if self.use_pruning is False:
            # store the current root in the lookup table
            self.node_lookup[key] = self.root_node

        # select the q-value according to the mcts best child value
        best_child_idx = p_vec_small.argmax()
        value = self.root_node.q[best_child_idx]

        lst_best_moves, _ = self.get_calculated_line()
        str_moves = self._mv_list_to_str(lst_best_moves)

        # show the best calculated line
        node_searched = int(self.root_node.n_sum - self.total_nodes_pre_search)
        # In uci the depth is given using half-moves notation also called plies
        time_e = time() - self.t_start_eval

        if len(legal_moves) != len(p_vec_small):
            raise Exception(
                'Legal move list %s with length %s is uncompatible to policy vector %s'
                ' with shape %s for board state %s and nodes legal move list: %s'
                % (legal_moves, len(legal_moves), p_vec_small,
                   p_vec_small.shape, state, self.root_node.legal_moves))

        # define the remaining return variables
        cp = value_to_centipawn(value)
        depth = max_depth_reached
        nodes = node_searched
        time_elapsed_s = time_e * 1000
        nps = node_searched / time_e
        pv = str_moves

        return value, legal_moves, p_vec_small, cp, depth, nodes, time_elapsed_s, nps, pv