Python get_probs_of_move_list 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: DeepCrazyhouse.src.domain.crazyhouse.output_representation

메소드/함수: get_probs_of_move_list

hotexamples.com에서의 예제들: 12

Python get_probs_of_move_list - 12개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 DeepCrazyhouse.src.domain.crazyhouse.output_representation.get_probs_of_move_list에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: raw_net_agent.py 프로젝트: lazercorn/CrazyAra

    def evaluate_board_state(self, state: AbsGameState):  # Too few public methods (1/2)
        """
        The greedy agent always performs the first legal move with the highest move probability

        :param state: Gamestate object
        :return:
        value - Value prediction in the current players view from [-1,1]: -1 -> 100% lost, +1 100% won
        selected_move - Python chess move object of the selected move
        confidence - Probability value for the selected move in the probability distribution
        idx - Integer index of the move which was returned
        centipawn - Centi pawn evaluation which is converted from the value prediction in currents player view
        depth - Depth which was reached after the search
        nodes - Number of nodes which have been evaluated in the search
        time_elapsed_s - Elapsed time in seconds for the full search
        nps - Nodes per second metric
        pv - Calculated best line for both players
        """

        t_start_eval = time()
        pred_value, pred_policy = self._net.predict_single(state.get_state_planes())
        legal_moves = list(state.get_legal_moves())
        p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.is_white_to_move())
        # define the remaining return variables
        time_e = time() - t_start_eval
        centipawn = value_to_centipawn(pred_value)
        depth = nodes = 1
        time_elapsed_s = time_e * 1000
        nps = nodes / time_e
        # use the move with the highest probability as the best move for logging
        pv = legal_moves[p_vec_small.argmax()].uci()
        return pred_value, legal_moves, p_vec_small, centipawn, depth, nodes, time_elapsed_s, nps, pv

예제 #2

파일 보기

파일: MCTSAgent.py 프로젝트: noelben/CrazyAra

    def _expand_root_node_multiple_moves(self, state, legal_moves):
        """
        Checks if the current root node can be found in the look-up table.
        Otherwise run a single inference of the neural network for this board state

        :param state: Current game state
        :param legal_moves: Available moves
        :return:
        """

        # initialize is_leaf by default to false
        is_leaf = False

        # start a brand new tree
        state_planes = state.get_state_planes()
        [value, policy_vec] = self.nets[0].predict_single(state_planes)

        # extract a sparse policy vector with normalized probabilities
        p_vec_small = get_probs_of_move_list(policy_vec, legal_moves,
                                             state.is_white_to_move())

        if self.check_mate_in_one is True:
            str_legal_moves = str(state.get_legal_moves())
        else:
            str_legal_moves = ''

        # create a new root node
        self.root_node = Node(value,
                              p_vec_small,
                              legal_moves,
                              str_legal_moves,
                              is_leaf,
                              clip_low_visit=False)

예제 #3

파일 보기

    def evaluate_board_state(self, state: _GameState):
        """

        :param state:
        :return:
        """

        t_start_eval = time()
        pred_value, pred_policy = self._net.predict_single(
            state.get_state_planes())

        legal_moves = list(state.get_legal_moves())

        p_vec_small = get_probs_of_move_list(pred_policy, legal_moves,
                                             state.is_white_to_move())

        # use the move with the highest probability as the best move for logging
        instinct_move = legal_moves[p_vec_small.argmax()]

        # define the remaining return variables
        time_e = time() - t_start_eval
        cp = value_to_centipawn(pred_value)
        depth = 1
        nodes = 1
        time_elapsed_s = time_e * 1000
        nps = nodes / time_e
        pv = instinct_move.uci()

        return pred_value, legal_moves, p_vec_small, cp, depth, nodes, time_elapsed_s, nps, pv

예제 #4

파일 보기

파일: mcts_agent.py 프로젝트: lazercorn/CrazyAra

    def _expand_root_node_single_move(self, state, legal_moves):
        """
        Expands the current root in the case if there's only a single move available.
        The neural network search can be omitted in this case.
        :param state: Request games state
        :param legal_moves: Available moves
        :return:
        """
        # request the value prediction for the current position
        [value, _] = self.nets[0].predict_single(state.get_state_planes())
        p_vec_small = np.array([1], np.float32)  # we can create the move probability vector without the NN this time

        # create a new root node
        self.root_node = Node(state.get_pythonchess_board(), value, p_vec_small, legal_moves, clip_low_visit=False)

        if self.root_node.child_nodes[0] is None:  # check a child node if it doesn't exists already
            state_child = deepcopy(state)
            state_child.apply_move(legal_moves[0])
            is_leaf = False  # initialize is_leaf by default to false
            # we don't need to check for is_lost() because the game is already over
            if state.is_won():  # check if the current player has won the game
                value = -1
                is_leaf = True
                legal_moves_child = []
                p_vec_small_child = None
            # check if you can claim a draw - its assumed that the draw is always claimed
            elif (
                self.can_claim_threefold_repetition(state.get_transposition_key(), [0])
                or state.get_pythonchess_board().can_claim_fifty_moves()
            ):
                value = 0
                is_leaf = True
                legal_moves_child = []
                p_vec_small_child = None
            else:
                legal_moves_child = state_child.get_legal_moves()
                # start a brand new prediction for the child
                [value, policy_vec] = self.nets[0].predict_single(state_child.get_state_planes())
                # extract a sparse policy vector with normalized probabilities
                p_vec_small_child = get_probs_of_move_list(
                    policy_vec, legal_moves_child, state_child.is_white_to_move()
                )

            # create a new child node
            child_node = Node(state.get_pythonchess_board(), value, p_vec_small_child, legal_moves_child, is_leaf)
            self.root_node.child_nodes[0] = child_node  # connect the child to the root
            # assign the value of the root node as the q-value for the child
            # here we must invert the invert the value because it's the value prediction of the next state
            self.root_node.q_value[0] = -value

예제 #5

파일 보기

    def evaluate_board_state(self, state: _GameState, verbose=True):
        """

        :param state:
        :return:
        """
        t_start_eval = time()
        pred_value, pred_policy = self._net.predict_single(state.get_state_planes())

        legal_moves = list(state.get_legal_moves())
        p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.is_white_to_move())

        if verbose is True:
            # use the move with the highest probability as the best move for logging
            instinct_move = legal_moves[p_vec_small.argmax()]

            # show the best calculated line
            print('info score cp %d depth %d nodes %d time %d pv %s' % (
            value_to_centipawn(pred_value), 1, 1, (time() - t_start_eval) * 1000, instinct_move.uci()))

        return pred_value, legal_moves, p_vec_small

예제 #6

파일 보기

파일: mcts_agent.py 프로젝트: lazercorn/CrazyAra

    def _expand_root_node_multiple_moves(self, state, legal_moves):
        """
        Checks if the current root node can be found in the look-up table.
        Otherwise run a single inference of the neural network for this board state
        :param state: Current game state
        :param legal_moves: Available moves
        :return:
        """

        is_leaf = False  # initialize is_leaf by default to false
        [value, policy_vec] = self.nets[0].predict_single(state.get_state_planes())  # start a brand new tree
        # extract a sparse policy vector with normalized probabilities
        p_vec_small = get_probs_of_move_list(policy_vec, legal_moves, state.is_white_to_move())
        chess_board = state.get_pythonchess_board()
        if self.enhance_captures:
            self._enhance_captures(chess_board, legal_moves, p_vec_small)

        if self.enhance_checks:
            self._enhance_checks(chess_board, legal_moves, p_vec_small)

        # create a new root node
        self.root_node = Node(chess_board, value, p_vec_small, legal_moves, is_leaf, clip_low_visit=False)

예제 #7

파일 보기

파일: MCTSAgent.py 프로젝트: DeFilippis/CrazyAra

    def _run_single_playout(self,
                            state: GameState,
                            parent_node: Node,
                            depth=1,
                            mv_list=[]):  #, pipe_id):
        """
        This function works recursively until a terminal node is reached

        :param state: Current game-state for the evaluation. This state differs between the treads
        :param parent_node: Current parent-node of the selected node. In the first  expansion this is the root node.
        :param depth: Current depth for the evaluation. Depth is increased by 1 for every recusive call
        :param mv_list: List of moves which have been taken in the current path. For each selected child node this list
                        is expanded by one move recursively.
        :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed
                        because the point of view changes each half-move
                depth: Current depth reach by this evaluation
                mv_list: List of moves which have been selected
        """

        # select a legal move on the chess board
        node, move, child_idx = self._select_node(parent_node)

        if move is None:
            raise Exception(
                "Illegal tree setup. A 'None' move was selected which souldn't be possible"
            )

        # update the visit counts to this node
        # temporarily reduce the attraction of this node by applying a virtual loss /
        # the effect of virtual loss will be undone if the playout is over
        parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss)

        # apply the selected move on the board
        state.apply_move(move)

        # append the selected move to the move list
        mv_list.append(move)

        if node is None:

            # get the board-fen which is used as an identifier for the board positions in the look-up table
            board_fen = state.get_board_fen()

            # check if the addressed fen exist in the look-up table
            if board_fen in self.node_lookup:
                # get the node from the look-up list
                node = self.node_lookup[board_fen]

                with parent_node.lock:
                    # setup a new connection from the parent to the child
                    parent_node.child_nodes[child_idx] = node

                # get the prior value from the leaf node which has already been expanded
                #value = node.v

                # get the value from the leaf node (the current function is called recursively)
                value, depth, mv_list = self._run_single_playout(
                    state, node, depth + 1, mv_list)

            else:
                # expand and evaluate the new board state (the node wasn't found in the look-up table)
                # its value will be backpropagated through the tree and flipped after every layer

                # receive a free available pipe
                my_pipe = self.my_pipe_endings.pop()
                my_pipe.send(state.get_state_planes())
                # this pipe waits for the predictions of the network inference service
                [value, policy_vec] = my_pipe.recv()
                # put the used pipe back into the list
                self.my_pipe_endings.append(my_pipe)

                # initialize is_leaf by default to false
                is_leaf = False

                # check if the current player has won the game
                # (we don't need to check for is_lost() because the game is already over
                #  if the current player checkmated his opponent)
                if state.is_won() is True:
                    value = -1
                    is_leaf = True
                    legal_moves = []
                    p_vec_small = None

                # check if you can claim a draw - its assumed that the draw is always claimed
                elif state.is_draw() is True:
                    value = 0
                    is_leaf = True
                    legal_moves = []
                    p_vec_small = None
                else:
                    # get the current legal move of its board state
                    legal_moves = list(state.get_legal_moves())
                    if len(legal_moves) < 1:
                        raise Exception(
                            'No legal move is available for state: %s' % state)

                    # extract a sparse policy vector with normalized probabilities
                    try:
                        p_vec_small = get_probs_of_move_list(
                            policy_vec,
                            legal_moves,
                            is_white_to_move=state.is_white_to_move(),
                            normalize=True)

                    except KeyError:
                        raise Exception('Key Error for state: %s' % state)

                # convert all legal moves to a string if the option check_mate_in_one was enabled
                if self.check_mate_in_one is True:
                    str_legal_moves = str(state.get_legal_moves())
                else:
                    str_legal_moves = ''

                # create a new node
                new_node = Node(value, p_vec_small, legal_moves,
                                str_legal_moves, is_leaf)

                #if is_leaf is False:
                # test of adding dirichlet noise to a new node
                #    new_node.apply_dirichlet_noise_to_prior_policy(epsilon=self.dirichlet_epsilon/4, alpha=self.dirichlet_alpha)

                # include a reference to the new node in the look-up table
                self.node_lookup[board_fen] = new_node

                with parent_node.lock:
                    # add the new node to its parent
                    parent_node.child_nodes[child_idx] = new_node

                # check if the new node has a mate_in_one connection (if yes overwrite the network prediction)
                if new_node.mate_child_idx is not None:
                    value = 1

        # check if we have reached a leaf node
        elif node.is_leaf is True:
            value = node.v
            # receive a free available pipe
            my_pipe = self.my_pipe_endings.pop()
            my_pipe.send(state.get_state_planes())
            # this pipe waits for the predictions of the network inference service
            [_, _] = my_pipe.recv()
            # put the used pipe back into the list
            self.my_pipe_endings.append(my_pipe)

        else:
            # get the value from the leaf node (the current function is called recursively)
            value, depth, mv_list = self._run_single_playout(
                state, node, depth + 1, mv_list)

        # revert the virtual loss and apply the predicted value by the network to the node
        parent_node.revert_virtual_loss_and_update(child_idx,
                                                   self.virtual_loss, -value)

        # we invert the value prediction for the parent of the above node layer because the player's turn is flipped every turn
        return -value, depth, mv_list

예제 #8

파일 보기

파일: MCTSAgent.py 프로젝트: DeFilippis/CrazyAra

    def evaluate_board_state(self, state_in: GameState):
        """
        Analyzes the current board state

        :param state_in: Actual game state to evaluate for the MCTS
        :return:
        """

        # store the time at which the search started
        t_start_eval = time()

        state = deepcopy(state_in)

        # check if the net prediction service has already been started
        if self.net_pred_service.running is False:
            # start the prediction daemon thread
            self.net_pred_service.start()

        # receive a list of all possible legal move in the current board position
        legal_moves = list(state.get_legal_moves())

        # store what depth has been reached at maximum in the current search tree
        # default is 1, in case only 1 move is available
        max_depth_reached = 1

        # consistency check
        if len(legal_moves) == 0:
            raise Exception(
                'The given board state has no legal move available')

        # check for fast way out
        if len(legal_moves) == 1:

            # set value 0 as a dummy value
            value = 0
            p_vec_small = np.array([1], np.float32)

            board_fen = state.get_pythonchess_board().fen()

            # check first if the the current tree can be reused
            if board_fen in self.node_lookup:
                self.root_node = self.node_lookup[board_fen]
                logging.debug(
                    'Reuse the search tree. Number of nodes in search tree: %d',
                    self.root_node.n_sum)
            else:
                logging.debug(
                    "The given board position wasn't found in the search tree."
                )
                logging.debug("Starting a brand new search tree...")

                # create a new root node
                self.root_node = Node(value, p_vec_small, legal_moves,
                                      str(state.get_legal_moves()))

                # check a child node if it doesn't exists already
                if self.root_node.child_nodes[0] is None:
                    state_child = deepcopy(state_in)
                    state_child.apply_move(legal_moves[0])

                    # initialize is_leaf by default to false
                    is_leaf = False

                    # check if the current player has won the game
                    # (we don't need to check for is_lost() because the game is already over
                    #  if the current player checkmated his opponent)
                    if state.is_won() is True:
                        value = -1
                        is_leaf = True
                        legal_moves_child = []
                        p_vec_small_child = None

                    # check if you can claim a draw - its assumed that the draw is always claimed
                    elif state.is_draw() is True:
                        value = 0
                        is_leaf = True
                        legal_moves_child = []
                        p_vec_small_child = None

                    else:
                        legal_moves_child = list(state_child.get_legal_moves())

                        # start a brand new prediction for the child
                        state_planes = state_child.get_state_planes()
                        [value,
                         policy_vec] = self.net.predict_single(state_planes)

                        # extract a sparse policy vector with normalized probabilities
                        p_vec_small_child = get_probs_of_move_list(
                            policy_vec, legal_moves_child,
                            state_child.is_white_to_move())

                    # create a new child node
                    child_node = Node(value, p_vec_small_child,
                                      legal_moves_child,
                                      str(state_child.get_legal_moves()),
                                      is_leaf)

                    # connect the child to the root
                    self.root_node.child_nodes[0] = child_node

        else:
            board_fen = state.get_board_fen()

            # check first if the the current tree can be reused
            if board_fen in self.node_lookup:
                self.root_node = self.node_lookup[board_fen]
                logging.debug(
                    'Reuse the search tree. Number of nodes in search tree: %d',
                    self.root_node.nb_total_expanded_child_nodes)
            else:
                logging.debug(
                    "The given board position wasn't found in the search tree."
                )
                logging.debug("Starting a brand new search tree...")

                # initialize is_leaf by default to false
                is_leaf = False

                # start a brand new tree
                state_planes = state.get_state_planes()
                [value, policy_vec] = self.net.predict_single(state_planes)

                # extract a sparse policy vector with normalized probabilities
                p_vec_small = get_probs_of_move_list(policy_vec, legal_moves,
                                                     state.is_white_to_move())

                # create a new root node
                self.root_node = Node(value, p_vec_small, legal_moves,
                                      str(state.get_legal_moves()), is_leaf)

            # clear the look up table
            self.node_lookup = {}

            # apply dirichlet noise to the prior probabilities in order to ensure
            #  that every move can possibly be visited
            self.root_node.apply_dirichlet_noise_to_prior_policy(
                epsilon=self.dirichlet_epsilon, alpha=self.dirichlet_alpha)

            futures = []

            # set the number of playouts accordingly
            if state_in.are_pocket_empty() is True:
                nb_playouts = self.nb_playouts_empty_pockets
            else:
                nb_playouts = self.nb_playouts_filled_pockets

            t_elapsed = 0
            cur_playouts = 0
            old_time = time()

            while max_depth_reached < self.max_search_depth and\
                       cur_playouts < nb_playouts and\
                     t_elapsed*1000 < self.movetime_ms: #and np.abs(self.root_node.q.mean()) < 0.99:

                # start searching
                with ThreadPoolExecutor(max_workers=self.threads) as executor:
                    for i in range(self.threads):
                        # calculate the thread id based on the current playout
                        futures.append(
                            executor.submit(self._run_single_playout,
                                            state=deepcopy(state),
                                            parent_node=self.root_node,
                                            depth=1,
                                            mv_list=[]))

                cur_playouts += self.threads
                time_show_info = time() - old_time

                # store the mean of all value predictions in this variable
                #mean_value = 0

                for i, f in enumerate(futures):
                    cur_value, cur_depth, mv_list = f.result()

                    # sum up all values
                    #mean_value += cur_value

                    if cur_depth > max_depth_reached:
                        max_depth_reached = cur_depth

                    # Print every second if verbose is true
                    if self.verbose and time_show_info > 1:
                        str_moves = self._mv_list_to_str(mv_list)
                        logging.debug('Update: %d' % cur_depth)
                        print('info score cp %d depth %d nodes %d pv%s' %
                              (value_to_centipawn(cur_value), cur_depth,
                               self.root_node.n_sum, str_moves))
                        old_time = time()

                # update the current search time
                t_elapsed = time() - t_start_eval
                if self.verbose and time_show_info > 1:
                    print(
                        'info nps %d time %d' %
                        ((self.root_node.n_sum / t_elapsed), t_elapsed * 1000))

            # receive the policy vector based on the MCTS search
            p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight)
            print('info string move overhead is %dms' %
                  (t_elapsed * 1000 - self.movetime_ms))

        # store the current root in the lookup table
        self.node_lookup[state.get_board_fen()] = self.root_node

        # select the q value which would score the highest value

        #value = self.root_node.q.max()

        # select the q-value according to the mcts best child value
        best_child_idx = self.root_node.get_mcts_policy(
            self.q_value_weight).argmax()
        value = self.root_node.q[best_child_idx]

        lst_best_moves, _ = self.get_calculated_line()

        str_moves = self._mv_list_to_str(lst_best_moves)

        # show the best calculated line
        time_e = time() - t_start_eval
        node_searched = self.root_node.n_sum
        print('info score cp %d depth %d nodes %d time %d nps %d pv%s' %
              (value_to_centipawn(value), max_depth_reached, node_searched,
               time_e * 1000, node_searched / max(1, time_e), str_moves))

        if len(legal_moves) != len(p_vec_small):
            print(
                'Legal move list %s with length %s is uncompatible to policy vector %s with shape %s for board state %s'
                % (legal_moves, len(legal_moves), p_vec_small,
                   p_vec_small.shape, state_in))
            self.node_lookup = {}
            # restart the search TODO: Fix this error
            """
                raise Exception('Legal move list %s with length %s is uncompatible to policy vector %s with shape %s for board state %s' % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state_in))
                    Exception: Legal move list [Move.from_uci('e4h7'), Move.from_uci('e4g6'), Move.from_uci('e4f5'), Move.from_uci('c4a6'), Move.from_uci('c4b5'), Move.from_uci('c4b3'), Move.from_uci('f3g5'), Move.from_uci('f3e5'), Move.from_uci('f3h4'), Move.from_uci('f3d4'), Move.from_uci('f3d2'), Move.from_uci('f3e1'), Move.from_uci('g1h1'), Move.from_uci('f1e1'), Move.from_uci('d1e2'), Move.from_uci('d1d2'), Move.from_uci('d1e1'), Move.from_uci('d1c1'), Move.from_uci('d1b1'), Move.from_uci('a1c1'), Move.from_uci('a1b1'), Move.from_uci('d3d4'), Move.from_uci('h2h3'), Move.from_uci('g2g3'), Move.from_uci('c2c3'), Move.from_uci('b2b3'), Move.from_uci('a2a3'), Move.from_uci('h2h4'), Move.from_uci('b2b4'), Move.from_uci('a2a4'), Move.from_uci('N@b1'), Move.from_uci('N@c1'), Move.from_uci('N@e1'), Move.from_uci('N@h1'), Move.from_uci('N@d2'), Move.from_uci('N@e2'), Move.from_uci('N@a3'), Move.from_uci('N@b3'), Move.from_uci('N@c3'), Move.from_uci('N@e3'), Move.from_uci('N@g3'), Move.from_uci('N@h3'), Move.from_uci('N@a4'), Move.from_uci('N@b4'), Move.from_uci('N@d4'), Move.from_uci('N@f4'), Move.from_uci('N@h4'), Move.from_uci('N@b5'), Move.from_uci('N@f5'), Move.from_uci('N@g5'), Move.from_uci('N@h5'), Move.from_uci('N@a6'), Move.from_uci('N@b6'), Move.from_uci('N@c6'), Move.from_uci('N@e6'), Move.from_uci('N@g6'), Move.from_uci('N@d7'), Move.from_uci('N@e7'), Move.from_uci('N@h7'), Move.from_uci('N@b8'), Move.from_uci('N@c8'), Move.from_uci('N@d8'), Move.from_uci('N@e8'), Move.from_uci('N@h8')] with length 64 is uncompatible to policy vector [0.71529347 0.00194482 0.00194482 0.00389555 0.00194482 0.00194482
                     0.00389942 0.00389942 0.00389941 0.0038994  0.0019448  0.0038994
                     0.0019448  0.00389941 0.00389941 0.00194482 0.00585401 0.00194482
                     0.00194482 0.00389941 0.00389942 0.00194482 0.00194482 0.00389942
                     0.00389942 0.00389941 0.00585341 0.00194482 0.00585396 0.00389942
                     0.00389941 0.00389941 0.00389941 0.00389941 0.00194482 0.00585401
                     0.00585401 0.00194482 0.00585399 0.00780859 0.00389942 0.00389941
                     0.00585401 0.00976319 0.00780829 0.00585215 0.00389942 0.00389942
                     0.00194482 0.00194482 0.02735228 0.00389942 0.005854   0.00389939
                     0.00389924 0.00389942 0.00194482 0.00389942 0.00585398 0.00389942
                     0.0038994  0.0038994  0.00585398 0.00194482 0.00389942 0.00389942
                     0.00389942 0.00389942] with shape (68,) for board state r4rk1/ppp2pp1/3p1q1p/n1bPp3/2B1B1b1/3P1N2/PPP2PPP/R2Q1RK1[Nn] w - - 2 13
             """
            return self.evaluate_board_state(state_in)

        return value, legal_moves, p_vec_small

예제 #9

파일 보기

    def negamax(self,
                state,
                depth,
                alpha=-math.inf,
                beta=math.inf,
                color=1,
                all_moves=1):
        """
        Evaluates all nodes at a given depth and back-propagates their values to their respective parent nodes.
        In order to keep the number nof nodes manageable for neural network evaluation
        :param all_moves: All possible moves
        :param state: Game state object
        :param depth: Number of depth to reach during search
        :param alpha: Current alpha value which is used for pruning
        :param beta: Current beta value which is used for pruning
        :param color: Integer color value 1 for white, -1 for black
        :return: best_value - Best value for the current player until search depth
        """

        if state.is_won(
        ):  # check for draw is neglected for now due to bad runtime
            return -1

        [value, policy_vec] = self.net.predict_single(
            state.get_state_planes())  # start a brand new tree

        if depth == 0:
            return value  # the value is always returned in the view of the current player

        best_value = -math.inf  # initialization

        legal_moves = state.get_legal_moves()
        p_vec_small = get_probs_of_move_list(policy_vec,
                                             state.get_legal_moves(),
                                             state.is_white_to_move())

        if all_moves > 0:
            mv_idces = list(np.argsort(p_vec_small)[::-1])
        else:
            mv_idces = list(
                np.argsort(p_vec_small)[::-1][:self.nb_candidate_moves])

        if self.include_check_moves:
            check_idces, _ = get_check_move_indices(
                state.get_pythonchess_board(), state.get_legal_moves())
            mv_idces += check_idces

        for mv_idx in mv_idces:  # each child of position
            if p_vec_small[mv_idx] > 0.1:
                mv = legal_moves[mv_idx]
                state_child = copy.deepcopy(state)
                state_child.apply_move(mv)
                value = -self.negamax(state_child, depth - 1, -beta, -alpha,
                                      -color, all_moves - 1)
                if value > best_value:
                    self.best_moves[-depth] = mv
                    self.sel_mv_idx[-depth] = mv_idx
                    best_value = value
                alpha = max(alpha, value)
                if alpha >= beta:
                    break
        return best_value

예제 #10

파일 보기

파일: mcts_agent.py 프로젝트: lazercorn/CrazyAra

    def _run_single_playout(self, parent_node: Node, pipe_id=0, depth=1, chosen_nodes=None):
        """
        This function works recursively until a leaf or terminal node is reached.
        It ends by back-propagating the value of the new expanded node or by propagating the value of a terminal state.

        :param state: Current game-state for the evaluation. This state differs between the treads
        :param parent_node: Current parent-node of the selected node. In the first  expansion this is the root node.
        :param depth: Current depth for the evaluation. Depth is increased by 1 for every recursive call
        :param chosen_nodes: List of moves which have been taken in the current path.
                        For each selected child node this list is expanded by one move recursively.
        :param chosen_nodes: List of all nodes that this thread has explored with respect to the root node
        :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed
                        because the point of view changes each half-move
                depth: Current depth reach by this evaluation
                mv_list: List of moves which have been selected
        """
        # Probably is better to be refactored
        # Too many arguments (6/5) - Too many local variables (27/15) - Too many branches (28/12) -
        # Too many statements (86/50)
        if chosen_nodes is None:  # select a legal move on the chess board
            chosen_nodes = []
        node, move, child_idx = self._select_node(parent_node)

        if move is None:
            raise Exception("Illegal tree setup. A 'None' move was selected which shouldn't be possible")
        # update the visit counts to this node
        # temporarily reduce the attraction of this node by applying a virtual loss /
        # the effect of virtual loss will be undone if the playout is over
        parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss)

        # append the selected move to the move list
        chosen_nodes.append(child_idx)  # append the chosen child idx to the chosen_nodes list

        if node is None:
            state = GameState(deepcopy(parent_node.board))  # get the board from the parent node
            state.apply_move(move)  # apply the selected move on the board

            # get the transposition-key which is used as an identifier for the board positions in the look-up table
            transposition_key = state.get_transposition_key()
            # check if the addressed fen exist in the look-up table
            # note: It's important to use also the halfmove-counter here, otherwise the system can create an infinite
            # feed-back-loop
            key = transposition_key + (state.get_fullmove_number(),)
            use_tran_table = True
            node_verified = False

            if use_tran_table and key in self.node_lookup:
                # if self.check_for_duplicate(transposition_key, chosen_nodes) is False:
                node = self.node_lookup[key]  # get the node from the look-up list

                if node.n_sum > parent_node.n_sum:  # make sure that you don't connect to a node with lower visits
                    node_verified = True

            if node_verified:
                with parent_node.lock:
                    # setup a new connection from the parent to the child
                    parent_node.child_nodes[child_idx] = node
                # logging.debug('found key: %s' % state.get_board_fen())
                # get the prior value from the leaf node which has already been expanded
                value = node.initial_value
            else:
                # expand and evaluate the new board state (the node wasn't found in the look-up table)
                # its value will be back-propagated through the tree and flipped after every layer
                my_pipe = self.my_pipe_endings[pipe_id]  # receive a free available pipe

                if self.send_batches:
                    my_pipe.send(state.get_state_planes())
                    # this pipe waits for the predictions of the network inference service
                    [value, policy_vec] = my_pipe.recv()
                else:
                    state_planes = state.get_state_planes()
                    self.batch_state_planes[pipe_id] = state_planes
                    my_pipe.send(pipe_id)
                    result_channel = my_pipe.recv()
                    value = np.array(self.batch_value_results[result_channel])
                    policy_vec = np.array(self.batch_policy_results[result_channel])

                is_leaf = is_won = False  # initialize is_leaf by default to false and check if the game is won
                # check if the current player has won the game
                # (we don't need to check for is_lost() because the game is already over
                #  if the current player checkmated his opponent)
                if state.is_check():
                    if state.is_won():
                        is_won = True

                if is_won:
                    value = -1
                    is_leaf = True
                    legal_moves = []
                    p_vec_small = None
                    # establish a mate in one connection in order to stop exploring different alternatives
                    parent_node.set_check_mate_node_idx(child_idx)
                # get the value from the leaf node (the current function is called recursively)
                # check if you can claim a draw - its assumed that the draw is always claimed
                elif (
                    self.can_claim_threefold_repetition(transposition_key, chosen_nodes)
                    or state.get_pythonchess_board().can_claim_fifty_moves() is True
                ):
                    value = 0
                    is_leaf = True
                    legal_moves = []
                    p_vec_small = None
                else:
                    legal_moves = state.get_legal_moves()  # get the current legal move of its board state

                    if not legal_moves:
                        # stalemate occurred which is very rare for crazyhouse
                        value = 0
                        is_leaf = True
                        legal_moves = []
                        p_vec_small = None
                        # raise Exception("No legal move is available for state: %s" % state)
                    else:
                        try:  # extract a sparse policy vector with normalized probabilities
                            p_vec_small = get_probs_of_move_list(
                                policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True
                            )
                        except KeyError:
                            raise Exception("Key Error for state: %s" % state)

                # clip the visit nodes for all nodes in the search tree except the director opp. move
                clip_low_visit = self.use_pruning and depth != 1  # and depth > 4
                new_node = Node(
                    state.get_pythonchess_board(),
                    value,
                    p_vec_small,
                    legal_moves,
                    is_leaf,
                    transposition_key,
                    clip_low_visit,
                )  # create a new node

                if depth == 1:
                    # disable uncertain moves from being visited by giving them a very bad score
                    if not is_leaf and self.use_pruning:
                        if self.root_node_prior_policy[child_idx] < 1e-3 and value * -1 < self.root_node.initial_value:
                            with parent_node.lock:
                                value = 99

                    # for performance reasons only apply check enhancement on depth 1 for now
                    chess_board = state.get_pythonchess_board()
                    if self.enhance_checks:
                        self._enhance_checks(chess_board, legal_moves, p_vec_small)

                    if self.enhance_captures:
                        self._enhance_captures(chess_board, legal_moves, p_vec_small)

                if not self.use_pruning:
                    self.node_lookup[key] = new_node  # include a reference to the new node in the look-up table

                with parent_node.lock:
                    parent_node.child_nodes[child_idx] = new_node  # add the new node to its parent
        elif node.is_leaf:  # check if we have reached a leaf node
            value = node.initial_value
        else:
            # get the value from the leaf node (the current function is called recursively)
            value, depth, chosen_nodes = self._run_single_playout(node, pipe_id, depth + 1, chosen_nodes)
        # revert the virtual loss and apply the predicted value by the network to the node
        parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value)
        # invert the value prediction for the parent of the above node layer because the player's changes every turn
        return -value, depth, chosen_nodes

예제 #11

파일 보기

파일: MCTSAgent.py 프로젝트: noelben/CrazyAra

    def _run_single_playout(self,
                            state: GameState,
                            parent_node: Node,
                            pipe_id=0,
                            depth=1,
                            chosen_nodes=[]):
        """
        This function works recursively until a leaf or terminal node is reached.
        It ends by backpropagating the value of the new expanded node or by propagating the value of a terminal state.

        :param state_: Current game-state for the evaluation. This state differs between the treads
        :param parent_node: Current parent-node of the selected node. In the first  expansion this is the root node.
        :param depth: Current depth for the evaluation. Depth is increased by 1 for every recusive call
        :param chosen_nodes: List of moves which have been taken in the current path. For each selected child node this list
                        is expanded by one move recursively.
        :param chosen_nodes: List of all nodes that this thread has explored with respect to the root node
        :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed
                        because the point of view changes each half-move
                depth: Current depth reach by this evaluation
                mv_list: List of moves which have been selected
        """

        # select a legal move on the chess board
        node, move, child_idx = self._select_node(parent_node)

        if move is None:
            raise Exception(
                "Illegal tree setup. A 'None' move was selected which souldn't be possible"
            )

        # update the visit counts to this node
        # temporarily reduce the attraction of this node by applying a virtual loss /
        # the effect of virtual loss will be undone if the playout is over
        parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss)

        if depth == 1:
            state = GameState(deepcopy(state.get_pythonchess_board()))

        # apply the selected move on the board
        state.apply_move(move)

        # append the selected move to the move list
        # append the chosen child idx to the chosen_nodes list
        chosen_nodes.append(child_idx)

        if node is None:

            # get the transposition-key which is used as an identifier for the board positions in the look-up table
            transposition_key = state.get_transposition_key()

            # check if the addressed fen exist in the look-up table
            # note: It's important to use also the halfmove-counter here, otherwise the system can create an infinite
            # feed-back-loop
            key = (transposition_key, state.get_halfmove_counter())

            # expand and evaluate the new board state (the node wasn't found in the look-up table)
            # its value will be backpropagated through the tree and flipped after every layer
            # receive a free available pipe
            my_pipe = self.my_pipe_endings[pipe_id]

            if self.send_batches is True:
                my_pipe.send(state.get_state_planes())
                # this pipe waits for the predictions of the network inference service
                [value, policy_vec] = my_pipe.recv()
            else:
                state_planes = state.get_state_planes()
                self.batch_state_planes[pipe_id] = state_planes

                my_pipe.send(pipe_id)

                result_channel = my_pipe.recv()

                value = np.array(self.batch_value_results[result_channel])
                policy_vec = np.array(
                    self.batch_policy_results[result_channel])

            # initialize is_leaf by default to false
            is_leaf = False

            # check if the current player has won the game
            # (we don't need to check for is_lost() because the game is already over
            #  if the current player checkmated his opponent)
            is_won = False
            is_check = False

            if state.is_check() is True:
                is_check = True
                if state.is_won() is True:
                    is_won = True

            if is_won is True:
                value = -1
                is_leaf = True
                legal_moves = []
                p_vec_small = None
                # establish a mate in one connection in order to stop exploring different alternatives
                parent_node.mate_child_idx = child_idx

            # get the value from the leaf node (the current function is called recursively)
            # check if you can claim a draw - its assumed that the draw is always claimed
            elif self.can_claim_threefold_repetition(transposition_key, chosen_nodes) or \
                    state.get_pythonchess_board().can_claim_fifty_moves() is True:
                value = 0
                is_leaf = True
                legal_moves = []
                p_vec_small = None
            else:
                # get the current legal move of its board state
                legal_moves = state.get_legal_moves()

                if len(legal_moves) < 1:
                    raise Exception(
                        'No legal move is available for state: %s' % state)

                # extract a sparse policy vector with normalized probabilities
                try:
                    p_vec_small = get_probs_of_move_list(
                        policy_vec,
                        legal_moves,
                        is_white_to_move=state.is_white_to_move(),
                        normalize=True)

                except KeyError:
                    raise Exception('Key Error for state: %s' % state)

            # convert all legal moves to a string if the option check_mate_in_one was enabled
            if self.check_mate_in_one is True:
                str_legal_moves = str(state.get_legal_moves())
            else:
                str_legal_moves = ''

            # clip the visit nodes for all nodes in the search tree except the director opp. move
            clip_low_visit = self.use_pruning and depth != 1

            # create a new node
            new_node = Node(value, p_vec_small, legal_moves, str_legal_moves,
                            is_leaf, transposition_key, clip_low_visit)

            if depth == 1:

                # disable uncertain moves from being visited by giving them a very bad score
                if is_leaf is False:
                    if self.root_node_prior_policy[
                            child_idx] < 1e-3 and value * -1 < self.root_node.v:
                        with parent_node.lock:
                            value = 99

                if value < 0:  # and state.are_pocket_empty(): #and pipe_id == 0:
                    # test of adding dirichlet noise to a new node
                    new_node.apply_dirichlet_noise_to_prior_policy(
                        epsilon=self.dirichlet_epsilon * .02,
                        alpha=self.dirichlet_alpha)

            if self.use_pruning is False:
                # include a reference to the new node in the look-up table
                self.node_lookup[key] = new_node

            with parent_node.lock:
                # add the new node to its parent
                parent_node.child_nodes[child_idx] = new_node

        # check if we have reached a leaf node
        elif node.is_leaf is True:
            value = node.v

        else:
            # get the value from the leaf node (the current function is called recursively)
            value, depth, chosen_nodes = self._run_single_playout(
                state, node, pipe_id, depth + 1, chosen_nodes)

        # revert the virtual loss and apply the predicted value by the network to the node
        parent_node.revert_virtual_loss_and_update(child_idx,
                                                   self.virtual_loss, -value)

        # we invert the value prediction for the parent of the above node layer because the player's turn is flipped every turn
        return -value, depth, chosen_nodes

예제 #12

파일 보기

파일: MCTSAgent.py 프로젝트: noelben/CrazyAra

    def _expand_root_node_single_move(self, state, legal_moves):
        """
        Expands the current root in the case if there's only a single move available.
        The neural network search can be omitted in this case.

        :param state: Request games state
        :param legal_moves: Available moves
        :return:
        """

        # set value 0 as a dummy value
        value = 0
        p_vec_small = np.array([1], np.float32)

        # create a new root node
        self.root_node = Node(value, p_vec_small, legal_moves,
                              str(state.get_legal_moves()))

        # check a child node if it doesn't exists already
        if self.root_node.child_nodes[0] is None:
            state_child = deepcopy(state)
            state_child.apply_move(legal_moves[0])

            # initialize is_leaf by default to false
            is_leaf = False

            # check if the current player has won the game
            # (we don't need to check for is_lost() because the game is already over
            #  if the current player checkmated his opponent)
            if state.is_won() is True:
                value = -1
                is_leaf = True
                legal_moves_child = []
                p_vec_small_child = None

            # check if you can claim a draw - its assumed that the draw is always claimed
            elif self.can_claim_threefold_repetition(state.get_transposition_key(), [0]) or\
                    state.get_pythonchess_board().can_claim_fifty_moves() is True:
                value = 0
                is_leaf = True
                legal_moves_child = []
                p_vec_small_child = None

            else:
                legal_moves_child = state_child.get_legal_moves()

                # start a brand new prediction for the child
                state_planes = state_child.get_state_planes()
                [value, policy_vec] = self.nets[0].predict_single(state_planes)

                # extract a sparse policy vector with normalized probabilities
                p_vec_small_child = get_probs_of_move_list(
                    policy_vec, legal_moves_child,
                    state_child.is_white_to_move())

            # create a new child node
            child_node = Node(value, p_vec_small_child, legal_moves_child,
                              str(state_child.get_legal_moves()), is_leaf)

            # connect the child to the root
            self.root_node.child_nodes[0] = child_node