Esempio n. 1
0
    def __init__(self, name):
        self.app = Flask(name)

        self.app.add_url_rule("/api/state", "api/state",
                              self._wrap_endpoint(ChessServer.serve_state))
        self.app.add_url_rule("/api/new", "api/new",
                              self._wrap_endpoint(ChessServer.serve_new_game))
        self.app.add_url_rule("/api/move", "api/move",
                              self._wrap_endpoint(ChessServer.serve_move))
        self.app.add_url_rule("/", "serve_client_r",
                              self._wrap_endpoint(ChessServer.serve_client))
        self.app.add_url_rule("/<path:path>", "serve_client",
                              self._wrap_endpoint(ChessServer.serve_client))

        self._gamestate = GameState()

        net = NeuralNetAPI()

        # Loading network

        player_agents = {
            "raw_net":
            RawNetAgent(net),
            "mcts":
            MCTSAgent(net,
                      virtual_loss=3,
                      threads=batch_size,
                      cpuct=cpuct,
                      dirichlet_epsilon=dirichlet_epsilon),
        }

        # Setting up agent
        self.agent = player_agents["raw_net"]
Esempio n. 2
0
    def perform_action(self, state: GameState, verbose=True):

        value, selected_move, confidence, selected_child_idx = super(
        ).perform_action(state)

        # apply the selected mve on the current board state in order to create a lookup table for future board states
        state.apply_move(selected_move)

        # select the q value for the child which leads to the best calculated line
        value = self.root_node.q[selected_child_idx]

        # select the next node
        node = self.root_node.child_nodes[selected_child_idx]

        # store the reference links for all possible child future child to the node lookup table
        for idx, mv in enumerate(state.get_legal_moves()):
            state_future = deepcopy(state)
            state_future.apply_move(mv)

            # store the current child node with it's board fen as the hash-key if the child node has already been expanded
            if node is not None and idx < node.nb_direct_child_nodes and node.child_nodes[
                    idx] is not None:
                self.node_lookup[
                    state_future.get_board_fen()] = node.child_nodes[idx]

        return value, selected_move, confidence, selected_child_idx
Esempio n. 3
0
def setup_network():
    """
    Load the libraries and the weights of the neural network
    :return:
    """

    global gamestate
    global setup_done
    global rawnet_agent
    global mcts_agent
    global s
    global engine_played_move

    if setup_done is False:
        from DeepCrazyhouse.src.domain.crazyhouse.GameState import GameState
        from DeepCrazyhouse.src.domain.agent.NeuralNetAPI import NeuralNetAPI
        from DeepCrazyhouse.src.domain.agent.player.RawNetAgent import RawNetAgent
        from DeepCrazyhouse.src.domain.agent.player.MCTSAgent import MCTSAgent

        # check for valid parameter setup and do auto-corrections if possible
        param_validity_check()

        nets = []
        for i in range(s["neural_net_services"]):
            nets.append(NeuralNetAPI(ctx=s["context"], batch_size=s["batch_size"]))

        rawnet_agent = RawNetAgent(
            nets[0], temperature=s["centi_temperature"] / 100, temperature_moves=s["temperature_moves"]
        )

        mcts_agent = MCTSAgent(
            nets,
            cpuct=s["centi_cpuct"] / 100,
            playouts_empty_pockets=s["playouts_empty_pockets"],
            playouts_filled_pockets=s["playouts_filled_pockets"],
            max_search_depth=s["max_search_depth"],
            dirichlet_alpha=s["centi_dirichlet_alpha"] / 100,
            q_value_weight=s["centi_q_value_weight"] / 100,
            dirichlet_epsilon=s["centi_dirichlet_epsilon"] / 100,
            virtual_loss=s["virtual_loss"],
            threads=s["threads"],
            temperature=s["centi_temperature"] / 100,
            temperature_moves=s["temperature_moves"],
            verbose=s["verbose"],
            min_movetime=MIN_SEARCH_TIME_MS,
            batch_size=s["batch_size"],
            check_mate_in_one=s["check_mate_in_one"],
            use_pruning=s["use_pruning"],
            use_oscillating_cpuct=s["use_oscillating_cpuct"],
            use_time_management=s["use_time_management"],
            opening_guard_moves=s["opening_guard_moves"],
        )

        gamestate = GameState()

        setup_done = True
Esempio n. 4
0
def setup_network():
    """
    Load the libraries and the weights of the neural network
    :return:
    """

    global gamestate
    global setup_done
    global rawnet_agent
    global mcts_agent
    global s
    global engine_played_move

    if setup_done is False:
        from DeepCrazyhouse.src.domain.crazyhouse.GameState import GameState
        from DeepCrazyhouse.src.domain.agent.NeuralNetAPI import NeuralNetAPI
        from DeepCrazyhouse.src.domain.agent.player.RawNetAgent import RawNetAgent
        from DeepCrazyhouse.src.domain.agent.player.MCTSAgent import MCTSAgent

        # check for valid parameter setup and do auto-corrections if possible
        param_validity_check()

        net = NeuralNetAPI(ctx=s['context'], batch_size=s['batch_size'])
        rawnet_agent = RawNetAgent(net,
                                   temperature=s['centi_temperature'],
                                   clip_quantil=s['centi_clip_quantil'])

        mcts_agent = MCTSAgent(
            net,
            cpuct=s['centi_cpuct'] / 100,
            playouts_empty_pockets=s['playouts_empty_pockets'],
            playouts_filled_pockets=s['playouts_filled_pockets'],
            max_search_depth=s['max_search_depth'],
            dirichlet_alpha=s['centi_dirichlet_alpha'] / 100,
            q_value_weight=s['centi_q_value_weight'] / 100,
            dirichlet_epsilon=s['centi_dirichlet_epsilon'] / 100,
            virtual_loss=s['virtual_loss'],
            threads=s['threads'],
            temperature=s['centi_temperature'] / 100,
            verbose=s['verbose'],
            clip_quantil=s['centi_clip_quantil'] / 100,
            min_movetime=MIN_SEARCH_TIME_MS,
            batch_size=s['batch_size'],
            check_mate_in_one=s['check_mate_in_one'])

        gamestate = GameState()

        setup_done = True
Esempio n. 5
0
    def _run_single_playout(self,
                            state: GameState,
                            parent_node: Node,
                            depth=1,
                            mv_list=[]):  #, pipe_id):
        """
        This function works recursively until a terminal node is reached

        :param state: Current game-state for the evaluation. This state differs between the treads
        :param parent_node: Current parent-node of the selected node. In the first  expansion this is the root node.
        :param depth: Current depth for the evaluation. Depth is increased by 1 for every recusive call
        :param mv_list: List of moves which have been taken in the current path. For each selected child node this list
                        is expanded by one move recursively.
        :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed
                        because the point of view changes each half-move
                depth: Current depth reach by this evaluation
                mv_list: List of moves which have been selected
        """

        # select a legal move on the chess board
        node, move, child_idx = self._select_node(parent_node)

        if move is None:
            raise Exception(
                "Illegal tree setup. A 'None' move was selected which souldn't be possible"
            )

        # update the visit counts to this node
        # temporarily reduce the attraction of this node by applying a virtual loss /
        # the effect of virtual loss will be undone if the playout is over
        parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss)

        # apply the selected move on the board
        state.apply_move(move)

        # append the selected move to the move list
        mv_list.append(move)

        if node is None:

            # get the board-fen which is used as an identifier for the board positions in the look-up table
            board_fen = state.get_board_fen()

            # check if the addressed fen exist in the look-up table
            if board_fen in self.node_lookup:
                # get the node from the look-up list
                node = self.node_lookup[board_fen]

                with parent_node.lock:
                    # setup a new connection from the parent to the child
                    parent_node.child_nodes[child_idx] = node

                # get the prior value from the leaf node which has already been expanded
                #value = node.v

                # get the value from the leaf node (the current function is called recursively)
                value, depth, mv_list = self._run_single_playout(
                    state, node, depth + 1, mv_list)

            else:
                # expand and evaluate the new board state (the node wasn't found in the look-up table)
                # its value will be backpropagated through the tree and flipped after every layer

                # receive a free available pipe
                my_pipe = self.my_pipe_endings.pop()
                my_pipe.send(state.get_state_planes())
                # this pipe waits for the predictions of the network inference service
                [value, policy_vec] = my_pipe.recv()
                # put the used pipe back into the list
                self.my_pipe_endings.append(my_pipe)

                # initialize is_leaf by default to false
                is_leaf = False

                # check if the current player has won the game
                # (we don't need to check for is_lost() because the game is already over
                #  if the current player checkmated his opponent)
                if state.is_won() is True:
                    value = -1
                    is_leaf = True
                    legal_moves = []
                    p_vec_small = None

                # check if you can claim a draw - its assumed that the draw is always claimed
                elif state.is_draw() is True:
                    value = 0
                    is_leaf = True
                    legal_moves = []
                    p_vec_small = None
                else:
                    # get the current legal move of its board state
                    legal_moves = list(state.get_legal_moves())
                    if len(legal_moves) < 1:
                        raise Exception(
                            'No legal move is available for state: %s' % state)

                    # extract a sparse policy vector with normalized probabilities
                    try:
                        p_vec_small = get_probs_of_move_list(
                            policy_vec,
                            legal_moves,
                            is_white_to_move=state.is_white_to_move(),
                            normalize=True)

                    except KeyError:
                        raise Exception('Key Error for state: %s' % state)

                # convert all legal moves to a string if the option check_mate_in_one was enabled
                if self.check_mate_in_one is True:
                    str_legal_moves = str(state.get_legal_moves())
                else:
                    str_legal_moves = ''

                # create a new node
                new_node = Node(value, p_vec_small, legal_moves,
                                str_legal_moves, is_leaf)

                #if is_leaf is False:
                # test of adding dirichlet noise to a new node
                #    new_node.apply_dirichlet_noise_to_prior_policy(epsilon=self.dirichlet_epsilon/4, alpha=self.dirichlet_alpha)

                # include a reference to the new node in the look-up table
                self.node_lookup[board_fen] = new_node

                with parent_node.lock:
                    # add the new node to its parent
                    parent_node.child_nodes[child_idx] = new_node

                # check if the new node has a mate_in_one connection (if yes overwrite the network prediction)
                if new_node.mate_child_idx is not None:
                    value = 1

        # check if we have reached a leaf node
        elif node.is_leaf is True:
            value = node.v
            # receive a free available pipe
            my_pipe = self.my_pipe_endings.pop()
            my_pipe.send(state.get_state_planes())
            # this pipe waits for the predictions of the network inference service
            [_, _] = my_pipe.recv()
            # put the used pipe back into the list
            self.my_pipe_endings.append(my_pipe)

        else:
            # get the value from the leaf node (the current function is called recursively)
            value, depth, mv_list = self._run_single_playout(
                state, node, depth + 1, mv_list)

        # revert the virtual loss and apply the predicted value by the network to the node
        parent_node.revert_virtual_loss_and_update(child_idx,
                                                   self.virtual_loss, -value)

        # we invert the value prediction for the parent of the above node layer because the player's turn is flipped every turn
        return -value, depth, mv_list
Esempio n. 6
0
    def evaluate_board_state(self, state_in: GameState):
        """
        Analyzes the current board state

        :param state_in: Actual game state to evaluate for the MCTS
        :return:
        """

        # store the time at which the search started
        t_start_eval = time()

        state = deepcopy(state_in)

        # check if the net prediction service has already been started
        if self.net_pred_service.running is False:
            # start the prediction daemon thread
            self.net_pred_service.start()

        # receive a list of all possible legal move in the current board position
        legal_moves = list(state.get_legal_moves())

        # store what depth has been reached at maximum in the current search tree
        # default is 1, in case only 1 move is available
        max_depth_reached = 1

        # consistency check
        if len(legal_moves) == 0:
            raise Exception(
                'The given board state has no legal move available')

        # check for fast way out
        if len(legal_moves) == 1:

            # set value 0 as a dummy value
            value = 0
            p_vec_small = np.array([1], np.float32)

            board_fen = state.get_pythonchess_board().fen()

            # check first if the the current tree can be reused
            if board_fen in self.node_lookup:
                self.root_node = self.node_lookup[board_fen]
                logging.debug(
                    'Reuse the search tree. Number of nodes in search tree: %d',
                    self.root_node.n_sum)
            else:
                logging.debug(
                    "The given board position wasn't found in the search tree."
                )
                logging.debug("Starting a brand new search tree...")

                # create a new root node
                self.root_node = Node(value, p_vec_small, legal_moves,
                                      str(state.get_legal_moves()))

                # check a child node if it doesn't exists already
                if self.root_node.child_nodes[0] is None:
                    state_child = deepcopy(state_in)
                    state_child.apply_move(legal_moves[0])

                    # initialize is_leaf by default to false
                    is_leaf = False

                    # check if the current player has won the game
                    # (we don't need to check for is_lost() because the game is already over
                    #  if the current player checkmated his opponent)
                    if state.is_won() is True:
                        value = -1
                        is_leaf = True
                        legal_moves_child = []
                        p_vec_small_child = None

                    # check if you can claim a draw - its assumed that the draw is always claimed
                    elif state.is_draw() is True:
                        value = 0
                        is_leaf = True
                        legal_moves_child = []
                        p_vec_small_child = None

                    else:
                        legal_moves_child = list(state_child.get_legal_moves())

                        # start a brand new prediction for the child
                        state_planes = state_child.get_state_planes()
                        [value,
                         policy_vec] = self.net.predict_single(state_planes)

                        # extract a sparse policy vector with normalized probabilities
                        p_vec_small_child = get_probs_of_move_list(
                            policy_vec, legal_moves_child,
                            state_child.is_white_to_move())

                    # create a new child node
                    child_node = Node(value, p_vec_small_child,
                                      legal_moves_child,
                                      str(state_child.get_legal_moves()),
                                      is_leaf)

                    # connect the child to the root
                    self.root_node.child_nodes[0] = child_node

        else:
            board_fen = state.get_board_fen()

            # check first if the the current tree can be reused
            if board_fen in self.node_lookup:
                self.root_node = self.node_lookup[board_fen]
                logging.debug(
                    'Reuse the search tree. Number of nodes in search tree: %d',
                    self.root_node.nb_total_expanded_child_nodes)
            else:
                logging.debug(
                    "The given board position wasn't found in the search tree."
                )
                logging.debug("Starting a brand new search tree...")

                # initialize is_leaf by default to false
                is_leaf = False

                # start a brand new tree
                state_planes = state.get_state_planes()
                [value, policy_vec] = self.net.predict_single(state_planes)

                # extract a sparse policy vector with normalized probabilities
                p_vec_small = get_probs_of_move_list(policy_vec, legal_moves,
                                                     state.is_white_to_move())

                # create a new root node
                self.root_node = Node(value, p_vec_small, legal_moves,
                                      str(state.get_legal_moves()), is_leaf)

            # clear the look up table
            self.node_lookup = {}

            # apply dirichlet noise to the prior probabilities in order to ensure
            #  that every move can possibly be visited
            self.root_node.apply_dirichlet_noise_to_prior_policy(
                epsilon=self.dirichlet_epsilon, alpha=self.dirichlet_alpha)

            futures = []

            # set the number of playouts accordingly
            if state_in.are_pocket_empty() is True:
                nb_playouts = self.nb_playouts_empty_pockets
            else:
                nb_playouts = self.nb_playouts_filled_pockets

            t_elapsed = 0
            cur_playouts = 0
            old_time = time()

            while max_depth_reached < self.max_search_depth and\
                       cur_playouts < nb_playouts and\
                     t_elapsed*1000 < self.movetime_ms: #and np.abs(self.root_node.q.mean()) < 0.99:

                # start searching
                with ThreadPoolExecutor(max_workers=self.threads) as executor:
                    for i in range(self.threads):
                        # calculate the thread id based on the current playout
                        futures.append(
                            executor.submit(self._run_single_playout,
                                            state=deepcopy(state),
                                            parent_node=self.root_node,
                                            depth=1,
                                            mv_list=[]))

                cur_playouts += self.threads
                time_show_info = time() - old_time

                # store the mean of all value predictions in this variable
                #mean_value = 0

                for i, f in enumerate(futures):
                    cur_value, cur_depth, mv_list = f.result()

                    # sum up all values
                    #mean_value += cur_value

                    if cur_depth > max_depth_reached:
                        max_depth_reached = cur_depth

                    # Print every second if verbose is true
                    if self.verbose and time_show_info > 1:
                        str_moves = self._mv_list_to_str(mv_list)
                        logging.debug('Update: %d' % cur_depth)
                        print('info score cp %d depth %d nodes %d pv%s' %
                              (value_to_centipawn(cur_value), cur_depth,
                               self.root_node.n_sum, str_moves))
                        old_time = time()

                # update the current search time
                t_elapsed = time() - t_start_eval
                if self.verbose and time_show_info > 1:
                    print(
                        'info nps %d time %d' %
                        ((self.root_node.n_sum / t_elapsed), t_elapsed * 1000))

            # receive the policy vector based on the MCTS search
            p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight)
            print('info string move overhead is %dms' %
                  (t_elapsed * 1000 - self.movetime_ms))

        # store the current root in the lookup table
        self.node_lookup[state.get_board_fen()] = self.root_node

        # select the q value which would score the highest value

        #value = self.root_node.q.max()

        # select the q-value according to the mcts best child value
        best_child_idx = self.root_node.get_mcts_policy(
            self.q_value_weight).argmax()
        value = self.root_node.q[best_child_idx]

        lst_best_moves, _ = self.get_calculated_line()

        str_moves = self._mv_list_to_str(lst_best_moves)

        # show the best calculated line
        time_e = time() - t_start_eval
        node_searched = self.root_node.n_sum
        print('info score cp %d depth %d nodes %d time %d nps %d pv%s' %
              (value_to_centipawn(value), max_depth_reached, node_searched,
               time_e * 1000, node_searched / max(1, time_e), str_moves))

        if len(legal_moves) != len(p_vec_small):
            print(
                'Legal move list %s with length %s is uncompatible to policy vector %s with shape %s for board state %s'
                % (legal_moves, len(legal_moves), p_vec_small,
                   p_vec_small.shape, state_in))
            self.node_lookup = {}
            # restart the search TODO: Fix this error
            """
                raise Exception('Legal move list %s with length %s is uncompatible to policy vector %s with shape %s for board state %s' % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state_in))
                    Exception: Legal move list [Move.from_uci('e4h7'), Move.from_uci('e4g6'), Move.from_uci('e4f5'), Move.from_uci('c4a6'), Move.from_uci('c4b5'), Move.from_uci('c4b3'), Move.from_uci('f3g5'), Move.from_uci('f3e5'), Move.from_uci('f3h4'), Move.from_uci('f3d4'), Move.from_uci('f3d2'), Move.from_uci('f3e1'), Move.from_uci('g1h1'), Move.from_uci('f1e1'), Move.from_uci('d1e2'), Move.from_uci('d1d2'), Move.from_uci('d1e1'), Move.from_uci('d1c1'), Move.from_uci('d1b1'), Move.from_uci('a1c1'), Move.from_uci('a1b1'), Move.from_uci('d3d4'), Move.from_uci('h2h3'), Move.from_uci('g2g3'), Move.from_uci('c2c3'), Move.from_uci('b2b3'), Move.from_uci('a2a3'), Move.from_uci('h2h4'), Move.from_uci('b2b4'), Move.from_uci('a2a4'), Move.from_uci('N@b1'), Move.from_uci('N@c1'), Move.from_uci('N@e1'), Move.from_uci('N@h1'), Move.from_uci('N@d2'), Move.from_uci('N@e2'), Move.from_uci('N@a3'), Move.from_uci('N@b3'), Move.from_uci('N@c3'), Move.from_uci('N@e3'), Move.from_uci('N@g3'), Move.from_uci('N@h3'), Move.from_uci('N@a4'), Move.from_uci('N@b4'), Move.from_uci('N@d4'), Move.from_uci('N@f4'), Move.from_uci('N@h4'), Move.from_uci('N@b5'), Move.from_uci('N@f5'), Move.from_uci('N@g5'), Move.from_uci('N@h5'), Move.from_uci('N@a6'), Move.from_uci('N@b6'), Move.from_uci('N@c6'), Move.from_uci('N@e6'), Move.from_uci('N@g6'), Move.from_uci('N@d7'), Move.from_uci('N@e7'), Move.from_uci('N@h7'), Move.from_uci('N@b8'), Move.from_uci('N@c8'), Move.from_uci('N@d8'), Move.from_uci('N@e8'), Move.from_uci('N@h8')] with length 64 is uncompatible to policy vector [0.71529347 0.00194482 0.00194482 0.00389555 0.00194482 0.00194482
                     0.00389942 0.00389942 0.00389941 0.0038994  0.0019448  0.0038994
                     0.0019448  0.00389941 0.00389941 0.00194482 0.00585401 0.00194482
                     0.00194482 0.00389941 0.00389942 0.00194482 0.00194482 0.00389942
                     0.00389942 0.00389941 0.00585341 0.00194482 0.00585396 0.00389942
                     0.00389941 0.00389941 0.00389941 0.00389941 0.00194482 0.00585401
                     0.00585401 0.00194482 0.00585399 0.00780859 0.00389942 0.00389941
                     0.00585401 0.00976319 0.00780829 0.00585215 0.00389942 0.00389942
                     0.00194482 0.00194482 0.02735228 0.00389942 0.005854   0.00389939
                     0.00389924 0.00389942 0.00194482 0.00389942 0.00585398 0.00389942
                     0.0038994  0.0038994  0.00585398 0.00194482 0.00389942 0.00389942
                     0.00389942 0.00389942] with shape (68,) for board state r4rk1/ppp2pp1/3p1q1p/n1bPp3/2B1B1b1/3P1N2/PPP2PPP/R2Q1RK1[Nn] w - - 2 13
             """
            return self.evaluate_board_state(state_in)

        return value, legal_moves, p_vec_small
Esempio n. 7
0
class ChessServer(object):
    def __init__(self, name):
        self.app = Flask(name)

        self.app.add_url_rule("/api/state", "api/state",
                              self._wrap_endpoint(ChessServer.serve_state))
        self.app.add_url_rule("/api/new", "api/new",
                              self._wrap_endpoint(ChessServer.serve_new_game))
        self.app.add_url_rule("/api/move", "api/move",
                              self._wrap_endpoint(ChessServer.serve_move))
        self.app.add_url_rule("/", "serve_client_r",
                              self._wrap_endpoint(ChessServer.serve_client))
        self.app.add_url_rule("/<path:path>", "serve_client",
                              self._wrap_endpoint(ChessServer.serve_client))

        self._gamestate = GameState()

        net = NeuralNetAPI()

        # Loading network

        player_agents = {
            "raw_net":
            RawNetAgent(net),
            "mcts":
            MCTSAgent(net,
                      virtual_loss=3,
                      threads=batch_size,
                      cpuct=cpuct,
                      dirichlet_epsilon=dirichlet_epsilon),
        }

        # Setting up agent
        self.agent = player_agents["raw_net"]
        # self.agent = player_agents["mcts"]

    def _wrap_endpoint(self, func):
        def wrapper(kwargs):
            return func(self, **kwargs)

        return lambda **kwargs: wrapper(kwargs)

    def run(self):
        self.app.run()

    # noinspection PyMethodMayBeStatic
    def serve_client(self, path=None):
        if path is None:
            path = "index.html"
        return send_from_directory("./client", path)

    def serve_state(self):
        return self.serialize_game_state()

    def serve_new_game(self):
        logging.debug("staring new game()")
        self.perform_new_game()
        return self.serialize_game_state()

    def serve_move(self):

        # read move data
        drop_piece = request.args.get("drop")
        from_square = request.args.get("from")
        to_square = request.args.get("to")
        promotion_piece = request.args.get("promotion")
        from_square_idx = get_square_index_from_name(from_square)
        to_square_idx = get_square_index_from_name(to_square)
        if (from_square_idx is None
                and drop_piece is None) or to_square_idx is None:
            return self.serialize_game_state("board name is invalid")

        promotion = None
        drop = None

        if drop_piece is not None:
            from_square_idx = to_square_idx

            if not (drop_piece in chess.PIECE_SYMBOLS):
                return self.serialize_game_state("drop piece name is invalid")
            drop = chess.PIECE_SYMBOLS.index(drop_piece)

        if promotion_piece is not None:
            if not (promotion_piece in chess.PIECE_SYMBOLS):
                return self.serialize_game_state(
                    "promotion piece name is invalid")
            promotion = chess.PIECE_SYMBOLS.index(promotion_piece)

        move = chess.Move(from_square_idx, to_square_idx, promotion, drop)

        # perform move
        try:
            self.perform_move(move)
        except ValueError as e:
            logging.error("ValueError %s", e)
            return self.serialize_game_state(e.args[0])

        # calculate agent response
        if not self.perform_agent_move():
            return self.serialize_game_state("Black has no more moves to play",
                                             True)

        return self.serialize_game_state()

    def perform_new_game(self):
        self._gamestate = GameState()

    def perform_move(self, move):
        logging.debug("perform_move(): %s", move)

        # check if move is valid
        if move not in list(self._gamestate.board.legal_moves):
            raise ValueError(
                "The given move %s is invalid for the current position" % move)
        self._gamestate.apply_move(move)

        if self._gamestate.is_won():
            logging.debug("Checkmate")
            return False

    def perform_agent_move(self):

        if self._gamestate.is_won():
            logging.debug("Checkmate")
            return False

        value, move, confidence, _ = self.agent.perform_action(self._gamestate)

        if self._gamestate.is_white_to_move() is False:
            value = -value

        logging.debug("Value %.4f", value)

        if move is None:
            logging.error("None move proposed!")
            return False

        self.perform_move(move)
        return True

    def serialize_game_state(self, message=None, finished=None):
        if message is None:
            message = ""

        board_str = "" + self._gamestate.board.__str__()
        pocket_str = "" + self._gamestate.board.pockets[1].__str__(
        ) + "|" + self._gamestate.board.pockets[0].__str__()
        state = {"board": board_str, "pocket": pocket_str, "message": message}
        if finished is not None:
            state["finished"] = finished
        return json.dumps(state)
Esempio n. 8
0
 def perform_new_game(self):
     self._gamestate = GameState()
Esempio n. 9
0
    def _run_single_playout(self,
                            state: GameState,
                            parent_node: Node,
                            pipe_id=0,
                            depth=1,
                            chosen_nodes=[]):
        """
        This function works recursively until a leaf or terminal node is reached.
        It ends by backpropagating the value of the new expanded node or by propagating the value of a terminal state.

        :param state_: Current game-state for the evaluation. This state differs between the treads
        :param parent_node: Current parent-node of the selected node. In the first  expansion this is the root node.
        :param depth: Current depth for the evaluation. Depth is increased by 1 for every recusive call
        :param chosen_nodes: List of moves which have been taken in the current path. For each selected child node this list
                        is expanded by one move recursively.
        :param chosen_nodes: List of all nodes that this thread has explored with respect to the root node
        :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed
                        because the point of view changes each half-move
                depth: Current depth reach by this evaluation
                mv_list: List of moves which have been selected
        """

        # select a legal move on the chess board
        node, move, child_idx = self._select_node(parent_node)

        if move is None:
            raise Exception(
                "Illegal tree setup. A 'None' move was selected which souldn't be possible"
            )

        # update the visit counts to this node
        # temporarily reduce the attraction of this node by applying a virtual loss /
        # the effect of virtual loss will be undone if the playout is over
        parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss)

        if depth == 1:
            state = GameState(deepcopy(state.get_pythonchess_board()))

        # apply the selected move on the board
        state.apply_move(move)

        # append the selected move to the move list
        # append the chosen child idx to the chosen_nodes list
        chosen_nodes.append(child_idx)

        if node is None:

            # get the transposition-key which is used as an identifier for the board positions in the look-up table
            transposition_key = state.get_transposition_key()

            # check if the addressed fen exist in the look-up table
            # note: It's important to use also the halfmove-counter here, otherwise the system can create an infinite
            # feed-back-loop
            key = (transposition_key, state.get_halfmove_counter())

            # expand and evaluate the new board state (the node wasn't found in the look-up table)
            # its value will be backpropagated through the tree and flipped after every layer
            # receive a free available pipe
            my_pipe = self.my_pipe_endings[pipe_id]

            if self.send_batches is True:
                my_pipe.send(state.get_state_planes())
                # this pipe waits for the predictions of the network inference service
                [value, policy_vec] = my_pipe.recv()
            else:
                state_planes = state.get_state_planes()
                self.batch_state_planes[pipe_id] = state_planes

                my_pipe.send(pipe_id)

                result_channel = my_pipe.recv()

                value = np.array(self.batch_value_results[result_channel])
                policy_vec = np.array(
                    self.batch_policy_results[result_channel])

            # initialize is_leaf by default to false
            is_leaf = False

            # check if the current player has won the game
            # (we don't need to check for is_lost() because the game is already over
            #  if the current player checkmated his opponent)
            is_won = False
            is_check = False

            if state.is_check() is True:
                is_check = True
                if state.is_won() is True:
                    is_won = True

            if is_won is True:
                value = -1
                is_leaf = True
                legal_moves = []
                p_vec_small = None
                # establish a mate in one connection in order to stop exploring different alternatives
                parent_node.mate_child_idx = child_idx

            # get the value from the leaf node (the current function is called recursively)
            # check if you can claim a draw - its assumed that the draw is always claimed
            elif self.can_claim_threefold_repetition(transposition_key, chosen_nodes) or \
                    state.get_pythonchess_board().can_claim_fifty_moves() is True:
                value = 0
                is_leaf = True
                legal_moves = []
                p_vec_small = None
            else:
                # get the current legal move of its board state
                legal_moves = state.get_legal_moves()

                if len(legal_moves) < 1:
                    raise Exception(
                        'No legal move is available for state: %s' % state)

                # extract a sparse policy vector with normalized probabilities
                try:
                    p_vec_small = get_probs_of_move_list(
                        policy_vec,
                        legal_moves,
                        is_white_to_move=state.is_white_to_move(),
                        normalize=True)

                except KeyError:
                    raise Exception('Key Error for state: %s' % state)

            # convert all legal moves to a string if the option check_mate_in_one was enabled
            if self.check_mate_in_one is True:
                str_legal_moves = str(state.get_legal_moves())
            else:
                str_legal_moves = ''

            # clip the visit nodes for all nodes in the search tree except the director opp. move
            clip_low_visit = self.use_pruning and depth != 1

            # create a new node
            new_node = Node(value, p_vec_small, legal_moves, str_legal_moves,
                            is_leaf, transposition_key, clip_low_visit)

            if depth == 1:

                # disable uncertain moves from being visited by giving them a very bad score
                if is_leaf is False:
                    if self.root_node_prior_policy[
                            child_idx] < 1e-3 and value * -1 < self.root_node.v:
                        with parent_node.lock:
                            value = 99

                if value < 0:  # and state.are_pocket_empty(): #and pipe_id == 0:
                    # test of adding dirichlet noise to a new node
                    new_node.apply_dirichlet_noise_to_prior_policy(
                        epsilon=self.dirichlet_epsilon * .02,
                        alpha=self.dirichlet_alpha)

            if self.use_pruning is False:
                # include a reference to the new node in the look-up table
                self.node_lookup[key] = new_node

            with parent_node.lock:
                # add the new node to its parent
                parent_node.child_nodes[child_idx] = new_node

        # check if we have reached a leaf node
        elif node.is_leaf is True:
            value = node.v

        else:
            # get the value from the leaf node (the current function is called recursively)
            value, depth, chosen_nodes = self._run_single_playout(
                state, node, pipe_id, depth + 1, chosen_nodes)

        # revert the virtual loss and apply the predicted value by the network to the node
        parent_node.revert_virtual_loss_and_update(child_idx,
                                                   self.virtual_loss, -value)

        # we invert the value prediction for the parent of the above node layer because the player's turn is flipped every turn
        return -value, depth, chosen_nodes
Esempio n. 10
0
    def evaluate_board_state(self, state: GameState):
        """
        Analyzes the current board state. This is the main method which get called by the uci interface or analysis
        request.

        :param state_in: Actual game state to evaluate for the MCTS
        :return:
        """

        # store the time at which the search started
        self.t_start_eval = time()

        # check if the net prediction service has already been started
        if self.net_pred_services[0].running is False:
            # start the prediction daemon thread
            for net_pred_service in self.net_pred_services:
                net_pred_service.start()

        # receive a list of all possible legal move in the current board position
        legal_moves = state.get_legal_moves()

        # consistency check
        if len(legal_moves) == 0:
            raise Exception(
                'The given board state has no legal move available')

        # check first if the the current tree can be reused
        key = (state.get_transposition_key(), state.get_halfmove_counter)

        if self.use_pruning is False and key in self.node_lookup:
            self.root_node = self.node_lookup[key]
            logging.debug(
                'Reuse the search tree. Number of nodes in search tree: %d',
                self.root_node.nb_total_expanded_child_nodes)
            self.total_nodes_pre_search = deepcopy(self.root_node.n_sum)

            # reset potential good nodes for the root
            self.root_node.q[self.root_node.q < 1.1] = 0

        else:
            logging.debug("Starting a brand new search tree...")
            self.root_node = None
            self.total_nodes_pre_search = 0

        # check for fast way out
        if len(legal_moves) == 1:

            # if there's only a single legal move you only must go 1 depth
            max_depth_reached = 1

            if self.root_node is None:
                # conduct all necessary steps for fastest way out
                self._expand_root_node_single_move(state, legal_moves)
        else:

            if self.root_node is None:
                # run a single expansion on the root node
                self._expand_root_node_multiple_moves(state, legal_moves)

            # conduct the mcts-search based on the given settings
            max_depth_reached = self._run_mcts_search(state)

            t_elapsed = time() - self.t_start_eval
            print('info string move overhead is %dms' %
                  (t_elapsed * 1000 - self.movetime_ms))

        # receive the policy vector based on the MCTS search
        p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight)

        if self.use_pruning is False:
            # store the current root in the lookup table
            self.node_lookup[key] = self.root_node

        # select the q-value according to the mcts best child value
        best_child_idx = p_vec_small.argmax()
        value = self.root_node.q[best_child_idx]

        lst_best_moves, _ = self.get_calculated_line()
        str_moves = self._mv_list_to_str(lst_best_moves)

        # show the best calculated line
        node_searched = int(self.root_node.n_sum - self.total_nodes_pre_search)
        # In uci the depth is given using half-moves notation also called plies
        time_e = time() - self.t_start_eval

        if len(legal_moves) != len(p_vec_small):
            raise Exception(
                'Legal move list %s with length %s is uncompatible to policy vector %s'
                ' with shape %s for board state %s and nodes legal move list: %s'
                % (legal_moves, len(legal_moves), p_vec_small,
                   p_vec_small.shape, state, self.root_node.legal_moves))

        # define the remaining return variables
        cp = value_to_centipawn(value)
        depth = max_depth_reached
        nodes = node_searched
        time_elapsed_s = time_e * 1000
        nps = node_searched / time_e
        pv = str_moves

        return value, legal_moves, p_vec_small, cp, depth, nodes, time_elapsed_s, nps, pv