예제 #1
0
 def __init__(self, name):
     self.app = Flask(name)
     self.app.add_url_rule("/api/state", "api/state", self._wrap_endpoint(ChessServer.serve_state))
     self.app.add_url_rule("/api/new", "api/new", self._wrap_endpoint(ChessServer.serve_new_game))
     self.app.add_url_rule("/api/move", "api/move", self._wrap_endpoint(ChessServer.serve_move))
     self.app.add_url_rule("/", "serve_client_r", self._wrap_endpoint(ChessServer.serve_client))
     self.app.add_url_rule("/<path:path>", "serve_client", self._wrap_endpoint(ChessServer.serve_client))
     self._gamestate = GameState()
     net = NeuralNetAPI()
     # Loading network
     player_agents = {
         "raw_net": RawNetAgent(net),
         "mcts": MCTSAgent(
             net, virtual_loss=3, threads=BATCH_SIZE, cpuct=CPUCT, dirichlet_epsilon=DIRICHLET_EPSILON
         ),
     }
     self.agent = player_agents["raw_net"]  # Setting up agent
예제 #2
0
class CrazyAra:  # Too many instance attributes (25/7)
    """Main"""
    def __init__(self):
        enable_color_logging()
        # Constants
        self.min_search_time = 100
        self.max_search_time = 10e10
        self.inc_factor = 7
        self.inc_div = 8
        self.min_moves_left = self.moves_left_increment = 10  # Used to reduce the movetime in the opening
        self.max_bad_pos_value = -0.10  # When pos eval [-1.0 to 1.0] is equal or worst than this then extend time
        # this is the assumed "maximum" blitz game length for calculating a constant movetime
        # after 80% of this game length a new time management starts which is based on movetime left
        self.blitz_game_length = 50
        # use less time in the opening defined by "max_move_num_to_reduce_movetime" by using the constant move time
        self.mv_time_opening_portion = 0.7
        # this variable is intended to increase variance in the moves played by using a different amount of time each
        # move
        self.random_mv_time_portion = 0.1
        # enable this variable if you want to see debug messages in certain environments, like the lichess.org api
        self.enable_lichess_debug_msg = self.setup_done = False
        self.client = {
            "name": "CrazyAra",
            "version": "0.4.0",
            "authors": "Johannes Czech, Moritz Willig, Alena Beyer"
        }
        self.mcts_agent = (
            self.rawnet_agent
        ) = self.ab_agent = self.gamestate = self.bestmove_value = self.move_time = self.score = None
        self.engine_played_move = 0
        self.log_file_path = "CrazyAra-log.txt"
        self.score_file_path = "score-log.txt"
        self.settings = {
            "UCI_Variant": "crazyhouse",
            "search_type": "mcts",  # mcts, alpha_beta
            "ab_depth": 5,  # depth to reach for alpha_beta
            "ab_candidate_moves":
            7,  # candidate moves to consider for ab-search, clipped according to NN policy
            # set the context in which the neural networks calculation will be done
            # choose 'gpu' using the settings if there is one available
            "context": "cpu",
            "use_raw_network": False,
            "threads": 8,
            "batch_size": 8,
            "neural_net_services": 1,
            "playouts_empty_pockets": 8192,
            "playouts_filled_pockets": 8192,
            "centi_cpuct": 250,
            "centi_dirichlet_epsilon": 25,
            "centi_dirichlet_alpha": 20,
            "centi_u_init_divisor": 100,
            "max_search_depth": 40,
            "centi_temperature": 7,
            "temperature_moves": 0,
            "opening_guard_moves": 0,
            "centi_clip_quantil": 0,
            "virtual_loss": 3,
            "centi_q_value_weight": 70,
            "threshold_time_for_raw_net_ms": 100,
            "move_overhead_ms": 300,
            "moves_left": 40,
            "extend_time_on_bad_position": True,
            "max_move_num_to_reduce_movetime": 4,
            "enhance_checks": False,
            "enhance_captures": False,
            "use_pruning": False,
            "use_future_q_values": False,
            "use_time_management": True,
            "verbose": False,
            "model_architecture_dir": "default",
            "model_weights_dir": "default"
        }
        try:
            self.log_file = open(self.log_file_path, "w")
        except IOError:
            self.log_file = None
            # print out the error message
            print(
                "info string An error occurred while trying to open the self.log_file %s"
                % self.log_file_path)
            traceback.print_exc()

        self.intro = """\
                                  _                                           
                   _..           /   ._   _.  _        /\   ._   _.           
                 .' _ `\         \_  |   (_|  /_  \/  /--\  |   (_|           
                /  /e)-,\                         /                           
               /  |  ,_ |                    __    __    __    __             
              /   '-(-.)/          bw     8 /__////__////__////__////         
            .'--.   \  `                 7 ////__////__////__////__/          
           /    `\   |                  6 /__////__////__////__////           
         /`       |  / /`\.-.          5 ////__////__////__////__/            
       .'        ;  /  \_/__/         4 /__////__////__////__////             
     .'`-'_     /_.'))).-` \         3 ////__////__////__////__/              
    / -'_.'---;`'-))).-'`\_/        2 /__////__////__////__////        
   (__.'/   /` .'`                 1 ////__////__////__////__/                
    (_.'/ /` /`                       a  b  c  d  e  f  g  h                  
      _|.' /`                                                                 
jgs.-` __.'|  Developers: Johannes Czech, Moritz Willig, Alena Beyer          
    .-'||  |  Source-Code: QueensGambit/CrazyAra (GPLv3-License)              
       \_`/   Inspiration: A0-paper by Silver, Hubert, Schrittwieser et al.  
              ASCII-Art: Joan G. Stark, Chappell, Burton                      """

    @staticmethod
    def eprint(*args, **kwargs):
        """ Wrapper of print() using stderr"""
        print(*args, file=sys.stderr, **kwargs)

    def print_if_debug(self, string):
        """ Print lichess debug message on the log"""
        if self.enable_lichess_debug_msg:
            self.eprint("[debug] " + string)

    def log_print(self, text: str):
        """ Print all log messages on the log file"""
        print(text)
        self.print_if_debug(text)
        self.log(text)

    def write_score_to_file(self, score: str):
        """Send the score to score.txt"""
        with open(self.score_file_path, "w") as selected_file:
            selected_file.seek(0)
            selected_file.write(score)
            selected_file.truncate()

    def log(self, text: str):
        """ Sends the text to the log file"""
        if self.log_file:
            self.log_file.write("> %s\n" % text)
            self.log_file.flush()

    def setup_network(self):
        """
        Load the libraries and the weights of the neural network
        :return:
        """
        if not self.setup_done:
            from DeepCrazyhouse.src.domain.crazyhouse.game_state import GameState
            from DeepCrazyhouse.src.domain.agent.neural_net_api import NeuralNetAPI
            from DeepCrazyhouse.src.domain.agent.player.raw_net_agent import RawNetAgent
            from DeepCrazyhouse.src.domain.agent.player.mcts_agent import MCTSAgent

            self.param_validity_check(
            )  # check for valid parameter setup and do auto-corrections if possible

            nets = []
            for _ in range(self.settings["neural_net_services"]):
                nets.append(
                    NeuralNetAPI(
                        ctx=self.settings["context"],
                        batch_size=self.settings["batch_size"],
                        model_architecture_dir=self.
                        settings["model_architecture_dir"],
                        model_weights_dir=self.settings["model_weights_dir"]))

            self.rawnet_agent = RawNetAgent(
                nets[0],
                temperature=self.settings["centi_temperature"] / 100,
                temperature_moves=self.settings["temperature_moves"],
            )

            self.mcts_agent = MCTSAgent(
                nets,
                cpuct=self.settings["centi_cpuct"] / 100,
                playouts_empty_pockets=self.settings["playouts_empty_pockets"],
                playouts_filled_pockets=self.
                settings["playouts_filled_pockets"],
                max_search_depth=self.settings["max_search_depth"],
                dirichlet_alpha=self.settings["centi_dirichlet_alpha"] / 100,
                q_value_weight=self.settings["centi_q_value_weight"] / 100,
                dirichlet_epsilon=self.settings["centi_dirichlet_epsilon"] /
                100,
                virtual_loss=self.settings["virtual_loss"],
                threads=self.settings["threads"],
                temperature=self.settings["centi_temperature"] / 100,
                temperature_moves=self.settings["temperature_moves"],
                verbose=self.settings["verbose"],
                min_movetime=self.min_search_time,
                batch_size=self.settings["batch_size"],
                enhance_checks=self.settings["enhance_checks"],
                enhance_captures=self.settings["enhance_captures"],
                use_future_q_values=self.settings["use_future_q_values"],
                use_pruning=self.settings["use_pruning"],
                use_time_management=self.settings["use_time_management"],
                opening_guard_moves=self.settings["opening_guard_moves"],
                u_init_divisor=self.settings["centi_u_init_divisor"] / 100,
            )

            self.ab_agent = AlphaBetaAgent(
                nets[0],
                depth=self.settings["ab_depth"],
                nb_candidate_moves=self.settings["ab_candidate_moves"],
                include_check_moves=False,
            )

            self.gamestate = GameState()
            self.setup_done = True

    def validity_with_threads(self, optname: str):
        """
        Checks for consistency with the number of threads with the given parameter
        :param optname: Option name
        :return:
        """

        if self.settings[optname] > self.settings["threads"]:
            self.log_print(
                "info string The given batch_size %d is higher than the number of threads %d. "
                "The maximum legal batch_size is the same as the number of threads (here: %d) "
                % (self.settings[optname], self.settings["threads"],
                   self.settings["threads"]))
            self.settings[optname] = self.settings["threads"]
            self.log_print("info string The batch_size was reduced to %d" %
                           self.settings[optname])

        if self.settings["threads"] % self.settings[optname] != 0:
            self.log_print(
                "info string You requested an illegal combination of threads %d and batch_size %d."
                " The batch_size must be a divisor of the number of threads" %
                (self.settings["threads"], self.settings[optname]))
            divisor = self.settings["threads"] // self.settings[optname]
            self.settings[optname] = self.settings["threads"] // divisor
            self.log_print("info string The batch_size was changed to %d" %
                           self.settings[optname])

    def param_validity_check(self):
        """
        Handles some possible issues when giving an illegal batch_size and number of threads combination.
        :return:
        """

        self.validity_with_threads("batch_size")
        self.validity_with_threads("neural_net_services")

    def perform_action(self, cmd_list):  # Probably needs refactoring
        """
        Computes the 'best move' according to the engine and the given settings.
        After the search is done it will print out ' bestmove e2e4' for example on std-out.
        :return:
        """
        # Too many local variables (21/15) - Too many branches (25/12) - Too many statements (71/50)
        movetime_ms = self.min_search_time

        if len(cmd_list) >= 5:
            if cmd_list[1] == "wtime" and cmd_list[3] == "btime":
                wtime = int(cmd_list[2])
                btime = int(cmd_list[4])

                winc = binc = 0
                if "winc" in cmd_list:
                    winc = int(cmd_list[6])
                if "binc" in cmd_list:
                    binc = int(cmd_list[8])

                if self.gamestate.is_white_to_move():
                    my_time = wtime
                    my_inc = winc
                else:
                    my_time = btime
                    my_inc = binc

                if self.move_time is None:
                    self.move_time = (my_time + self.blitz_game_length *
                                      my_inc) / self.blitz_game_length

                # TC with period (traditional) like 40/60 or 40 moves in 60 sec repeating
                if "movestogo" in cmd_list:
                    tc_type = "traditional"
                    if "winc" in cmd_list and "binc" in cmd_list:
                        moves_left = int(cmd_list[10])
                    else:
                        moves_left = int(cmd_list[6])
                    # If we are close to the period limit, save extra time to avoid time forfeit
                    if moves_left <= 3:
                        moves_left += 1
                else:
                    tc_type = "blitz"
                    moves_left = self.settings["moves_left"]

                moves_left = self.adjust_moves_left(moves_left, tc_type,
                                                    self.bestmove_value)
                if tc_type == "blitz" and self.engine_played_move < self.blitz_game_length * 0.8:
                    movetime_ms = (
                        self.move_time + (np.random.rand() - 0.5) *
                        self.random_mv_time_portion * self.move_time)

                    if self.engine_played_move < self.settings[
                            "max_move_num_to_reduce_movetime"]:
                        # avoid spending too much time in the opening
                        movetime_ms *= self.mv_time_opening_portion
                else:
                    movetime_ms = max(
                        my_time / moves_left +
                        self.inc_factor * my_inc // self.inc_div -
                        self.settings["move_overhead_ms"],
                        self.min_search_time,
                    )

        # movetime in UCI protocol, go movetime x, search exactly x ms
        # UCI protocol: http://wbec-ridderkerk.nl/html/UCIProtocol.html
        elif len(cmd_list) == 3 and cmd_list[1] == "movetime":
            movetime_ms = max(
                int(cmd_list[2]) - self.settings["move_overhead_ms"],
                self.min_search_time)

        self.mcts_agent.update_movetime(movetime_ms)
        self.log_print("info string Time for this move is %dms" % movetime_ms)
        self.log_print("info string Requested pos: %s" % self.gamestate)

        # assign search depth
        try:
            # we try to extract the search depth from the cmd list
            self.mcts_agent.set_max_search_depth(
                int(cmd_list[cmd_list.index("depth") + 1]))
            movetime_ms = self.max_search_time  # increase the movetime to maximum to make sure to reach the given depth
            self.mcts_agent.update_movetime(movetime_ms)
        except ValueError:
            pass  # the given command wasn't found in the command list

        # disable noise for short move times
        if movetime_ms < 1000:
            self.mcts_agent.dirichlet_epsilon = 0.1
        elif movetime_ms < 7000:
            # reduce noise for very short move times
            self.mcts_agent.dirichlet_epsilon = 0.2

        if self.settings["search_type"] == "alpha_beta":
            value, selected_move, _, _, centipawn, depth, nodes, time_elapsed_s, nps, pv = self.ab_agent.perform_action(
                self.gamestate)
        elif self.settings["search_type"] == "mcts":
            if self.settings["use_raw_network"] or movetime_ms <= self.settings[
                    "threshold_time_for_raw_net_ms"]:
                self.log_print(
                    "info string Using raw network for fast mode...")
                value, selected_move, _, _, centipawn, depth, nodes, time_elapsed_s, nps, pv = self.rawnet_agent.perform_action(
                    self.gamestate)
            else:
                value, selected_move, _, _, centipawn, depth, nodes, time_elapsed_s, nps, pv = self.mcts_agent.perform_action(
                    self.gamestate)

        self.score = "score cp %d depth %d nodes %d time %d nps %d pv %s" % (
            centipawn,
            depth,
            nodes,
            time_elapsed_s,
            nps,
            pv,
        )
        if self.enable_lichess_debug_msg:
            try:
                self.write_score_to_file(self.score)
            except IOError:
                traceback.print_exc()

        self.log_print("info %s" %
                       self.score)  # print out the search information
        # Save the bestmove value [-1.0 to 1.0] to modify the next movetime
        self.bestmove_value = float(value)
        self.engine_played_move += 1

        # apply CrazyAra's selected move the global gamestate
        if self.gamestate.get_pythonchess_board().is_legal(selected_move):
            # apply the last move CrazyAra played
            self._apply_move(selected_move)
        else:
            raise Exception("all_ok is false! - crazyara_last_move")

        self.log_print("bestmove %s" % selected_move.uci())

    def setup_gamestate(self, cmd_list):  # Too many branches (13/12)
        """
        Prepare the gamestate according to the user's wishes.

        :param cmd_list: Input-command lists arguments
        :return:
        """
        position_type = cmd_list[1]

        if "moves" in cmd_list:
            # position startpos moves e2e4 g8f6
            if position_type == "startpos":
                mv_list = cmd_list[3:]
            else:
                # position fen rn2N2k/pp5p/3pp1pN/3p4/3q1P2/3P1p2/PP3PPP/RN3RK1/Qrbbpbb b - - 3 27 moves d4f2 f1f2
                mv_list = cmd_list[9:]

            # try to apply opponent last move to the board state
            if mv_list:
                # the move the opponent just played is the last move in the list
                opponent_last_move = chess.Move.from_uci(mv_list[-1])
                if self.gamestate.get_pythonchess_board().is_legal(
                        opponent_last_move):
                    # apply the last move the opponent played
                    self._apply_move(opponent_last_move)
                    mv_compatible = True
                else:
                    self.log_print(
                        "info string  all_ok is false! - opponent_last_move %s"
                        % opponent_last_move)
                    mv_compatible = False
            else:
                mv_compatible = False

            if not mv_compatible:
                self.log_print(
                    "info string The given last two moves couldn't be applied to the previous board-state."
                )
                self.log_print(
                    "info string Rebuilding the game from scratch...")

                # create a new game state from scratch
                if position_type == "startpos":
                    self.new_game()
                else:
                    fen = " ".join(cmd_list[2:8])
                    self.gamestate.set_fen(fen)

                for move in mv_list:
                    self._apply_move(chess.Move.from_uci(move))
            else:
                self.log_print("info string Move Compatible")
        else:
            if position_type == "fen":
                fen = " ".join(cmd_list[2:8])
                self.gamestate.set_fen(fen)
                self.mcts_agent.update_transposition_table(
                    (self.gamestate.get_transposition_key(), ))
                # log_print("info string Added %s - count %d" % (gamestate.get_board_fen(),
                #                                    mcts_agent.transposition_table[gamestate.get_transposition_key()]))

    def _apply_move(self, selected_move: chess.Move):
        """
        Applies the given move on the gamestate and updates the transposition table of the environment
        :param selected_move: Move in python chess format
        :return:
        """

        self.gamestate.apply_move(selected_move)
        self.mcts_agent.update_transposition_table(
            (self.gamestate.get_transposition_key(), ))
        # log_print("info string Added %s - count %d" % (gamestate.get_board_fen(),
        #                                               mcts_agent.transposition_table[
        #                                                   gamestate.get_transposition_key()]))

    def new_game(self):
        """Group everything related to start the game"""
        self.log_print("info string >> New Game")
        self.gamestate.new_game()
        self.mcts_agent.transposition_table = collections.Counter()
        self.mcts_agent.time_buffer_ms = 0
        self.mcts_agent.dirichlet_epsilon = self.settings[
            "centi_dirichlet_epsilon"] / 100

    def set_options(self, cmd_list):  # Too many branches (16/12)
        """
        Updates the internal options as requested by the use via the uci-protocoll
        An example call could be: "setoption name nb_threads value 1"
        :param cmd_list: List of received of commands
        :return:
        """
        # make sure there exists enough items in the given command list like "setoption name nb_threads value 1"
        if len(cmd_list) >= 5:
            if cmd_list[1] != "name" or cmd_list[3] != "value":
                self.log_print(
                    "info string The given setoption command wasn't understood"
                )
                self.log_print(
                    'info string An example call could be: "setoption name threads value 4"'
                )
            else:
                option_name = cmd_list[2]

                if option_name not in self.settings:
                    self.log_print(
                        "info string The given option %s wasn't found in the settings list"
                        % option_name)
                else:

                    if option_name in [
                            "UCI_Variant",
                            "search_type",
                            "context",
                            "use_raw_network",
                            "extend_time_on_bad_position",
                            "verbose",
                            "enhance_checks",
                            "enhance_captures",
                            "use_pruning",
                            "use_future_q_values",
                            "use_time_management",
                            "model_architecture_dir",
                            "model_weights_dir",
                    ]:
                        value = cmd_list[4]
                    else:
                        value = int(cmd_list[4])

                    if option_name == "use_raw_network":
                        self.settings["use_raw_network"] = value == "true"
                    elif option_name == "extend_time_on_bad_position":
                        self.settings[
                            "extend_time_on_bad_position"] = value == "true"
                    elif option_name == "verbose":
                        self.settings["verbose"] = value == "true"
                    elif option_name == "enhance_checks":
                        self.settings["enhance_checks"] = value == "true"
                    elif option_name == "enhance_captures":
                        self.settings["enhance_captures"] = value == "true"
                    elif option_name == "use_pruning":
                        self.settings["use_pruning"] = value == "true"
                    elif option_name == "use_future_q_values":
                        self.settings["use_future_q_values"] = value == "true"
                    elif option_name == "use_time_management":
                        self.settings["use_time_management"] = value == "true"
                    else:
                        self.settings[
                            option_name] = value  # by default all options are treated as integers
                        # Guard threads limits
                        if option_name == "threads":
                            self.settings[option_name] = min(
                                4096, max(1, self.settings[option_name]))

                    self.log_print("info string Updated option %s to %s" %
                                   (option_name, value))

    def adjust_moves_left(self, moves_left, tc_type, prev_bm_value):
        """
        We can reduce the movetime early in the opening as the NN may be able to handle it well.
        Or when the position is bad we can increase the movetime especially if there are enough time left.
        To increase/decrease the movetime, we decrease/increase the moves_left.
        movetime = time_left/moves_left
        :param moves_left: Moves left for the next period for traditional or look ahead moves for blitz
        :param tc_type: Can be blitz (60+1) or traditional (40/60)
        :param prev_bm_value: The value of the previous bestmove. value is in the range [-1 to 1]
        :return: moves_left
        """

        # Don't spend too much time in the opening, we increase the moves_left
        # so that the movetime is reduced. engine_played_move is the actual moves
        # made by the engine excluding the book moves input from a GUI.
        if self.engine_played_move < self.settings[
                "max_move_num_to_reduce_movetime"]:
            moves_left += self.moves_left_increment

        # Increase movetime by reducing the moves left if our prev bestmove value is below 0.0
        elif self.settings[
                "extend_time_on_bad_position"] and prev_bm_value and prev_bm_value <= self.max_bad_pos_value:
            if tc_type == "blitz":
                # The more the bad position is, the more that we extend the search time
                moves_left -= abs(prev_bm_value) * self.settings["moves_left"]
                moves_left = max(moves_left, self.min_moves_left)
            # Else if TC is traditional, we extend with more time if we have more time left
            elif moves_left > 4:
                moves_left -= moves_left // 8

        return moves_left

    def uci_reply(self):
        """Group UCI log info's"""
        self.log_print("id name %s %s" %
                       (self.client["name"], self.client["version"]))
        self.log_print("id author %s" % self.client["authors"])
        # tell the GUI all possible options
        self.log_print(
            "option name UCI_Variant type combo default crazyhouse var crazyhouse"
        )
        self.log_print(
            "option name search_type type combo default %s var mcts var alpha_beta"
            % self.settings["search_type"])
        self.log_print(
            "option name ab_depth type spin default %d min 1 max 40" %
            self.settings["ab_depth"])
        self.log_print(
            "option name ab_candidate_moves type spin default %d min 1 max 4096"
            % self.settings["ab_candidate_moves"])
        self.log_print(
            "option name context type combo default %s var cpu var gpu" %
            self.settings["context"])
        self.log_print(
            "option name use_raw_network type check default %s" %
            ("false" if not self.settings["use_raw_network"] else "true"))
        self.log_print(
            "option name threads type spin default %d min 1 max 4096" %
            self.settings["threads"])
        self.log_print(
            "option name batch_size type spin default %d min 1 max 4096" %
            self.settings["batch_size"])
        self.log_print(
            "option name neural_net_services type spin default %d min 1 max 10"
            % self.settings["neural_net_services"])
        self.log_print(
            "option name playouts_empty_pockets type spin default %d min 56 max 8192"
            % self.settings["playouts_empty_pockets"])
        self.log_print(
            "option name playouts_filled_pockets type spin default %d min 56 max 8192"
            % self.settings["playouts_filled_pockets"])
        self.log_print(
            "option name centi_cpuct type spin default %d min 1 max 500" %
            self.settings["centi_cpuct"])
        self.log_print(
            "option name centi_dirichlet_epsilon type spin default %d min 0 max 100"
            % self.settings["centi_dirichlet_epsilon"])
        self.log_print(
            "option name centi_dirichlet_alpha type spin default %d min 0 max 100"
            % self.settings["centi_dirichlet_alpha"])
        self.log_print(
            "option name centi_u_init_divisor type spin default %d min 1 max 100"
            % self.settings["centi_u_init_divisor"])
        self.log_print(
            "option name max_search_depth type spin default %d min 1 max 100" %
            self.settings["max_search_depth"])
        self.log_print(
            "option name centi_temperature type spin default %d min 0 max 100"
            % self.settings["centi_temperature"])
        self.log_print(
            "option name temperature_moves type spin default %d min 0 max 99999"
            % self.settings["temperature_moves"])
        self.log_print(
            "option name opening_guard_moves type spin default %d min 0 max 99999"
            % self.settings["opening_guard_moves"])
        self.log_print(
            "option name centi_clip_quantil type spin default 0 min 0 max 100")
        self.log_print(
            "option name virtual_loss type spin default 3 min 0 max 10")
        self.log_print(
            "option name centi_q_value_weight type spin default %d min 0 max 100"
            % self.settings["centi_q_value_weight"])
        self.log_print(
            "option name threshold_time_for_raw_net_ms type spin default %d min 1 max 300000"
            % self.settings["threshold_time_for_raw_net_ms"])
        self.log_print(
            "option name move_overhead_ms type spin default %d min 0 max 60000"
            % self.settings["move_overhead_ms"])
        self.log_print(
            "option name moves_left type spin default %d min 10 max 320" %
            self.settings["moves_left"])
        self.log_print(
            "option name extend_time_on_bad_position type check default %s" %
            ("false"
             if not self.settings["extend_time_on_bad_position"] else "true"))
        self.log_print(
            "option name max_move_num_to_reduce_movetime type spin default %d min 0 max 120"
            % self.settings["max_move_num_to_reduce_movetime"])
        self.log_print(
            "option name enhance_checks type check default %s" %
            ("false" if not self.settings["enhance_checks"] else "true"))
        self.log_print(
            "option name enhance_captures type check default %s" %
            ("false" if not self.settings["enhance_captures"] else "true"))
        self.log_print(
            "option name use_pruning type check default %s" %
            ("false" if not self.settings["use_pruning"] else "true"))
        self.log_print(
            "option name use_future_q_values type check default %s" %
            ("false" if not self.settings["use_future_q_values"] else "true"))
        self.log_print(
            "option name use_time_management type check default %s" %
            ("false" if not self.settings["use_time_management"] else "true"))
        self.log_print("option name verbose type check default %s" %
                       ("false" if not self.settings["verbose"] else "true"))
        self.log_print(
            "option name model_architecture_dir type string default %s" %
            self.settings["model_architecture_dir"])
        self.log_print("option name model_weights_dir type string default %s" %
                       self.settings["model_weights_dir"])
        self.log_print("uciok")  # verify that all options have been sent

    def main(self):
        """ Main waiting loop for processing command line inputs"""
        self.eprint(self.intro)
        while True:
            line = input()
            self.print_if_debug("waiting ...")
            self.print_if_debug(line)
            # wait for an std-in input command
            if line:
                cmd_list = line.rstrip().split(
                    " ")  # split the line to a list which makes parsing easier
                main_cmd = cmd_list[
                    0]  # extract the first command from the list for evaluation
                self.log(line)  # write the given command to the log-file

                try:
                    if main_cmd == "uci":
                        self.uci_reply()
                    elif main_cmd == "isready":
                        self.setup_network()
                        self.log_print("readyok")
                    elif main_cmd == "ucinewgame":
                        self.bestmove_value = None
                        self.engine_played_move = 0
                        self.new_game()
                    elif main_cmd == "position":
                        self.setup_gamestate(cmd_list)
                    elif main_cmd == "setoption":
                        self.set_options(cmd_list)
                    elif main_cmd == "go":
                        self.perform_action(cmd_list)
                    elif main_cmd in ("quit", "exit"):
                        if self.log_file:
                            self.log_file.close()
                        return 0
                    else:
                        # give the user a message that the command was ignored
                        print("info string Unknown command: %s" % line)
                except Exception:  # all possible exceptions
                    # log the error message to the log-file and exit the script
                    traceback_text = traceback.format_exc()
                    self.log_print(traceback_text)
                    return -1
예제 #3
0
    def setup_network(self):
        """
        Load the libraries and the weights of the neural network
        :return:
        """
        if not self.setup_done:
            from DeepCrazyhouse.src.domain.crazyhouse.game_state import GameState
            from DeepCrazyhouse.src.domain.agent.neural_net_api import NeuralNetAPI
            from DeepCrazyhouse.src.domain.agent.player.raw_net_agent import RawNetAgent
            from DeepCrazyhouse.src.domain.agent.player.mcts_agent import MCTSAgent

            self.param_validity_check(
            )  # check for valid parameter setup and do auto-corrections if possible

            nets = []
            for _ in range(self.settings["neural_net_services"]):
                nets.append(
                    NeuralNetAPI(
                        ctx=self.settings["context"],
                        batch_size=self.settings["batch_size"],
                        model_architecture_dir=self.
                        settings["model_architecture_dir"],
                        model_weights_dir=self.settings["model_weights_dir"]))

            self.rawnet_agent = RawNetAgent(
                nets[0],
                temperature=self.settings["centi_temperature"] / 100,
                temperature_moves=self.settings["temperature_moves"],
            )

            self.mcts_agent = MCTSAgent(
                nets,
                cpuct=self.settings["centi_cpuct"] / 100,
                playouts_empty_pockets=self.settings["playouts_empty_pockets"],
                playouts_filled_pockets=self.
                settings["playouts_filled_pockets"],
                max_search_depth=self.settings["max_search_depth"],
                dirichlet_alpha=self.settings["centi_dirichlet_alpha"] / 100,
                q_value_weight=self.settings["centi_q_value_weight"] / 100,
                dirichlet_epsilon=self.settings["centi_dirichlet_epsilon"] /
                100,
                virtual_loss=self.settings["virtual_loss"],
                threads=self.settings["threads"],
                temperature=self.settings["centi_temperature"] / 100,
                temperature_moves=self.settings["temperature_moves"],
                verbose=self.settings["verbose"],
                min_movetime=self.min_search_time,
                batch_size=self.settings["batch_size"],
                enhance_checks=self.settings["enhance_checks"],
                enhance_captures=self.settings["enhance_captures"],
                use_future_q_values=self.settings["use_future_q_values"],
                use_pruning=self.settings["use_pruning"],
                use_time_management=self.settings["use_time_management"],
                opening_guard_moves=self.settings["opening_guard_moves"],
                u_init_divisor=self.settings["centi_u_init_divisor"] / 100,
            )

            self.ab_agent = AlphaBetaAgent(
                nets[0],
                depth=self.settings["ab_depth"],
                nb_candidate_moves=self.settings["ab_candidate_moves"],
                include_check_moves=False,
            )

            self.gamestate = GameState()
            self.setup_done = True
예제 #4
0
class ChessServer:
    """ Helper for handling the game server"""

    def __init__(self, name):
        self.app = Flask(name)
        self.app.add_url_rule("/api/state", "api/state", self._wrap_endpoint(ChessServer.serve_state))
        self.app.add_url_rule("/api/new", "api/new", self._wrap_endpoint(ChessServer.serve_new_game))
        self.app.add_url_rule("/api/move", "api/move", self._wrap_endpoint(ChessServer.serve_move))
        self.app.add_url_rule("/", "serve_client_r", self._wrap_endpoint(ChessServer.serve_client))
        self.app.add_url_rule("/<path:path>", "serve_client", self._wrap_endpoint(ChessServer.serve_client))
        self._gamestate = GameState()
        net = NeuralNetAPI()
        # Loading network
        player_agents = {
            "raw_net": RawNetAgent(net),
            "mcts": MCTSAgent(
                net, virtual_loss=3, threads=BATCH_SIZE, cpuct=CPUCT, dirichlet_epsilon=DIRICHLET_EPSILON
            ),
        }
        self.agent = player_agents["raw_net"]  # Setting up agent
        # self.agent = player_agents["mcts"]

    def _wrap_endpoint(self, func):
        """TODO: docstring"""

        def wrapper(kwargs):
            return func(self, **kwargs)

        return lambda **kwargs: wrapper(kwargs)

    def run(self):
        """ Run the flask server"""
        self.app.run()

    @staticmethod
    def serve_client(path=None):
        """Find the client server path"""
        if path is None:
            path = "index.html"
        return send_from_directory("./client", path)

    def serve_state(self):
        """TODO: docstring"""
        return self.serialize_game_state()

    def serve_new_game(self):
        """TODO: docstring"""
        logging.debug("staring new game()")
        self.perform_new_game()
        return self.serialize_game_state()

    def serve_move(self):
        """ Groups the move requests and data to the server and the response from it"""
        # read move data
        drop_piece = request.args.get("drop")
        from_square = request.args.get("from")
        to_square = request.args.get("to")
        promotion_piece = request.args.get("promotion")
        from_square_idx = get_square_index_from_name(from_square)
        to_square_idx = get_square_index_from_name(to_square)
        if (from_square_idx is None and drop_piece is None) or to_square_idx is None:
            return self.serialize_game_state("board name is invalid")

        promotion = drop = None

        if drop_piece:
            from_square_idx = to_square_idx
            if not drop_piece in chess.PIECE_SYMBOLS:
                return self.serialize_game_state("drop piece name is invalid")
            drop = chess.PIECE_SYMBOLS.index(drop_piece)

        if promotion_piece:
            if not promotion_piece in chess.PIECE_SYMBOLS:
                return self.serialize_game_state("promotion piece name is invalid")
            promotion = chess.PIECE_SYMBOLS.index(promotion_piece)

        move = chess.Move(from_square_idx, to_square_idx, promotion, drop)

        # perform move
        try:
            self.perform_move(move)
        except ValueError as err:
            logging.error("ValueError %s", err)
            return self.serialize_game_state(err.args[0])

        # calculate agent response
        if not self.perform_agent_move():
            return self.serialize_game_state("Black has no more moves to play", True)

        return self.serialize_game_state()

    def perform_new_game(self):
        """Initialize a new game on the server"""
        self._gamestate = GameState()

    def perform_move(self, move):
        """ Apply the move on the game and check if the legality of it"""
        logging.debug("perform_move(): %s", move)
        # check if move is valid
        if move not in list(self._gamestate.board.legal_moves):
            raise ValueError("The given move %s is invalid for the current position" % move)
        self._gamestate.apply_move(move)
        if self._gamestate.is_won():
            logging.debug("Checkmate")
            return False
        return None

    def perform_agent_move(self):
        """TODO: docstring"""
        if self._gamestate.is_won():
            logging.debug("Checkmate")
            return False

        value, move, _, _ = self.agent.perform_action(self._gamestate)

        if not self._gamestate.is_white_to_move():
            value = -value

        logging.debug("Value %.4f", value)

        if move is None:
            logging.error("None move proposed!")
            return False

        self.perform_move(move)
        return True

    def serialize_game_state(self, message=None, finished=None):
        """ Encodes the game state to a .json file"""
        if message is None:
            message = ""

        board_str = "" + self._gamestate.board.__str__()
        pocket_str = "" + self._gamestate.board.pockets[1].__str__() + "|" + self._gamestate.board.pockets[0].__str__()
        state = {"board": board_str, "pocket": pocket_str, "message": message}
        if finished:
            state["finished"] = finished
        return json.dumps(state)
예제 #5
0
 def perform_new_game(self):
     """Initialize a new game on the server"""
     self._gamestate = GameState()
예제 #6
0
    def _run_single_playout(self, parent_node: Node, pipe_id=0, depth=1, chosen_nodes=None):
        """
        This function works recursively until a leaf or terminal node is reached.
        It ends by back-propagating the value of the new expanded node or by propagating the value of a terminal state.

        :param state: Current game-state for the evaluation. This state differs between the treads
        :param parent_node: Current parent-node of the selected node. In the first  expansion this is the root node.
        :param depth: Current depth for the evaluation. Depth is increased by 1 for every recursive call
        :param chosen_nodes: List of moves which have been taken in the current path.
                        For each selected child node this list is expanded by one move recursively.
        :param chosen_nodes: List of all nodes that this thread has explored with respect to the root node
        :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed
                        because the point of view changes each half-move
                depth: Current depth reach by this evaluation
                mv_list: List of moves which have been selected
        """
        # Probably is better to be refactored
        # Too many arguments (6/5) - Too many local variables (27/15) - Too many branches (28/12) -
        # Too many statements (86/50)
        if chosen_nodes is None:  # select a legal move on the chess board
            chosen_nodes = []
        node, move, child_idx = self._select_node(parent_node)

        if move is None:
            raise Exception("Illegal tree setup. A 'None' move was selected which shouldn't be possible")
        # update the visit counts to this node
        # temporarily reduce the attraction of this node by applying a virtual loss /
        # the effect of virtual loss will be undone if the playout is over
        parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss)

        # append the selected move to the move list
        chosen_nodes.append(child_idx)  # append the chosen child idx to the chosen_nodes list

        if node is None:
            state = GameState(deepcopy(parent_node.board))  # get the board from the parent node
            state.apply_move(move)  # apply the selected move on the board

            # get the transposition-key which is used as an identifier for the board positions in the look-up table
            transposition_key = state.get_transposition_key()
            # check if the addressed fen exist in the look-up table
            # note: It's important to use also the halfmove-counter here, otherwise the system can create an infinite
            # feed-back-loop
            key = transposition_key + (state.get_fullmove_number(),)
            use_tran_table = True
            node_verified = False

            if use_tran_table and key in self.node_lookup:
                # if self.check_for_duplicate(transposition_key, chosen_nodes) is False:
                node = self.node_lookup[key]  # get the node from the look-up list

                if node.n_sum > parent_node.n_sum:  # make sure that you don't connect to a node with lower visits
                    node_verified = True

            if node_verified:
                with parent_node.lock:
                    # setup a new connection from the parent to the child
                    parent_node.child_nodes[child_idx] = node
                # logging.debug('found key: %s' % state.get_board_fen())
                # get the prior value from the leaf node which has already been expanded
                value = node.initial_value
            else:
                # expand and evaluate the new board state (the node wasn't found in the look-up table)
                # its value will be back-propagated through the tree and flipped after every layer
                my_pipe = self.my_pipe_endings[pipe_id]  # receive a free available pipe

                if self.send_batches:
                    my_pipe.send(state.get_state_planes())
                    # this pipe waits for the predictions of the network inference service
                    [value, policy_vec] = my_pipe.recv()
                else:
                    state_planes = state.get_state_planes()
                    self.batch_state_planes[pipe_id] = state_planes
                    my_pipe.send(pipe_id)
                    result_channel = my_pipe.recv()
                    value = np.array(self.batch_value_results[result_channel])
                    policy_vec = np.array(self.batch_policy_results[result_channel])

                is_leaf = is_won = False  # initialize is_leaf by default to false and check if the game is won
                # check if the current player has won the game
                # (we don't need to check for is_lost() because the game is already over
                #  if the current player checkmated his opponent)
                if state.is_check():
                    if state.is_won():
                        is_won = True

                if is_won:
                    value = -1
                    is_leaf = True
                    legal_moves = []
                    p_vec_small = None
                    # establish a mate in one connection in order to stop exploring different alternatives
                    parent_node.set_check_mate_node_idx(child_idx)
                # get the value from the leaf node (the current function is called recursively)
                # check if you can claim a draw - its assumed that the draw is always claimed
                elif (
                    self.can_claim_threefold_repetition(transposition_key, chosen_nodes)
                    or state.get_pythonchess_board().can_claim_fifty_moves() is True
                ):
                    value = 0
                    is_leaf = True
                    legal_moves = []
                    p_vec_small = None
                else:
                    legal_moves = state.get_legal_moves()  # get the current legal move of its board state

                    if not legal_moves:
                        # stalemate occurred which is very rare for crazyhouse
                        value = 0
                        is_leaf = True
                        legal_moves = []
                        p_vec_small = None
                        # raise Exception("No legal move is available for state: %s" % state)
                    else:
                        try:  # extract a sparse policy vector with normalized probabilities
                            p_vec_small = get_probs_of_move_list(
                                policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True
                            )
                        except KeyError:
                            raise Exception("Key Error for state: %s" % state)

                # clip the visit nodes for all nodes in the search tree except the director opp. move
                clip_low_visit = self.use_pruning and depth != 1  # and depth > 4
                new_node = Node(
                    state.get_pythonchess_board(),
                    value,
                    p_vec_small,
                    legal_moves,
                    is_leaf,
                    transposition_key,
                    clip_low_visit,
                )  # create a new node

                if depth == 1:
                    # disable uncertain moves from being visited by giving them a very bad score
                    if not is_leaf and self.use_pruning:
                        if self.root_node_prior_policy[child_idx] < 1e-3 and value * -1 < self.root_node.initial_value:
                            with parent_node.lock:
                                value = 99

                    # for performance reasons only apply check enhancement on depth 1 for now
                    chess_board = state.get_pythonchess_board()
                    if self.enhance_checks:
                        self._enhance_checks(chess_board, legal_moves, p_vec_small)

                    if self.enhance_captures:
                        self._enhance_captures(chess_board, legal_moves, p_vec_small)

                if not self.use_pruning:
                    self.node_lookup[key] = new_node  # include a reference to the new node in the look-up table

                with parent_node.lock:
                    parent_node.child_nodes[child_idx] = new_node  # add the new node to its parent
        elif node.is_leaf:  # check if we have reached a leaf node
            value = node.initial_value
        else:
            # get the value from the leaf node (the current function is called recursively)
            value, depth, chosen_nodes = self._run_single_playout(node, pipe_id, depth + 1, chosen_nodes)
        # revert the virtual loss and apply the predicted value by the network to the node
        parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value)
        # invert the value prediction for the parent of the above node layer because the player's changes every turn
        return -value, depth, chosen_nodes
예제 #7
0
    def evaluate_board_state(self, state: GameState):  # Probably is better to be refactored
        """
        Analyzes the current board state. This is the main method which get called by the uci interface or analysis
        request.
        :param state: Actual game state to evaluate for the MCTS
        :return:
        """
        # Too many local variables (28/15) - Too many branches (25/12) - Too many statements (75/50)
        self.t_start_eval = time()  # store the time at which the search started

        if not self.net_pred_services[0].running:  # check if the net prediction service has already been started
            for net_pred_service in self.net_pred_services:  # start the prediction daemon thread
                net_pred_service.start()

        legal_moves = state.get_legal_moves()  # list of all possible legal move in the current board position

        if not legal_moves:  # consistency check
            raise Exception("The given board state has no legal move available")

        key = state.get_transposition_key() + (
            state.get_fullmove_number(),
        )  # check first if the the current tree can be reused

        if not self.use_pruning and key in self.node_lookup:
            chess_board = state.get_pythonchess_board()
            self.root_node = self.node_lookup[key]  # if key in self.node_lookup:
            if self.enhance_captures:
                self._enhance_captures(chess_board, legal_moves, self.root_node.policy_prob)
                # enhance checks for all direct child nodes
                for child_node in self.root_node.child_nodes:
                    if child_node:
                        self._enhance_captures(child_node.board, child_node.legal_moves, child_node.policy_prob)

            if self.enhance_checks:
                self._enhance_checks(chess_board, legal_moves, self.root_node.policy_prob)
                # enhance checks for all direct child nodes
                for child_node in self.root_node.child_nodes:
                    if child_node:
                        self._enhance_checks(child_node.board, child_node.legal_moves, child_node.policy_prob)

            logging.debug(
                "Reuse the search tree. Number of nodes in search tree: %d",
                self.root_node.nb_total_expanded_child_nodes,
            )
            self.total_nodes_pre_search = deepcopy(self.root_node.n_sum)
        else:
            logging.debug("Starting a brand new search tree...")
            self.root_node = None
            self.total_nodes_pre_search = 0

        if len(legal_moves) == 1:  # check for fast way out
            max_depth_reached = 1  # if there's only a single legal move you only must go 1 depth

            if self.root_node is None:
                # conduct all necessary steps for fastest way out
                self._expand_root_node_single_move(state, legal_moves)

            # increase the move time buffer
            # subtract half a second as a constant for possible delay
            self.time_buffer_ms += max(self.movetime_ms - 500, 0)
        else:
            if self.root_node is None:
                self._expand_root_node_multiple_moves(state, legal_moves)  # run a single expansion on the root node
            # opening guard
            if state.get_fullmove_number() <= self.opening_guard_moves:  # 100: #7: #10:
                self.root_node.q_value[self.root_node.policy_prob < 5e-2] = -9999
            # elif len(legal_moves) > 50:
            #    self.root_node.q_value[self.root_node.policy_prob < 1e-3] = -9999
            # conduct the mcts-search based on the given settings
            max_depth_reached = self._run_mcts_search(state)
            t_elapsed = time() - self.t_start_eval
            print("info string move overhead is %dms" % (t_elapsed * 1000 - self.movetime_ms))

        # receive the policy vector based on the MCTS search
        p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight)  # , xth_n_max=xth_n_max, is_root=True)

        if self.use_future_q_values:
            # use q-future value to update the q-values of direct child nodes
            q_future, indices = self.get_last_q_values(min_nb_visits=5, max_depth=5) #25)
            # self.root_node.q_value = 0.5 * self.root_node.q_value + 0.5 * q_future
            # TODO: make this matrix vector form
            if max_depth_reached >= 5:
                for idx in indices:
                    self.root_node.q_value[idx] = min(self.root_node.q_value[idx], q_future[idx])
                p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight)

        # if self.use_pruning is False:
        self.node_lookup[key] = self.root_node  # store the current root in the lookup table
        best_child_idx = p_vec_small.argmax()  # select the q-value according to the mcts best child value
        value = self.root_node.q_value[best_child_idx]
        # value = orig_q[best_child_idx]
        lst_best_moves, _ = self.get_calculated_line()
        str_moves = self._mv_list_to_str(lst_best_moves)
        node_searched = int(self.root_node.n_sum - self.total_nodes_pre_search)  # show the best calculated line
        time_e = time() - self.t_start_eval  # In uci the depth is given using half-moves notation also called plies

        if len(legal_moves) != len(p_vec_small):
            raise Exception(
                "Legal move list %s with length %s is incompatible to policy vector %s"
                " with shape %s for board state %s and nodes legal move list: %s"
                % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state, self.root_node.legal_moves)
            )

        # define the remaining return variables
        centipawns = value_to_centipawn(value)
        depth = max_depth_reached
        nodes = node_searched
        time_elapsed_s = time_e * 1000

        # avoid division by 0
        if time_e > 0.0:
            nps = node_searched / time_e
        else:
            # return a high constant in otherwise
            nps = 999999999

        pv = str_moves
        if self.verbose:
            score = "score cp %d depth %d nodes %d time %d nps %d pv %s" % (
                centipawns,
                depth,
                nodes,
                time_elapsed_s,
                nps,
                pv,
            )
            logging.info("info string %s", score)
        return value, legal_moves, p_vec_small, centipawns, depth, nodes, time_elapsed_s, nps, pv