Example #1
0
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool):
    cur_pipes = cur.pop()
    ng_pipes = ng.pop()
    env = ChessEnv().reset()

    current_player = ChessPlayer(config,
                                 pipes=cur_pipes,
                                 play_config=config.eval.play_config)
    ng_player = ChessPlayer(config,
                            pipes=ng_pipes,
                            play_config=config.eval.play_config)
    if current_white:
        white, black = current_player, ng_player
    else:
        white, black = ng_player, current_player

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.eval.max_game_length:
            env.adjudicate()

    if env.winner == Winner.draw:
        ng_score = 0.5
    elif env.white_won == current_white:
        ng_score = 0
    else:
        ng_score = 1
    cur.append(cur_pipes)
    ng.append(ng_pipes)
    return ng_score, env, current_white
Example #2
0
    def play_game(self, best_model, ng_model):
        env = ChessEnv().reset()

        best_player = ChessPlayer(self.config, best_model, play_config=self.config.eval.play_config)
        ng_player = ChessPlayer(self.config, ng_model, play_config=self.config.eval.play_config)
        best_is_white = random() < 0.5
        if not best_is_white:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        observation = env.observation
        while not env.done:
            if env.board.turn == chess.BLACK:
                action = black.action(observation)
            else:
                action = white.action(observation)
            board, info = env.step(action)
            observation = board.fen()

        ng_win = None
        if env.winner == Winner.white:
            if best_is_white:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.black:
            if best_is_white:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, best_is_white
Example #3
0
    def play_game(self, current_model, ng_model,
                  current_white: bool) -> (float, ChessEnv):
        env = ChessEnv().reset()

        current_player = ChessPlayer(self.config,
                                     model=current_model,
                                     play_config=self.config.eval.play_config)
        ng_player = ChessPlayer(self.config,
                                model=ng_model,
                                play_config=self.config.eval.play_config)
        if current_white:
            white, black = current_player, ng_player
        else:
            white, black = ng_player, current_player

        while not env.done:
            if env.board.turn == chess.WHITE:
                action = white.action(env)
            else:
                action = black.action(env)
            env.step(action)
            if env.num_halfmoves >= self.config.eval.max_game_length:
                env.adjudicate()

        if env.winner == Winner.draw:
            ng_score = 0.5
        elif env.whitewon == current_white:
            ng_score = 0
        else:
            ng_score = 1
        return ng_score, env
Example #4
0
def self_play_buffer(config, cur) -> (ChessEnv, list):
    pipes = cur.pop()  # borrow
    env = ChessEnv().reset()

    white = ChessPlayer(config, pipes=pipes)
    black = ChessPlayer(config, pipes=pipes)

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.play.max_game_length:
            env.adjudicate()

    if env.winner == Winner.white:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data
Example #5
0
    def move_by_ai(self, env):
        if self.ai is None:
            self.ai = ChessPlayer(self.config, self.model)
        action = self.ai.action(env.observation)

        self.last_history = self.ai.ask_thought_about(env.observation)
        self.last_evaluation = self.last_history.values[self.last_history.action]
        logger.debug(f"Evaluation by AI = {self.last_evaluation}")

        return action
Example #6
0
def get_buffer(config, game) -> (ChessEnv, list):
    """
    Gets data to load into the buffer by playing a game using PGN data.
    :param Config config: config to use to play the game
    :param pgn.Game game: game to play
    :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer
    """
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(
        game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)

    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation,
                                     actions[k],
                                     weight=white_weight)  #ignore=True
        else:
            action = black.sl_action(env.observation,
                                     actions[k],
                                     weight=black_weight)  #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
def self_play_buffer(config, cur) -> (GoBangEnv, list):
    """
    Play one game and add the play data to the buffer
    :param Config config: config for how to play
    :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting
        predictions. One will be removed from this list during the game, then added back
    :return (GoBangEnv,list((str,list(float)): a tuple containing the final GoBangEnv state and then a list
        of data to be appended to the SelfPlayWorker.buffer
    """
    pipes = cur.pop() # borrow
    env = GoBangEnv().reset()

    white = ChessPlayer(config, pipes=pipes)
    black = ChessPlayer(config, pipes=pipes)

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        # pretty_print_panel(env.board.panel)
        # print(f'After action:{action}')
        env.step(action)
        # pretty_print_panel(env.board.panel)
        # print()
        # if env.num_halfmoves >= config.play.max_game_length:
        #     env.adjudicate()

    if env.winner == Winner.white:
        black_score, white_score = -1, 1
    elif env.winner == Winner.black:
        black_score, white_score = 1, -1
    else:
        black_score, white_score = -0.5, -0.5

    black.finish_game(black_score)
    white.finish_game(white_score)

    data = []
    for i in range(len(black.moves)):
        data.append(black.moves[i])
        if i < len(white.moves):
            data.append(white.moves[i])
            
    pretty_print_panel(env.board.panel)
    print()
    #print(data)
    
    cur.append(pipes)
    return env, data
Example #8
0
def supervised_buffer(config, game) -> (ChessEnv, list):
    env = ChessEnv(config).reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    env.board = game.board()
    for move in game.main_line():
        ai = white if env.board.turn == chess.WHITE else black
        ai.sl_action(env, move)
        env.step(move)

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.WHITE
        white_win = 1
    elif result == '0-1':
        env.winner = Winner.BLACK
        white_win = -1
    else:
        env.winner = Winner.DRAW
        white_win = 0

    white.finish_game(white_win)
    black.finish_game(-white_win)
    return env, merge_data(white, black)
 def start_game(self, idx):
     self.env.reset()
     self.black = ChessPlayer(self.config, self.model)
     self.white = ChessPlayer(self.config, self.model)
     while not self.env.done:
         if self.env.board.turn == chess.BLACK:
             action = self.black.action(self.env)
         else:
             action = self.white.action(self.env)
         self.env.step(action)
     self.finish_game()
     self.save_play_data(write=idx %
                         self.config.play_data.nb_game_in_file == 0)
     self.remove_play_data()
     return self.env
def get_player(config):
    from chess_zero.agent.model_chess import ChessModel
    from chess_zero.lib.model_helper import load_best_model_weight
    model = ChessModel(config)
    if not load_best_model_weight(model):
        raise RuntimeError("Best model not found!")
    return ChessPlayer(config, model.get_pipes(config.play.search_threads))
def start(config: Config):

    PlayWithHumanConfig().update_play_config(config.play)

    me_player = None
    env = ChessEnv().reset()

    app = Flask(__name__)

    model = ChessModel(config)

    if not load_best_model_weight(model):
        raise RuntimeError("Best model not found!")

    player = ChessPlayer(config, model.get_pipes(config.play.search_threads))

    @app.route('/play', methods=["GET", "POST"])
    def play():
        data = request.get_json()
        print(data["position"])
        env.update(data["position"])
        env.step(data["moves"], False)
        bestmove = player.action(env, False)
        return jsonify(bestmove)

    app.run(host="0.0.0.0", port="8080")
Example #12
0
def play_game(config, cur, ng,
              current_white: bool) -> (float, GoBangEnv, bool):
    """
    Plays a game against models cur and ng and reports the results.

    :param Config config: config for how to play the game
    :param ChessModel cur: should be the current model
    :param ChessModel ng: should be the next generation model
    :param bool current_white: whether cur should play white or black
    :return (float, GoBangEnv, bool): the score for the ng model
        (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool
        which is true iff cur played as white in that game.
    """
    cur_pipes = cur.pop()
    ng_pipes = ng.pop()
    env = GoBangEnv().reset()

    current_player = ChessPlayer(config,
                                 pipes=cur_pipes,
                                 play_config=config.eval.play_config)
    ng_player = ChessPlayer(config,
                            pipes=ng_pipes,
                            play_config=config.eval.play_config)
    if current_white:
        white, black = current_player, ng_player
    else:
        white, black = ng_player, current_player

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.eval.max_game_length:
            env.adjudicate()

    if env.winner == Winner.draw:
        ng_score = 0.5
    elif env.white_won == current_white:
        ng_score = 0
    else:
        ng_score = 1

    cur.append(cur_pipes)
    ng.append(ng_pipes)
    return ng_score, env, current_white
def play_game(config: Config, cur,
              robot_white: int) -> (float, GoBangEnv, int):
    """
    Plays a game against models cur and ng and reports the results.

    :param Config config: config for how to play the game
    :param ChessModel cur: should be the current model
    :param ChessModel ng: should be the next generation model
    :param bool ng_no: ng所在的位置,0-1
    :return (float, ChessEnv, bool): the score for the ng model
        (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool
        which is true iff cur played as white in that game.
    """
    cur_pipes = cur.pop()
    env = GoBangEnv().reset()

    configs = config.eval.play_config
    # man
    configs.simulation_num_per_move = 1200
    configs.tau_decay_rate = 0.
    #
    current_player = ChessPlayer(config, pipes=cur_pipes, play_config=configs)

    if robot_white:
        white, black = current_player, None
    else:
        white, black = None, current_player

    print(f"本局游戏人类为{'黑棋' if robot_white else '白棋'}.")

    while not env.done:
        if env.white_to_move and robot_white:
            action = white.action(env)
        elif env.white_to_move == False and robot_white == False:
            action = black.action(env)
        else:
            # 轮到人类
            print('当前局面如下:')
            pretty_print_panel(env.board.panel)
            print()
            action = input("请输入您要放置的棋子位置:")
            while action not in env.board.legal_moves:
                print("输入有误!请重新输入.")
                action = input("请输入您要放置的棋子位置:")
        env.step(action)

    print('本局游戏结束!当前棋面为:')
    pretty_print_panel(env.board.panel)

    if env.winner == Winner.draw:
        man_score = 0.5
    elif env.white_won == robot_white:
        man_score = 0
    else:
        man_score = 1

    cur.append(cur_pipes)
    return man_score
Example #14
0
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.human_color = None
        self.observers = []
        self.model = self._load_model()
        self.ai = None  # type: ChessPlayer
        self.last_evaluation = None
        self.last_history = None  # type: HistoryItem

    def start_game(self, human_is_black):
        self.human_color = chess.BLACK if human_is_black else chess.WHITE
        self.ai = ChessPlayer(self.config, self.model)

    def _load_model(self):
        from chess_zero.agent.model_chess import ChessModel
        model = ChessModel(self.config)
        if not load_best_model_weight(model):
            raise RuntimeError("Best model not found!")
        return model

    def move_by_ai(self, env):
        if self.ai is None:
            self.ai = ChessPlayer(self.config, self.model)
        action = self.ai.action(env.observation)

        self.last_history = self.ai.ask_thought_about(env.observation)
        self.last_evaluation = self.last_history.values[
            self.last_history.action]
        logger.debug(f"Evaluation history by AI = {self.last_history}")
        logger.debug(f"Evaluation by AI = {self.last_evaluation}")

        return action

    def move_by_human(self, env):
        while True:
            try:
                move = input(
                    '\nEnter your move in UCI format (a1a2, b2b6, ...): ')
                if chess.Move.from_uci(move) in env.board.legal_moves:
                    return move
                else:
                    print("That is NOT a valid move :(.")
            except:
                print("That is NOT a valid move :(.")
Example #15
0
def get_buffer(config, game) -> (ChessEnv, list):
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)
    
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True
        else:
            action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
Example #16
0
def get_buffer(game, config) -> (ChessEnv, list):
    env = ChessEnv().reset()
    black = ChessPlayer(config, dummy=True)
    white = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    observation = env.observation
    while not env.done and k < len(actions):
        if env.board.turn == chess.WHITE:
            action = white.sl_action(observation, actions[k])  #ignore=True
        else:
            action = black.sl_action(observation, actions[k])  #ignore=True
        board, info = env.step(action, False)
        observation = board.fen()
        k += 1

    env.done = True
    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
Example #17
0
class PlayWithEngine:
    def __init__(self, config: Config):
        self.config = config
        self.engine_color = None
        self.engine = UciEngine()
        self.observers = []
        self.model = self._load_model()
        self.ai = None  # type: ChessPlayer
        self.last_evaluation = None
        self.last_history = None  # type: HistoryItem

    def start_game(self, engine_is_black):
        self.engine_color = chess.BLACK if engine_is_black else chess.WHITE
        self.engine.start_game(engine_is_black)
        self.ai = ChessPlayer(self.config, self.model)

    def _load_model(self):
        from chess_zero.agent.model_chess import ChessModel
        model = ChessModel(self.config)
        if not load_best_model_weight(model):
            raise RuntimeError("Best model not found!")
        return model

    def move_by_ai(self, env):
        if self.ai is None:
            self.ai = ChessPlayer(self.config, self.model)
        action = self.ai.action(env.observation)

        self.last_history = self.ai.ask_thought_about(env.observation)
        self.last_evaluation = self.last_history.values[self.last_history.action]
        logger.debug(f"Evaluation history by AI = {self.last_history}")
        logger.debug(f"Evaluation by AI = {self.last_evaluation}")

        return action

    def move_by_opponent(self, env):
        self.engine.update_position(env.board)
        action = self.engine.best_move()
        last_evaluation = self.engine.score()
        logger.debug(f"Evaluation by Stockfish = {last_evaluation}")

        return action
Example #18
0
def self_play_buffer(config, cur) -> (ChessEnv, list):
    pipes = cur.pop()  # borrow
    env = ChessEnv().reset()
    search_tree = defaultdict(VisitStats)

    white = ChessPlayer(config, search_tree=search_tree, pipes=pipes)
    black = ChessPlayer(config, search_tree=search_tree, pipes=pipes)

    history = []

    cc = 0
    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        history.append(action)
        if len(history) > 6 and history[-1] == history[-5]:
            cc = cc + 1
        else:
            cc = 0
        if env.num_halfmoves >= config.play.max_game_length or cc >= 4:
            env.adjudicate()
    if env.winner == Winner.white:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data
Example #19
0
 def get_player_from_model(config):
     try:
         model = ChessModel(config)
         if not load_best_model_weight(model):
             raise RuntimeError("Best model not found!")
         return ChessPlayer(config,
                            model.get_pipes(config.play.search_threads))
     except Exception as e:
         traceback.print_exc()
         log.error(e)
         return None
Example #20
0
def get_buffer(config, game) -> (ChessEnv, list):
    """
    Gets data to load into the buffer by playing a game using PGN data.
    :param Config config: config to use to play the game
    :param pgn.Game game: game to play
    :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer
    """
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)
    
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True
        else:
            action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
Example #21
0
 def start(self):
     game_idx = 0
     while True:
         cur = self.cur_pipes.pop()
         play_config = self.play_config
         play_config.simulation_num_per_move = 100
         play_config.tau_decay_rate = 0
         robot = ChessPlayer(self.config,
                             pipes=cur,
                             play_config=play_config)
         score = play_game(robot, (game_idx % 2) == 0)
         game_idx += 1
         self.cur_pipes.append(cur)
    def get_player_from_model(config):
        try:
            from chess_zero.agent.player_chess import ChessPlayer
            from chess_zero.agent.model_chess import ChessModel
            from chess_zero.lib.model_helper import load_best_model_weight
            model = ChessModel(config)
            if not load_best_model_weight(model):
                raise RuntimeError("Best model not found!")
            return ChessPlayer(config,
                               model.get_pipes(config.play.search_threads))

        except Exception as e:
            traceback.print_exc()
            log.error(e)
            return None
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.human_color = None
        self.observers = []
        self.model = self._load_model()
        self.ai = None  # type: ChessPlayer
        self.last_evaluation = None
        self.last_history = None  # type: HistoryItem

    def start_game(self, human_is_white):
        self.human_color = chess.WHITE if human_is_white else chess.BLACK
        self.ai = ChessPlayer(
            self.config, self.model, self.config.human.play_config
        )  # override self.config.play with optional third parameter

    def _load_model(self):
        from chess_zero.agent.model_chess import ChessModel
        model = ChessModel(self.config)
        if not load_newest_model_weight(self.config.resource, model):
            raise RuntimeError("newest model not found!")
        return model

    def move_by_ai(self, env):
        action = self.ai.action(env)

        return action

    def move_by_human(self, env):
        while True:
            san = input(
                '\nEnter your move in SAN format ("e4", "Nf3", ... or "quit"): '
            )
            if san == "quit":
                raise SystemExit
            try:
                move = env.board.parse_san(san)
                if move != chess.Move.null():
                    return move
                else:
                    print("That is NOT a valid move :(."
                          )  # how will parse_san ever return a null move...?
            except:
                print("That is NOT a valid move :(.")
 def start_game(self, human_is_black):
     self.human_color = chess.BLACK if human_is_black else chess.WHITE
     self.ai = ChessPlayer(self.config, self.model)
Example #25
0
class SelfPlayWorker:
    def __init__(self, config: Config, env=None, model=None):
        """

        :param config:
        :param ChessEnv|None env:
        :param chess_zero.agent.model_chess.ChessModel|None model:
        """
        self.config = config
        self.model = model
        self.env = env     # type: ChessEnv
        self.black = None  # type: ChessPlayer
        self.white = None  # type: ChessPlayer
        self.buffer = []

    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = 1

        while True:
            start_time = time()
            env = self.start_game(idx)
            end_time = time()
            logger.debug(f"game {idx} time={end_time - start_time} sec, "
                         f"turn={env.turn}:{env.observation} - Winner:{env.winner} - by resignation?:{env.resigned}")
            if (idx % self.config.play_data.nb_game_in_file) == 0:
                reload_best_model_weight_if_changed(self.model)
            idx += 1

    def start_game(self, idx):
        self.env.reset()
        self.black = ChessPlayer(self.config, self.model)
        self.white = ChessPlayer(self.config, self.model)
        observation = self.env.observation
        while not self.env.done:
            if self.env.board.turn == chess.BLACK:
                action = self.black.action(observation)
            else:
                action = self.white.action(observation)
            board, info = self.env.step(action)
            observation = board.fen()
        self.finish_game()
        self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0)
        self.remove_play_data()
        return self.env

    def save_play_data(self, write=True):
        data = self.black.moves + self.white.moves
        self.buffer += data

        if not write:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id)
        logger.info(f"save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        for i in range(len(files) - self.config.play_data.max_file_num):
            os.remove(files[i])

    def finish_game(self):
        if self.env.winner == Winner.black:
            black_win = 1
        elif self.env.winner == Winner.white:
            black_win = -1
        else:
            black_win = 0

        self.black.finish_game(black_win)
        self.white.finish_game(-black_win)

    def load_model(self):
        from chess_zero.agent.model_chess import ChessModel
        model = ChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(model):
            model.build()
            save_as_best_model(model)
        return model
 def start_game(self, human_is_white):
     self.human_color = chess.WHITE if human_is_white else chess.BLACK
     self.ai = ChessPlayer(
         self.config, self.model, self.config.human.play_config
     )  # override self.config.play with optional third parameter
Example #27
0
 def start_game(self, engine_is_black):
     self.engine_color = chess.BLACK if engine_is_black else chess.WHITE
     self.engine.start_game(engine_is_black)
     self.ai = ChessPlayer(self.config, self.model)
 def start_game(self, human_is_white):
     self.human_color = chess.WHITE if human_is_white else chess.BLACK
     self.ai = ChessPlayer(self.config, self.model)
Example #29
0
def play_game(config, cur, ng,
              current_white: bool) -> (float, GoBangEnv, bool):
    """
    Plays a game against models cur and ng and reports the results.

    :param Config config: config for how to play the game
    :param ChessModel cur: should be the current model
    :param ChessModel ng: should be the next generation model
    :param bool current_white: whether cur should play white or black
    :return (float, GoBangEnv, bool): the score for the ng model
        (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool
        which is true iff cur played as white in that game.
    """
    cur_pipes = cur.pop()
    ng_pipes = ng.pop()
    env = GoBangEnv().reset()

    current_player = ChessPlayer(config,
                                 pipes=cur_pipes,
                                 play_config=config.eval.play_config)
    ng_player = ChessPlayer(config,
                            pipes=ng_pipes,
                            play_config=config.eval.play_config)
    if current_white:
        white, black = current_player, ng_player
    else:
        white, black = ng_player, current_player

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)

    if env.winner == Winner.draw:
        ng_score = 0.5
    elif env.white_won == current_white:
        ng_score = 0
    else:
        ng_score = 1

# ----- 整理moves -----
    if env.winner == Winner.white:
        black_score, white_score = -1, 1
    elif env.winner == Winner.black:
        black_score, white_score = 1, -1
    else:
        black_score, white_score = -0.5, -0.5

    black.finish_game(black_score)
    white.finish_game(white_score)

    data = []
    for i in range(len(black.moves)):
        data.append(black.moves[i])
        if i < len(white.moves):
            data.append(white.moves[i])

# --------------------

    cur.append(cur_pipes)
    ng.append(ng_pipes)
    return ng_score, env, current_white, data
Example #30
0
class SupervisedLearningWorker:
    def __init__(self, config: Config, env=None, model=None):
        """

        :param config:
        :param ChessEnv|None env:
        :param chess_zero.agent.model_chess.ChessModel|None model:
        """
        self.config = config
        self.model = model
        self.env = env     # type: ChessEnv
        self.black = None  # type: ChessPlayer
        self.white = None  # type: ChessPlayer
        self.buffer = []

    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = 1

        while True:
            start_time = time()
            env = self.read_game(idx)
            end_time = time()
            logger.debug(f"game {idx} time={end_time - start_time} sec, "
                         f"turn={int(env.turn/2)}:{env.observation} - Winner:{env.winner} - by resignation?:{env.resigned}")
            if (idx % self.config.play_data.nb_game_in_file) == 0:
                reload_best_model_weight_if_changed(self.model)
            idx += 1

    def read_game(self, idx):
        self.env.reset()
        self.black = ChessPlayer(self.config, self.model)
        self.white = ChessPlayer(self.config, self.model)
        files = find_pgn_files(self.config.resource.play_data_dir)
        if len(files) > 0:
            random.shuffle(files)
            filename = files[0]
            pgn = open(filename, errors='ignore')
            size = os.path.getsize(filename)
            pos = random.randint(0, size)
            pgn.seek(pos)

            line = pgn.readline()
            offset = 0
            # Parse game headers.
            while line:
                if line.isspace() or line.startswith("%"):
                    line = pgn.readline()
                    continue

                # Read header tags.
                tag_match = TAG_REGEX.match(line)
                if tag_match:
                    offset = pgn.tell()
                    break

                line = pgn.readline()

            pgn.seek(offset)
            game = chess.pgn.read_game(pgn)
            node = game
            result = game.headers["Result"]
            actions = []
            while not node.is_end():
                next_node = node.variation(0)
                actions.append(node.board().uci(next_node.move))
                node = next_node
            pgn.close()

            k = 0
            observation = self.env.observation
            while not self.env.done and k < len(actions):
                if self.env.board.turn == chess.BLACK:
                    action = self.black.sl_action(observation, actions[k])
                else:
                    action = self.white.sl_action(observation, actions[k])
                board, info = self.env.step(action)
                observation = board.fen()
                k += 1

            self.env.done = True
            if not self.env.board.is_game_over() and result != '1/2-1/2':
                self.env.resigned = True
            if result == '1-0':
                self.env.winner = Winner.white
            elif result == '0-1':
                self.env.winner = Winner.black
            else:
                self.env.winner = Winner.draw

            self.finish_game()
            self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0)
            self.remove_play_data()
        return self.env

    def save_play_data(self, write=True):
        data = self.black.moves + self.white.moves
        self.buffer += data

        if not write:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id)
        logger.info(f"save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        for i in range(len(files) - self.config.play_data.max_file_num):
            os.remove(files[i])

    def finish_game(self):
        if self.env.winner == Winner.black:
            black_win = 1
        elif self.env.winner == Winner.white:
            black_win = -1
        else:
            black_win = 0

        self.black.finish_game(black_win)
        self.white.finish_game(-black_win)

    def load_model(self):
        from chess_zero.agent.model_chess import ChessModel
        model = ChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(model):
            model.build()
            save_as_best_model(model)
        return model
Example #31
0
    def read_game(self, idx):
        self.env.reset()
        self.black = ChessPlayer(self.config, self.model)
        self.white = ChessPlayer(self.config, self.model)
        files = find_pgn_files(self.config.resource.play_data_dir)
        if len(files) > 0:
            random.shuffle(files)
            filename = files[0]
            pgn = open(filename, errors='ignore')
            size = os.path.getsize(filename)
            pos = random.randint(0, size)
            pgn.seek(pos)

            line = pgn.readline()
            offset = 0
            # Parse game headers.
            while line:
                if line.isspace() or line.startswith("%"):
                    line = pgn.readline()
                    continue

                # Read header tags.
                tag_match = TAG_REGEX.match(line)
                if tag_match:
                    offset = pgn.tell()
                    break

                line = pgn.readline()

            pgn.seek(offset)
            game = chess.pgn.read_game(pgn)
            node = game
            result = game.headers["Result"]
            actions = []
            while not node.is_end():
                next_node = node.variation(0)
                actions.append(node.board().uci(next_node.move))
                node = next_node
            pgn.close()

            k = 0
            observation = self.env.observation
            while not self.env.done and k < len(actions):
                if self.env.board.turn == chess.BLACK:
                    action = self.black.sl_action(observation, actions[k])
                else:
                    action = self.white.sl_action(observation, actions[k])
                board, info = self.env.step(action)
                observation = board.fen()
                k += 1

            self.env.done = True
            if not self.env.board.is_game_over() and result != '1/2-1/2':
                self.env.resigned = True
            if result == '1-0':
                self.env.winner = Winner.white
            elif result == '0-1':
                self.env.winner = Winner.black
            else:
                self.env.winner = Winner.draw

            self.finish_game()
            self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0)
            self.remove_play_data()
        return self.env