Example #1
0
    def play_game(self, best_model, ng_model):
        env = ChessEnv().reset()

        best_player = ChessPlayer(self.config, best_model, play_config=self.config.eval.play_config)
        ng_player = ChessPlayer(self.config, ng_model, play_config=self.config.eval.play_config)
        best_is_white = random() < 0.5
        if not best_is_white:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        observation = env.observation
        while not env.done:
            if env.board.turn == chess.BLACK:
                action = black.action(observation)
            else:
                action = white.action(observation)
            board, info = env.step(action)
            observation = board.fen()

        ng_win = None
        if env.winner == Winner.white:
            if best_is_white:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.black:
            if best_is_white:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, best_is_white
Example #2
0
def start(config: Config):
    PlayWithHumanConfig().update_play_config(config.play)
    chess_model = PlayWithHuman(config)

    env = ChessEnv().reset()
    human_is_black = random() < 0.5
    chess_model.start_game(human_is_black)

    while not env.done:
        if env.board.turn == chess.BLACK:
            if not human_is_black:
                action = chess_model.move_by_ai(env)
                print("IA moves to: " + action)
            else:
                action = chess_model.move_by_human(env)
                print("You move to: " + action)
        else:
            if human_is_black:
                action = chess_model.move_by_ai(env)
                print("IA moves to: " + action)
            else:
                action = chess_model.move_by_human(env)
                print("You move to: " + action)
        board, info = env.step(action)
        env.render()
        print("Board fen = " + board.fen())

    print("\nEnd of the game.")
    print("Game result:")
    print(env.board.result())
    def action(self, board):

        env = ChessEnv().update(board)
        key = self.counter_key(env)

        for tl in range(self.play_config.thinking_loop):
            if tl > 0 and self.play_config.logging_thinking:
                logger.debug(
                    f"continue thinking: policy move=({action % 8}, {action // 8}), "
                    f"value move=({action_by_value % 8}, {action_by_value // 8})"
                )
            self.search_moves(board)
            policy = self.calc_policy(board)
            action = int(np.random.choice(range(self.labels_n), p=policy))
            action_by_value = int(
                np.argmax(self.var_q[key] + (self.var_n[key] > 0) * 100))
            if action == action_by_value or env.turn < self.play_config.change_tau_turn:
                break

        # this is for play_gui, not necessary when training.
        self.thinking_history[env.observation] = HistoryItem(
            action, policy, list(self.var_q[key]), list(self.var_n[key]))

        if self.play_config.resign_threshold is not None and \
            env.score_current() <= self.play_config.resign_threshold and \
                self.play_config.min_resign_turn < env.turn:
            return None  # means resign
        else:
            self.moves.append([env.observation, list(policy)])
            return self.config.labels[action]
Example #4
0
def supervised_buffer(config, game) -> (ChessEnv, list):
    env = ChessEnv(config).reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    env.board = game.board()
    for move in game.main_line():
        ai = white if env.board.turn == chess.WHITE else black
        ai.sl_action(env, move)
        env.step(move)

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.WHITE
        white_win = 1
    elif result == '0-1':
        env.winner = Winner.BLACK
        white_win = -1
    else:
        env.winner = Winner.DRAW
        white_win = 0

    white.finish_game(white_win)
    black.finish_game(-white_win)
    return env, merge_data(white, black)
Example #5
0
    def search_my_move(self, env: ChessEnv, is_root_node=False, depth=0) -> float:
        """
        Q, V is value for this Player(always white).
        P is value for the player of next_player (black or white)

        This method searches for possible moves, adds them to a search tree, and eventually returns the
        best move that was found during the search.

        :param ChessEnv env: environment in which to search for the move
        :param boolean is_root_node: whether this is the root node of the search.
        :return float: value of the move. This is calculated by getting a prediction
            from the value network.
        """
        if env.done:
            if env.winner == Winner.draw:
                return 0
            # assert env.whitewon != env.white_to_move # side to move can't be winner!
            return -1

        state = state_key(env)

        with self.node_lock[state]:
            if state not in self.tree:
                leaf_p, leaf_v = self.expand_and_evaluate(env)
                self.tree[state].p = leaf_p
                return leaf_v # I'm returning everything from the POV of side to move

            # SELECT STEP
            action_t = self.select_action_q_and_u(env, is_root_node)

            virtual_loss = self.play_config.virtual_loss

            my_visit_stats = self.tree[state]
            my_stats = my_visit_stats.a[action_t]

            my_visit_stats.sum_n += virtual_loss
            my_stats.n += virtual_loss
            my_stats.w += -virtual_loss
            my_stats.q = my_stats.w / my_stats.n

        print("SMM state", 'd{}'.format(depth), state, action_t)
        env.step(action_t.uci())
        leaf_v = self.search_my_move(env, depth=depth+1)  # next move from enemy POV
        leaf_v = -leaf_v

        # BACKUP STEP
        # on returning search path
        # update: N, W, Q
        with self.node_lock[state]:
            my_visit_stats.sum_n += -virtual_loss + 1
            my_stats.n += -virtual_loss + 1
            my_stats.w += virtual_loss + leaf_v
            my_stats.q = my_stats.w / my_stats.n

        return leaf_v
Example #6
0
    def search_my_move(self, env: ChessEnv, is_root_node=False) -> float:
        """
        Q, V is value for this Player(always white).
        P is value for the player of next_player (black or white)

        This method searches for possible moves, adds them to a search tree, and eventually returns the
        best move that was found during the search.

        :param ChessEnv env: environment in which to search for the move
        :param boolean is_root_node: whether this is the root node of the search.
        :return float: value of the move. This is calculated by getting a prediction
            from the value network.
        """
        if env.done:
            if env.winner == Winner.draw:
                return 0
            # assert env.whitewon != env.white_to_move # side to move can't be winner!
            return -1

        state = state_key(env)

        with self.node_lock[state]:
            if state not in self.tree:
                leaf_p, leaf_v = self.expand_and_evaluate(env)
                self.tree[state].p = leaf_p
                return leaf_v # I'm returning everything from the POV of side to move

            # SELECT STEP
            action_t = self.select_action_q_and_u(env, is_root_node)

            virtual_loss = self.play_config.virtual_loss

            my_visit_stats = self.tree[state]
            my_stats = my_visit_stats.a[action_t]

            my_visit_stats.sum_n += virtual_loss
            my_stats.n += virtual_loss
            my_stats.w += -virtual_loss
            my_stats.q = my_stats.w / my_stats.n

        env.step(action_t.uci())
        leaf_v = self.search_my_move(env)  # next move from enemy POV
        leaf_v = -leaf_v

        # BACKUP STEP
        # on returning search path
        # update: N, W, Q
        with self.node_lock[state]:
            my_visit_stats.sum_n += -virtual_loss + 1
            my_stats.n += -virtual_loss + 1
            my_stats.w += virtual_loss + leaf_v
            my_stats.q = my_stats.w / my_stats.n

        return leaf_v
    async def search_my_move(self, env: ChessEnv, is_root_node=False):
        """

        Q, V is value for this Player(always white).
        P is value for the player of next_player (black or white)
        :param env:
        :param is_root_node:
        :return:
        """
        if env.done:
            if env.winner == Winner.white:
                return 1
            elif env.winner == Winner.black:
                return -1
            else:
                return 0

        key = self.counter_key(env)

        while key in self.now_expanding:
            await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec)

        # is leaf?
        if key not in self.expanded:  # reach leaf node
            leaf_v = await self.expand_and_evaluate(env.copy())
            if env.board.turn == chess.WHITE:
                return leaf_v  # Value for white
            else:
                return -leaf_v  # Value for white == -Value for white

        action_t = self.select_action_q_and_u(env, is_root_node)

        _, _ = env.step(self.config.labels[action_t])

        virtual_loss = self.config.play.virtual_loss
        self.var_n[key][action_t] += virtual_loss
        self.var_w[key][action_t] -= virtual_loss

        leaf_v = await self.search_my_move(env)  # next move

        # on returning search path
        # update: N, W, Q, U
        n = self.var_n[key][
            action_t] = self.var_n[key][action_t] - virtual_loss + 1
        w = self.var_w[key][
            action_t] = self.var_w[key][action_t] + virtual_loss + leaf_v
        self.var_q[key][action_t] = w / n

        return leaf_v
Example #8
0
    def search_my_move(self, env: ChessEnv, is_root_node=False) -> float:
        """
		Q, V is value for this Player(always white).
		P is value for the player of next_player (black or white)
		:return: leaf value
		"""
        if env.done:
            if env.winner == Winner.draw:
                return 0
            #assert env.whitewon != (env.board.turn == chess.WHITE) # side to move can't be winner!
            return -1

        state = state_key(env)

        with self.node_lock[state]:
            if state not in self.tree:
                leaf_p, leaf_v = self.expand_and_evaluate(env=env)
                self.tree[state].p = leaf_p
                return leaf_v  # I'm returning everything from the POV of side to move
            #assert state in self.tree

            # SELECT STEP
            action_t = self.select_action_q_and_u(env, is_root_node)

            virtual_loss = self.play_config.virtual_loss

            my_visitstats = self.tree[state]
            my_stats = my_visitstats.a[action_t]

            my_stats.n += virtual_loss
            my_visitstats.sum_n += virtual_loss
            my_stats.w += -virtual_loss
            my_stats.q = my_stats.w / my_stats.n

        env.step(action_t.uci())
        leaf_v = self.search_my_move(env)  # next move from enemy POV
        leaf_v = -leaf_v

        # BACKUP STEP
        # on returning search path
        # update: N, W, Q
        with self.node_lock[state]:
            my_stats.n += -virtual_loss + 1
            my_visitstats.sum_n += -virtual_loss + 1
            my_stats.w += virtual_loss + leaf_v
            my_stats.q = my_stats.w / my_stats.n

        return leaf_v
def start(config: Config):

    PlayWithHumanConfig().update_play_config(config.play)

    me_player = None
    env = ChessEnv().reset()

    app = Flask(__name__)

    model = ChessModel(config)

    if not load_best_model_weight(model):
        raise RuntimeError("Best model not found!")

    player = ChessPlayer(config, model.get_pipes(config.play.search_threads))

    @app.route('/play', methods=["GET", "POST"])
    def play():
        data = request.get_json()
        print(data["position"])
        env.update(data["position"])
        env.step(data["moves"], False)
        bestmove = player.action(env, False)
        return jsonify(bestmove)

    app.run(host="0.0.0.0", port="8080")
Example #10
0
 async def start_search_my_move(self, board):
     self.running_simulation_num += 1
     with await self.sem:  # reduce parallel search number
         env = ChessEnv().update(board)
         leaf_v = await self.search_my_move(env, is_root_node=True)
         self.running_simulation_num -= 1
         return leaf_v
Example #11
0
def start(config: Config):

    PlayWithHumanConfig().update_play_config(config.play)

    me_player = None
    env = ChessEnv().reset()

    while True:
        line = input()
        words = line.rstrip().split(" ", 1)
        if words[0] == "uci":
            print("id name ChessZero")
            print("id author ChessZero")
            print("uciok")
        elif words[0] == "isready":
            if not me_player:
                me_player = get_player(config)
            print("readyok")
        elif words[0] == "ucinewgame":
            env.reset()
        elif words[0] == "position":
            words = words[1].split(" ", 1)
            if words[0] == "startpos":
                env.reset()
            else:
                if words[0] == "fen":  # skip extraneous word
                    words = words[1].split(' ', 1)
                fen = words[0]
                for _ in range(5):
                    words = words[1].split(' ', 1)
                    fen += " " + words[0]
                env.update(fen)
            if len(words) > 1:
                words = words[1].split(" ", 1)
                if words[0] == "moves":
                    for w in words[1].split(" "):
                        env.step(w, False)
        elif words[0] == "go":
            if not me_player:
                me_player = get_player(config)
            action = me_player.action(env, False)
            print(f"bestmove {action}")
        elif words[0] == "stop":
            pass
        elif words[0] == "quit":
            break
Example #12
0
def convert_to_cheating_data(data):
    """
    :param data: format is SelfPlayWorker.buffer
    :return:
    """
    state_list = []
    policy_list = []
    value_list = []
    env = ChessEnv().reset()
    for state_fen, policy, value in data:
        move_number = int(state_fen.split(' ')[5])
        # f2 = maybe_flip_fen(maybe_flip_fen(state_fen,True),True)
        # assert state_fen == f2
        next_move = env.deltamove(state_fen)
        if next_move == None:  # new game!
            assert state_fen == chess.STARTING_FEN
            env.reset()
        else:
            env.step(next_move, False)

        state_planes = env.canonical_input_planes()
        # assert env.check_current_planes(state_planes)

        side_to_move = state_fen.split(" ")[1]
        if side_to_move == 'b':
            #assert np.sum(policy) == 0
            policy = Config.flip_policy(policy)
        else:
            #assert abs(np.sum(policy) - 1) < 1e-8
            pass

        # if np.sum(policy) != 0:
        #     policy /= np.sum(policy)

        #assert abs(np.sum(policy) - 1) < 1e-8

        assert len(policy) == 1968
        assert state_planes.dtype == np.float32

        value_certainty = min(
            15, move_number
        ) / 15  # reduces the noise of the opening... plz train faster
        SL_value = value * value_certainty + env.testeval() * (1 -
                                                               value_certainty)

        state_list.append(state_planes)
        policy_list.append(policy)
        value_list.append(SL_value)

    return np.array(state_list, dtype=np.float32), np.array(
        policy_list, dtype=np.float32), np.array(value_list, dtype=np.float32)
Example #13
0
def get_buffer(config, game) -> (ChessEnv, list):
    """
    Gets data to load into the buffer by playing a game using PGN data.
    :param Config config: config to use to play the game
    :param pgn.Game game: game to play
    :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer
    """
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(
        game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)

    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation,
                                     actions[k],
                                     weight=white_weight)  #ignore=True
        else:
            action = black.sl_action(env.observation,
                                     actions[k],
                                     weight=black_weight)  #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
    def training(self):
        tc = self.config.trainer
        last_load_data_step = last_save_step = total_steps = self.config.trainer.start_total_steps

        meta_dir = 'data/model'
        meta_file = os.path.join(meta_dir, 'metadata.json')
        file_dir = 'data/model/next_generation'
        h5_file = os.path.join(file_dir, 'weights.{epoch:02d}.h5')
        self.meta_writer = OptimizeWorker(meta_file)
        self.early_stopping = EarlyStopping(monitor='val_loss')
        self.check_point = ModelCheckpoint(filepath=h5_file,
                                           monitor='val_loss',
                                           verbose=1)

        while True:
            self.load_play_data()

            if (self.dataset_size * (1 - self.validation)) < tc.batch_size:
                while (self.dataset_size *
                       (1 - self.validation)) < tc.batch_size:
                    self_play = SelfPlayWorker(self.config,
                                               env=ChessEnv(),
                                               model=self.model)
                    self_play.start()
                    self.load_play_data()
            else:
                self_play = SelfPlayWorker(self.config,
                                           env=ChessEnv(),
                                           model=self.model)
                self_play.start()
                self.load_play_data()

            self.compile_model()

            self.update_learning_rate(total_steps)
            steps = self.train_epoch(self.config.trainer.epoch_to_checkpoint)
            total_steps += steps
            if True:
                self.save_current_model()
                last_save_step = total_steps

            #net_params = ChessModel(self.config).get_policy_param()
            #pickle.dump(net_params, open('current_policy.model', 'wb'), pickle.HIGHEST_PROTOCOL)
            k.clear_session()
            load_best_model_weight(self.model)
    def search_my_move(self, env: ChessEnv, is_root_node):
        """

        Q, V is value for this Player (always white).
        P is value for the player of next_player (white or black)
        :param env:
        :param is_root_node:
        :return: leaf value
        """
        if env.done:
            if env.winner == Winner.DRAW:
                return 0
            else:
                return -1  # a tricky optimization: this conditional will _only_ execute if the side to move has just lost.

        key = env.transposition_key()

        with self.node_lock[key]:
            if key not in self.tree:
                leaf_p, leaf_v = self.expand_and_evaluate(env)
                self.tree[key].p = leaf_p
                return leaf_v  # returning everything from the POV of side to move
            # keep the same lock open?
            move_t, action_t = self.select_action_q_and_u(env, is_root_node)

            virtual_loss = self.play_config.virtual_loss
            my_visit_stats = self.tree[key]
            my_action_stats = my_visit_stats.a[move_t]
            my_visit_stats.sum_n += virtual_loss
            my_action_stats.n += virtual_loss
            my_action_stats.w += -virtual_loss
            my_action_stats.q = my_action_stats.w / my_action_stats.n  # fixed a bug: must update q here...

        env.step(move_t)
        leaf_v = -self.search_my_move(env, False)  # next move

        # on returning search path, update: N, W, Q
        with self.node_lock[key]:
            my_visit_stats.sum_n += -virtual_loss + 1
            my_action_stats.n += -virtual_loss + 1
            my_action_stats.w += virtual_loss + leaf_v
            my_action_stats.q = my_action_stats.w / my_action_stats.n

        return leaf_v
Example #16
0
def start(config: Config):

    PlayWithHumanConfig().update_play_config(config.play)
    config.play.thinking_loop = 1

    chess_model = None
    env = ChessEnv().reset()

    while True:
        line = input()
        words = line.rstrip().split(" ", 1)
        if words[0] == "uci":
            print("id name ChessZero")
            print("id author ChessZero")
            print("uciok")
        elif words[0] == "isready":
            if chess_model is None:
                chess_model = PlayWithHuman(config)
            print("readyok")
        elif words[0] == "ucinewgame":
            env.reset()
        elif words[0] == "position":
            words = words[1].split(" ", 1)
            if words[0] == "startpos":
                env.reset()
            else:
                fen = words[0]
                for _ in range(5):
                    words = words[1].split(' ', 1)
                    fen += " "+words[0]
                env.update(fen)
            if len(words) > 1:
                words = words[1].split(" ", 1)
                if words[0] == "moves":
                    for w in words[1].split(" "):
                        env.step(w, False)
        elif words[0] == "go":
            action = chess_model.move_by_ai(env)
            print(f"bestmove {action}")
        elif words[0] == "stop":
            pass
        elif words[0] == "quit":
            break
Example #17
0
    def convert_to_training_data(data):
        """

        :param data: format is SelfPlayWorker.buffer
        :return:
        """
        state_list = []
        policy_list = []
        z_list = []
        for state, policy, z in data:
            env = ChessEnv().update(state)

            black_ary, white_ary = env.black_and_white_plane()
            state = [black_ary, white_ary] if env.board.turn == chess.BLACK else [white_ary, black_ary]

            state_list.append(state)
            policy_list.append(policy)
            z_list.append(z)

        return np.array(state_list), np.array(policy_list), np.array(z_list)
    def convert_to_training_data(data):
        """

        :param data: format is SelfPlayWorker.buffer
        :return:
        """
        state_list = []
        policy_list = []
        z_list = []
        aux_move_number = 1
        movements = []
        for state, policy, z in data:
            move_number = int(
                (ChessEnv().update(state,
                                   movements)).board.fen().split(" ")[5])
            if aux_move_number < move_number:
                if len(movements) > 8:
                    movements.pop(0)
                movements.append(env.observation)
                aux_move_number = move_number
            else:
                aux_move_number = 1
                movements = []

            env = ChessEnv().update(state, movements)

            black_ary, white_ary, current_player, move_number = env.black_and_white_plane(
            )
            state = [black_ary, white_ary
                     ] if env.board.fen().split(" ")[1] == 'b' else [
                         white_ary, black_ary
                     ]
            state = np.reshape(np.reshape(np.array(state), (18, 6, 8, 8)),
                               (108, 8, 8))
            state = np.vstack((state, np.reshape(current_player, (1, 8, 8)),
                               np.reshape(move_number, (1, 8, 8))))
            state_list.append(state)
            policy_list.append(policy)
            z_list.append(z)

        return np.array(state_list), np.array(policy_list), np.array(z_list)
Example #19
0
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool):
    """
    Plays a game against models cur and ng and reports the results.

    :param Config config: config for how to play the game
    :param ChessModel cur: should be the current model
    :param ChessModel ng: should be the next generation model
    :param bool current_white: whether cur should play white or black
    :return (float, ChessEnv, bool): the score for the ng model
        (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool
        which is true iff cur played as white in that game.
    """
    cur_pipes = cur.pop()
    ng_pipes = ng.pop()
    env = ChessEnv().reset()

    current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config)
    ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config)
    if current_white:
        white, black = current_player, ng_player
    else:
        white, black = ng_player, current_player

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.eval.max_game_length:
            env.adjudicate()

    if env.winner == Winner.draw:
        ng_score = 0.5
    elif env.white_won == current_white:
        ng_score = 0
    else:
        ng_score = 1
    cur.append(cur_pipes)
    ng.append(ng_pipes)
    return ng_score, env, current_white
Example #20
0
def start(config: Config):

    PlayWithHumanConfig().update_play_config(config.play)

    me_player = None
    env = ChessEnv().reset()

    while True:
        line = input()
        words = line.rstrip().split(" ",1)
        if words[0] == "uci":
            print("id name ChessZero")
            print("id author ChessZero")
            print("uciok")
        elif words[0] == "isready":
            if not me_player:
                me_player = get_player(config)
            print("readyok")
        elif words[0] == "ucinewgame":
            env.reset()
        elif words[0] == "position":
            words = words[1].split(" ",1)
            if words[0] == "startpos":
                env.reset()
            else:
                if words[0] == "fen": # skip extraneous word
                    words = words[1].split(' ',1)
                fen = words[0]
                for _ in range(5):
                    words = words[1].split(' ',1)
                    fen += " " + words[0]
                env.update(fen)
            if len(words) > 1:
                words = words[1].split(" ",1)
                if words[0] == "moves":
                    for w in words[1].split(" "):
                        env.step(w, False)
        elif words[0] == "go":
            if not me_player:
                me_player = get_player(config)
            action = me_player.action(env, False)
            print(f"bestmove {action}")
        elif words[0] == "stop":
            pass
        elif words[0] == "quit":
            break
Example #21
0
def start(config: Config):
    chess_model = PlayWithHuman(config)
    while True:
        random_endgame = config.play.random_endgame
        if random_endgame == -1:
            env = ChessEnv(config).reset()
        else:
            env = ChessEnv(config).randomize(random_endgame)
        human_is_white = random() < 0.5
        chess_model.start_game(human_is_white)

        print(env.board)
        while not env.done:
            if (env.board.turn == chess.WHITE) == human_is_white:
                action = chess_model.move_by_human(env)
                print(f"You move to: {env.board.san(action)}")
            else:
                action = chess_model.move_by_ai(env)
                print(f"AI moves to: {env.board.san(action)}")
            env.step(action)
            print(env.board)
            print(f"Board FEN = {env.fen}")

        game = chess.pgn.Game.from_board(env.board)
        game.headers['White'] = "Human" if human_is_white else f"AI {chess_model.model.digest[:10]}..."
        game.headers['Black'] = f"AI {chess_model.model.digest[:10]}..." if human_is_white else "Human"
        logger.debug("\n"+str(game))
        print(f"\nEnd of the game. Game result: {env.board.result()}")
Example #22
0
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool):
    cur_pipes = cur.pop()
    ng_pipes = ng.pop()
    env = ChessEnv().reset()

    current_player = ChessPlayer(config,
                                 pipes=cur_pipes,
                                 play_config=config.eval.play_config)
    ng_player = ChessPlayer(config,
                            pipes=ng_pipes,
                            play_config=config.eval.play_config)
    if current_white:
        white, black = current_player, ng_player
    else:
        white, black = ng_player, current_player

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.eval.max_game_length:
            env.adjudicate()

    if env.winner == Winner.draw:
        ng_score = 0.5
    elif env.white_won == current_white:
        ng_score = 0
    else:
        ng_score = 1
    cur.append(cur_pipes)
    ng.append(ng_pipes)
    return ng_score, env, current_white
Example #23
0
def self_play_buffer(config, cur) -> (ChessEnv, list):
    pipes = cur.pop()  # borrow
    env = ChessEnv().reset()

    white = ChessPlayer(config, pipes=pipes)
    black = ChessPlayer(config, pipes=pipes)

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.play.max_game_length:
            env.adjudicate()

    if env.winner == Winner.white:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data
Example #24
0
    def play_game(self, current_model, ng_model,
                  current_white: bool) -> (float, ChessEnv):
        env = ChessEnv().reset()

        current_player = ChessPlayer(self.config,
                                     model=current_model,
                                     play_config=self.config.eval.play_config)
        ng_player = ChessPlayer(self.config,
                                model=ng_model,
                                play_config=self.config.eval.play_config)
        if current_white:
            white, black = current_player, ng_player
        else:
            white, black = ng_player, current_player

        while not env.done:
            if env.board.turn == chess.WHITE:
                action = white.action(env)
            else:
                action = black.action(env)
            env.step(action)
            if env.num_halfmoves >= self.config.eval.max_game_length:
                env.adjudicate()

        if env.winner == Winner.draw:
            ng_score = 0.5
        elif env.whitewon == current_white:
            ng_score = 0
        else:
            ng_score = 1
        return ng_score, env
Example #25
0
def start(config: Config):
    PlayWithHumanConfig().update_play_config(config.play)
    chess_model = PlayWithEngine(config)

    env = ChessEnv().reset()
    human_is_black = random() < 0.5
    chess_model.start_game(human_is_black)

    while not env.done:
        if (env.board.turn == chess.BLACK) == human_is_black:
            action = chess_model.move_by_opponent(env)
            print("You move to: " + action)
        else:
            action = chess_model.move_by_ai(env)
            print("AI moves to: " + action)
        board, info = env.step(action)
        env.render()
        print("Board FEN = " + board.fen())

    print("\nEnd of the game.")  #spaces after this?
    print("Game result:")  #and this?
    print(env.board.result())
Example #26
0
    def sl_action(self, board, action):

        env = ChessEnv().update(board)

        policy = np.zeros(self.labels_n)
        k = 0
        for mov in self.config.labels:
            if mov == action:
                policy[k] = 1.0
                break
            k += 1

        self.moves.append([env.observation, list(policy)])
        return action
Example #27
0
 def calc_policy(self, board):
     """calc π(a|s0)
     :return:
     """
     pc = self.play_config
     env = ChessEnv().update(board)
     key = self.counter_key(env)
     if env.turn < pc.change_tau_turn:
         return self.var_n[key] / (np.sum(self.var_n[key])+1e-8)  # tau = 1
     else:
         action = np.argmax(self.var_n[key])  # tau = 0
         ret = np.zeros(self.labels_n)
         ret[action] = 1
         return ret
Example #28
0
def get_buffer(config, game) -> (ChessEnv, list):
    """
    Gets data to load into the buffer by playing a game using PGN data.
    :param Config config: config to use to play the game
    :param pgn.Game game: game to play
    :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer
    """
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)
    
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True
        else:
            action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
Example #29
0
def get_buffer(config, game) -> (ChessEnv, list):
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)
    
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True
        else:
            action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
Example #30
0
def get_buffer(game, config) -> (ChessEnv, list):
    env = ChessEnv().reset()
    black = ChessPlayer(config, dummy=True)
    white = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    observation = env.observation
    while not env.done and k < len(actions):
        if env.board.turn == chess.WHITE:
            action = white.sl_action(observation, actions[k])  #ignore=True
        else:
            action = black.sl_action(observation, actions[k])  #ignore=True
        board, info = env.step(action, False)
        observation = board.fen()
        k += 1

    env.done = True
    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
Example #31
0
def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool):
    """
    Plays a game against models cur and ng and reports the results.

    :param Config config: config for how to play the game
    :param ChessModel cur: should be the current model
    :param ChessModel ng: should be the next generation model
    :param bool current_white: whether cur should play white or black
    :return (float, ChessEnv, bool): the score for the ng model
        (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool
        which is true iff cur played as white in that game.
    """
    cur_pipes = cur.pop()
    ng_pipes = ng.pop()
    env = ChessEnv().reset()

    current_player = ChessPlayer(config,
                                 pipes=cur_pipes,
                                 play_config=config.eval.play_config)
    ng_player = ChessPlayer(config,
                            pipes=ng_pipes,
                            play_config=config.eval.play_config)
    if current_white:
        white, black = current_player, ng_player
    else:
        white, black = ng_player, current_player

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.eval.max_game_length:
            env.adjudicate()

    if env.winner == Winner.draw:
        ng_score = 0.5
    elif env.white_won == current_white:
        ng_score = 0
    else:
        ng_score = 1
    cur.append(cur_pipes)
    ng.append(ng_pipes)
    return ng_score, env, current_white
Example #32
0
def self_play_buffer(config, cur) -> (ChessEnv, list):
    pipes = cur.pop()  # borrow
    env = ChessEnv().reset()
    search_tree = defaultdict(VisitStats)

    white = ChessPlayer(config, search_tree=search_tree, pipes=pipes)
    black = ChessPlayer(config, search_tree=search_tree, pipes=pipes)

    history = []

    cc = 0
    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        history.append(action)
        if len(history) > 6 and history[-1] == history[-5]:
            cc = cc + 1
        else:
            cc = 0
        if env.num_halfmoves >= config.play.max_game_length or cc >= 4:
            env.adjudicate()
    if env.winner == Winner.white:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data
Example #33
0
def self_play_buffer(config, cur) -> (ChessEnv, list):
    """
    Play one game and add the play data to the buffer
    :param Config config: config for how to play
    :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting
        predictions. One will be removed from this list during the game, then added back
    :return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list
        of data to be appended to the SelfPlayWorker.buffer
    """
    pipes = cur.pop()  # borrow
    env = ChessEnv().reset()

    white = ChessPlayer(config, pipes=pipes)
    black = ChessPlayer(config, pipes=pipes)

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.play.max_game_length:
            env.adjudicate()

    if env.winner == Winner.white:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data
Example #34
0
 def counter_key(env: ChessEnv):
     return CounterKey(env.replace_tags(), env.board.turn)