Python ChessEnv Examples, env.chess_env.ChessEnv Python Examples

Example #1

0

Show file

File: player_chess.py Project: Gowri-Rk/ReinforcementLearningProject

    def search_my_move(self, env: ChessEnv, is_root_node=False) -> float:
        """
        Q, V is value for this Player(always white).
        P is value for the player of next_player (black or white)

        This method searches for possible moves, adds them to a search tree, and eventually returns the
        best move that was found during the search.

        :param ChessEnv env: environment in which to search for the move
        :param boolean is_root_node: whether this is the root node of the search.
        :return float: value of the move. This is calculated by getting a prediction
            from the value network.
        """
        if env.done:
            if env.winner == Winner.draw:
                return 0
            # assert env.whitewon != env.white_to_move # side to move can't be winner!
            return -1

        state = state_key(env)

        with self.node_lock[state]:
            if state not in self.tree:
                leaf_p, leaf_v = self.expand_and_evaluate(env)
                self.tree[state].p = leaf_p
                return leaf_v  # I'm returning everything from the POV of side to move

            # SELECT STEP
            action_t = self.select_action_q_and_u(env, is_root_node)

            virtual_loss = self.play_config.virtual_loss

            my_visit_stats = self.tree[state]
            my_stats = my_visit_stats.a[action_t]

            my_visit_stats.sum_n += virtual_loss
            my_stats.n += virtual_loss
            my_stats.w += -virtual_loss
            my_stats.q = my_stats.w / my_stats.n

        env.step(action_t.uci())
        leaf_v = self.search_my_move(env)  # next move from enemy POV
        leaf_v = -leaf_v

        # BACKUP STEP
        # on returning search path
        # update: N, W, Q
        with self.node_lock[state]:
            my_visit_stats.sum_n += -virtual_loss + 1
            my_stats.n += -virtual_loss + 1
            my_stats.w += virtual_loss + leaf_v
            my_stats.q = my_stats.w / my_stats.n

        return leaf_v

Example #2

0

Show file

File: supervised.py Project: Gowri-Rk/ReinforcementLearningProject

def get_buffer(config, game) -> (ChessEnv, list):
    """
    Gets data to load into the buffer by playing a game using PGN data.
    :param Config config: config to use to play the game
    :param pgn.Game game: game to play
    :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer
    """
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)
    
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True
        else:
            action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data

Example #3

0

Show file

def play_buffer(config, cur) -> (ChessEnv, list):
    """
	Play one game and add the play data to the buffer
	:param Config config: config for how to play
	:param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting
		predictions. One will be removed from this list during the game, then added back
	:return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list
		of data to be appended to the DuelWorker.buffer
	"""
    pipes_1 = cur[0].pop()  # borrow
    pipes_2 = cur[1].pop()  # borrow
    env = ChessEnv().reset()

    white = ChessPlayer(config, pipes=pipes_1)
    black = ChessPlayer(config, pipes=pipes_2)

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.play.max_game_length:
            env.adjudicate()

    if env.winner == Winner.white:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    cur[0].append(pipes_1)
    cur[1].append(pipes_2)

    fen_data = []
    moves_array = np.zeros(
        (len(white.moves) + len(black.moves), white.labels_n),
        dtype=np.float16)
    scores = np.zeros((len(white.moves) + len(black.moves)), dtype=np.int8)
    for i in range(len(white.moves)):
        fen_data.append(white.moves[i][0])
        moves_array[i * 2] = white.moves[i][1]
        scores[i * 2] = white.moves[i][2]
        if i < len(black.moves):
            fen_data.append(black.moves[i][0])
            moves_array[i * 2 + 1] = black.moves[i][1]
            scores[i * 2 + 1] = black.moves[i][2]

    return env, fen_data, moves_array, scores

Example #4

0

Show file

File: self_play.py Project: Gowri-Rk/ReinforcementLearningProject

def self_play_buffer(config, cur) -> (ChessEnv, list):
    """
    Play one game and add the play data to the buffer
    :param Config config: config for how to play
    :param list(Connection) cur: list of pipes to use to get a pipe to send observations to for getting
        predictions. One will be removed from this list during the game, then added back
    :return (ChessEnv,list((str,list(float)): a tuple containing the final ChessEnv state and then a list
        of data to be appended to the SelfPlayWorker.buffer
    """
    pipes = cur.pop()  # borrow
    env = ChessEnv().reset()

    white = ChessPlayer(config, pipes=pipes)
    black = ChessPlayer(config, pipes=pipes)

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.play.max_game_length:
            env.adjudicate()

    if env.winner == Winner.white:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data

Example #5

0

Show file

def play_game(config, cur, ng, current_white: bool) -> (float, ChessEnv, bool):
    """
    Plays a game against models cur and ng and reports the results.

    :param Config config: config for how to play the game
    :param ChessModel cur: should be the current model
    :param ChessModel ng: should be the next generation model
    :param bool current_white: whether cur should play white or black
    :return (float, ChessEnv, bool): the score for the ng model
        (0 for loss, .5 for draw, 1 for win), the env after the game is finished, and a bool
        which is true iff cur played as white in that game.
    """
    cur_pipes = cur.pop()
    ng_pipes = ng.pop()
    env = ChessEnv().reset()

    current_player = ChessPlayer(config, pipes=cur_pipes, play_config=config.eval.play_config)
    ng_player = ChessPlayer(config, pipes=ng_pipes, play_config=config.eval.play_config)
    if current_white:
        white, black = current_player, ng_player
    else:
        white, black = ng_player, current_player

    while not env.done:
        if env.white_to_move:
            action = white.action(env)
        else:
            action = black.action(env)
        env.step(action)
        if env.num_halfmoves >= config.eval.max_game_length:
            env.adjudicate()

    if env.winner == Winner.draw:
        ng_score = 0.5
    elif env.white_won == current_white:
        ng_score = 0
    else:
        ng_score = 1
    cur.append(cur_pipes)
    ng.append(ng_pipes)
    return ng_score, env, current_white

Example #6

0

Show file

File: game.py Project: Gowri-Rk/ReinforcementLearningProject

def start(config: Config):
    DemoConfig().update_play_config(config.play)
    chess_model = PlayWithHuman(config)

    env = ChessEnv(config).reset()
    human_is_black = random() < 0.5
    chess_model.start_game(human_is_black)

    while not env.done:
        if (env.board.turn == chess.BLACK) == human_is_black:
            action = chess_model.move_by_human(env)
            print("You move to: " + action)
        else:
            action = chess_model.move_by_ai(env)
            print("AI moves to: " + action)
        env.step(action)
        env.render()
        print("Board FEN = " + env.board.fen())

    print("\nEnd of the game.")
    print("Game result:")
    print(env.board.result())

Example #7

0

Show file

    def run_game(self):
        default_config = Config()
        PlayWithHumanConfig().update_play_config(default_config.play)
        me_player = None
        env = ChessEnv().reset()

        #mcts_player = mcts_pure(c_puct=5, n_playout=5)
        move_stack = ""
        board = chess.Board()

        chess_sets = Settings()
        screen = pygame.display.set_mode((chess_sets.screen_width, chess_sets.screen_height))
        pygame.display.set_caption("Chess Game")

        pygame.init()
        image_path = '/home/k1758068/Desktop/alphaGoTest-master/image/'
        black_b = pygame.image.load(image_path + 'blackb.png').convert_alpha()
        black_k = pygame.image.load(image_path + 'blackk.png').convert_alpha()
        black_n = pygame.image.load(image_path + 'blackn.png').convert_alpha()
        black_p = pygame.image.load(image_path + 'blackp.png').convert_alpha()
        black_q = pygame.image.load(image_path + 'blackq.png').convert_alpha()
        black_r = pygame.image.load(image_path + 'blackr.png').convert_alpha()

        white_b = pygame.image.load(image_path + 'whiteb.png').convert_alpha()
        white_k = pygame.image.load(image_path + 'whitek.png').convert_alpha()
        white_n = pygame.image.load(image_path + 'whiten.png').convert_alpha()
        white_p = pygame.image.load(image_path + 'whitep.png').convert_alpha()
        white_q = pygame.image.load(image_path + 'whiteq.png').convert_alpha()
        white_r = pygame.image.load(image_path + 'whiter.png').convert_alpha()

        images = {3: [white_b, black_b], 6: [white_k, black_k], 2: [white_n, black_n],
                       1: [white_p, black_p], 5: [white_q, black_q], 4: [white_r, black_r]}

        background_color = (230,230,230)
        #image_path = '/Desktop/alphaGoTest-master/image/'
        chess_board = pygame.image.load(image_path + 'board_image.png').convert()
        while True:

            screen.fill(background_color)
            chess_board_x = 100
            chess_board_y = 50

            screen.blit(chess_board, (chess_board_x, chess_board_y))

            d = (950 - 55) / 8
            for i in range(64):
                if board.piece_at(i):
                    piece = board.piece_at(i).piece_type
                    color = board.piece_at(i).color

                    if color:
                        piece = images[piece][0]
                    else:
                        piece = images[piece][1]

                    x = 177 + (i % 8) * d
                    y = 23 + (8 - (i // 8)) * d
                    screen.blit(piece, (x, y))
            myfont = pygame.font.SysFont('test', 30)
            textsurface = myfont.render(move_stack, True, (0, 0, 0))
            screen.blit(textsurface, (1000, 1000))
            for event in pygame.event.get():
                if board.turn == True:
                    if event.type == MOUSEBUTTONDOWN:
                        if event.button == 1:
                            selected_position = self.is_chess_clicked(chess_sets.position, event)
                            select_piece = board.piece_at(selected_position)

                            if select_piece.color == True:
                                from_position = selected_position
                                print(from_position)
                            else:
                                pass

                        if event.button == 3:
                            selected_position = self.is_chess_clicked(chess_sets.position, event)
                            to_position = selected_position
                            a = [48, 49, 50, 51, 52, 53, 54, 55]
                            # The promotion precess of PAWN pieces
                            if from_position in a and select_piece.piece_type == 1:
                                # a = self.create_dialog()#
                                a = pop_dialog.main()
                                move = chess.Move(from_position, to_position, promotion=int(a), drop=None)
                                print(int(a), move)
                            else:
                                move = chess.Move(from_position, to_position)

                            # Move check and make move
                            if move not in board.legal_moves:
                                print("invalide move!", move, board.legal_moves)
                            else:
                                board.push(move)
                                move_stack += "," + board.peek().uci()
                                myfont = pygame.font.SysFont('test', 30)

                                textsurface = myfont.render(move_stack, True, (0, 0, 0))
                                screen.blit(textsurface, (100, 100))
                else:
                    me_player = self.get_player(default_config)

                    action = me_player.action(env, False)
                    env.step(action)
    
                    screen.blit(textsurface, (100, 100))

                    me_player = self.get_player(default_config)
                    action = me_player.action(env, False)
                    print(f"bestmove {action}")
                    board.push(chess.Move.from_uci(action))

            pygame.display.flip()

Example #8

0

Show file

def get_buffer(config, game) -> (ChessEnv, list):
    """
	Gets data to load into the buffer by playing a game using PGN data.
	:param Config config: config to use to play the game
	:param pgn.Game game: game to play
	:return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer
	"""
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]

    # Rare cases where elo ratings are not in the headers
    if "WhiteElo" not in game.headers or "BlackElo" not in game.headers:
        return None, None, None, None, False

    white_elo, black_elo = int(game.headers["WhiteElo"]), int(
        game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)

    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation,
                                     actions[k],
                                     weight=white_weight)  # ignore=True
        else:
            action = black.sl_action(env.observation,
                                     actions[k],
                                     weight=black_weight)  # ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    fen_data = []
    moves_array = np.zeros(
        (len(white.moves) + len(black.moves), white.labels_n),
        dtype=np.float16)
    scores = np.zeros((len(white.moves) + len(black.moves)), dtype=np.int8)
    for i in range(len(white.moves)):
        fen_data.append(white.moves[i][0])
        moves_array[i * 2] = white.moves[i][1]
        scores[i * 2] = white.moves[i][2]
        if i < len(black.moves):
            fen_data.append(black.moves[i][0])
            moves_array[i * 2 + 1] = black.moves[i][1]
            scores[i * 2 + 1] = black.moves[i][2]

    return env, fen_data, moves_array, scores, True