Python Checkers.save_stateの例

プログラミング言語: Python

名前空間/パッケージ名: checkers

クラス/型: Checkers

メソッド/関数: save_state

hotexamples.comのコード掲載数: 3

Python Checkers.save_state - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcheckers.Checkers.save_stateの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Checkers(20)

extractTokensForChecker(7)

legal_moves(4)

player_in_turn(3)

save_state(3)

possible_movements(3)

move(3)

flat_board(2)

restore_state(2)

sq2pos(1)

setSelected(1)

render(1)

pos2sq(1)

make_move(1)

_get_movement(1)

has_ended(1)

getSelected(1)

getModelStatsForChecker(1)

evaluate_winner(1)

checkTile(1)

captures_available(1)

start(1)

コード例 #1

ファイルを表示

def eval(agent:Agent, env: Checkers, color:str, n_games=100):
    agent.net.eval()
    opponent = Agent(gamma=agent.gamma,
                     epsilon=1,
                     lr=0,
                     input_dims=[8*8 + 1],
                     batch_size=agent.batch_size,
                     action_space=agent.action_space,
                     eps_dec=0,
                     max_mem_size=0
                     )
    opponent.net.eval()
    initial_state = env.save_state()
    score = {'black': 0, 'white': 0}

    for i in tqdm(range(n_games)):
        env.restore_state(initial_state)
        winner = None
        moves = torch.tensor(env.legal_moves())
        board, turn, last_moved_piece = env.save_state()
        brain = agent if turn == color else opponent
        board_tensor = torch.from_numpy(env.flat_board()).view(-1).float()
        encoded_turn = torch.tensor([1.]) if turn == 'black' else torch.tensor([0.])
        observation = torch.cat([board_tensor, encoded_turn])
        while not winner:
            action = brain.choose_action(observation)
            while not action_is_legal(action, moves):
                action = brain.choose_action(observation)
            new_board, new_turn, _, moves, winner = env.move(*action.tolist())
            moves = torch.tensor(moves)
            board_tensor = torch.from_numpy(env.flat_board()).view(-1).float()
            encoded_turn = torch.tensor([1. if turn == 'black' else 0.])
            observation = torch.cat([board_tensor, encoded_turn])
            brain = agent if turn == color else opponent
        score[winner] +=1
    agent.net.train()
    return score[color] / n_games

コード例 #2

ファイルを表示

ファイル: main.py プロジェクト: trushkin-ilya/rl_game

              batch_size=args.batch_size,
              action_space=action_space,
              input_dims=[8 * 8 + 1],
              lr=args.lr,
              eps_dec=args.epsilon_decay),
        'white':
        Agent(gamma=args.gamma,
              epsilon=args.epsilon,
              batch_size=args.batch_size,
              action_space=action_space,
              input_dims=[8 * 8 + 1],
              lr=args.lr,
              eps_dec=args.epsilon_decay)
    }
    env = Checkers()
    initial_state = env.save_state()
    eps_history = []
    score = {'black': 0, 'white': 0}

    os.makedirs(args.checkpoints_dir, exist_ok=True)

    for i in range(args.games):
        print(
            f"episode={i}, score={score}, black_eps:{players['black'].epsilon}, white_eps:{players['white'].epsilon}"
        )
        score = {'black': 0, 'white': 0}
        env.restore_state(initial_state)
        winner = None
        moves = torch.tensor(env.legal_moves())
        board, turn, last_moved_piece = env.save_state()
        brain = players[turn]

コード例 #3

ファイルを表示

class CheckersGame(Game):
    def __init__(self, history=[]):
        # Rollout statistics
        self.child_visits = []
        # Terminal values for the first player
        # 1 for win
        # 0 for draw
        # -1 for loss
        # None for incomplete
        self.game_value = None

        # XXX Conventions:
        # - Black player moves first
        # - Ego-centric views assume the king row are at the top, i.e. starts at the bottom (Second player has the same view as absolute)
        self.ch = Checkers()

        # Action space
        self.actions = []
        # Simple moves
        for from_sq in range(self.ch.n_positions):
            for to_sq in self.ch.neighbors[from_sq]:
                if to_sq is not None:
                    simple_move = (from_sq, to_sq)
                    self.actions.append(simple_move)

        assert 98 == len(self.actions), 'There should be 98 simple moves.'
        # Jumps
        for from_sq in range(self.ch.n_positions):
            row, col = self.ch.sq2pos(from_sq)
            # For each direction
            for di, (drow, dcol) in enumerate(Checkers.dir2del):
                next_row, next_col = row + 2 * drow, col + 2 * dcol
                if 0 <= next_row < self.ch.size and 0 <= next_col < self.ch.size:
                    # Within bound
                    to_sq = self.ch.pos2sq(next_row, next_col)
                    jump = (from_sq, to_sq)
                    self.actions.append(jump)
        self.num_actions = len(self.actions)
        assert 98 + 72 == self.num_actions, 'There should be 98 simple moves and 72 jumps.'
        # Inverse dictionary
        self.action2ind = {
            action: ind
            for ind, action in enumerate(self.actions)
        }
        # Square mapping from absolute to first player's ego-centric (reflect through the center)
        self.abs2ego_sq = {}
        for sq in range(self.ch.n_positions):
            row, col = self.ch.sq2pos(sq)
            re_row, re_col = -row + self.ch.size - 1, -col + self.ch.size - 1
            re_sq = self.ch.pos2sq(re_row, re_col)
            self.abs2ego_sq[sq] = re_sq
        # Inverse
        self.ego2abs_sq = {re_sq: sq for sq, re_sq in self.abs2ego_sq.items()}

        # Move mapping from absolute to first player's ego-centric
        self.abs2ego_ac = {}
        for ac, (from_sq, to_sq) in enumerate(self.actions):
            ego_move = (self.abs2ego_sq[from_sq], self.abs2ego_sq[to_sq])
            ego_ac = self.action2ind[ego_move]
            self.abs2ego_ac[ac] = ego_ac
        # Inverse
        self.ego2abs_ac = {
            ego_ac: ac
            for ac, ego_ac in self.abs2ego_ac.items()
        }

        # Fast forward to the last state by taking actions from history
        self.history = []
        for action in history:
            self.apply(action)

    def clone(self):
        game = CheckersGame()
        state = self.ch.save_state()
        game.ch.restore_state(state)
        return game

    def apply(self, action_index):
        from_sq, to_sq = self.actions[action_index]
        board, turn, last_moved_piece, all_next_moves, winner = self.ch.move(
            from_sq, to_sq)

        # Terminate when one player wins
        if winner == 'black':
            self.game_value = 1
        elif winner == 'white':
            self.game_value = -1

        self.history.append(action_index)

    def legal_actions(self):
        moves = self.ch.legal_moves()
        action_idices = {self.action2ind[move] for move in moves}
        return action_idices

    def is_first_player_turn(self):
        return self.ch.turn == 'black'

    def ego_board_representation(self):
        # XXX Channels
        # 0 my men
        # 1 my kings
        # 2 opponent's men
        # 3 opponent's kings
        # 4 my last moved piece
        # QUESTION: try indicating the king row and skipping ego transform?
        rep = np.zeros((self.ch.size, self.ch.size, 5))
        if self.ch.turn == 'white':
            # Same as the absolute view
            for sq in self.ch.board['white']['men']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 0] = 1
            for sq in self.ch.board['white']['kings']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 1] = 1
            for sq in self.ch.board['black']['men']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 2] = 1
            for sq in self.ch.board['black']['kings']:
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 3] = 1
            if self.ch._last_moved_piece is not None:
                row, col = self.ch.sq2pos(self.ch._last_moved_piece)
                rep[row, col, 4] = 1
        else:
            # Need to invert the board
            for sq in self.ch.board['black']['men']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 0] = 1
            for sq in self.ch.board['black']['kings']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 1] = 1
            for sq in self.ch.board['white']['men']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 2] = 1
            for sq in self.ch.board['white']['kings']:
                sq = self.abs2ego_sq[sq]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 3] = 1
            if self.ch._last_moved_piece is not None:
                sq = self.abs2ego_sq[self.ch._last_moved_piece]
                row, col = self.ch.sq2pos(sq)
                rep[row, col, 4] = 1
        return rep

    def ego_sample(self, state_index: int):
        # Fast forward
        game = CheckersGame(list(self.history[:state_index]))
        # Ego-centric views of the current player
        rep = game.ego_board_representation()
        # Zero-sum game
        ego_val = self.game_value if game.is_first_player_turn() else (
            0 - self.game_value)
        # Ego-centric actions
        if game.is_first_player_turn():
            # Invert actions for the first player
            visits = np.zeros(self.num_actions)
            for i in range(self.num_actions):
                visits[self.abs2ego_ac[i]] = self.child_visits[state_index][i]
        else:
            visits = np.asarray(self.child_visits[state_index])
        return rep, ego_val, visits

    def ego2abs_policy(self, is_first_player, ego_policy):
        if is_first_player:
            policy = np.zeros(self.num_actions)
            for ego_ac, pr in enumerate(ego_policy):
                policy[self.ego2abs_ac[ego_ac]] = pr
        else:
            policy = ego_policy
        return policy